# Training different models

## Simple training
This script is using the sigmoid activation function and mean squared error for loss calculation.
Were training on one image at the time.

### Imports

In [5]:
from utils import *
from model.layers import Dense
from utils.data_loader import load_data
from utils.losses import mse, mse_grad
import numpy as np
from utils.evaluation import evaluate_accuracy

### Load data

In [4]:
X_train, y_train, X_test, y_test = load_data()

### Training model

In [None]:
dense_1 = Dense(X_train[0].shape[0], 100)
dense_2 = Dense(100, 10)

epochs = 30

for i in range(epochs):
    p = np.random.permutation(len(X_train))
    X_train = X_train[p]
    y_train = y_train[p]

    for X, y in zip(X_train, y_train):

        out = dense_1.forward(X)
        out = dense_2.forward(out)

        grad = mse_grad(out, y)

        dX = dense_2.backward(grad, 0.01)
        dX = dense_1.backward(dX, 0.01)

    loss = mse(out, y)

    print("epoch: ", i, "loss: ", loss)

    dense_1.save("./models/simple/dense_1.npz")
    dense_2.save("./models/simple/dense_2.npz")


epoch:  0 loss:  0.0922920779015491
epoch:  1 loss:  0.09042523414056093
epoch:  2 loss:  0.013854867560518958
epoch:  3 loss:  0.01036838883898498
epoch:  4 loss:  0.021985148407419358
epoch:  5 loss:  0.006846822694639808
epoch:  6 loss:  0.13388006758023208
epoch:  7 loss:  0.006629105956671835
epoch:  8 loss:  0.007127874908378784
epoch:  9 loss:  0.0017027190879862317
epoch:  10 loss:  0.006320170837347762
epoch:  11 loss:  9.155376243076541e-05
epoch:  12 loss:  0.0018181721433894339
epoch:  13 loss:  0.0005045028409909351
epoch:  14 loss:  0.004043978432020104
epoch:  15 loss:  0.11243815239145731
epoch:  16 loss:  0.009480624637415159
epoch:  17 loss:  0.007609493562962813
epoch:  18 loss:  0.0013109059428833818
epoch:  19 loss:  0.0005040223631130234
epoch:  20 loss:  0.000112003118934931
epoch:  21 loss:  0.00017206885779658734
epoch:  22 loss:  0.00020065608868448872
epoch:  23 loss:  0.0005366107329037025
epoch:  24 loss:  0.001156667445116667
epoch:  25 loss:  0.0001502685

### Evaluate accuracy

In [None]:
acc = evaluate_accuracy(dense_1, dense_2, X_test[:1000], y_test[:1000])
acc

### Evaluate accuracy on saved model

In [None]:
dense_1 = Dense(784, 100)
dense_2 = Dense(100, 10)

dense_1.load("./models/simple/dense_1.npz")
dense_2.load("./models/simple/dense_2.npz")

acc = evaluate_accuracy(dense_1, dense_2, X_test[:1000], y_test[:1000])

acc

Accuracy:  0.918


## Softmax and cross entropy

This script is using the sigmoid activation for first layer and softmax activation for output layer and cross entropy for loss calculation.
Were training on one image at the time.

### Imports

In [7]:
from utils import *
from model.layers import Dense
from utils.data_loader import load_data
from utils.losses import mse, mse_grad, cross_entropy_grad, cross_entropy
from utils.evaluation import evaluate_accuracy
import numpy as np


### Load data

In [8]:
X_train, y_train, X_test, y_test = load_data()

### Training model

In [None]:
dense_1 = Dense(X_train[0].shape[0], 100, "sigmoid")
dense_2 = Dense(100, 10, "softmax")

epochs = 30

for i in range(epochs):
    p = np.random.permutation(len(X_train))
    X_train = X_train[p]
    y_train = y_train[p]

    for X, y in zip(X_train, y_train):

        out = dense_1.forward(X)
        out = dense_2.forward(out)

        grad = cross_entropy_grad(out, y)

        dX = dense_2.backward(grad, 0.01)
        dX = dense_1.backward(dX, 0.01)

    loss = cross_entropy(out, y)
    acc = evaluate_accuracy(dense_1, dense_2, X_test[:1000], y_test[:1000])

    print("epoch: ", i, "loss: ", loss, "Accuracy: ", acc)

    dense_1.save("./models/softmax/dense_1.npz")
    dense_2.save("./models/softmax/dense_2.npz")


epoch:  0 loss:  0.0038256303087737566 Accuracy:  0.915
epoch:  1 loss:  0.0033906071368934327 Accuracy:  0.944
epoch:  2 loss:  0.0019063486556675205 Accuracy:  0.954
epoch:  3 loss:  0.0002848087100886635 Accuracy:  0.965
epoch:  4 loss:  0.006117796719264184 Accuracy:  0.966
epoch:  5 loss:  0.1139328438817871 Accuracy:  0.971
epoch:  6 loss:  0.0009242581968288125 Accuracy:  0.977
epoch:  7 loss:  0.007481818384311697 Accuracy:  0.975
epoch:  8 loss:  5.1670203206042384e-05 Accuracy:  0.974
epoch:  9 loss:  9.947181199495979e-05 Accuracy:  0.974
epoch:  10 loss:  0.014731081704524522 Accuracy:  0.976
epoch:  11 loss:  0.00013068033821757086 Accuracy:  0.975
epoch:  12 loss:  0.0016603583141613493 Accuracy:  0.976
epoch:  13 loss:  9.942559990606685e-06 Accuracy:  0.979
epoch:  14 loss:  0.02209481126883705 Accuracy:  0.974
epoch:  15 loss:  4.105071416438863e-06 Accuracy:  0.974
epoch:  16 loss:  3.1278210341952184e-05 Accuracy:  0.974
epoch:  17 loss:  0.00016208289751050534 Accur

### Evaluate accuracy

In [12]:
acc = evaluate_accuracy(dense_1, dense_2, X_test[:1000], y_test[:1000])

print("Accuracy: ", acc)

Accuracy:  0.979


### Evaluate accuracy on saved model

In [13]:
dense_1 = Dense(784, 100)
dense_2 = Dense(100, 10)

dense_1.load("./models/softmax/dense_1.npz")
dense_2.load("./models/softmax/dense_2.npz")

acc = evaluate_accuracy(dense_1, dense_2, X_test[:1000], y_test[:1000])

acc

0.979

## learning rate decay

This script is using the sigmoid activation for first layer and softmax activation for output layer and cross entropy for loss calculation.
Were training on one image at the time.

Were adding learning rate decay to try to make an even better model

### Imports

In [14]:
from utils import *
from model.layers import Dense
from utils.data_loader import load_data
from utils.losses import mse, mse_grad, cross_entropy_grad, cross_entropy
from utils.evaluation import evaluate_accuracy
import numpy as np

### Load data

In [15]:
X_train, y_train, X_test, y_test = load_data()

### Training model

In [None]:
dense_1 = Dense(X_train[0].shape[0], 100, "sigmoid")
dense_2 = Dense(100, 10, "softmax")

epochs = 30
initial_lr = 0.01
decay_rate = 0.95

for i in range(epochs):
    p = np.random.permutation(len(X_train))
    X_train = X_train[p]
    y_train = y_train[p]

    learning_rate = initial_lr * (decay_rate ** i)

    for X, y in zip(X_train, y_train):

        out = dense_1.forward(X)
        out = dense_2.forward(out)

        grad = cross_entropy_grad(out, y)

        dX = dense_2.backward(grad, learning_rate)
        dX = dense_1.backward(dX, learning_rate)

    loss = cross_entropy(out, y)
    acc = evaluate_accuracy(dense_1, dense_2, X_test[:1000], y_test[:1000])

    print("epoch: ", i, "loss: ", loss, "Accuracy: ", acc)

    dense_1.save("./models/lr_decay/dense_1.npz")
    dense_2.save("./models/lr_decay/dense_2.npz")


epoch:  0 loss:  0.010605580673544073 Accuracy:  0.925
epoch:  1 loss:  0.02131413016532393 Accuracy:  0.943
epoch:  2 loss:  0.01539930392553042 Accuracy:  0.953
epoch:  3 loss:  0.00985713964095468 Accuracy:  0.958
epoch:  4 loss:  0.010076524324651675 Accuracy:  0.966
epoch:  5 loss:  0.0015672583357207567 Accuracy:  0.966
epoch:  6 loss:  0.11126626243545341 Accuracy:  0.97
epoch:  7 loss:  0.0010763619951474647 Accuracy:  0.966
epoch:  8 loss:  0.030922778588279024 Accuracy:  0.972
epoch:  9 loss:  0.0043083440734872185 Accuracy:  0.973
epoch:  10 loss:  0.00039955210550091075 Accuracy:  0.97
epoch:  11 loss:  0.0008978531914805315 Accuracy:  0.972
epoch:  12 loss:  0.004310596590062158 Accuracy:  0.973
epoch:  13 loss:  0.01884402450074211 Accuracy:  0.976
epoch:  14 loss:  0.0011406176331032365 Accuracy:  0.975
epoch:  15 loss:  0.00012711632455744842 Accuracy:  0.972
epoch:  16 loss:  7.701968979878762e-05 Accuracy:  0.976
epoch:  17 loss:  0.00024010889421543998 Accuracy:  0.9

### Evaluate accuracy

In [17]:
acc = evaluate_accuracy(dense_1, dense_2, X_test[:1000], y_test[:1000])

print("Accuracy: ", acc)

Accuracy:  0.977


### Evaluate accuracy on saved model

In [21]:
dense_1 = Dense(784, 100)
dense_2 = Dense(100, 10)

dense_1.load("./models/lr_decay/dense_1.npz")
dense_2.load("./models/lr_decay/dense_2.npz")

acc = evaluate_accuracy(dense_1, dense_2, X_test[:1000], y_test[:1000])

acc

0.977

## Try more layers, different sizes etc

This script is using the sigmoid activation for first layer and softmax activation for output layer and cross entropy for loss calculation.
Were training on one image at the time.

try adding some more layers and different shapes to the layers

### Imports

In [22]:
from utils import *
from model.layers import Dense
from utils.data_loader import load_data
from utils.losses import mse, mse_grad, cross_entropy_grad, cross_entropy
from utils.evaluation import evaluate_accuracy
import numpy as np

### Load data

In [23]:
X_train, y_train, X_test, y_test = load_data()

### Training model

In [26]:
import numpy as np
from model.layers import Dense

def evaluate_accuracy(dense_1, dense_2, dense_3, X, y_true):
    correct = 0
    for i in range(len(X)):
        out = dense_1.forward(X[i])
        out = dense_2.forward(out)
        out = dense_3.forward(out)
        if np.argmax(out) == np.argmax(y_true[i]):
            correct += 1
    return correct / len(X)

In [28]:
dense_1 = Dense(X_train[0].shape[0], 256, "sigmoid")
dense_2 = Dense(256, 128, "sigmoid")
dense_3 = Dense(128, 10, "softmax")

epochs = 30
initial_lr = 0.01
decay_rate = 0.95

for i in range(epochs):
    p = np.random.permutation(len(X_train))
    X_train = X_train[p]
    y_train = y_train[p]

    learning_rate = initial_lr * (decay_rate ** i)

    for X, y in zip(X_train, y_train):

        out = dense_1.forward(X)
        out = dense_2.forward(out)
        out = dense_3.forward(out)

        grad = cross_entropy_grad(out, y)

        dX = dense_3.backward(grad, learning_rate)
        dX = dense_2.backward(dX, learning_rate)
        dX = dense_1.backward(dX, learning_rate)

    loss = cross_entropy(out, y)
    acc = evaluate_accuracy(dense_1, dense_2, dense_3, X_test[:1000], y_test[:1000])

    print("epoch: ", i, "loss: ", loss, "Accuracy: ", acc)

    dense_1.save("./models/more_layers/dense_1.npz")
    dense_2.save("./models/more_layers/dense_2.npz")
    dense_3.save("./models/more_layers/dense_3.npz")


epoch:  0 loss:  1.8421974956017724 Accuracy:  0.43
epoch:  1 loss:  0.0829560199307541 Accuracy:  0.87
epoch:  2 loss:  0.004339661835644443 Accuracy:  0.923
epoch:  3 loss:  0.007710187876310219 Accuracy:  0.944
epoch:  4 loss:  0.005725956550606591 Accuracy:  0.944
epoch:  5 loss:  0.016418570194419602 Accuracy:  0.959
epoch:  6 loss:  0.005867829269940568 Accuracy:  0.955
epoch:  7 loss:  0.0009160931106664936 Accuracy:  0.966
epoch:  8 loss:  4.3901679667656864e-05 Accuracy:  0.967
epoch:  9 loss:  0.01749336033412857 Accuracy:  0.969
epoch:  10 loss:  0.000797194476737851 Accuracy:  0.967
epoch:  11 loss:  0.03657499929632651 Accuracy:  0.972
epoch:  12 loss:  0.0002060127163239746 Accuracy:  0.975
epoch:  13 loss:  0.0011740604728405887 Accuracy:  0.976
epoch:  14 loss:  4.340229766881412e-05 Accuracy:  0.977
epoch:  15 loss:  0.13236464521028576 Accuracy:  0.978
epoch:  16 loss:  0.0011332921317126897 Accuracy:  0.976
epoch:  17 loss:  0.0003567290035664898 Accuracy:  0.977
epo

### Evaluate accuracy

In [29]:
acc = evaluate_accuracy(dense_1, dense_2, dense_3, X_test[:1000], y_test[:1000])

acc

0.98

### Evaluate accuracy on saved model

In [30]:
dense_1 = Dense(784, 100)
dense_2 = Dense(100, 10)

dense_1.load("./models/more_layers/dense_1.npz")
dense_2.load("./models/more_layers/dense_2.npz")
dense_3.load("./models/more_layers/dense_3.npz")


acc = evaluate_accuracy(dense_1, dense_2, dense_3, X_test[:1000], y_test[:1000])

acc

0.98