# Train

In [1]:
from network.activation import *
from network.layer import *
from network.loss import *
from network.optimizer import *
from network.sequential import *
from network.metric import *

import numpy as np
import mnist.mnist as mnist

np.random.seed(30)

In [2]:

def train(
    model: Sequential,
    X: np.ndarray,
    Y: np.ndarray,
    X_val: np.ndarray,
    Y_val: np.ndarray,
    epochs: int,
    batch_size: int,
    optimizer: Optimizer,
    loss_function: Loss,
    metric_function : Metric
) -> None:
    shuffled_indexes = np.random.permutation(len(X))
    X = X[shuffled_indexes]
    Y = Y[shuffled_indexes]

    for epoch in range(epochs):
        loss = 0
        metric = 0
        batches_steps = range(0, len(X), batch_size)
        total_steps = len(batches_steps)

        for i in batches_steps:
            x_batch = X[i:i+batch_size]
            y_batch = Y[i:i+batch_size]
            
						# Forward pass
            y_hat = model.forward({'X' : x_batch})
						# Compute loss
            batch_loss = loss_function.forward({'Y' : y_batch, 'Y_hat' : y_hat})
            batch_metric = metric_function.compute({'Y' : y_batch, 'Y_hat' : y_hat})
            # Compute gradients
            loss_function.backward()
						# Backward pass
            model.backward({'dY' : loss_function.gradients['dY_hat']})
						# Update parameters
            optimizer.update(model)
						# Accumulate batch loss mean
            loss += batch_loss.mean()
            metric += batch_metric
        
        y_hat = model.forward({'X': X_val})
        val_loss = loss_function.forward({'Y': Y_val, 'Y_hat': y_hat}).mean()
        val_metric = metric_function.compute({'Y': Y_val, 'Y_hat': y_hat})

        
        print(f"Train ==> Epoch {epoch+1}/{epochs} loss: {loss/total_steps} accuracy: {metric/total_steps}")
        print(f"Validation ==> Epoch {epoch+1}/{epochs} loss: {val_loss} accuracy: {val_metric}")


In [3]:
# Load mnist
x_train, y_train, x_test, y_test = mnist.load('mnist/mnist.pkl')

# Normalize data
x_train = x_train / 255
x_test = x_test / 255
# Add an extra dimension
y_test = np.eye(10)[y_test].squeeze()
y_train = np.eye(10)[y_train].squeeze()

model = Sequential([
    Dense(784, 64),
    ReLU(),
    Dense(64, 64),
    ReLU(),
    Dense(64, 10),
		Softmax()
])

optimizer = StoichasticGradientDescent(learning_rate=0.001)
loss = CategoricalCrossEntropy()
metric = Accuracy()

try:
	train(model, x_train, y_train, x_test, y_test, 100, 64, optimizer, loss, metric)
	model.save("model.pkl")
except KeyboardInterrupt:
	model.save("model.pkl")

Train ==> Epoch 1/100 loss: 1.8432929935052578 accuracy: 0.3154817430703625
Validation ==> Epoch 1/100 loss: 0.6600128183583741 accuracy: 0.7962
Train ==> Epoch 2/100 loss: 0.5306899705457799 accuracy: 0.8410680970149254
Validation ==> Epoch 2/100 loss: 0.3627925822811711 accuracy: 0.8943
Train ==> Epoch 3/100 loss: 0.31385531090210644 accuracy: 0.9094649520255863
Validation ==> Epoch 3/100 loss: 0.2476188152067784 accuracy: 0.9275
Train ==> Epoch 4/100 loss: 0.22334185268392037 accuracy: 0.935717617270789
Validation ==> Epoch 4/100 loss: 0.1920679292779632 accuracy: 0.9436
Train ==> Epoch 5/100 loss: 0.17426181721179218 accuracy: 0.9499933368869936
Validation ==> Epoch 5/100 loss: 0.15855112654500286 accuracy: 0.9537
Train ==> Epoch 6/100 loss: 0.14458567766623695 accuracy: 0.9582889125799574
Validation ==> Epoch 6/100 loss: 0.14020652179012613 accuracy: 0.9583
Train ==> Epoch 7/100 loss: 0.12442974151068335 accuracy: 0.9642190831556503
Validation ==> Epoch 7/100 loss: 0.1298986572037