# Train

In [14]:
from network.activation import *
from network.layer import *
from network.loss import *
from network.optimizer import *
from network.sequential import *
from network.metric import *

import numpy as np
import mnist.mnist as mnist

np.random.seed(30)

In [15]:

def train(
    model: Sequential,
    X: np.ndarray,
    Y: np.ndarray,
    X_val: np.ndarray,
    Y_val: np.ndarray,
    epochs: int,
    batch_size: int,
    optimizer: Optimizer,
    loss_function: Loss,
    metric_function : Metric
) -> None:
    

    for epoch in range(epochs):
        loss = 0
        metric = 0
        batches_steps = range(0, len(X), batch_size)
        total_steps = len(batches_steps)
        shuffled_indexes = np.random.permutation(len(X))
        X = X[shuffled_indexes]
        Y = Y[shuffled_indexes]
        for i in batches_steps:
            x_batch = X[i:i+batch_size]
            y_batch = Y[i:i+batch_size]
            
						# Forward pass
            y_hat = model.forward({'X' : x_batch})
						# Compute loss
            batch_loss = loss_function.forward({'Y' : y_batch, 'Y_hat' : y_hat})
            batch_metric = metric_function.compute({'Y' : y_batch, 'Y_hat' : y_hat})
            # Compute gradients
            loss_function.backward()
						# Backward pass
            model.backward({'dY' : loss_function.gradients['dY_hat']})
						# Update parameters
            optimizer.update(model)
						# Accumulate batch loss mean
            loss += batch_loss.mean()
            metric += batch_metric
        
        y_hat = model.forward({'X': X_val})
        val_loss = loss_function.forward({'Y': Y_val, 'Y_hat': y_hat}).mean()
        val_metric = metric_function.compute({'Y': Y_val, 'Y_hat': y_hat})

        
        print(f"Train ==> Epoch {epoch+1}/{epochs} loss: {loss/total_steps} accuracy: {metric/total_steps}")
        print(f"Validation ==> Epoch {epoch+1}/{epochs} loss: {val_loss} accuracy: {val_metric}")


In [16]:
# Load mnist
x_train, y_train, x_test, y_test = mnist.load('mnist/mnist.pkl')

# Normalize data
x_train = x_train / 255
x_test = x_test / 255
# Add an extra dimension
y_test = np.eye(10)[y_test].squeeze()
y_train = np.eye(10)[y_train].squeeze()

model = Sequential([
    Dense(784, 256),
	  Sigmoid(),
    Dense(256, 16),
	  Sigmoid(),
    Dense(16, 10),
		Softmax()
])

optimizer = StoichasticGradientDescent(learning_rate=0.001)
loss = CategoricalCrossEntropy()
metric = Accuracy()

try:
	train(model, x_train, y_train, x_test, y_test, 1450, 64, optimizer, loss, metric)
	model.save("model_3.pkl")
except KeyboardInterrupt:
	model.save("model_3.pkl")

Train ==> Epoch 1/1450 loss: 2.302316904598372 accuracy: 0.10869203091684435
Validation ==> Epoch 1/1450 loss: 2.301648054580215 accuracy: 0.1135
Train ==> Epoch 2/1450 loss: 2.302055676444978 accuracy: 0.11072428038379531
Validation ==> Epoch 2/1450 loss: 2.3018917603204736 accuracy: 0.1135
Train ==> Epoch 3/1450 loss: 2.301832817383373 accuracy: 0.11097414712153518
Validation ==> Epoch 3/1450 loss: 2.301410456892137 accuracy: 0.1135
Train ==> Epoch 4/1450 loss: 2.301539016865679 accuracy: 0.11084088486140725
Validation ==> Epoch 4/1450 loss: 2.3008094285056546 accuracy: 0.1135
Train ==> Epoch 5/1450 loss: 2.300629541487464 accuracy: 0.11240671641791045
Validation ==> Epoch 5/1450 loss: 2.2994235620866994 accuracy: 0.1135
Train ==> Epoch 6/1450 loss: 2.2965104779208136 accuracy: 0.11483875266524521
Validation ==> Epoch 6/1450 loss: 2.287395528871814 accuracy: 0.1148
Train ==> Epoch 7/1450 loss: 2.1573341937342576 accuracy: 0.22478011727078892
Validation ==> Epoch 7/1450 loss: 1.884863