# Train

In [1]:
from network.activation import *
from network.layer import *
from network.loss import *
from network.optimizer import *
from network.sequential import *
import numpy as np
import mnist.mnist as mnist

np.random.seed(30)

In [2]:

def train(
    model: Sequential,
    X: np.ndarray,
    Y: np.ndarray,
    X_val: np.ndarray,
    Y_val: np.ndarray,
    epochs: int,
    batch_size: int,
    optimizer: Optimizer,
    loss_function: Loss
) -> None:
    
    for epoch in range(epochs):
        loss = 0
        batches_steps = range(0, len(X), batch_size)
        total_steps = len(batches_steps)

        shuffled_indexes = np.random.permutation(len(X))
        X = X[shuffled_indexes]
        Y = Y[shuffled_indexes]

        for i in batches_steps:
            x_batch = X[i:i+batch_size]
            y_batch = Y[i:i+batch_size]
            
						# Forward pass
						# Compute loss
            # Compute gradients
						# Backward pass
						# Update parameters
						# Accumulate batch loss mean
        
        y_hat = model.forward({'X': X_val})
        val_loss = loss_function.forward({'Y': Y_val, 'Y_hat': y_hat})
        val_loss = val_loss.mean()
        
        print(f"Train ==> Epoch {epoch+1}/{epochs} loss: {loss/total_steps}")
        print(f"Validation ==> Epoch {epoch+1}/{epochs} loss: {val_loss}")


In [None]:
# Load mnist
x_train, y_train, x_test, y_test = mnist.load('mnist/mnist.pkl')

# Normalize data
x_train = x_train / 255
x_test = x_test / 255
# Add an extra dimension
y_test = np.eye(10)[y_test].squeeze()
y_train = np.eye(10)[y_train].squeeze()

model = Sequential([
    Dense(784, 256),
    ReLU(),
    Dense(256, 256),
    ReLU(),
    Dense(256, 10),
		Softmax()
])

optimizer = StoichasticGradientDescent(learning_rate=0.001)
loss = MeanSquaredError()

try:
	train(model, x_train, y_train, x_test, y_test, 10, 64, optimizer, loss)
	model.save("model.pkl")
except KeyboardInterrupt:
	model.save("model.pkl")