In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from src.preprocessing import preprocess
from nn.model import NeuralNetwork
from nn.learning_policies.circular_lr import CircularLR


In [2]:
train = pd.read_csv('data/mnist_train.csv')
val = pd.read_csv('data/mnist_test.csv')

X_val, y_val = preprocess(val)
X_train, y_train = preprocess(train)

X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2, shuffle=True)
X_train, X_test, X_val, y_train, y_test, y_val = np.array(X_train), np.array(X_test), np.array(X_val), np.array(y_train),np.array(y_test),np.array(y_val)

In [3]:
# Neural network parameters
input_size = X_train.shape[1]
hidden_size = 128
output_size = 10
epochs = 5
batch_size = 64


initial_learning_rate = 0.01
max_learning_rate = 0.01
step_size = 1

# Create a Neural Network
model = NeuralNetwork(input_size, hidden_size, output_size)
learning_rate_scheduler = CircularLR(initial_learning_rate, max_learning_rate, step_size)

# Training loop
for epoch in range(epochs):
    for i in range(0, len(X_train), batch_size):
        # Mini-batch
        X_batch = X_train[i:i+batch_size]
        y_batch = y_train[i:i+batch_size]

        # Forward pass
        output = model.forward(X_batch)

        # Compute MSE
        loss = np.mean(0.5 * np.sum((output - y_batch) ** 2, axis=1))

        # Backward pass
        grad_output = output - y_batch
        model.backward(grad_output)

        learning_rate = learning_rate_scheduler.update_learning_rate()

        # Update weights and biases using gradient descent
        model.linear1.weights -= learning_rate * model.linear1.grad_weights / X_batch.shape[0]
        model.linear1.biases -= learning_rate * model.linear1.grad_biases / X_batch.shape[0]
        model.linear2.weights -= learning_rate * model.linear2.grad_weights / X_batch.shape[0]
        model.linear2.biases -= learning_rate * model.linear2.grad_biases / X_batch.shape[0]

    # Print loss for each epoch
    print(f'Epoch [{epoch + 1}/{epochs}], Training loss: {loss:.2} Val acc {model.get_accuracy(X_val, y_val):.2}')


Epoch [1/5], Training loss: 0.13 Val acc 0.78
Epoch [2/5], Training loss: 0.11 Val acc 0.83
Epoch [3/5], Training loss: 0.083 Val acc 0.84
Epoch [4/5], Training loss: 0.076 Val acc 0.85
Epoch [5/5], Training loss: 0.076 Val acc 0.86
