In [None]:
import homemade_tensors as ht
from homemade_nn import MultilayerPerceptron, StochasticGradientDescent, CrossEntropyLoss
import numpy as np
import pandas as pd
import warnings

warnings.filterwarnings("ignore")

## Loading data

You can download the mnist dataset from [here](https://www.kaggle.com/datasets/oddrationale/mnist-in-csv)

In [None]:
train_df = pd.read_csv("../data/mnist_train.csv")  
test_df = pd.read_csv("../data/mnist_test.csv")[:1000]  

## Defining our model and hyperparameters

Since we are doing a classification problem we will be using the cross entropy loss. If we were doing linear regression we may use something like MSEloss.

The optimizer we will be using is stochastic gradient descent which is the most basic kind which use the formula below to update the parameters.

$$w_{t+1}=w_{t}-\frac{\partial L}{\partial w}\alpha$$

A more popular optimizer we could use is the adam optimizer 

In [None]:
TEST_SIZE = 1
BATCH_SIZE = 128
EPOCHS = 20
LEARNING_RATE = 1e-3

net = MultilayerPerceptron()
optim = StochasticGradientDescent(net.parameters(), lr=LEARNING_RATE)
criterion = CrossEntropyLoss()

## Loading and formating out data

Our image data will be in the shape $(B\times C \times HW)$ were $B$ is our batch size, $C$ is the number of color channels (1 in our case) and $H, W$ are the height and width of our image (our image is 28 by 28 so $HW$ will be 784)

In [None]:
train_labels = np.eye(10)[train_df["label"].to_numpy()]
train_labels = [
    ht.Tensor(
        np.array(train_labels[i:i + BATCH_SIZE][:, np.newaxis, :])
    )
    for i in range(0, len(train_labels), BATCH_SIZE)
]

train_data = train_df.drop(["label"], axis=1).to_numpy() / 255
train_data = [
    ht.Tensor(
        np.array(train_data[i:i + BATCH_SIZE][:, np.newaxis, :])
    )
    for i in range(0, len(train_data), BATCH_SIZE)
]

test_labels = np.eye(10)[test_df["label"].to_numpy()]
test_labels = [
    ht.Tensor(
        np.array(test_labels[i:i + TEST_SIZE][:, np.newaxis, :])
    )
    for i in range(0, len(test_labels), TEST_SIZE)
]

test_data = test_df.drop(["label"], axis=1).to_numpy() / 255
test_data = [
    ht.Tensor(
        np.array(test_data[i:i + TEST_SIZE][:, np.newaxis, :])
    )
    for i in range(0, len(test_data), TEST_SIZE)
]

## Training our model

In [None]:
for epoch in range(EPOCHS):
    for label, image in zip(train_labels, train_data):
        # Get the predicted results from the network
        result = net.forward(image)
        
        # Calculate the loss
        loss = criterion(result, label)
        
        # Backpropogate through the network
        loss.backward()
        
        # Update the networks parameters
        optim.step()

    print(f"Epoch {epoch + 1} Loss: {-1 / 10 * loss.data}")

## Testing our model

We use a different dataset to test the accuracy of our model, this is ensure that our model has not over fit to our original training data.

In [None]:
correct = 0

# Counts the number of times the network gets the correct result
for label, image in zip(test_labels, test_data):
    result = net.forward(image)
    prediciton = np.argmax(result.data, axis=-1)
    correct += np.count_nonzero(prediciton == np.argmax(label.data, axis=-1))

print(f"Accuracy: {correct / len(test_data) * 100}%")