# Simple MNIST implementation
The goal of this notebook is to train a simple neural network to classify MNIST digits.  
This is meant to be a simple implementation of an MDL from scratch to test the implementation of the layers(except the convolution layer) and losses.  

## Setup

### Imports

In [1]:
from os.path import join

import plotly.express as px
import kagglehub
import numpy as np

from optimizers import SGD
from metrics import accuracy
from losses import BinaryCrossentropy
from layers import Linear, Relu, Sigmoid

### Data extraction

In [2]:
dataset_path = kagglehub.dataset_download("hojjatk/mnist-dataset")
train_image_path = join(dataset_path, 'train-images.idx3-ubyte')
train_labels_path = join(dataset_path, 'train-labels.idx1-ubyte')
test_image_path = join(dataset_path, 't10k-images.idx3-ubyte')
test_labels_path = join(dataset_path, 't10k-labels.idx1-ubyte')

def load_images(path) -> np.ndarray:
    with open(path, 'rb') as f:
        return (
            np.frombuffer(f.read(), dtype=np.uint8)
            [16:]
            .reshape(-1, 28**2)
            / 255
        )

def load_labels(path) -> np.ndarray:
    with open(path, 'rb') as f:
        label_idxs = np.frombuffer(f.read(), dtype=np.uint8)[8:]
        labels = np.eye(10)[label_idxs]
        return labels

train_dataset = load_images(train_image_path)
train_labels = load_labels(train_labels_path)
test_dataset = load_images(test_image_path)
test_labels = load_labels(test_labels_path)




In [3]:
np.argmax(train_labels, axis=1)

array([5, 0, 4, ..., 5, 6, 8])

In [4]:
px.imshow(train_dataset[0].reshape(28, 28))

## Model definition

In [5]:
INPUT_SIZE = 28**2
nn: list[Linear|Relu|Sigmoid] = [
    Linear(INPUT_SIZE, 64),
    Relu(),
    Linear(64, 10),
    Sigmoid(),
]
loss = BinaryCrossentropy()

In [6]:
from functools import reduce

activations = reduce(lambda x, l: l.forward(x), nn, train_dataset)
y_pred = activations.argmax(axis=1)
y_true = train_labels.argmax(axis=1)
accuracy_score = (y_pred == y_true).mean()

accuracy_score

np.float64(0.0912)

In [7]:
train_labels.sum(axis=0) / train_labels.sum()

array([0.09871667, 0.11236667, 0.0993    , 0.10218333, 0.09736667,
       0.09035   , 0.09863333, 0.10441667, 0.09751667, 0.09915   ])

In [8]:
# Randomize the dataset and labels
indices = np.random.permutation(train_dataset.shape[0])
train_dataset = train_dataset[indices]
train_labels = train_labels[indices]

## Training

In [11]:
NB_EPOCHS = 30
LEARNING_RATE = 0.02
BATCH_SIZE = 128

training_stats = (
    SGD(0.02)
    .optimize_nn(
        nn,
        train_dataset,
        train_labels,
        NB_EPOCHS,
        BATCH_SIZE,
        loss, 
        [accuracy]
    )
)

Output()

In [13]:
fig = (
    px.scatter(
        training_stats.melt(id_vars="epoch", value_vars=["loss", "accuracy"]),
        x="epoch",
        y="value",
        color="variable",
        facet_row="variable",
        title="Training Metrics Over Btaches",
        height=600,
    )
    .update_yaxes(matches=None)
    .update_yaxes(showticklabels=True)
)
fig.show()