In [1]:
from functools import reduce

import numpy as np
import pandas as pd
import plotly.express as px
from rich.progress import track

from time_utils import time_to_exec, print_time_dict
from losses import BinaryCrossentropy
from cifar_10_dataset_loading import load_cifar_10
from layers import Convolutional, Linear, Relu, Sigmoid, Flatten, Softmax, MaxPool

## Setup

### Data extraction

In [2]:
x_train, y_train, x_test, y_test = load_cifar_10()

In [3]:
classes = y_train.argmax(axis=1)

In [4]:
# Change range from [0, 255] to [-1, 1]
x_train = x_train / 255  
x_train.dtype

dtype('float64')

In [5]:
IMGS_IDX = [0, 351, 5673, 5494, 32, 55, 66, 776, 564]
x = x_train[IMGS_IDX]
y = y_train[IMGS_IDX]
px.imshow(x, facet_col=0, facet_col_wrap=4)

### Model declaration

In [6]:
nn: list[Convolutional|Relu|Linear|Sigmoid] = [
    Convolutional((10, 5, 5, 3)),
    Relu(),
    MaxPool((2, 2)),
    Convolutional((10, 3, 3, 10)),
    Relu(),
    MaxPool((2, 2)),
    Flatten(),
    Linear(360, 64),
    Relu(),
    Linear(64, y.shape[1]),
    Softmax(),
]
loss = BinaryCrossentropy()

In [7]:
def forward(activations:np.ndarray, nn=nn) -> np.ndarray:
    return reduce(lambda x, l: l.forward(x), nn, activations)

def backward(gradients:np.ndarray, learning_rate:float, nn=nn) -> np.ndarray:
    return reduce(lambda g, l: l.backward(g, learning_rate), reversed(nn), gradients)

## Training

In [8]:
training_stats:list[dict] = []

In [9]:
NB_EPOCHS = 1000
LEARNING_RATE = 0.005
BATCH_SIZE = 128

for epoch in track(range(NB_EPOCHS), description="Training..."):
    for i in range(0, x.shape[0], BATCH_SIZE):
        batch = x[i:i+BATCH_SIZE]
        labels = y[i:i+BATCH_SIZE]
        activations = forward(batch)
        gradients = loss.backward(activations, labels)
        backward(gradients, LEARNING_RATE)
    activations = forward(x)
    training_stats.append({
        "loss": loss.forward(activations, y),
        "accuracy": (activations.argmax(axis=1) == labels.argmax(axis=1)).mean(),
        **{f"{class_idx}_class_mean_actiavation": activations[:, class_idx].mean() for class_idx in range(6)},
    })
    for i, layer in enumerate(nn):
        if isinstance(layer, Linear):
            training_stats[-1][f"Linear_{i}"] = layer.weights.mean()
        if isinstance(layer, Convolutional):
            training_stats[-1][f"Convolutional_{i}"] = layer.kernels.mean()

Output()

In [10]:
training_stats_df = (
    pd.DataFrame.from_records(training_stats)
    .assign(epoch=range(len(training_stats)))
)
fig = (
    px.scatter(
        (
            training_stats_df
            .melt("epoch", training_stats_df.columns.difference(["epoch"]))
        ),
        x="epoch",
        y="value",
        color="variable",
        facet_row="variable",
        title="Training Metrics Over epochs",
        height=1200,
    )
    .update_yaxes(matches=None)
)
fig.show()