In [1]:
from functools import reduce

import numpy as np
import pandas as pd
import plotly.express as px
from rich.progress import track

from losses import BinaryCrossentropy
from cifar_10_dataset_loading import load_cifar_10
from layers import Convolutional, Linear, Relu, Sigmoid, Flatten, Softmax

In [2]:
x_train, y_train, x_test, y_test = load_cifar_10()

In [3]:
classes = y_train.argmax(axis=1)
planes_idx = np.arange(classes.shape[0])[classes == 0]

In [4]:
x_train = x_train / 255
x_train.dtype

dtype('float64')

In [5]:
IMGS_IDX = [0, 351, 5673, 5494]
x = x_train[IMGS_IDX]
y = y_train[IMGS_IDX]
px.imshow(x, facet_col=0)

In [6]:
# Converges on the two images samples with lr = 0.01 and ~300 epochs.
# test_nn: list[Convolutional|Relu|Linear|Sigmoid] = [
#     Flatten(),
#     Linear(np.prod(x.shape[1:]), 64),
#     Relu(),
#     Linear(64, y.shape[1]),
#     Softmax(),
# ]

nn: list[Convolutional|Relu|Linear|Sigmoid] = [
    Convolutional((10, 5, 5, 3)),
    Relu(),
    Convolutional((10, 3, 3, 10)),
    Relu(),
    Flatten(),
    Linear(6760, 64), # Linear(np.prod(x.shape[1:]), 64)
    Relu(),
    Linear(64, y.shape[1]),
    Softmax(),
]
loss = BinaryCrossentropy()

In [7]:
# I wish I could call operator.method_caller but it doesn't seem possible without adding more code...
def forward(activations:np.ndarray, nn=nn) -> np.ndarray:
    return reduce(lambda x, l: l.forward(x), nn, activations)

def backward(gradients:np.ndarray, learning_rate:float, nn=nn) -> np.ndarray:
    return reduce(lambda g, l: l.backward(g, learning_rate), reversed(nn), gradients)

def compute_accuracy(activation:np.ndarray, labels:np.ndarray) -> float:
    return (activation.argmax(axis=1) == labels.argmax(axis=1)).mean()

In [8]:
stats:list[dict] = []

In [9]:
NB_EPOCHS = 3000
LEARNING_RATE = 0.005
BATCH_SIZE = 128
TARGET_CLASSES = [0, 6]

for epoch in track(range(NB_EPOCHS)):
    for i in range(0, x.shape[0], BATCH_SIZE):
        batch = x[i:i+BATCH_SIZE]
        labels = y[i:i+BATCH_SIZE]
        activations = forward(batch)
        gradients = loss.backward(activations, labels)
        backward(gradients, LEARNING_RATE)
    dataset_activations = forward(x)
    mean_dataset_loss = loss.forward(dataset_activations, y)
    stats.append({
        "losses": mean_dataset_loss,
        "accuracies": compute_accuracy(dataset_activations, y),
        "frog_mean_actiavation": activations[:, 0].mean(),
        "plane_mean_actiavation": activations[:, 6].mean(),
    })

In [10]:
fig = (
    px.scatter(
        (
            pd.DataFrame.from_records(stats)
            .assign(epoch=range(len(stats)))
            .melt(id_vars="epoch", value_vars=["losses", "accuracies",  "frog_mean_actiavation", "plane_mean_actiavation"])
        ),
        x="epoch",
        y="value",
        color="variable",
        facet_row="variable",
        title="Training Metrics Over epochs",
        height=750,
    )
    .update_yaxes(matches=None)
)
fig.show()