In [12]:
import numpy as np
from numpy import ndarray
import plotly.express as px

from optimizers import Adam
from losses import BinaryCrossentropy
from cifar_10_dataset_loading import load_cifar_10
from metrics import accuracy, nn_params_stats, activations_stats
from layers import Convolutional, Linear, Relu, BatchNorm, Flatten, Softmax, MaxPool, Layer

## Setup

### Data extraction

In [13]:
x_train, y_train, x_test, y_test = load_cifar_10()

In [14]:
classes = y_train.argmax(axis=1)

In [15]:
# Change range from [0, 255] to [-1, 1]
x_train = x_train / 255  
x_train.dtype

dtype('float64')

In [16]:
IMGS_IDX = [0, 1, 5673, 5494, 2, 55, 66, 776, 564]
x = x_train[IMGS_IDX]
y = y_train[IMGS_IDX]
px.imshow(x, facet_col=0, facet_col_wrap=4)

### Model declaration

In [17]:
def create_nn() -> list[Layer]:
    return [
        Convolutional((10, 5, 5, 3)),
        BatchNorm(),
        Relu(),
        MaxPool((2, 2)),
        Convolutional((32, 3, 3, 10)),
        BatchNorm(),
        Relu(),
        MaxPool((2, 2)),
        Flatten(),
        Linear(1152, 128),
        Relu(),
        Linear(128, y.shape[1]),
        Softmax(),
    ]

## Training

In [18]:
training_stats = (
    Adam(
        create_nn(),
        x,
        y,
        BinaryCrossentropy(),
        starting_lr=0.015,
        lr_decay=0.0005,
        momentum_weight=0.9,
        ada_grad_weight=0.9,
    )
    .optimize_nn(
        epochs=10,
        batch_size=128,
        plt_x="epoch",
        plt_ys=["loss", "accuracy"]
    )
)

FigureWidget({
    'data': [{'hovertemplate': 'variable=loss<br>epoch=%{x}<br>value=%{y}<extra></extra>',
              'legendgroup': 'loss',
              'marker': {'color': '#636efa', 'symbol': 'circle'},
              'mode': 'markers',
              'name': 'loss',
              'orientation': 'v',
              'showlegend': True,
              'type': 'scatter',
              'uid': 'ddf5ff33-7e83-4bc8-8127-28dd6871db13',
              'x': array([0]),
              'xaxis': 'x2',
              'y': array([0.32506977]),
              'yaxis': 'y2'},
             {'hovertemplate': 'variable=accuracy<br>epoch=%{x}<br>value=%{y}<extra></extra>',
              'legendgroup': 'accuracy',
              'marker': {'color': '#EF553B', 'symbol': 'circle'},
              'mode': 'markers',
              'name': 'accuracy',
              'orientation': 'v',
              'showlegend': True,
              'type': 'scatter',
              'uid': '4a28084d-1f28-4d75-b499-83f15a9a9f4d',
     

In [19]:
N_SAMPLES_PER_CLASS = 500
NB_CLASSES = 10

class_masks = y_train.argmax(1, keepdims=True) == np.arange(NB_CLASSES)
take_first_n_of_class = lambda data, class_idx: data[class_masks[:, class_idx]][:N_SAMPLES_PER_CLASS]
take_first_n_of_each_class = lambda data: np.concatenate([take_first_n_of_class(data, class_idx) for class_idx in range(NB_CLASSES)], axis=0)
even_x_train = take_first_n_of_each_class(x_train)
even_y_train = take_first_n_of_each_class(y_train)
even_x_train.shape

(5000, 32, 32, 3)

In [None]:
bad_nn = create_nn()
bad_optimizer = Adam(
    bad_nn,
    even_x_train,
    even_y_train,
    BinaryCrossentropy(),
    starting_lr=0.025,
    lr_decay=0.00015,
    momentum_weight=0.85,
    ada_grad_weight=0.999,
)

In [None]:
bad_training_stats = (
    bad_optimizer
    .optimize_nn(
        epochs=100,
        batch_size=200,
        metrics=[accuracy, nn_params_stats, activations_stats],
        plt_x="epoch",
        plt_ys=[
            "loss", 
            "accuracy",
            "learning_rate",
        ],
        height=850,
    )
)

FigureWidget({
    'data': [{'hovertemplate': 'variable=loss<br>epoch=%{x}<br>value=%{y}<extra></extra>',
              'legendgroup': 'loss',
              'marker': {'color': '#636efa', 'symbol': 'circle'},
              'mode': 'markers',
              'name': 'loss',
              'orientation': 'v',
              'showlegend': True,
              'type': 'scatter',
              'uid': '1b916d1a-6596-4428-b7cf-a0d4d05e17d3',
              'x': array([0]),
              'xaxis': 'x3',
              'y': array([0.32507915]),
              'yaxis': 'y3'},
             {'hovertemplate': 'variable=accuracy<br>epoch=%{x}<br>value=%{y}<extra></extra>',
              'legendgroup': 'accuracy',
              'marker': {'color': '#EF553B', 'symbol': 'circle'},
              'mode': 'markers',
              'name': 'accuracy',
              'orientation': 'v',
              'showlegend': True,
              'type': 'scatter',
              'uid': '2bc74049-88bc-4ad0-a470-0a8bdf4e8758',
     

In [None]:
from copy import deepcopy

best_accuracy = 0
try:
    iteration = 0
    while True:
        starting_nn = create_nn()
        nn = deepcopy(starting_nn)
        optimizer = Adam(
            nn,
            even_x_train,   
            even_y_train,
            BinaryCrossentropy(),
            starting_lr=0.025,
            lr_decay=0.0005,
            momentum_weight=0.9,
            ada_grad_weight=0.999,
        )
        training_stats = (
            optimizer
            .optimize_nn(
                epochs=50,
                batch_size=100,
                metrics=[accuracy, nn_params_stats, activations_stats],
            )
        )
        print(iteration, "accuracy:", training_stats["accuracy"].max())
        if training_stats["accuracy"].max() > best_accuracy:
            best_starting_nn, best_nn, best_optimizer, best_training_stats = starting_nn, nn, optimizer, training_stats
            best_accuracy = training_stats["accuracy"].max()
        if training_stats["accuracy"].max() > 50:
            break
        iteration += 1
except KeyboardInterrupt:
    print()
print("Best accuracy:", best_accuracy)

Caught KeyboardInterrupt exception, returning training metrics.
0 accuracy: 0.586


In [None]:
nn

[<layers.Convolutional at 0x7fc6a629ff80>,
 <layers.BatchNorm at 0x7fc6a629c860>,
 <layers.Relu at 0x7fc6a629d6d0>,
 <layers.MaxPool at 0x7fc6a629fef0>,
 <layers.Convolutional at 0x7fc6a629fbf0>,
 <layers.BatchNorm at 0x7fc6a660ef00>,
 <layers.Relu at 0x7fc6ac160ad0>,
 <layers.MaxPool at 0x7fc6a629c6b0>,
 <layers.Flatten at 0x7fc6a629fd10>,
 <layers.Linear at 0x7fc6a629fce0>,
 <layers.Relu at 0x7fc6ac45ffb0>,
 <layers.Linear at 0x7fc6a629d160>,
 <layers.Softmax at 0x7fc6a629c7d0>]

In [None]:
px.imshow(x_train[5673])

In [None]:
img = x_train[[5673]]
convs = nn[1].forward(nn[0].forward(img))
convs.shape
px.imshow(convs[0], facet_col=2)

In [None]:
reduce(lambda x, l: l.forward(x), nn, img) #.argmax(axis=1)

array([[0.34507341, 0.10575061, 0.0240158 , 0.03988019, 0.00964173,
        0.02572158, 0.00364022, 0.04975087, 0.24413487, 0.15239071]])

In [None]:
y_train[5673]

array([1., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [None]:
nn

[<layers.Convolutional at 0x7fc6a629ff80>,
 <layers.BatchNorm at 0x7fc6a629c860>,
 <layers.Relu at 0x7fc6a629d6d0>,
 <layers.MaxPool at 0x7fc6a629fef0>,
 <layers.Convolutional at 0x7fc6a629fbf0>,
 <layers.BatchNorm at 0x7fc6a660ef00>,
 <layers.Relu at 0x7fc6ac160ad0>,
 <layers.MaxPool at 0x7fc6a629c6b0>,
 <layers.Flatten at 0x7fc6a629fd10>,
 <layers.Linear at 0x7fc6a629fce0>,
 <layers.Relu at 0x7fc6ac45ffb0>,
 <layers.Linear at 0x7fc6a629d160>,
 <layers.Softmax at 0x7fc6a629c7d0>]

In [None]:
def min_max_normalize(arr:np.ndarray, axis:tuple) -> np.ndarray:
    return (arr - arr.min(axis)) / (arr.max(axis) - arr.min(axis))


In [None]:
from functools import reduce

img = x_train[[5673]]
convs = reduce(lambda x, l: l.forward(x), nn[:6], img) #nn[1].forward(nn[0].forward(img))
convs.shape
normed_convs = convs #(convs - convs.min(axis=3, keepdims=True)) / (convs.max(axis=3, keepdims=True) - convs.min(axis=3, keepdims=True))
px.imshow(normed_convs.sum(axis=3)[0])

In [None]:
px.imshow(min_max_normalize(nn[0].kernels[..., 0] , (0)), facet_col=0)