In [27]:
import numpy as np
from numpy import ndarray
import plotly.express as px

from optimizers import Adam
from losses import BinaryCrossentropy
from cifar_10_dataset_loading import load_cifar_10
from metrics import accuracy, nn_params_stats, activations_stats
from layers import Convolutional, Linear, Relu, BatchNorm, Flatten, Softmax, MaxPool, Layer, LeakyRelu

## Setup

### Data extraction

In [28]:
x_train, y_train, x_test, y_test = load_cifar_10()

In [29]:
classes = y_train.argmax(axis=1)

In [30]:
# Change range from [0, 255] to [-1, 1]
x_train = x_train / 255  
x_train.dtype

dtype('float64')

In [31]:
IMGS_IDX = [0, 1, 5673, 5494, 2, 55, 66, 776, 564]
x = x_train[IMGS_IDX]
y = y_train[IMGS_IDX]
px.imshow(x, facet_col=0, facet_col_wrap=4)

In [32]:
NB_SAMPLES_PER_CLASS = 1000
NB_CLASSES = 10

class_masks = y_train.argmax(1, keepdims=True) == np.arange(NB_CLASSES)
take_first_n_of_class = lambda data, class_idx: data[class_masks[:, class_idx]][:NB_SAMPLES_PER_CLASS]
take_first_n_of_each_class = lambda data: np.concatenate([take_first_n_of_class(data, class_idx) for class_idx in range(NB_CLASSES)], axis=0)
x_train_subset = take_first_n_of_each_class(x_train)
y_train_subset = take_first_n_of_each_class(y_train)
x_train_subset.shape

(10000, 32, 32, 3)

### Model declaration

In [33]:
def create_nn() -> list[Layer]:
    return [
        Convolutional((10, 7, 7, 3)),
        BatchNorm(),
        Relu(),
        MaxPool((2, 2)),
        # Convolutional((20, 3, 3, 10)),
        # BatchNorm(),
        # Relu(),
        # MaxPool((2, 2)),
        Flatten(),
        Linear(1690, 300),
        LeakyRelu(),
        Linear(300, y.shape[1]),
        Softmax(),
    ]

## Optimizer

In [34]:
def create_optimizer(nn:list[Layer]) -> Adam:
    return Adam(
        nn,
        x_train,
        y_train,
        BinaryCrossentropy(),
        starting_lr=0.015,
        lr_decay=0.0001,
        momentum_weight=0.8,
        ada_grad_weight=0.999,
        l2_weight=0.0,
    )

## Training

In [35]:
first_nn = create_nn()
first_optimizer = create_optimizer(first_nn)

In [36]:
from numpy import array_split as split
from functools import reduce

from constants import MAX_NB_SAMPLES

NB_TEST_BATCHES = int(np.ceil(len(x_test) / MAX_NB_SAMPLES))
loss = BinaryCrossentropy()

def test_accuracy(metric_line:dict, nn:list[Layer], **_) -> dict:
    y_test_pred = []
    for x_test_subset in split(x_test, NB_TEST_BATCHES):
        y_test_pred.append(reduce(lambda x, l: l.forward(x), nn, x_test_subset))
    y_test_pred = np.concatenate(y_test_pred)
    metric_line["test_accuracy"] =  np.mean(y_test_pred.argmax(1) == y_test.argmax(1))
    metric_line["test_loss"] = loss.forward(y_test_pred, y_test)
    return metric_line

test_accuracy({}, first_nn)

{'test_accuracy': np.float64(0.1001),
 'test_loss': np.float64(0.3250793109445218)}

In [37]:
bad_training_stats = (
    first_optimizer
    .optimize_nn(
        epochs=35,
        batch_size=500,
        metrics=[accuracy, test_accuracy],
        plt_x="epoch",
        plt_ys=[
            "loss",
            "accuracy",
            "test_accuracy",
            "test_loss",
            "learning_rate",
        ],
        height=600,
    )
)

FigureWidget({
    'data': [{'hovertemplate': 'variable=loss<br>epoch=%{x}<br>value=%{y}<extra></extra>',
              'legendgroup': 'loss',
              'line': {'color': '#636efa', 'dash': 'solid'},
              'marker': {'symbol': 'circle'},
              'mode': 'lines+markers',
              'name': 'loss',
              'orientation': 'v',
              'showlegend': True,
              'type': 'scatter',
              'uid': '326e24a0-c52c-406d-8299-8395933d1079',
              'x': array([0]),
              'xaxis': 'x5',
              'y': array([0.32507974]),
              'yaxis': 'y5'},
             {'hovertemplate': 'variable=accuracy<br>epoch=%{x}<br>value=%{y}<extra></extra>',
              'legendgroup': 'accuracy',
              'line': {'color': '#EF553B', 'dash': 'solid'},
              'marker': {'symbol': 'circle'},
              'mode': 'lines+markers',
              'name': 'accuracy',
              'orientation': 'v',
              'showlegend': True,
     

Caught KeyboardInterrupt exception, returning training metrics.


In [None]:
test_accuracy({}, first_nn)

{'test_accuracy': np.float64(0.5752),
 'test_loss': np.float64(0.21304661202863814)}

In [38]:
nn = first_nn

In [39]:
nn

[<layers.Convolutional at 0x7f291421c260>,
 <layers.BatchNorm at 0x7f28cca0d3a0>,
 <layers.Relu at 0x7f28cca0c710>,
 <layers.MaxPool at 0x7f28cca0c6b0>,
 <layers.Flatten at 0x7f28cca0d280>,
 <layers.Linear at 0x7f28cca0e1b0>,
 <layers.LeakyRelu at 0x7f291421c8c0>,
 <layers.Linear at 0x7f291421c740>,
 <layers.Softmax at 0x7f28cca0f470>]

In [40]:
TARGET_IMG = 50
px.imshow(x_train[TARGET_IMG])

In [41]:
from functools import reduce

imgs = x_train[:1000]
convs =  reduce(lambda x, l: l.forward(x), nn[:4], imgs) #nn[1].forward(nn[0].forward(img))
convs.shape
px.imshow(convs[TARGET_IMG], facet_col=2, facet_col_wrap=10)

In [42]:
convs.sum(axis=(0, 1, 2))

array([27199.05119103, 21432.38680741, 19785.10114154, 18707.86752072,
       11649.36508566, 13797.36918632, 10777.03740412, 17596.69079882,
       14345.8012749 , 22651.97941909])

In [43]:
activations = reduce(lambda x, l: l.forward(x), nn[:7], imgs) #nn[1].forward(nn[0].forward(img))
activations.sum(axis=(0))

array([-7.13977724e+01, -6.06170104e+01, -5.23185803e+01, -1.47099403e+02,
       -6.21879702e+01, -4.28604223e+01, -3.30884810e+02, -3.44013493e+02,
       -6.79475293e+01, -8.00582551e+01,  3.37414252e+02, -4.57531364e+01,
        3.21217285e+02, -8.20533576e+01,  1.40154616e+03, -4.92249654e+01,
       -3.72762449e+02, -7.51115042e+01, -8.36093648e+01, -7.65842494e+01,
       -1.51951876e+02, -2.34071774e+02, -2.03447268e+02,  2.26967407e+02,
       -4.88734517e+01, -2.19456101e+02, -8.02302404e+01, -2.83127235e+02,
       -1.10277948e+02, -5.26100309e+01, -9.02921441e+01, -3.15491500e+02,
       -3.95548045e+01, -1.55860596e+02, -2.63904374e+02, -1.57951530e+02,
       -1.87273087e+02, -2.50623129e+02, -1.33376991e+02,  2.39912925e+02,
       -1.87386901e+02, -1.96353492e+02, -7.50691648e+01, -5.76626773e+01,
       -7.54944296e+01, -9.19227012e+01, -4.26370919e+01, -1.64410870e+02,
       -8.32542940e+01, -1.41509706e+02, -7.69447940e+01, -5.30198083e+01,
       -6.94616263e+01, -

In [44]:
convs[[250]].shape

(1, 13, 13, 10)

In [45]:
def min_max_normalize(arr:np.ndarray, axis:tuple) -> np.ndarray:
    return (arr - arr.min(axis, keepdims=True)) / (arr.max(axis, keepdims=True) - arr.min(axis, keepdims=True))


MIN = nn[0].kernels.min()
MAX = nn[0].kernels.max()
normed_kernels = (nn[0].kernels - MIN) / (MAX - MIN)
normed_kernels.shape

px.imshow(normed_kernels, facet_col=0, facet_col_wrap=10)

In [46]:
nn[0].biases

array([[[[ 2.19863676e-11, -8.67689910e-12, -3.11132557e-13,
          -7.66046026e-12,  1.14822841e-11,  1.15214143e-12,
           9.82631410e-12,  1.31738999e-11,  1.07837528e-11,
          -1.18525174e-11]]]])

In [47]:
nn[1].gamma

array([[[[0.52779861, 0.41050551, 0.25418172, 0.43037644, 0.32722252,
          0.33699421, 0.49522439, 0.35472762, 0.47563867, 0.3049408 ]]]])

In [48]:
nn[1].beta

array([[[[-0.36852564, -0.2544772 , -0.00366403, -0.30026435,
          -0.24406324, -0.19220797, -0.52037788, -0.14986187,
          -0.40846846, -0.12176323]]]])