In [1]:
import numpy as np
from numpy.typing import NDArray

In [14]:
from deep_learner import Tensor
from deep_learner.datasets import cifar10
from deep_learner.metrics.accuracy import accuracy
from deep_learner.nn import (
    CrossEntropyLoss,
    Dropout,
    Linear,
    Module,
    ReLU,
    Sequential,
    Softmax,
)
from deep_learner.nn.optimizer.sgd import SGD
from deep_learner.utils import batch

In [3]:
train_X, train_Y, test_X, test_Y = cifar10()

In [4]:
def describe(data: NDArray) -> None:
    print("Shape:", data.shape)
    print(f"Min: {data.min()}, Max: {data.max()}")
    print(f"Mean: {data.mean()}, Standard deviation: {data.std()}")

In [5]:
describe(train_X), describe(test_X)

Shape: (50000, 3072)
Min: 0.0, Max: 1.0
Mean: 0.4733648896217346, Standard deviation: 0.25156906247138977
Shape: (10000, 3072)
Min: 0.0, Max: 1.0
Mean: 0.4765852391719818, Standard deviation: 0.25121963024139404


(None, None)

In [6]:
def preprocessing(train_data: NDArray, test_data: NDArray) -> tuple[NDArray, NDArray]:
    return (train_data - train_data.mean()) / train_data.std(), (
        test_data - test_data.mean()
    ) / test_data.std()

In [7]:
p_train_X, p_test_X = preprocessing(train_X, test_X)

describe(p_train_X), describe(p_test_X)

Shape: (50000, 3072)
Min: -1.8816498517990112, Max: 2.0934016704559326
Mean: -7.475010534108151e-06, Standard deviation: 1.0
Shape: (10000, 3072)
Min: -1.8970860242843628, Max: 2.0834946632385254
Mean: -1.214090957546432e-06, Standard deviation: 1.0000001192092896


(None, None)

In [8]:
train_Y.shape, train_Y[:10]

((50000, 10),
 array([[0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
        [0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
        [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]], dtype=uint8))

In [17]:
class MyModel(Module):
    def __init__(self):
        super().__init__()

        self.linear1 = Linear(n_in=3072, n_out=256)
        self.linear2 = Linear(n_in=256, n_out=256)
        self.linear3 = Linear(n_in=256, n_out=10)

        # self.skip_connection = Linear(n_in=256, n_out=256)

        self.relu = ReLU()

        self.softmax = Softmax()

    def forward(self, x: Tensor) -> Tensor:
        return self.softmax(
            self.linear3(self.relu(self.linear2(self.relu(self.linear1(x)))))
        )


model = Sequential(
    Linear(n_in=3072, n_out=256),
    ReLU(),
    Linear(n_in=256, n_out=256),
    ReLU(),
    Dropout(drop_proba=0.4),
    Linear(n_in=256, n_out=10),
    Softmax(),
)

loss_fn = CrossEntropyLoss()

In [25]:
from collections.abc import Generator, Sequence
from itertools import product
from typing import Any


def grid_search(
    hyperparameters: dict[str, Sequence[Any]],
) -> Generator[dict, None, None]:
    for combination in product(*hyperparameters.values()):
        yield {
            key: param for key, param in zip(hyperparameters, combination, strict=False)
        }

In [26]:
for param in grid_search({"lr": [1e-1, 1e-2], "batch_size": [16, 32, 128]}):
    print(param)

{'lr': 0.1, 'batch_size': 16}
{'lr': 0.1, 'batch_size': 32}
{'lr': 0.1, 'batch_size': 128}
{'lr': 0.01, 'batch_size': 16}
{'lr': 0.01, 'batch_size': 32}
{'lr': 0.01, 'batch_size': 128}


In [18]:
model.train()

optimizer = SGD(model, learning_rate=1e-2)

for epoch in range(100):
    cum_loss = Tensor(0)
    num_batches = 0
    train_accuracy = Tensor(0)

    for batch_X, batch_Y in batch(p_train_X, train_Y, batch_size=256):
        optimizer.zero_grad()

        X = Tensor(batch_X)
        Y = Tensor(batch_Y)

        predictions: Tensor = model(X)
        loss = loss_fn(predictions, Y)

        loss.backward()

        optimizer.step()

        cum_loss += loss
        num_batches += 1

        train_accuracy += accuracy(
            Tensor(np.argmax(predictions.data, axis=-1)),
            Tensor(np.argmax(Y.data, axis=-1)),
        )

    print(
        f"{epoch=}, train_accuracy={train_accuracy.data / num_batches:.2%}, cum_loss={cum_loss.data:.4f}"
    )

epoch=0, train_accuracy=23.90%, cum_loss=417.8727
epoch=1, train_accuracy=32.16%, cum_loss=375.4670
epoch=2, train_accuracy=35.69%, cum_loss=355.7024
epoch=3, train_accuracy=38.03%, cum_loss=343.3465
epoch=4, train_accuracy=39.88%, cum_loss=334.1354
epoch=5, train_accuracy=41.28%, cum_loss=326.9482
epoch=6, train_accuracy=42.22%, cum_loss=320.6745
epoch=7, train_accuracy=43.57%, cum_loss=315.4324
epoch=8, train_accuracy=44.39%, cum_loss=310.1797
epoch=9, train_accuracy=45.04%, cum_loss=305.9832
epoch=10, train_accuracy=45.96%, cum_loss=302.1980
epoch=11, train_accuracy=46.62%, cum_loss=297.8916
epoch=12, train_accuracy=47.32%, cum_loss=294.3786
epoch=13, train_accuracy=47.97%, cum_loss=290.5426
epoch=14, train_accuracy=48.51%, cum_loss=287.4820
epoch=15, train_accuracy=49.35%, cum_loss=283.8905
epoch=16, train_accuracy=49.79%, cum_loss=281.0463
epoch=17, train_accuracy=50.33%, cum_loss=278.0836
epoch=18, train_accuracy=51.16%, cum_loss=275.1289
epoch=19, train_accuracy=51.49%, cum_loss