# Import

In [None]:
import torch
import torch.nn as nn
from torch.optim import SGD
from torch.utils.data import Dataset, DataLoader, RandomSampler
import torchvision
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px

## Import Data

In [None]:
class CTDataset(Dataset):
    def __init__(self, filepath):
        self.x, self.y = torch.load(filepath)
        self.x = self.x / 255.
        self.y = nn.functional.one_hot(self.y, num_classes=10).to(float)


    def __len__(self):
        return self.x.shape[0]
    def __getitem__(self, ix):
        return self.x[ix], self.y[ix]

In [None]:
# https://www.di.ens.fr/~lelarge/MNIST.tar.gz
train_ds = CTDataset('./MNIST/training.pt')
test_ds = CTDataset('./MNIST/test.pt')

In [None]:
random_sampler = RandomSampler(train_ds, num_samples=10_000)

In [None]:
batch_size = 32
train_dl = DataLoader(train_ds, batch_size=batch_size, sampler=random_sampler)

In [None]:
class MyNeuralNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.Matrix1 = nn.Linear(28**2,100)
        self.Matrix2 = nn.Linear(100,50)
        self.Matrix3 = nn.Linear(50,10)
        self.R = nn.ReLU()
    def forward(self,x):
        x = x.view(-1,28**2)
        x = self.R(self.Matrix1(x))
        x = self.R(self.Matrix2(x))
        x = self.Matrix3(x)
        return x.squeeze()

In [None]:
model = MyNeuralNet()

## Train

In [None]:
def train_model(dataloader, model, loss, optimizer, n_epochs=20):
    model.train()
    # Optimization
    opt = SGD(model.parameters(), lr=0.01)

    # Train model
    losses = []
    epochs = []

    N = len(dataloader)

    for epoch in range(1, n_epochs+1):
        for i, (x, y) in enumerate(dataloader):
            # Update the weights of the network
            optimizer.zero_grad()
            loss_value = loss(model(x), y)
            loss_value.backward()
            optimizer.step()
            # Store training data
            epochs.append(epoch+i/N)
            losses.append(loss_value.item())

        print(f'Epoch {epoch}/{n_epochs} Completed')

    model.eval()
    return np.array(epochs), np.array(losses)

In [None]:
optim_class = torch.optim
optim_children = dir(optim_class)
no_of_optimizers = [o.startswith("_") for o in optim_children].index(True)
optimizers = optim_children[:no_of_optimizers]
optimizers = [o for o in optimizers if o!= "Optimizer"]

for optimizer in optimizers:
  print(getattr(optim_class, optimizer)(model.parameters(), lr=0.01))

ASGD (
Parameter Group 0
    alpha: 0.75
    capturable: False
    differentiable: False
    foreach: None
    lambd: 0.0001
    lr: 0.01
    maximize: False
    t0: 1000000.0
    weight_decay: 0
)
Adadelta (
Parameter Group 0
    differentiable: False
    eps: 1e-06
    foreach: None
    lr: 0.01
    maximize: False
    rho: 0.9
    weight_decay: 0
)
Adagrad (
Parameter Group 0
    differentiable: False
    eps: 1e-10
    foreach: None
    initial_accumulator_value: 0
    lr: 0.01
    lr_decay: 0
    maximize: False
    weight_decay: 0
)
Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.01
    maximize: False
    weight_decay: 0
)
AdamW (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.01
    maximize: False
    weight_decay: 0.01
)
Adamax (
Pa

In [None]:
# this is our project
optimizer = SGD(model.parameters(), lr=0.01)

In [None]:
loss = nn.CrossEntropyLoss()
n_epochs = 20

In [None]:
epoch_data, loss_data = train_model(train_dl, model, loss, optimizer, n_epochs)

In [None]:
# Average the loss across all the data per epoch to get the total loss
epoch_data_avgd = epoch_data.reshape(n_epochs,-1).mean(axis=1)
loss_data_avgd = loss_data.reshape(n_epochs,-1).mean(axis=1)

In [None]:
fig = px.line(
    x = epoch_data_avgd,
    y = loss_data_avgd,
    title = "Cross Entropy (avgd per epoch)",
    range_x = [epoch_data_avgd.min(), epoch_data_avgd.max()],
    range_y = [0, loss_data_avgd.max()*1.1],
    markers=True,
    labels = {
        "x": "Epoch",
        "y": "Loss"
    }
)
#fig.update_traces(patch={"line": {"dash": "dot"}})
fig.show()

## Test

In [None]:
test_count = 10

In [None]:
xs, ys = test_ds[:test_count] # test
yhats = model(xs).argmax(axis=1)

In [None]:
cols = 4
rows = np.ceil(test_count/cols).astype(int)

fig, ax = plt.subplots(rows, cols,figsize=(10,5))
for i in range(test_count):
    plt.subplot(rows, cols, i+1)
    plt.imshow(xs[i])
    plt.title(f'Predicted Digit: {yhats[i]}')
fig.tight_layout()
plt.show()