In [None]:
import torch
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

from tqdm import trange

In [None]:
class MLP(nn.Module):
    def __init__(self, h_sizes, out_size):
        super(MLP, self).__init__()
        self.layers = nn.ModuleList()
        for k in range(len(h_sizes) - 1):
            self.layers.append(nn.Linear(h_sizes[k], h_sizes[k+1]).cuda())

        self.out = nn.Linear(h_sizes[-1], out_size).cuda()
        self.logsoftmax = nn.LogSoftmax(dim=1)

    def forward(self, x):
        for layer in self.layers:
            x = F.relu(layer(x))
        output = self.logsoftmax(self.out(x))
        return output

In [None]:
X = pd.read_csv("/kaggle/input/mnist-in-csv/mnist_train.csv")
X_test = pd.read_csv("/kaggle/input/mnist-in-csv/mnist_test.csv")

In [None]:
plt.imshow(X.iloc[1,1:].to_numpy().reshape((28, 28)))
plt.title(f"{X.iloc[1,0]} sample")
plt.show()

In [None]:
BATCH_SIZE = 60_000
N_EPOCH = 100
N_BATCHES = X.shape[0] // BATCH_SIZE

In [None]:
model = MLP((28 * 28, 1024, 1024), 10)
loss = nn.NLLLoss()
opt = optim.SGD(model.parameters(), lr=1e-3)

r = trange(N_EPOCH * N_BATCHES)
losses = []
accuracies = []
for j in r:
    i = 0
    model.train()
    batch_data = X.iloc[i*BATCH_SIZE:(i+1)*BATCH_SIZE,1:].to_numpy().reshape((-1, 28 * 28))
    batch_labels = X.iloc[i*BATCH_SIZE:(i+1)*BATCH_SIZE,0].to_numpy()

    batch_data = torch.Tensor(batch_data).cuda()
    batch_labels = torch.Tensor(batch_labels).to(torch.int64).cuda()

    preds = model(batch_data)
    loss_val = loss(preds, batch_labels)
    loss_val.backward()
    opt.step()

    model.eval()
    test_data = torch.Tensor(X_test.iloc[:, 1:].to_numpy().reshape((-1, 28 * 28))).cuda()
    test_labels = torch.Tensor(X_test.iloc[:,0].to_numpy()).cuda()

    with torch.no_grad():
        preds = model(test_data).argmax(dim=1)
        acc = float(sum(preds == test_labels)) / X_test.shape[0]

    losses.append(loss_val.item())
    accuracies.append(acc * 100)
    r.set_description(f"loss: {loss_val.item():.2f}\t acc: {acc*100:.2f}%\t")

In [None]:
plt.plot(range(N_EPOCH), losses)
plt.plot(range(N_EPOCH), accuracies)
plt.title("Training loss and testing accuracy over the training")
plt.show()

On the previous plot we can see that the convergence seems to be happening at epoch n°20. We are going to see more in depth over a random sample.

In [None]:
rand_idx = np.random.randint(X_test.shape[0])
sample = X_test.iloc[rand_idx, 1:].to_numpy()
label = X_test.iloc[rand_idx, 0]

model.eval()
with torch.no_grad():
    sample_tensor = torch.Tensor([sample]).cuda()
    preds = model(sample_tensor)[0]

plt.imshow(sample.reshape((28, 28)))
plt.show()

In [None]:
preds = preds.exp().cpu().numpy()
sns.barplot(list(range(10)), preds)
plt.title(f"Probabilities for sample of label {label}")
plt.show()

In [None]:
prediction = preds.argmax()
f"Predicted label {prediction} for label {label}"

In [None]:
weights = model.layers[0].weight.detach().cpu().numpy()
plt.imshow(weights)
plt.title("Weights of the first linear layer")
plt.show()