## Task 2.4: MLP in PyTorch

ITU KSADMAL1KU-NLP - Advanced Machine Learning for NLP in KCS 2024

by Stefan Heinrich, Bertram Højer, Christian H. Rasmussen, & material by Kevin Murphy.

All info and static material: https://learnit.itu.dk/course/view.php?id=3024579

-------------------------------------------------------------------------------

This notebook is a prototypical blueprint for Deep Learning frameworks, usually following four steps:
- Data loading and preprocessing (often including Exploratory Data Analysis (EDA))
- Building a model by using the Tensorflow or PyTorch API
- Training a model (including initialising) until termination (often: convergence)
- Analysing the model (often including various steps to achieve interpretability of the model)

In Advanced Machine Learning course, we will detail these steps but often revisit these basic framework steps.

In [None]:
# @title #### Import dependencies

from __future__ import absolute_import, division, print_function, unicode_literals

from IPython import display
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms

np.random.seed(0)  # test with different seeds (for re-running everything)!

#### Load the data

In [None]:
batch_size = 32

# transformations
transform = transforms.Compose(
    [transforms.ToTensor()])

# Create a dataloader for Pytorch training
# download and load training dataset
trainset = torchvision.datasets.MNIST(root='../data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

# download and load testing dataset
testset = torchvision.datasets.MNIST(root='../data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=1,
                                         shuffle=False, num_workers=2)

figure = plt.figure(figsize=(10, 5))
cols, rows = 4, 2
for i in range(1, cols * rows + 1):
    sample_idx = torch.randint(len(trainset), size=(1,)).item()
    img, label = trainset[sample_idx]
    figure.add_subplot(rows, cols, i)
    plt.title(f"Class {label}")
    plt.axis("off")
    plt.imshow(img.squeeze(), cmap="gray")
plt.show()

#### Build the model

In [None]:
#def get_output_shape(layer, img_size):
#    return layer(torch.rand(*(img_size))).data.shape

class MNIST_model(nn.Module):
  def __init__(self, img_size, fc1_out, fc2_out, class_out):
      super(MNIST_model, self).__init__()

      self.hidden_layer_1 = nn.Linear(in_features=img_size*img_size, out_features=fc1_out)
      self.hidden_layer_2 = nn.Linear(in_features=fc1_out, out_features=fc2_out)
      self.output_layer = nn.Linear(in_features=fc2_out, out_features=class_out)

  def forward(self, img):

    # we flatten the 2D image into one long array
    # Start_dim = 1 because we pass the batches
    img = img.flatten(start_dim=1)

    x = self.hidden_layer_1(img)
    x = F.relu(x)
    x = self.hidden_layer_2(x)
    x = F.relu(x)

    x = self.output_layer(x)
    x = F.softmax(x, dim=1)

    return x

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
img_size = trainloader.dataset.data.shape[1]
class_out = trainloader.dataset.targets.unique().size()[0]
model = MNIST_model(img_size, 128, 128, class_out)
model = model.to(device)

#### Train the model

In [None]:
# Define hyperparameters
learning_rate = 0.001
momentum = 0.9
epochs = 5

# Setup for training
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)

# Training loop

def get_accuracy(logit, target, batch_size):
    # compute accuracy
    corrects = (torch.max(logit, 1)[1].view(target.size()).data == target.data).sum()
    accuracy = 100.0 * corrects/batch_size
    return accuracy.item()

for epoch in range(epochs):
  # Put the model in training mode
  model = model.train()

  train_running_loss = 0.0
  train_acc = 0.0
  for idx, (images, labels) in enumerate(trainloader):
    images = images.to(device)
    labels = labels.to(device)

    # loss and optimiser definitions!
    logits = model(images)
    loss = criterion(logits, labels)
    optimizer.zero_grad()

    loss.backward()

    # update model params
    optimizer.step()

    train_running_loss += loss.detach().item()
    train_acc += get_accuracy(logits, labels, batch_size)

  print('Epoch: %d | Loss: %.4f | Train Accuracy: %.2f' \
          %(epoch, train_running_loss / idx, train_acc/idx))

model = model.eval()
test_acc = 0.0
for i, (images, labels) in enumerate(testloader, 0):
    images = images.to(device)
    labels = labels.to(device)
    outputs = model(images)
    test_acc += get_accuracy(outputs, labels, 1)

print(f"Test Accuracy: {test_acc/i}, in epoch: {epoch}")

#### Analyse the model

In [None]:
# @title ##### Prediction and visualisation
model = model.eval()

In [None]:
# Derive an histogram over the 10 classes
# by counting over the predictions on the test data

hist = {k: {l:0 for l in range(10)} for k in range(10)}

for (images, labels) in testloader:
  images = images.to(device)
  labels = labels.to(device)
  outputs = model(images)
  pred = torch.argmax(outputs).item()
  hist[labels[0].item()][pred] += 1

In [None]:
fig = plt.figure()
fig.set_figheight(15)
fig.set_figwidth(16)
for idx, (key, val) in enumerate(hist.items()):
  ax = fig.add_subplot(3, 4, idx+1)
  ax.bar(list(val.keys()), val.values(), color='r')
  ax.set_title(f"Prediction for {key}")
  ax.set_xticks(range(0,10))
  ax.set_ylim(0, 1200)

plt.rcParams.update({'font.size': 12})
plt.show()