In [1]:
#get relevant modules

import torch 
from PIL import Image
from torch import nn, optim
from torch.optim import Adam
from torch.utils.data import DataLoader
from torchvision.transforms import ToTensor
from torchvision import datasets, transforms
from copy import deepcopy

import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
#get working dataset and format it accordingly

#MNIST
transform = transforms.ToTensor()
dataset = datasets.MNIST(root="data", download=True, train=True, transform=transform)

#define train and test sets
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size

#split the dataset
train_set, test_set = torch.utils.data.random_split(dataset, [train_size, test_size])
train_loader = DataLoader(train_set, batch_size=32, shuffle=True)
test_loader = DataLoader(test_set, batch_size=32, shuffle=False)

In [3]:
#define the model

class Layer(nn.Module):
  def __init__(self, in_channels, out_channels, kernel_size, stride, padding, num_features):
    super().__init__()

    self.output = nn.Sequential(
      nn.Conv2d(in_channels=in_channels, out_channels=out_channels, 
                kernel_size=kernel_size, stride=stride, padding=padding),
      nn.ReLU(),
      nn.BatchNorm2d(num_features=num_features)
    )

  def forward(self, x):
    x = self.output(x)
    return x


class Block(nn.Module):
  def __init__(self, in_channels, out_channels, kernel_size, stride, padding, num_features):
    super().__init__()

    self.layer_1 = Layer(in_channels, out_channels, kernel_size, stride, padding, num_features)
    self.layer_2 = Layer(in_channels, out_channels, kernel_size, stride, padding, num_features)

  def forward(self, x):
            
      #layer 1
      x = self.layer_1(x)

      #layer 2
      x = self.layer_2(x)
      
      return x

class ResNet(nn.Module):
  def __init__(self, in_channels, out_channels, kernel_size, stride, padding, num_features):
    super().__init__()

    self.initial_layer = nn.Sequential(
          nn.Conv2d(in_channels=1, out_channels=64, kernel_size=7, stride=2, padding=3),
          nn.ReLU(),
          nn.BatchNorm2d(num_features=64),
          nn.MaxPool2d(kernel_size=2, stride=2)
        )
    
    self.block_1 = Block(in_channels, out_channels, kernel_size, stride, padding, num_features)
    self.block_2 = Block(in_channels, out_channels, kernel_size, stride, padding, num_features)
    self.block_3 = Block(in_channels, out_channels, kernel_size, stride, padding, num_features)

    self.output = nn.Sequential(
      nn.Flatten(),
      nn.Linear(64*7*7, 10)
    )

  def forward(self, x):
    #initial conv layer + pooling
    x = self.initial_layer(x)

    #blocks
    y = x
    x = self.block_1(x)
    x = torch.add(x, y) #skip connection: F(x) + x

    y = x
    x = self.block_2(x)
    x = torch.add(x, y)

    y = x
    x = self.block_3(x)
    x = torch.add(x, y)

    #flatten then apply linear layer
    x = self.output(y)
    return x

In [4]:
#train the model

model = ResNet(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1, num_features=64)

criterion = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=0.0001)
model.train()

best_loss = 1
all_losses = []
for epoch in range(10):
  for inputs, targets in train_loader:

    outputs = model(inputs)
    loss = criterion(outputs, targets)
    all_losses.append(loss.item())

    if loss < best_loss:
       best_model = deepcopy(model)
       best_loss = loss

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

  print(f"Epoch {epoch}: loss = {loss.item()}")

Epoch 0: loss = 0.015224847942590714
Epoch 1: loss = 0.04075232893228531
Epoch 2: loss = 0.00402669096365571
Epoch 3: loss = 0.003790066111832857
Epoch 4: loss = 0.007529288996011019
Epoch 5: loss = 0.031100675463676453
Epoch 6: loss = 0.0010145916603505611
Epoch 7: loss = 0.0032656118273735046
Epoch 8: loss = 5.103349849377992e-06
Epoch 9: loss = 0.0072031812742352486


In [5]:
#test the last model

model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, targets in test_loader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        total += targets.size(0)
        correct += (predicted == targets).sum().item()

print(f"Accuracy on test set: {100 * correct / total:.2f}%")

Accuracy on test set: 98.72%


In [6]:
#test the model with the lowest loss during training

best_model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, targets in test_loader:
        outputs = best_model(inputs)
        _, predicted = torch.max(outputs, 1)
        total += targets.size(0)
        correct += (predicted == targets).sum().item()

print(f"Accuracy on test set: {100 * correct / total:.2f}%")

Accuracy on test set: 98.76%
