ECS6P9U/P: NEURAL NETWORKS & DEEP LEARNING

2021/22 – Semester 2

Dr. Yorgos Tzimiropoulos

Spring 2022 - Coursework (Notebook)

Jordan Akisanya – 200884501

In [12]:
import my_utils as mu
import torch
from torch import nn
from IPython import display
import pandas as pd
from torch.utils.tensorboard import SummaryWriter
from collections import OrderedDict, namedtuple
from itertools import product
import time
import json

## 1. Create the Model

In [13]:
class MixerBlock(nn.Module):
    def __init__(self, num_patches, num_channels, patches_mlp_dim, channels_mlp_dim, dropout_p):
        super(MixerBlock, self).__init__()
        self.num_patches = num_patches
        self.num_channels = num_channels
        self.patches_mlp_dim = patches_mlp_dim
        self.channels_mlp_dim = channels_mlp_dim
        self.dropout_p = dropout_p

        # (1) token-mixing MLP
        self.MLP1 = nn.Sequential(nn.Linear(self.num_patches, self.patches_mlp_dim),
                                  nn.ReLU(),
                                  nn.Dropout(self.dropout_p), 
                                  nn.Linear(self.patches_mlp_dim, self.num_patches),
                                  nn.Dropout(self.dropout_p))
        # (2) channel-mixing MLP
        self.MLP2 = nn.Sequential(nn.Linear(self.num_channels, self.channels_mlp_dim),
                                  nn.ReLU(),
                                  nn.Dropout(self.dropout_p), 
                                  nn.Linear(self.channels_mlp_dim, self.num_channels),
                                  nn.Dropout(self.dropout_p))

        self.norm1 = nn.LayerNorm(self.num_channels)
        self.norm2 = nn.LayerNorm(self.num_channels)
        
    def forward(self, x):
        y = self.norm1(x)
        y = y.transpose(2, 1)
        y = self.MLP1(y) 
        y = y.transpose(2, 1) 
        x = x + y             
        y = self.norm2(x) 
        return x + self.MLP2(y) 
    
class MlpMixer(nn.Module):
    def __init__(self, num_classes, num_blocks, patch_size, num_patches, 
                 num_channels, patches_mlp_dim, channels_mlp_dim, dropout_p):
        super(MlpMixer, self).__init__()
        self.num_classes = num_classes
        self.num_blocks = num_blocks
        self.patch_size = patch_size
        self.num_patches = num_patches
        self.num_channels = num_channels
        self.patches_mlp_dim = patches_mlp_dim
        self.channels_mlp_dim = channels_mlp_dim
        self.dropout_p = dropout_p
        
        self.MixerBlock = MixerBlock(self.num_patches, self.num_channels,
                                     self.patches_mlp_dim, self.channels_mlp_dim, 
                                     self.dropout_p)
        
        self.stem_mlp = nn.Sequential(nn.Linear(self.num_channels, self.num_channels),
                                      nn.ReLU())
            
        self.norm1 = nn.LayerNorm(num_channels)
        self.out = nn.Linear(self.num_channels, self.num_classes)

    def forward(self, x):
        # Stem
        x = mu.F.unfold(x, kernel_size=self.patch_size, stride=self.patch_size)
        x = x.permute(0, 2, 1)
        x = self.stem_mlp(x)

        # Backbone
        for _ in range(self.num_blocks): 
            x = self.MixerBlock(x)
        x = self.norm1(x)

        # Logits out
        x = torch.mean(x, dim=1)
        logits = self.out(x)
        return logits


## 2. Training and Evaluation Functions/Classes

In [14]:
# Define accuracy function
# Source: my_utils
def accuracy(y_hat, y):  #y_hat is a matrix; 2nd dimension stores prediction scores for each class.
    """Compute the number of correct predictions."""
    if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
        y_hat = y_hat.argmax(axis=1) # Predicted class is the index of max score         
    cmp = (y_hat.type(y.dtype) == y)  # because`==` is sensitive to data types
    return float(torch.sum(cmp)) # Taking the sum yields the number of correct predictions.

In [15]:
# Define accumulator class
# Source: my_utils
class Accumulator:  
    """For accumulating sums over `n` variables."""
    def __init__(self, n):
        self.data = [0.0] * n # [0, 0, ..., 0]
    def add(self, *args):
        self.data = [a + float(b) for a, b in zip(self.data, args)]
    def reset(self):
        self.data = [0.0] * len(self.data)
    def __getitem__(self, idx):
        return self.data[idx]

In [16]:
# Define accuracy evaluation function
# Source: my_utils 
def evaluate_accuracy(net, data_iter, device): 
    """Compute the accuracy for a model on a dataset."""
    metric = Accumulator(2)  # No. of correct predictions, no. of predictions
    for _, (X, y) in enumerate(data_iter):
        (X, y) = (X.to(device), y.to(device))
        metric.add(accuracy(net(X), y), y.numel())
    return metric[0] / metric[1]

In [17]:
# Define training function 
# Source: my_utils
def train_epoch_ch3(net, train_iter, loss, optimizer, device):  
    """The training function for one epoch."""
    # Set the model to training mode
    if isinstance(net, torch.nn.Module):
        net.train()
    # Sum of training loss, sum of training accuracy, no. of examples
    metric = Accumulator(3)
    for X, y in train_iter:
        X = X.to(device)
        y = y.to(device)
        # Compute gradients and update parameters
        y_hat = net(X)
        l = loss(y_hat, y)
        optimizer.zero_grad()
        l.backward()
        optimizer.step()
        metric.add(float(l) * len(y), accuracy(y_hat, y), y.size().numel())
    # Return training loss and training accuracy
    return metric[0] / metric[2], metric[1] / metric[2]

In [18]:
# Define the epoch class
# Source: github.com/ElisonSherton/Deep_Learning_Using_PyTorch
class Epoch():
    def __init__(self):
        # Keeps track of which epoch it is
        self.count = 0
        # Keeps track of the loss
        self.loss = 0
        # Keeps track of the number of correct predictions
        self.num_correct = 0
        # When to start
        self.start_time = 0

In [19]:
# Define the run manager class
# Source https://github.com/ElisonSherton/Deep_Learning_Using_PyTorch
# Modified to fit existing my_utils function outputs
class RunManager():
    def __init__(self):
        self.e = Epoch()
        self.run_params = None
        self.run_count = 0
        self.run_data = []
        self.run_start_time = None
        self.network = None
        self.loader = None
        self.tb = None
    
    def begin_run(self, run, network, loader):
        self.run_start_time = time.time()
        self.run_params = run
        self.run_count += 1
        self.network = network
        self.loader = loader
        self.tb = SummaryWriter(comment = f"-{run}")
#           self.tb.add_graph(network)
    
    def end_run(self):
        self.tb.close()
        self.e.count = 0
    
    def begin_epoch(self):
        self.e.start_time = time.time()
        self.e.count += 1
#           self.e.loss = 0
#           self.e.num_correct = 0
        
    def end_epoch(self):
        epoch_duration = time.time() - self.e.start_time
        run_duration = time.time() - self.run_start_time
#           loss = self.e.loss / len(self.loader.dataset)
#           accuracy = self.e.num_correct / (len(self.loader.dataset))
        loss = train_loss
        train_accuracy = train_acc
        test_accuracy = test_acc
        
        self.tb.add_scalar('Loss', loss, self.e.count)
        self.tb.add_scalar('Train_Accuracy', train_accuracy, self.e.count)
        self.tb.add_scalar('Test_Accuracy', test_accuracy, self.e.count)

        for name, param in self.network.named_parameters():
            self.tb.add_histogram(name, param, self.e.count)
            self.tb.add_histogram(f"{name}.grad", param.grad, self.e.count)
            
        results = {'Run': self.run_count, 
                   'Epoch': self.e.count, 
                   'Loss': loss,
                   'Train_Accuracy': train_accuracy,
                   'Test_Accuracy': test_accuracy,
                   'Epoch Duration': epoch_duration,
                   'Run Duration': run_duration}
        
        for k, v in self.run_params._asdict().items(): 
            results[k] = v
        
        self.run_data.append(results)

        df = pd.DataFrame.from_dict(self.run_data, orient='columns')
        display.clear_output(wait = True)
        print(df)
        
    def track_loss(self, loss):
        self.e.loss += loss.item() * self.loader.batch_size
    
    def track_num_correct(self, pred, labels):
        self.e.num_correct += pred.argmax(dim = 1).eq(labels).sum().item()
        
    def save(self, fileName):
        pd.DataFrame.from_dict(
            self.run_data, orient='columns'
        ).to_csv(f'{fileName}.csv')

        with open(f'{fileName}.json', 'w', encoding='utf-8') as f:
            json.dump(self.run_data, f, ensure_ascii=False, indent=4)

In [20]:
# Define the parameter builder class
# Source: github.com/ElisonSherton/Deep_Learning_Using_PyTorch 
class RunBuilder():
    @staticmethod
    def get_runs(params):
        
        # Creates a named tuple which we can use to access values in organized way of .notation
        Run = namedtuple('Run', params.keys())
        # Create a container to hold all the combination of params
        runs = []
        
        # Compute the Cartesian product of parameters
        for v in product(*params.values()):
            runs.append(Run(*v))
        
        return runs

## 3. Train the Model

In [21]:
# Create the parameter dictionary 
params = OrderedDict(
    lr = [.01],
    dropout_p = [.1],
    batch_size = [256],
    num_epochs = [60],
    device = ['cpu']
)

In [22]:
m = RunManager()
for run in RunBuilder.get_runs(params):
    # Task 1 - Read dataset and create dataloader
    loader = mu.load_data_fashion_mnist(batch_size=run.batch_size)

    # Task 2 - Create the model
    net = MlpMixer(10, 8, (4,4), 49, 16, 1024, 512, run.dropout_p).to(run.device)
    
    # Task 3 - Create the loss and optimizer
    loss = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(net.parameters(), lr=run.lr)

    # Task 4 - Write the training script and train the model
    m.begin_run(run, net, loader)
    for epoch in range(run.num_epochs):
        m.begin_epoch()
        train_iter, test_iter = loader
        train_metrics = train_epoch_ch3(net, train_iter, loss, optimizer, run.device)
        test_acc = evaluate_accuracy(net, test_iter, run.device)
        train_loss, train_acc = train_metrics
        m.end_epoch()
    m.end_run()
m.save('results_1')

    Run  Epoch      Loss  Train_Accuracy  Test_Accuracy  Epoch Duration  \
0     1      1  0.709132        0.742000         0.8097       17.769782   
1     1      2  0.447118        0.836700         0.8239       15.865147   
2     1      3  0.405036        0.849417         0.8464       15.940517   
3     1      4  0.377228        0.862250         0.8548       15.856584   
4     1      5  0.354748        0.869200         0.8525       15.919687   
5     1      6  0.345007        0.873533         0.8599       15.758570   
6     1      7  0.332167        0.877633         0.8626       15.906396   
7     1      8  0.324218        0.880933         0.8646       16.005501   
8     1      9  0.313348        0.884283         0.8618       15.955086   
9     1     10  0.307857        0.886900         0.8732       15.820107   
10    1     11  0.299736        0.888700         0.8711       15.811812   
11    1     12  0.296812        0.888567         0.8720       15.887808   
12    1     13  0.287921 

## 5. Display the results

In [27]:
# Display the results
pd.DataFrame.from_dict(m.run_data, orient='columns').sort_values("Test_Accuracy", ascending=False).head()

Unnamed: 0,Run,Epoch,Loss,Train_Accuracy,Test_Accuracy,Epoch Duration,Run Duration,lr,dropout_p,batch_size,num_epochs,device
57,1,58,0.166606,0.9372,0.8883,15.772248,929.865648,0.01,0.1,256,60,cuda
47,1,48,0.184309,0.931067,0.8882,15.939361,770.739119,0.01,0.1,256,60,cuda
54,1,55,0.171472,0.937317,0.8867,15.729063,882.133852,0.01,0.1,256,60,cuda
52,1,53,0.17548,0.935033,0.8865,15.734182,850.29665,0.01,0.1,256,60,cuda
27,1,28,0.230724,0.912767,0.886,15.84763,449.530975,0.01,0.1,256,60,cuda


## 6. Plot the results

In [29]:
%load_ext tensorboard
%tensorboard --logdir runs/