In [20]:
import torch
from torch import nn, optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import random_split
from datetime import datetime
import numpy as np



# Getting the same results with train and train_manual_update
- Write torch.manual_seed(42) at the beginning of your notebook.
- Write torch.set_default_dtype(torch.double) at the beginning of your notebook to alleviate precision errors

In [21]:
#Todo

torch.manual_seed(42) 
torch.set_default_dtype(torch.double)

# Tasks
Load, analyse and preprocess the CIFAR-10 dataset. Split it into 3
datasets: training, validation and test. Take a subset of these datasets
by keeping only 2 labels: bird and airplane

In [22]:
def load_cifar(train_val_split=0.9, data_path='../data/', preprocessor=None):

    transform = transforms.ToTensor()
    train_set = datasets.CIFAR10(data_path, train = True, download = True, transform = transform)

    test_set = datasets.CIFAR10(data_path, train = False, download = True, transform = transform)

    trainsize = int(train_val_split * len(train_set))
    valsize = len(train_set) - trainsize

    train_set, val_set = random_split(train_set, [trainsize, valsize])

    return train_set, val_set, test_set


def compute_accuracy(model, loader):
    device = next(model.parameters()).device

    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for data, target in loader:
            data = data.to(device)
            target = target.to(device)

            outputs = model(data)
            predicted = torch.argmax(outputs, dim=1)

            total += target.size(0)
            correct += (predicted == target).sum().item()

    return correct / total




Write a MyMLP class that implements a MLP in PyTorch (so only fully
connected layers) such that:
    
    - The input dimension is 3072 (= 32 ∗ 32 ∗ 3) and the output dimension is 2 (for the 2 classes).
    - The hidden layers have respectively 512, 128 and 32 hidden units.
    - All activation functions are ReLU. The last layer has no activation function since the cross-entropy loss already includes a softmax activation function.

In [23]:
class MyNet(nn.Module):
    def __init__(self, n_l = [2, 3, 2]):
        super().__init__() 
        
        
        # number of layers in our network (following Andrew's notations)
        self.L = len(n_l)-1
        self.n_l = n_l
        
        # Where we will store our neuron values
        # - z: before activation function 
        # - a: after activation function (a=f(z))
        self.z = {i : None for i in range(1, self.L+1)}
        self.a = {i : None for i in range(self.L+1)}

        # Where we will store the gradients for our custom backpropagation algo
        self.dL_dw = {i : None for i in range(1, self.L+1)}
        self.dL_db = {i : None for i in range(1, self.L+1)}

        # Our activation functions
        self.f = {i : lambda x : torch.tanh(x) for i in range(1, self.L+1)}

        # Derivatives of our activation functions
        self.df = {
            i : lambda x : (1 / (torch.cosh(x)**2)) 
            for i in range(1, self.L+1)
        }
        
        # fully connected layers
        # We have to use nn.ModuleDict and to use strings as keys here to 
        # respect pytorch requirements (otherwise, the model does not learn)
        self.fc = nn.ModuleDict({str(i): None for i in range(1, self.L+1)})
        for i in range(1, self.L+1):
            self.fc[str(i)] = nn.Linear(in_features=n_l[i-1], out_features=n_l[i])

            print(self.fc[str(i)].weight)
        
    def forward(self, x):
        # Input layer
        self.a[0] = torch.flatten(x, 1)
        
        # Hidden layers until output layer
        for i in range(1, self.L+1):

            # fully connected layer
            self.z[i] = self.fc[str(i)](self.a[i-1])
            # activation
            self.a[i] = self.f[i](self.z[i])

        # return output
        return self.a[self.L]

Write a train(n_epochs, optimizer, model, loss_fn, train_loader) function that trains model for n_epochs epochs given an optimizer optimizer, a loss function loss_fn and a dataloader train_loader.

In [24]:
def train(n_epochs, optimizer, model, loss_fn, train_loader):
    

    for epoch in range(n_epochs):
        for data, target in train_loader:

            optimizer.zero_grad()
            loss = loss_fn(model(data), target)

            loss.backward()
            optimizer.step()



Write a similar function train manual_update that has no optimizer parameter, but a learning rate lr parameter instead and that manually updates each trainable parameter of model using equation (2). Do not forget to zero out all gradients after each iteration. 

Train 2 instances of MyMLP, one using train and the other using train_manual_update (use the same parameter values for both models). Compare their respective training losses. To get exactly the same results with both functions, see section 3.3

In [None]:
def backpropagation(model, y_true, y_pred):

    L = model.L  

    with torch.no_grad():
                
        dL_da = 2 * (y_pred - y_true)  

        delta = {}
        delta[L] = dL_da * model.df[L](model.z[L])

        for l in range(L-1, 0, -1):
            delta[l] = (delta[l+1] @ model.fc[str(l+1)].weight) * model.df[l](model.z[l])

        for l in range(1, L+1):
            model.dL_dw[l] = delta[l].t() @ model.a[l-1]
            model.dL_db[l] = delta[l].squeeze(0)

    return None



def train_manual_update(n_epochs, model, loss_fn, train_loader, lr=1e-2, momentum_coeff=0., weight_decay=0.):
    
    model = model 
    for epoch in range(n_epochs):
        for data, target in train_loader:
            

            

SyntaxError: unmatched ')' (1186912599.py, line 34)