In [1]:
import torch
from torch import nn, optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import random_split
from datetime import datetime
import numpy as np



# Getting the same results with train and train_manual_update
- Write torch.manual_seed(42) at the beginning of your notebook.
- Write torch.set_default_dtype(torch.double) at the beginning of your notebook to alleviate precision errors

In [2]:
torch.manual_seed(42)
torch.set_default_dtype(torch.double)

# Tasks
Load, analyse and preprocess the CIFAR-10 dataset. Split it into 3
datasets: training, validation and test. Take a subset of these datasets
by keeping only 2 labels: cat and car

In [26]:
def load_cifar(train_val_split=0.9, data_path='../data/', preprocessor=None):

    # transformer to resize images to 16x16 pixels
    if preprocessor is None:
        preprocessor = transforms.Compose([
            transforms.Resize(16),
            transforms.ToTensor()
            ])
    
    # load datasets
    train_val_data = datasets.CIFAR10(data_path, train=True, download=True, transform=preprocessor)
    test_data = datasets.CIFAR10(data_path, train=False, download=True, transform=preprocessor)
    
    # sizes of train and validation data
    train_size = int(train_val_split * len(train_val_data))
    val_size = len(train_val_data) - train_size

    # split train_val_data into train and validation sets
    train_data, val_data = random_split(train_val_data, [train_size, val_size])

    # create subsets with only cat (0) and car (1)
    label_map = {3: 0, 1: 1} 

    train = [(img, label_map[label]) for img, label in train_data if label in [1,3]]
    val = [(img, label_map[label]) for img, label in val_data if label in [1,3]]
    test = [(img, label_map[label]) for img, label in test_data if label in [1,3]]

    # create dataloaders?

    return train, val, test


def compute_accuracy(model, loader):
    # set the model to evaluation mode
    model.eval()
    correct = 0
    total = 0

    # disable gradient tracking
    with torch.no_grad():
        for images, labels in loader:
            # forward pass
            outputs = model(images)
            predicted = torch.argmax(outputs, dim=1)
            total += labels.size(0)
            # compare predicted with labels
            correct += torch.eq(predicted, labels).sum().item()

    accuracy = correct / total * 100.0
    return accuracy


Write a MyMLP class that implements a MLP in PyTorch (so only fully
connected layers) such that:
    
    - The input dimension is 768(= 16 ∗ 16 ∗ 3) and the output dimension is 2 (for the 2 classes).
    - The hidden layers have respectively 128 and 32 hidden units.
    - All activation functions are ReLU. The last layer has no activation function since the cross-entropy loss already includes a softmax activation
function.

In [None]:
class MyNet(nn.Module):
    #TODO

Write a train(n_epochs, optimizer, model, loss_fn, train_loader) function that trains model for n_epochs epochs given an optimizer optimizer, a loss function loss_fn and a dataloader train_loader.

In [None]:
def train(n_epochs, optimizer, model, loss_fn, train_loader):
 #TODO


Write a similar function train manual_update that has no optimizer parameter, but a learning rate lr parameter instead and that manually updates each trainable parameter of model using equation (2). Do not forget to zero out all gradients after each iteration. 

Train 2 instances of MyMLP, one using train and the other using train_manual_update (use the same parameter values for both models). Compare their respective training losses. To get exactly the same results with both functions, see section 3.3

In [None]:
def train_manual_update(n_epochs, model, loss_fn, train_loader, lr=1e-2, momentum_coeff=0., weight_decay=0.):
    #TODO