In [None]:
import torch
from torch import nn, optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import random_split
from datetime import datetime
import numpy as np



# Getting the same results with train and train_manual_update
- Write torch.manual_seed(42) at the beginning of your notebook.
- Write torch.set_default_dtype(torch.double) at the beginning of your notebook to alleviate precision errors

In [None]:
#Todo

torch.manual_seed(42) 
torch.set_default_dtype(torch.double)

# Tasks
Load, analyse and preprocess the CIFAR-10 dataset. Split it into 3
datasets: training, validation and test. Take a subset of these datasets
by keeping only 2 labels: bird and airplane

In [None]:
from torchvision import datasets, transforms
from torch.utils.data import random_split
from torch.utils.data import random_split, DataLoader


from torchvision import datasets, transforms
from torch.utils.data import random_split

def load_cifar(train_val_split=0.9, data_path="../data/", preprocessor=None):

    if preprocessor is None:
        preprocessor = transforms.ToTensor()

    full_train = datasets.CIFAR10(
        root=data_path,
        train=True,
        download=True,
        transform=preprocessor
    )

    # keep only airplane (0) and bird (2), remap labels to {0,1}
    full_train = [
        (x, 0 if y == 0 else 1)
        for (x, y) in full_train
        if y in (0, 2)
    ]

    n_total = len(full_train)
    n_train = int(train_val_split * n_total)
    n_val   = n_total - n_train

    train_set, val_set = random_split(full_train, [n_train, n_val])

    test_set = datasets.CIFAR10(
        root=data_path,
        train=False,
        download=True,
        transform=preprocessor
    )

    test_set = [
        (x, 0 if y == 0 else 1)
        for (x, y) in test_set
        if y in (0, 2)
    ]

    return train_set, val_set, test_set



def compute_accuracy(model, loader):
    device = next(model.parameters()).device

    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for x, y in loader:
            x = x.to(device)
            y = y.to(device)

            outputs = model(x)
            preds = torch.argmax(outputs, dim=1)

            correct += (preds == y).sum().item()
            total += y.size(0)

    return correct / total if total > 0 else 0.0



Write a MyMLP class that implements a MLP in PyTorch (so only fully
connected layers) such that:
    
    - The input dimension is 3072 (= 32 ∗ 32 ∗ 3) and the output dimension is 2 (for the 2 classes).
    - The hidden layers have respectively 512, 128 and 32 hidden units.
    - All activation functions are ReLU. The last layer has no activation function since the cross-entropy loss already includes a softmax activation function.

In [None]:
class MyNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Flatten(),
            nn.Linear(3072, 512),
            nn.ReLU(),
            nn.Linear(512, 128),
            nn.ReLU(),
            nn.Linear(128, 32),
            nn.ReLU(),
            nn.Linear(32, 2),
        )

    def forward(self, x):
        return self.net(x)


Write a train(n_epochs, optimizer, model, loss_fn, train_loader) function that trains model for n_epochs epochs given an optimizer optimizer, a loss function loss_fn and a dataloader train_loader.

In [None]:
def train(n_epochs, optimizer, model, loss_fn, train_loader):

    for epoch in range(n_epochs):
        model.train()

        for x, y in train_loader:
            outputs = model(x)
            loss = loss_fn(outputs, y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()


from torch.utils.data import DataLoader

train_set, val_set, test_set = load_cifar(
    train_val_split=0.9,
    data_path="../data/",
)

train_loader = DataLoader(train_set, batch_size=64, shuffle=True)
val_loader   = DataLoader(val_set, batch_size=64, shuffle=False)
test_loader  = DataLoader(test_set, batch_size=64, shuffle=False)


model = MyNet()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(
    model.parameters(),
    lr=0.01,
    momentum=0.9
)
n_epochs = 5
train(
    n_epochs=n_epochs,
    optimizer=optimizer,
    model=model,
    loss_fn=loss_fn,
    train_loader=train_loader
)
val_acc = compute_accuracy(model, val_loader)
test_acc = compute_accuracy(model, test_loader)

print("Validation accuracy:", val_acc)
print("Test accuracy:", test_acc)




Validation accuracy: 0.711
Test accuracy: 0.715


Write a similar function train manual_update that has no optimizer parameter, but a learning rate lr parameter instead and that manually updates each trainable parameter of model using equation (2). Do not forget to zero out all gradients after each iteration. 

Train 2 instances of MyMLP, one using train and the other using train_manual_update (use the same parameter values for both models). Compare their respective training losses. To get exactly the same results with both functions, see section 3.3

In [None]:
def train_manual_update(n_epochs, model, loss_fn, train_loader, lr=1e-2, momentum_coeff=0., weight_decay=0.):

    device = next(model.parameters()).device
    model.train()

    for epoch in range(n_epochs):
        model.train()

        for x, y in train_loader:
            x = x.to(device)
            y = y.to(device)
            outputs = model(x)
            loss = loss_fn(outputs, y)

            model.zero_grad()
            loss.backward() 


            #update 
