In [None]:
from utils import load_dataset
import torch
from datetime import datetime
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
import matplotlib.pyplot as plt





### 3.1.1 ###
Load, analyse and preprocess the CIFAR-10 dataset. Split it into 3 datasets: training, validation and test. Take a subset of these datasets by keeping only 2 labels: bird and plane.

In [None]:
cifar10_train, cifar10_val, cifar10_test = load_dataset.load_CIFAR10("CIFAR10")
cifar2_train, cifar2_val, cifar2_test = load_dataset.subset_dataset(cifar10_train, cifar10_val, cifar10_test)

In [None]:
print('Size of the training dataset: ', len(cifar2_train))
print('Size of the validation dataset: ', len(cifar2_val))
print('Size of the test dataset: ', len(cifar2_test))


In [None]:
imgs = torch.stack([transforms.functional.to_tensor(img) for img, _ in cifar10_train])
print(imgs.shape)
mean = imgs.mean()
std = imgs.std()
normalizer = transforms.Normalize(mean, std)
print("Mean: ", mean, "std: ", std)

In [None]:
cifar_preprocessor = transforms.Compose([
    transforms.ToTensor(),
    normalizer,
])
cifar10_train, cifar10_val, cifar10_test = load_dataset.load_CIFAR10("CIFAR10", pre_processing=cifar_preprocessor)
cifar2_train, cifar2_val, cifar2_test = load_dataset.subset_dataset(cifar10_train, cifar10_val, cifar10_test)

In [None]:
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(8,3))
for i, ax in enumerate(axes.flat):
    # Find an image in the dataset with the right label
    img = next(img for img, label in cifar2_train if label == i)
    # Plot image
    ax.imshow(img.permute(1,2,0), cmap='gray')
    # Add title
    ax.set_title(i)
    # Remove ticks
    ax.axis('off')

plt.show()

### 3.1.2 ###
Write a MyMLP class that implements a MLP in PyTorch (so only fully connected layers) such
that:

    (a) The input dimension is 3072 (= 32*32*3) and the output dimension is 2 (for the 2
    classes).

    (b) The hidden layers have respectively 512, 128 and 32 hidden units.

    (c) All activation functions are ReLU. The last layer has no activation function since the
    cross-entropy loss already includes a softmax activation function.

In [None]:
class MyMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(32*32*3, 512)
        self.fc2 = nn.Linear(512,128)
        self.fc3 = nn.Linear(128, 32)
        self.fc4 = nn.Linear(32, 2)
    def forward(self, x):
        out = torch.flatten(x,1)
        out = torch.relu(self.fc1(out))
        out = torch.relu(self.fc2(out))
        out = torch.relu(self.fc3(out))
        out = self.fc4(out)
        return out


### 3.1.3 ###
Write a train(n epochs, optimizer, model, loss fn, train loader) function that trains
model for n epochs epochs given an optimizer optimizer, a loss function loss fn and a dataloader train loader

In [None]:
def train(n_epochs, optimizer, model, loss_fn, train_loader, device=None):
    if device is None:
        device = (
            torch.device('cuda') if torch.cuda.is_available()
            else torch.device('cpu'))
    print(f"Training on device {device}.")

    n_batch = len(train_loader)
    
    # We'll store there the training loss for each epoch
    losses_train = []
    
    # Set the network in training mode
    model.train()
    
    # Re-initialize gradients, just in case the model has been inappropriately 
    # manipulated before the training
    optimizer.zero_grad(set_to_none=True)
    
    for epoch in range(1, n_epochs + 1): 
        
        # Training loss for the current epoch
        loss_train = 0

        # Loop over our dataset (in batches the data loader creates for us)
        for imgs, labels in train_loader:
            
            imgs = imgs.to(device=device) 
            labels = labels.to(device=device)
            # Feed a batch into our model
            outputs = model(imgs)
            
            # Compute the loss we wish to minimize 
            # Note that by default, it is the mean loss that is computed
            # (so entire_batch_loss / batch_size)
            loss = loss_fn(outputs, labels) 
            
            
            # Perform the backward step. That is, compute the gradients of all parameters we want the network to learn
            loss.backward()
            
            # Update the model
            optimizer.step() 
            
            # Zero out gradients before the next round (or the end of training)
            optimizer.zero_grad() 

            # Update loss for this epoch
            # It is important to transform the loss to a number with .item()
            loss_train += loss.item()
            
        # Store current epoch loss. 
        losses_train.append(loss_train / n_batch)

        if epoch == 1 or epoch % 10 == 0:
            print('{}  |  Epoch {}  |  Training loss {:.3f}'.format(
                datetime.now().time(), epoch, loss_train / n_batch))
    return losses_train

### 3.1.5 ###
Train 2 instances of MyMLP, one using train and the other using train manual update (use
the same parameter values for both models). Compare their respective training losses. To get
exactly the same results with both functions, see section 3.3.

Note: only done for train,since have not made train_manual_update

In [None]:
device = (
    torch.device('cuda') if torch.cuda.is_available()
    else torch.device('cpu'))

n_epochs = 20
batch_size = 64

train_loader = torch.utils.data.DataLoader(cifar2_train, batch_size=batch_size, shuffle=True)
print(train_loader)
models = []
model_names = []
losses_train = []

model = MyMLP().to(device=device) 
optimizer = optim.SGD(model.parameters(), lr=1e-1)
loss_fn = nn.CrossEntropyLoss()

loss_train = train(
    n_epochs = n_epochs,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader
)

models.append(model)
model_names.append('MyMLP, lr=0.1')
losses_train.append(loss_train)

### 3.1.10 ###
Evaluate the best model and analyse its performance.

In [None]:
train_loader = torch.utils.data.DataLoader(cifar2_train, batch_size=64, shuffle=False)
val_loader = torch.utils.data.DataLoader(cifar2_val, batch_size=64, shuffle=False)

def compute_accuracy(model, loader):
    model.eval()
    correct = 0
    total = 0

    # We do not want gradients here, as we will not want to update the parameters.
    with torch.no_grad():
        for imgs, labels in loader:

            outputs = model(imgs)
            _, predicted = torch.max(outputs, dim=1)
            total += labels.shape[0]
            correct += int((predicted == labels).sum())

    acc =  correct / total
    print("Accuracy: {:.2f}".format(acc))
    return acc

print("Training accuracy:")
compute_accuracy(model, train_loader)
print("Validation accuracy:")
compute_accuracy(model, val_loader)