In [1]:
from utils import load_dataset
import torch
from datetime import datetime
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
import matplotlib.pyplot as plt
from torchvision.datasets import CIFAR10

In [2]:
torch.manual_seed(123)
torch.set_default_dtype(torch.double)

In [3]:
device = (torch.device('cuda') if torch.cuda.is_available()
          else torch.device('cpu'))

### 3.1.1 ###
Load, analyse and preprocess the CIFAR-10 dataset. Split it into 3 datasets: training, validation and test. Take a subset of these datasets by keeping only 2 labels: bird and plane.

In [4]:
cifar2_train, cifar2_val, cifar2_test = load_dataset(CIFAR10)

Files already downloaded and verified
Files already downloaded and verified
Dataset: <class 'torchvision.datasets.cifar.CIFAR10'>
Size of the train dataset:       , 45000
Size of the validation dataset:  , 5000
Size of the test dataset:        , 10000


### 3.1.2 ###
Write a MyMLP class that implements a MLP in PyTorch (so only fully connected layers) such
that:

    (a) The input dimension is 3072 (= 32*32*3) and the output dimension is 2 (for the 2
    classes).

    (b) The hidden layers have respectively 512, 128 and 32 hidden units.

    (c) All activation functions are ReLU. The last layer has no activation function since the
    cross-entropy loss already includes a softmax activation function.

In [5]:
class MyMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(32*32*3, 512)
        self.fc2 = nn.Linear(512,128)
        self.fc3 = nn.Linear(128, 32)
        self.fc4 = nn.Linear(32, 2)
    
    def forward(self, x):
        out = torch.flatten(x,1)
        out = torch.relu(self.fc1(out))
        out = torch.relu(self.fc2(out))
        out = torch.relu(self.fc3(out))
        out = self.fc4(out)
        return out


### 3.1.3 ###
Write a train(n epochs, optimizer, model, loss fn, train loader) function that trains
model for n epochs epochs given an optimizer optimizer, a loss function loss fn and a dataloader train loader

In [6]:
def train(n_epochs, optimizer, model, loss_fn, train_loader):

    device = (torch.device('cuda') if torch.cuda.is_available()
        else torch.device('cpu'))
    
    print(f"Training on device {device}.")

    n_batch = len(train_loader)
    
    # We'll store there the training loss for each epoch
    losses_train = []
    
    # Set the network in training mode
    model.train()
    
    # Re-initialize gradients, just in case the model has been inappropriately 
    # manipulated before the training
    optimizer.zero_grad(set_to_none=True)
    
    for epoch in range(1, n_epochs + 1): 
        
        # Training loss for the current epoch
        loss_train = 0

        # Loop over our dataset (in batches the data loader creates for us)
        for imgs, labels in train_loader:
            
            imgs = imgs.to(device=device, dtype=torch.double) 
            labels = labels.to(device=device)
            
            # Feed a batch into our model
            outputs = model(imgs)
            
            # Compute the loss we wish to minimize 
            # Note that by default, it is the mean loss that is computed
            # (so entire_batch_loss / batch_size)
            loss = loss_fn(outputs, labels) 
            
            
            # Perform the backward step. That is, compute the gradients of all parameters we want the network to learn
            loss.backward()
            
            # Update the model
            optimizer.step() 
            
            # Zero out gradients before the next round (or the end of training)
            optimizer.zero_grad() 

            # Update loss for this epoch
            # It is important to transform the loss to a number with .item()
            loss_train += loss.item()
            
        # Store current epoch loss. 
        losses_train.append(loss_train / n_batch)

        if epoch == 1 or epoch % 5 == 0:
            print('{}  |  Epoch {}  |  Training loss {:.3f}'.format(
                datetime.now().time(), epoch, loss_train / n_batch))
    return losses_train

# XXX

In [7]:
def train_manual_update(n_epochs, lr, model, loss_fn, train_loader, weight_decay=0, momentum=0):
    
    # Train on GPU if available
    device = (torch.device('cuda') if torch.cuda.is_available()
          else torch.device('cpu'))
    
    n_batch = len(train_loader)
    
    # Need to store the training loss
    losses_train = []
    
    # Will be used when applying momentum
    velocities = {}
    
    # Set the network in training mode
    model.train()
    
    # Re-initialize gradient just in case they have been inappropriately 
    # manipulated before the training
    model.zero_grad(set_to_none=True)
    
    for epoch in range(1, n_epochs + 1):
        
        # Training loss for current epoch
        loss_train = 0
        
        # Loop over our dataset (in batches the data loader creates for us)
        for imgs, labels in train_loader:
            
            imgs.to(device=device, dtype=torch.double)
            labels.to(device=device)
            
            # Feed a batch into the model
            outputs = model(imgs)
            
            # Compute the loss we want to minimize
            loss = loss_fn(outputs, labels)
            
            # Perform the backward step
            loss.backward()
            
            # Update the model
            with torch.no_grad():
                
                # Want to update the weights and biases
                for name, p in model.named_parameters():
                    grad = p.grad
                    
                    # L2 regularization
                    if weight_decay:
                        grad += weight_decay * p.data
                    
                    # Momentum
                    if momentum:
                        if name not in velocities:
                            # Want to store a tensor separate from the graph
                            buf = velocities[name] = torch.clone(grad).detach()
                        else:
                            buf = velocities[name]
                            buf.mul_(momentum).add_(grad)
                        grad = buf
                    
                    # Learning step
                    p.data -= grad * lr
                    
                # Zero out the gradients
                model.zero_grad()
            # Update the loss for this epoch
            # Need to use .item() to transform the loss into a number
            loss_train += loss.item()
            
        # Store current epoch loss
        losses_train.append(loss_train / n_batch)
        
        if epoch == 1 or epoch % 5 == 0:
            print('{}  |  Epoch {}  |  Training loss {:.3f}'.format(
                datetime.now().time(), epoch, loss_train / n_batch))
            
    return losses_train

### 3.1.5 ###
Train 2 instances of MyMLP, one using train and the other using train manual update (use
the same parameter values for both models). Compare their respective training losses. To get
exactly the same results with both functions, see section 3.3.

Note: only done for train,since have not made train_manual_update

In [8]:
n_epochs = 10
batch_size = 128
lr = 1e-1
weight_decay = 1e-3
momentum = 0.9

torch.manual_seed(123)
model_train = MyMLP().to(device=device) 
torch.manual_seed(123)
model_manual_train = MyMLP().to(device=device) 

optimizer = optim.SGD(model_train.parameters(), lr=lr, weight_decay=weight_decay, momentum=momentum)
loss_fn = nn.CrossEntropyLoss()
train_loader = torch.utils.data.DataLoader(cifar2_train, batch_size=batch_size, shuffle=False)

In [9]:
train(n_epochs, optimizer, model_train, loss_fn, train_loader)

Training on device cpu.
14:16:38.135411  |  Epoch 1  |  Training loss 0.499
14:16:42.601544  |  Epoch 5  |  Training loss 0.337
14:16:48.483844  |  Epoch 10  |  Training loss 0.277


[0.4991299856039895,
 0.4179808006687227,
 0.38310016322177143,
 0.35417560659043956,
 0.3369651512339421,
 0.31428151765006895,
 0.30157356896140336,
 0.3016805296169667,
 0.27999407095439194,
 0.27655556492489153]

In [10]:
train_manual_update(n_epochs, lr, model_manual_train, loss_fn, train_loader, weight_decay, momentum)

14:16:49.921635  |  Epoch 1  |  Training loss 0.499
14:16:55.285176  |  Epoch 5  |  Training loss 0.337
14:17:02.098553  |  Epoch 10  |  Training loss 0.277


[0.4991299856039894,
 0.4179808006687227,
 0.38310016322177143,
 0.35417560659043956,
 0.33696515123394216,
 0.31428151765006895,
 0.30157356896140325,
 0.30168052961696673,
 0.27999407095439205,
 0.27655556492489247]

### 3.1.10 ###
Evaluate the best model and analyse its performance.

In [11]:
train_loader = torch.utils.data.DataLoader(cifar2_train, batch_size=64, shuffle=False)
val_loader = torch.utils.data.DataLoader(cifar2_val, batch_size=64, shuffle=False)

def compute_accuracy(model, loader):
    model.eval()
    correct = 0
    total = 0

    # We do not want gradients here, as we will not want to update the parameters.
    with torch.no_grad():
        for imgs, labels in loader:
            imgs = imgs.to(device=device)
            labels = labels.to(device=device)
            
            outputs = model(imgs)
            _, predicted = torch.max(outputs, dim=1)
            total += labels.shape[0]
            correct += int((predicted == labels).sum())

    acc =  correct / total
    print("Accuracy: {:.2f}".format(acc))
    return acc

In [12]:
# We want to use these global parameters for all 4 models

batch_size = 256
n_epoch = 100
loss_fn = nn.CrossEntropyLoss()
seed = 256
train_loader = torch.utils.data.DataLoader(cifar2_train, batch_size=batch_size, shuffle=False)
val_loader = torch.utils.data.DataLoader(cifar2_val, batch_size=batch_size, shuffle=False)

In [13]:
# Model 1
lr_1 = 0.01
mom_1 = 0
decay_1 = 0
torch.manual_seed(seed)
model_1 = MyMLP().to(device=device) 

train_manual_update(n_epoch, lr_1, model_1, loss_fn, train_loader, weight_decay=decay_1, momentum=mom_1)
compute_accuracy(model_1, train_loader)
compute_accuracy(model_1, val_loader)

14:17:03.008989  |  Epoch 1  |  Training loss 0.682
14:17:05.892872  |  Epoch 5  |  Training loss 0.563
14:17:09.355357  |  Epoch 10  |  Training loss 0.474
14:17:12.874920  |  Epoch 15  |  Training loss 0.429
14:17:16.418281  |  Epoch 20  |  Training loss 0.390
14:17:19.878996  |  Epoch 25  |  Training loss 0.356
14:17:23.364676  |  Epoch 30  |  Training loss 0.322
14:17:26.809942  |  Epoch 35  |  Training loss 0.289
14:17:30.662488  |  Epoch 40  |  Training loss 0.265
14:17:34.552870  |  Epoch 45  |  Training loss 0.255
14:17:38.373960  |  Epoch 50  |  Training loss 0.235
14:17:42.170278  |  Epoch 55  |  Training loss 0.249
14:17:46.001764  |  Epoch 60  |  Training loss 0.211
14:17:49.717840  |  Epoch 65  |  Training loss 0.161
14:17:53.452008  |  Epoch 70  |  Training loss 0.132
14:17:57.793272  |  Epoch 75  |  Training loss 0.143
14:18:01.713645  |  Epoch 80  |  Training loss 0.136
14:18:05.582019  |  Epoch 85  |  Training loss 0.078
14:18:09.478264  |  Epoch 90  |  Training loss 0

0.8494404883011191