# Training a CNN 


Modified by : Harold Mouch√®re + Nicolas Normand / University of Nantes

date : 2023

Questions are at the end of the notebook.


In [None]:
import time
import copy
import torch
import torchvision
import torchvision.transforms as transforms

import os
os.environ['HTTP_PROXY'] = 'http://cache.ha.univ-nantes.fr:3128'
os.environ['HTTPS_PROXY'] = 'http://cache.ha.univ-nantes.fr:3128'

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Assume that we are on a CUDA machine, then this should print a CUDA device:

print(device)


## Preparation of the dataset
The output of torchvision datasets are PILImage images of range [0, 1].
We transform them to Tensors of normalized range [-1, 1].

In [None]:
transform = transforms.Compose(
    [transforms.Pad(2),
     transforms.ToTensor(),
     transforms.Normalize((0.5,), (0.5,))])

# size of the mini batch
minibatchsize = 32

datadir = '../data'
fulltrainset = torchvision.datasets.MNIST(root=datadir, train=True,
                                        download=True, transform=transform)

#split the full train part as train and validation (10K samples, some can be ignored):
trainset, validationset, _ignored_part = torch.utils.data.random_split(fulltrainset, [20000,10000, 30000])

trainloader = torch.utils.data.DataLoader(trainset, batch_size=minibatchsize,
                                          shuffle=True, num_workers=0)

validationloader = torch.utils.data.DataLoader(validationset, batch_size=minibatchsize,
                                          shuffle=False, num_workers=0)

testset = torchvision.datasets.MNIST(root=datadir, train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=minibatchsize,
                                         shuffle=False, num_workers=0)

# define the set of class names :
classes = range(0,10)
nb_classes = len(classes)

In [None]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np

# functions to show an image
def imshow(img, filename=''):
    img = img / 2 + 0.5     # unnormalize
    numpy_img = img.numpy()
    plt.imshow(np.transpose(numpy_img, (1, 2, 0)))
    if filename == '':
        plt.show()
    else:
        plt.savefig(filename)

# get some random training images
dataiter = iter(trainloader)
images, labels = next(dataiter)

# show images
imshow(torchvision.utils.make_grid(images))
# print labels
print(' '.join('%5s' % classes[labels[j]] for j in range(4)))


## Define a CNN

Propose a CNN constructor that takes meta-parameters as arguments (nb conv layers, nb kernel per conv, nb FC layers, nb hidden cells per fc)


In [None]:
import torch.nn as nn
import torch.nn.functional as F

# this class define the CNN architecture
# default architecture is close to LeNet5 one
class NetCNN(nn.Module):
    def __init__(self):
        super(NetCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        x = F.softmax(x, dim=1)
        return x


In [None]:
########################################################################
# Define the network to use :
net = NetCNN()
net.to(device) # move it to GPU or CPU
# show the structure :
print(net)

# Define a Loss function and optimizer

In [None]:
# Let's use a Classification Cross-Entropy loss and SGD with momentum.

import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

## Q1: Optimize the architecture

1. Reuse the training function from the previous TP
2. Analyse your network : recognition rate, confusion matrix 
3. Select one meta-paramter (or 2 depending on the available time) and draw the curve showing the loss depending  on this parameter
4. Evaluate and analyse the best network, compute the number of free parameters (complexity). See documentation of [nn.Module.parameters()](https://pytorch.org/docs/stable/generated/torch.nn.Module.html#torch.nn.Module.parameters).

## Q2: Draw the Pareto front collectively
Agregate all the results from the group to draw a full Pareto front. Do not hesitate to change the architecture to explore the full tradeoff **complexity/error rate**.