<center><h1>1-cd: Convolutional Neural Networks (ConvNets)</h1></center>

<center><h2><a href="https://rdfia.github.io/">Course link</a></h2></center>


In [1]:
#!git clone https://github.com/cdancette/deep-learning-polytech-tp6-7.git
! wget https://github.com/rdfia/rdfia.github.io/raw/master/code/2-cd/utils.py

--2023-11-21 15:44:12--  https://github.com/rdfia/rdfia.github.io/raw/master/code/2-cd/utils.py
Resolving github.com (github.com)... 140.82.121.4
Connecting to github.com (github.com)|140.82.121.4|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/rdfia/rdfia.github.io/master/code/2-cd/utils.py [following]
--2023-11-21 15:44:12--  https://raw.githubusercontent.com/rdfia/rdfia.github.io/master/code/2-cd/utils.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2627 (2.6K) [text/plain]
Saving to: ‘utils.py’


2023-11-21 15:44:12 (59.0 MB/s) - ‘utils.py’ saved [2627/2627]



In [2]:
%run 'utils.py'

In [3]:
import argparse
import os
import time
import numpy as np
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim
import torch.utils.data
import torchvision.transforms as transforms
import torchvision.datasets as datasets

import torch.optim.lr_scheduler

from utils import *

PRINT_INTERVAL = 200
PATH="datasets"

KeyboardInterrupt: ignored

In [None]:
class ConvNet(nn.Module):
    """
    This class defines the structure of the neural network
    """

    def __init__(self):
        super(ConvNet, self).__init__()
        # We first define the convolution and pooling layers as a features extractor
        self.features = nn.Sequential(
            #conv1, relu, maxpool
            nn.Conv2d(3, 32, (5, 5), stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d((2, 2), stride=2, padding=0),
            #conv2, relu, maxpool
            nn.Conv2d(32, 64, (5, 5), stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d((2, 2), stride=2, padding=0),
            #conv3, relu, maxpool
            nn.Conv2d(64, 64, (5, 5), stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d((2, 2), stride=2, padding=0, ceil_mode = True),
        )
        # We then define fully connected layers as a classifier
        self.classifier = nn.Sequential(
            #fc4, relu, fc5
            nn.Linear(1024, 1000), #4*4*64
            nn.ReLU(),

            #dropout layer


            nn.Linear(1000, 10)
            # Reminder: The softmax is included in the loss, do not put it here
        )

    # Method called when we apply the network to an input batch
    def forward(self, input):
        bsize = input.size(0) # batch size
        output = self.features(input) # output of the conv layers
        output = output.view(bsize, -1) # we flatten the 2D feature maps into one 1D vector for each input
        output = self.classifier(output) # we compute the output of the fc layers
        return output



def get_dataset(batch_size, cuda=False):
    """
    This function loads the dataset and performs transformations on each
    image (listed in `transform = ...`).
    """
    train_dataset = datasets.CIFAR10(PATH, train=True, download=True,
        transform=transforms.Compose([
            transforms.ToTensor()
        ]))
    val_dataset = datasets.CIFAR10(PATH, train=False, download=True,
        transform=transforms.Compose([
            transforms.ToTensor()
        ]))

    train_loader = torch.utils.data.DataLoader(train_dataset,
                        batch_size=batch_size, shuffle=True, pin_memory=cuda, num_workers=2)
    val_loader = torch.utils.data.DataLoader(val_dataset,
                        batch_size=batch_size, shuffle=False, pin_memory=cuda, num_workers=2)

    return train_loader, val_loader



def epoch(data, model, criterion, optimizer=None, cuda=False):
    """
    Make a pass (called epoch in English) on the data `data` with the
     model `model`. Evaluates `criterion` as loss.
     If `optimizer` is given, perform a training epoch using
     the given optimizer, otherwise, perform an evaluation epoch (no backward)
     of the model.
    """

    # indicates whether the model is in eval or train mode (some layers behave differently in train and eval)
    model.eval() if optimizer is None else model.train()

    # objects to store metric averages
    avg_loss = AverageMeter()
    avg_top1_acc = AverageMeter()
    avg_top5_acc = AverageMeter()
    avg_batch_time = AverageMeter()
    global loss_plot

    # we iterate on the batches
    tic = time.time()
    for i, (input, target) in enumerate(data):

        if cuda: # only with GPU, and not with CPU
            input = input.cuda()
            target = target.cuda()

        # forward
        output = model(input)
        loss = criterion(output, target)

        # backward if we are training
        if optimizer:
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        # compute metrics
        prec1, prec5 = accuracy(output, target, topk=(1, 5))
        batch_time = time.time() - tic
        tic = time.time()

        # update
        avg_loss.update(loss.item())
        avg_top1_acc.update(prec1.item())
        avg_top5_acc.update(prec5.item())
        avg_batch_time.update(batch_time)
        if optimizer:
            loss_plot.update(avg_loss.val)
        # print info
        """
        if i % PRINT_INTERVAL == 0:
            print('[{0:s} Batch {1:03d}/{2:03d}]\t'
                  'Time {batch_time.val:.3f}s ({batch_time.avg:.3f}s)\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:5.1f} ({top1.avg:5.1f})\t'
                  'Prec@5 {top5.val:5.1f} ({top5.avg:5.1f})'.format(
                   "EVAL" if optimizer is None else "TRAIN", i, len(data), batch_time=avg_batch_time, loss=avg_loss,
                   top1=avg_top1_acc, top5=avg_top5_acc))
            if optimizer:
                loss_plot.plot()"""

    # Print summary
    print('\n===============> Total time {batch_time:d}s\t'
          'Avg loss {loss.avg:.4f}\t'
          'Avg Prec@1 {top1.avg:5.2f} %\t'
          'Avg Prec@5 {top5.avg:5.2f} %\n'.format(
           batch_time=int(avg_batch_time.sum), loss=avg_loss,
           top1=avg_top1_acc, top5=avg_top5_acc))

    return avg_top1_acc, avg_top5_acc, avg_loss


def main(batch_size=128, lr=0.1, epochs=5, optim = 'SGD', scheduler = "", cuda=False):
    model = ConvNet()
    criterion = nn.CrossEntropyLoss()
    if(optim == 'Adagrad'):
      optimizer = torch.optim.Adagrad(model.parameters(), lr)
    if(optim == 'RMSprop'):
      optimizer = torch.optim.RMSprop(model.parameters(), lr)
    if(optim == 'Adam'):
      optimizer = torch.optim.Adam(model.parameters(), lr)
    else :
      optimizer = torch.optim.SGD(model.parameters(), lr)
    # Create the scheduler

    if(scheduler == "ExponentialLR"):
      lr_sched = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.95)
    if(scheduler == "CosineAWR"):
      lr_sched = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0 = 1)
    if(scheduler == "ReduceLROnPlateau"):
      lr_sched = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer)
    if(scheduler == "CosineALR"):
      lr_sched = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max = 10)
    if(scheduler == "Cyclic"):
      lr_sched = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr=0.1, max_lr = 0.2)

    if cuda: # only with GPU, and not with CPU
        cudnn.benchmark = True
        model = model.cuda()
        criterion = criterion.cuda()

    # Get the data
    train, test = get_dataset(batch_size, cuda)

    # init plots
    plot = AccLossPlot()
    global loss_plot
    loss_plot = TrainLossPlot()

    # We iterate on the epochs
    for i in range(epochs):
        print("=================\n=== EPOCH "+str(i+1)+" =====\n=================\n")
        # Train phase
        top1_acc, avg_top5_acc, loss = epoch(train, model, criterion, optimizer, cuda)
        # Test phase
        top1_acc_test, top5_acc_test, loss_test = epoch(test, model, criterion, cuda=cuda)
        # plot
        plot.update(loss.avg, loss_test.avg, top1_acc.avg, top1_acc_test.avg)

        #Modify the learning rate after each epoch
        if(len(scheduler)>0):
          lr_sched.step()
          print(f"LR : {lr_sched.get_last_lr()}")

        """
        # TEST DE CONVERGENCE
        if(np.abs(loss_1-loss.avg) <1e-2):
          print(f"Convergence, loss{loss.avg}")
          break
        else:
          loss_1 = loss.avg
        """
    return loss, loss_test

In [None]:
main(128, 0.1, epochs=50, cuda=True)

# Part 3 -- Results improvements

### 3.1 - Standardization of examples

In [None]:
def get_dataset(batch_size, cuda=False):
    """
    This function loads the dataset and performs transformations on each
    image (listed in `transform = ...`).
    """
    train_dataset = datasets.CIFAR10(PATH, train=True, download=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.491, 0.482, 0.447), (0.202, 0.199, 0.201))
        ]))
    val_dataset = datasets.CIFAR10(PATH, train=False, download=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.491, 0.482, 0.447), (0.202, 0.199, 0.201))
        ]))

    train_loader = torch.utils.data.DataLoader(train_dataset,
                        batch_size=batch_size, shuffle=True, pin_memory=cuda, num_workers=2)
    val_loader = torch.utils.data.DataLoader(val_dataset,
                        batch_size=batch_size, shuffle=False, pin_memory=cuda, num_workers=2)

    return train_loader, val_loader

In [None]:
main(128, 0.1, epochs=50, cuda=True)

In [None]:
class PCAWhitening:
    def __init__(self, epsilon=1e-5):
        self.epsilon = epsilon
        self.mean = None
        self.whitening_matrix = None

    def fit(self, X):
        self.mean = torch.mean(X, dim=1).unsqueeze(1)
        X_centered = X - self.mean
        covariance_matrix = torch.mm(X_centered, X_centered.t()) / X_centered.size(1)
        eigenvalues, eigenvectors = torch.linalg.eigh(covariance_matrix, UPLO='U')
        self.whitening_matrix = torch.mm(torch.diag(1.0 / torch.sqrt(eigenvalues + self.epsilon)), eigenvectors.t())

    def transform(self, X):
        X_centered = X - self.mean
        return torch.mm(self.whitening_matrix, X_centered)

    def __call__(self, img):
        img = img.view(3, -1)
        if self.whitening_matrix is None:
            self.fit(img)
        img_whitened = self.transform(img).view(3, 32, 32)
        return img_whitened

In [None]:
def get_dataset(batch_size, cuda=False):
    pca_whitening = PCAWhitening()

    train_dataset = datasets.CIFAR10(PATH, train=True, download=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Lambda(lambda x: x * 255.0),  # Scale back to 0-255 range, as PCA operates on this range
            pca_whitening,
            # transforms.Normalize((0.491, 0.482, 0.447), (0.202, 0.199, 0.201))
        ]))
    val_dataset = datasets.CIFAR10(PATH, train=False, download=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Lambda(lambda x: x * 255.0),  # Scale back to 0-255 range, as PCA operates on this range
            pca_whitening,
            # transforms.Normalize((0.491, 0.482, 0.447), (0.202, 0.199, 0.201))
        ]))

    train_loader = torch.utils.data.DataLoader(train_dataset,
                        batch_size=batch_size, shuffle=True, pin_memory=cuda, num_workers=2)
    val_loader = torch.utils.data.DataLoader(val_dataset,
                        batch_size=batch_size, shuffle=False, pin_memory=cuda, num_workers=2)

    return train_loader, val_loader

In [None]:
main(128, 0.1, epochs=50, cuda=True)

In [None]:
main(128, 0.001, epochs=50, cuda=True)

### Min-Max scaling

In [None]:
class MinMaxScaling:
    def __init__(self):
        self.min = None
        self.max = None

    def fit(self, X):
        self.min = torch.min(X, dim=1, keepdim=True)[0]
        self.max = torch.max(X, dim=1, keepdim=True)[0]

    def transform(self, X):
        return (X - self.min) / (self.max - self.min + 1e-5)  # Adding a small constant to avoid division by zero

    def __call__(self, img):
        img = img.view(3, -1)
        if self.min is None or self.max is None:
            self.fit(img)
        img_scaled = self.transform(img).view(3, 32, 32)
        return img_scaled

In [None]:
def get_dataset(batch_size, cuda=False):
    min_max_scaling = MinMaxScaling()

    train_dataset = datasets.CIFAR10(PATH, train=True, download=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
            min_max_scaling,
            # transforms.Normalize((0.491, 0.482, 0.447), (0.202, 0.199, 0.201))
        ]))
    val_dataset = datasets.CIFAR10(PATH, train=False, download=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
            min_max_scaling,
            # transforms.Normalize((0.491, 0.482, 0.447), (0.202, 0.199, 0.201))
        ]))

    train_loader = torch.utils.data.DataLoader(train_dataset,
                        batch_size=batch_size, shuffle=True, pin_memory=cuda, num_workers=2)
    val_loader = torch.utils.data.DataLoader(val_dataset,
                        batch_size=batch_size, shuffle=False, pin_memory=cuda, num_workers=2)

    return train_loader, val_loader

In [None]:
main(128, 0.1, epochs=50, cuda=True)

##3.2 - Increase the number of training examples by data increase

In [None]:
class AddGaussianNoise(object):
    def __init__(self, mean=0., std=1.):
        self.std = std
        self.mean = mean

    def __call__(self, tensor):
        return tensor + torch.randn(tensor.size()) * self.std + self.mean

    def __repr__(self):
        return self.__class__.__name__ + '(mean={0}, std={1})'.format(self.mean, self.std)

In [None]:
def get_dataset(batch_size, cuda=False):
    """
    This function loads the dataset and performs transformations on each
    image (listed in `transform = ...`).
    """
    train_dataset = datasets.CIFAR10(PATH, train=True, download=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.RandomCrop(28),
            transforms.RandomHorizontalFlip(),

            #transforms.RandomRotation(90),

            #transforms.GaussianBlur(kernel_size = (5, 9)),

            #transforms.ElasticTransform(alpha=250.0),

            transforms.Normalize((0.491, 0.482, 0.447), (0.202, 0.199, 0.201)),

            # AddGaussianNoise(0,1)
        ]))
    val_dataset = datasets.CIFAR10(PATH, train=False, download=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.CenterCrop(28),
            transforms.Normalize((0.491, 0.482, 0.447), (0.202, 0.199, 0.201))
        ]))

    train_loader = torch.utils.data.DataLoader(train_dataset,
                        batch_size=batch_size, shuffle=True, pin_memory=cuda, num_workers=2)
    val_loader = torch.utils.data.DataLoader(val_dataset,
                        batch_size=batch_size, shuffle=False, pin_memory=cuda, num_workers=2)

    return train_loader, val_loader

In [None]:
main(128, 0.1, epochs=50, cuda=True)

In [None]:
def get_dataset(batch_size, cuda=False):
    """
    This function loads the dataset and performs transformations on each
    image (listed in `transform = ...`).
    """
    train_dataset = datasets.CIFAR10(PATH, train=True, download=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
            # transforms.RandomCrop(28),
            # transforms.RandomHorizontalFlip(),

            #transforms.RandomRotation(90),

            #transforms.GaussianBlur(kernel_size = (5, 9)),

            #transforms.ElasticTransform(alpha=250.0),

            transforms.Normalize((0.491, 0.482, 0.447), (0.202, 0.199, 0.201)),

            AddGaussianNoise(0,1)
        ]))
    val_dataset = datasets.CIFAR10(PATH, train=False, download=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
            # transforms.CenterCrop(28),
            transforms.Normalize((0.491, 0.482, 0.447), (0.202, 0.199, 0.201))
        ]))

    train_loader = torch.utils.data.DataLoader(train_dataset,
                        batch_size=batch_size, shuffle=True, pin_memory=cuda, num_workers=2)
    val_loader = torch.utils.data.DataLoader(val_dataset,
                        batch_size=batch_size, shuffle=False, pin_memory=cuda, num_workers=2)

    return train_loader, val_loader

In [None]:
main(128, 0.1, epochs=50, cuda=True)

In [None]:
def get_dataset(batch_size, cuda=False):
    """
    This function loads the dataset and performs transformations on each
    image (listed in `transform = ...`).
    """
    train_dataset = datasets.CIFAR10(PATH, train=True, download=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
            # transforms.RandomCrop(28),
            # transforms.RandomHorizontalFlip(),

            #transforms.RandomRotation(90),

            #transforms.GaussianBlur(kernel_size = (5, 9)),

            transforms.ElasticTransform(alpha=250.0),

            transforms.Normalize((0.491, 0.482, 0.447), (0.202, 0.199, 0.201)),

            # AddGaussianNoise(0,1)
        ]))
    val_dataset = datasets.CIFAR10(PATH, train=False, download=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
            # transforms.CenterCrop(28),
            transforms.Normalize((0.491, 0.482, 0.447), (0.202, 0.199, 0.201))
        ]))

    train_loader = torch.utils.data.DataLoader(train_dataset,
                        batch_size=batch_size, shuffle=True, pin_memory=cuda, num_workers=2)
    val_loader = torch.utils.data.DataLoader(val_dataset,
                        batch_size=batch_size, shuffle=False, pin_memory=cuda, num_workers=2)

    return train_loader, val_loader

In [None]:
main(128, 0.1, epochs=50, cuda=True)

In [None]:
def get_dataset(batch_size, cuda=False):
    """
    This function loads the dataset and performs transformations on each
    image (listed in `transform = ...`).
    """
    train_dataset = datasets.CIFAR10(PATH, train=True, download=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.RandomCrop(28),
            transforms.RandomHorizontalFlip(),

            #transforms.RandomRotation(90),

            transforms.GaussianBlur(kernel_size = (5, 9)),

            #transforms.ElasticTransform(alpha=250.0),

            transforms.Normalize((0.491, 0.482, 0.447), (0.202, 0.199, 0.201)),

            # AddGaussianNoise(0,1)
        ]))
    val_dataset = datasets.CIFAR10(PATH, train=False, download=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.CenterCrop(28),
            transforms.Normalize((0.491, 0.482, 0.447), (0.202, 0.199, 0.201))
        ]))

    train_loader = torch.utils.data.DataLoader(train_dataset,
                        batch_size=batch_size, shuffle=True, pin_memory=cuda, num_workers=2)
    val_loader = torch.utils.data.DataLoader(val_dataset,
                        batch_size=batch_size, shuffle=False, pin_memory=cuda, num_workers=2)

    return train_loader, val_loader

In [None]:
main(128, 0.1, epochs=50, cuda=True)

In [None]:
def get_dataset(batch_size, cuda=False):
    """
    This function loads the dataset and performs transformations on each
    image (listed in `transform = ...`).
    """
    train_dataset = datasets.CIFAR10(PATH, train=True, download=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
            # transforms.RandomCrop(28),
            # transforms.RandomHorizontalFlip(),

            transforms.RandomRotation(90),

            #transforms.GaussianBlur(kernel_size = (5, 9)),

            #transforms.ElasticTransform(alpha=250.0),

            transforms.Normalize((0.491, 0.482, 0.447), (0.202, 0.199, 0.201)),

            # AddGaussianNoise(0,1)
        ]))
    val_dataset = datasets.CIFAR10(PATH, train=False, download=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
            # transforms.CenterCrop(28),
            transforms.Normalize((0.491, 0.482, 0.447), (0.202, 0.199, 0.201))
        ]))

    train_loader = torch.utils.data.DataLoader(train_dataset,
                        batch_size=batch_size, shuffle=True, pin_memory=cuda, num_workers=2)
    val_loader = torch.utils.data.DataLoader(val_dataset,
                        batch_size=batch_size, shuffle=False, pin_memory=cuda, num_workers=2)

    return train_loader, val_loader

In [None]:
main(128, 0.1, epochs=50, cuda=True)

In [None]:
def get_dataset(batch_size, cuda=False):
    """
    This function loads the dataset and performs transformations on each
    image (listed in `transform = ...`).
    """
    train_dataset = datasets.CIFAR10(PATH, train=True, download=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.RandomCrop(28),
            transforms.RandomHorizontalFlip(),

            transforms.RandomRotation(90),

            #transforms.GaussianBlur(kernel_size = (5, 9)),

            #transforms.ElasticTransform(alpha=250.0),

            transforms.Normalize((0.491, 0.482, 0.447), (0.202, 0.199, 0.201)),

            # AddGaussianNoise(0,1)
        ]))
    val_dataset = datasets.CIFAR10(PATH, train=False, download=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.CenterCrop(28),
            transforms.Normalize((0.491, 0.482, 0.447), (0.202, 0.199, 0.201))
        ]))

    train_loader = torch.utils.data.DataLoader(train_dataset,
                        batch_size=batch_size, shuffle=True, pin_memory=cuda, num_workers=2)
    val_loader = torch.utils.data.DataLoader(val_dataset,
                        batch_size=batch_size, shuffle=False, pin_memory=cuda, num_workers=2)

    return train_loader, val_loader

In [None]:
main(128, 0.1, epochs=50, cuda=True)

## 3.3 - Variants on the optimization algorithm

In [None]:
def get_dataset(batch_size, cuda=False):
    """
    This function loads the dataset and performs transformations on each
    image (listed in `transform = ...`).
    """
    train_dataset = datasets.CIFAR10(PATH, train=True, download=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.RandomCrop(28),
            transforms.RandomHorizontalFlip(),

            #transforms.RandomRotation(90),

            #transforms.GaussianBlur(kernel_size = (5, 9)),

            #transforms.ElasticTransform(alpha=250.0),

            transforms.Normalize((0.491, 0.482, 0.447), (0.202, 0.199, 0.201)),

            # AddGaussianNoise(0,1)
        ]))
    val_dataset = datasets.CIFAR10(PATH, train=False, download=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.CenterCrop(28),
            transforms.Normalize((0.491, 0.482, 0.447), (0.202, 0.199, 0.201))
        ]))

    train_loader = torch.utils.data.DataLoader(train_dataset,
                        batch_size=batch_size, shuffle=True, pin_memory=cuda, num_workers=2)
    val_loader = torch.utils.data.DataLoader(val_dataset,
                        batch_size=batch_size, shuffle=False, pin_memory=cuda, num_workers=2)

    return train_loader, val_loader

In [None]:
main(128, 0.1, epochs=50, cuda=True, scheduler = "ExponentialLR")

### Learning stability

In [None]:
import statistics

losses = []
losses_test = []

for i in range(10):
    print("Step", i)
    loss, loss_test = main(128, 0.1, epochs=10, cuda=True)
    losses.append(loss.avg)
    losses_test.append(loss_test.avg)

print("Mean training loss :", sum(losses) / len(losses))
print("Mean test loss :", sum(losses_test) / len(losses_test))
print("Standard deviation training loss :", statistics.stdev(losses))
print("Standard deviation test loss :", statistics.stdev(losses_test))

In [None]:
import scipy.stats as stats
import math

mu = 0.7306593440756013
sigma = 0.05003281626991144
x = np.linspace(mu - 3*sigma, mu + 3*sigma, 100)
plt.plot(x, stats.norm.pdf(x, mu, sigma), label='μ:0.73, σ: 0.05')
plt.plot(x, stats.norm.pdf(x, mu, 1), label='μ:0.73, σ: 1')
plt.legend()

In [None]:
main(128, 0.0005, epochs=50, optim='Adam', cuda=True)

In [None]:
main(128, 0.0005, epochs=50, optim='Adam', scheduler = 'ExponentialLR', cuda=True)

In [None]:
main(128, 0.1, epochs=50, scheduler = 'CosineALR', cuda=True)

## 3.4 - Regularization of the network by dropout

In [None]:
class ConvNet(nn.Module):
    """
    This class defines the structure of the neural network
    """

    def __init__(self):
        super(ConvNet, self).__init__()
        # We first define the convolution and pooling layers as a features extractor
        self.features = nn.Sequential(
            #conv1, relu, maxpool
            nn.Conv2d(3, 32, (5, 5), stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d((2, 2), stride=2, padding=0),
            #conv2, relu, maxpool
            nn.Conv2d(32, 64, (5, 5), stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d((2, 2), stride=2, padding=0),
            #conv3, relu, maxpool
            nn.Conv2d(64, 64, (5, 5), stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d((2, 2), stride=2, padding=0, ceil_mode = True),
        )
        # We then define fully connected layers as a classifier
        self.classifier = nn.Sequential(
            #fc4, relu, fc5
            nn.Linear(1024, 1000), #4*4*64
            nn.ReLU(),
            nn.Dropout(0.9),
            nn.Linear(1000, 10)
            # Reminder: The softmax is included in the loss, do not put it here
        )

    # Method called when we apply the network to an input batch
    def forward(self, input):
        bsize = input.size(0) # batch size
        output = self.features(input) # output of the conv layers
        output = output.view(bsize, -1) # we flatten the 2D feature maps into one 1D vector for each input
        output = self.classifier(output) # we compute the output of the fc layers
        return output

In [None]:
main(128, 0.1, epochs=50, cuda=True, scheduler = 'ExponentialLR')

## 3.5 Use of a batch normalization

In [None]:
class ConvNet(nn.Module):
    """
    This class defines the structure of the neural network
    """

    def __init__(self):
        super(ConvNet, self).__init__()
        # We first define the convolution and pooling layers as a features extractor
        self.features = nn.Sequential(
            #conv1, relu, maxpool
            nn.Conv2d(3, 32, (5, 5), stride=1, padding=2),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d((2, 2), stride=2, padding=0),
            #conv2, relu, maxpool
            nn.Conv2d(32, 64, (5, 5), stride=1, padding=2),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d((2, 2), stride=2, padding=0),
            #conv3, relu, maxpool
            nn.Conv2d(64, 64, (5, 5), stride=1, padding=2),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d((2, 2), stride=2, padding=0, ceil_mode = True),
        )
        # We then define fully connected layers as a classifier
        self.classifier = nn.Sequential(
            #fc4, relu, fc5
            nn.Linear(1024, 1000), #4*4*64
            nn.ReLU(),
            nn.Dropout(0.9),
            nn.Linear(1000, 10)
            # Reminder: The softmax is included in the loss, do not put it here
        )

    # Method called when we apply the network to an input batch
    def forward(self, input):
        bsize = input.size(0) # batch size
        output = self.features(input) # output of the conv layers
        output = output.view(bsize, -1) # we flatten the 2D feature maps into one 1D vector for each input
        output = self.classifier(output) # we compute the output of the fc layers
        return output

In [None]:
main(128, 0.1, epochs=50, cuda=True, scheduler = "ExponentialLR")

## With all our trials and tests, we will now determine an "optimal" model based on experimentations we made before.

In [None]:
class ConvNet(nn.Module):
    """
    This class defines the structure of the neural network
    """

    def __init__(self):
        super(ConvNet, self).__init__()
        # We first define the convolution and pooling layers as a features extractor
        self.features = nn.Sequential(
            #conv1, relu, maxpool
            nn.Conv2d(3, 32, (5, 5), stride=1, padding=2),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d((2, 2), stride=2, padding=0),
            #conv2, relu, maxpool
            nn.Conv2d(32, 64, (5, 5), stride=1, padding=2),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d((2, 2), stride=2, padding=0),
            #conv3, relu, maxpool
            nn.Conv2d(64, 64, (5, 5), stride=1, padding=2),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d((2, 2), stride=2, padding=0, ceil_mode = True),
        )
        # We then define fully connected layers as a classifier
        self.classifier = nn.Sequential(
            #fc4, relu, fc5
            nn.Linear(1024, 1000), #4*4*64
            nn.ReLU(),
            nn.Dropout(0.9),
            nn.Linear(1000, 10)
            # Reminder: The softmax is included in the loss, do not put it here
        )

    # Method called when we apply the network to an input batch
    def forward(self, input):
        bsize = input.size(0) # batch size
        output = self.features(input) # output of the conv layers
        output = output.view(bsize, -1) # we flatten the 2D feature maps into one 1D vector for each input
        output = self.classifier(output) # we compute the output of the fc layers
        return output

In [None]:
def get_dataset(batch_size, cuda=False):
    """
    This function loads the dataset and performs transformations on each
    image (listed in `transform = ...`).
    """
    train_dataset = datasets.CIFAR10(PATH, train=True, download=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.RandomCrop(28),
            transforms.RandomHorizontalFlip(),

            transforms.RandomRotation(90),

            #transforms.GaussianBlur(kernel_size = (5, 9)),

            #transforms.ElasticTransform(alpha=250.0),

            transforms.Normalize((0.491, 0.482, 0.447), (0.202, 0.199, 0.201)),

            # AddGaussianNoise(0,1)
        ]))
    val_dataset = datasets.CIFAR10(PATH, train=False, download=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.CenterCrop(28),
            transforms.Normalize((0.491, 0.482, 0.447), (0.202, 0.199, 0.201))
        ]))

    train_loader = torch.utils.data.DataLoader(train_dataset,
                        batch_size=batch_size, shuffle=True, pin_memory=cuda, num_workers=2)
    val_loader = torch.utils.data.DataLoader(val_dataset,
                        batch_size=batch_size, shuffle=False, pin_memory=cuda, num_workers=2)

    return train_loader, val_loader

In [None]:
main(128, 0.5, epochs=100, cuda=True, scheduler = "CosineALR")