# MLP vs CNN (bonus)

In lab 3 we have defined an MLP classifier, and in the previous script we used a CNN classifier. A natural question is: how do they compare, and which one is the best? The goal of this exercise is to answer this question, in terms of number of parameters, training behavior, and accuracy on the test set.

&nbsp; 

<center><a href="https://towardsdatascience.com/simple-introduction-to-convolutional-neural-networks-cdf8d3077bac">
    <img src="https://miro.medium.com/max/700/1*1Cw9nKcdKV5YQun-e4F8gQ.png"></a></center>

In [3]:
import torch
import torch.nn as nn
from torchvision import transforms, datasets
from torch.utils.data import DataLoader, Subset
import numpy as np
import matplotlib.pyplot as plt
import copy

# Define the data repository
data_dir = 'data/'

In [4]:
# Initialization function
def init_weights(m):
    if isinstance(m, nn.Linear) or isinstance(m, nn.Conv2d):
        torch.nn.init.xavier_uniform_(m.weight.data)
        m.bias.data.fill_(0.01)
    return

In [5]:
# Load the Fashion MNIST dataset
train_data = datasets.FashionMNIST(data_dir, train=True, download=True, transform=transforms.ToTensor())
test_data = datasets.FashionMNIST(data_dir, train=False, download=True, transform=transforms.ToTensor())
num_classes = len(train_data.classes)

train_data = Subset(train_data, torch.arange(500))
test_data = Subset(test_data, torch.arange(50))

# Create dataloaders
batch_size = 8
train_dataloader = DataLoader(train_data, shuffle=True, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

In [6]:
# TO DO: write the MLP and CNN (with batch norm) classifiers modules (you can reuse your code for the past scripts)
class CNNClassif_bnorm(nn.Module):
    def __init__(self, num_channels1=16, num_channels2=32, num_classes=10):
        super(CNNClassif_bnorm, self).__init__()
        self.conv_block1 = nn.Sequential(nn.Conv2d(1, num_channels1, kernel_size=5, padding=2),
                           nn.ReLU(),
                           nn.BatchNorm2d(num_channels1, affine=False),
                           nn.MaxPool2d(kernel_size=2))
        
        self.conv_block2 = nn.Sequential(nn.Conv2d(num_channels1, num_channels2, kernel_size=5, padding=2),
                           nn.ReLU(),
                           nn.BatchNorm2d(num_channels2,affine=False),
                           nn.MaxPool2d(kernel_size=2))
        self.fc = nn.Linear(1568, num_classes)

    def forward(self, x):
        x = self.conv_block1(x)
        x = self.conv_block2(x)
        out = self.fc(x.reshape(x.shape[0], -1))
        return out

class MLPClassif(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, act_fn):
        super(MLPClassif, self).__init__()
        self.hidden1 = nn.Linear(input_size, hidden_size)
        self.hidden2 = nn.Linear(hidden_size, hidden_size)
        self.out_layer = nn.Linear(hidden_size, output_size)
        self.act_fn = act_fn
    
    def forward(self, x):
        
        # TO DO: write the 'forward' method, which computes the output 'out' from the input x
        # It should apply sequentially the input, hidden, and output layer, as we did in the example before.
        #x = x.view(-1, 784)
        x = self.act_fn(self.hidden1(x))
        x = self.act_fn(self.hidden2(x))
        out = self.out_layer(x)
        return out

In [None]:
# TO DO: write the training function.
# No need to write 2 functions (one for MLP and one for CNN), you can use the same but be careful about image vectorization.


In [None]:
# TO DO: write the evaluation function (again, no need to write 2)


In [None]:
# Common parameters
num_classes = 10
num_epochs = 30
loss_fn = nn.CrossEntropyLoss()
learning_rate = 0.01

# MLP
input_size_mlp = train_data[0][0][0].shape[0]*train_data[0][0][0].shape[1]
hidden_size_mlp = 50
model_mlp = MLPClassif(input_size_mlp, hidden_size_mlp, num_classes, nn.Sigmoid())
model_mlp, loss_total_mlp = training_classifier(model_mlp, train_dataloader, num_epochs, loss_fn, learning_rate, is_mlp=True, verbose=False)
accuracy_mlp = eval_classifier(model_mlp, test_dataloader, is_mlp=True, verbose=True)

# CNN
num_channels1 = 16
num_channels2 = 32

In [None]:
# TO DO: Instanciate, initialize and train the two models. Compute accuracy on the test set


In [None]:
# TO DO: compare the MLP and CNN models
# - print the number of parameters of each model
# - plot the training loss
# - display the test accuracy


<span style="color:red">**Q5**</span> Put these results in your report. Which one is the winner?