# Load data

In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor


In [2]:
# Define relevant variables for the ML task
batch_size = 64

# Download training data from open datasets.
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
)

# Download test data from open datasets.
test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor(),
)

train_dataloader = DataLoader(training_data, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=batch_size, shuffle=True)

100%|██████████| 26.4M/26.4M [00:01<00:00, 13.3MB/s]
100%|██████████| 29.5k/29.5k [00:00<00:00, 210kB/s]
100%|██████████| 4.42M/4.42M [00:01<00:00, 3.92MB/s]
100%|██████████| 5.15k/5.15k [00:00<00:00, 14.6MB/s]


# Define your CNN network

we start by creating a class that inherits the nn.Module class, and then we define the layers and their sequence of execution inside \_\_init\_\_ and forward respectively.

Some things to notice here:

- nn.Conv2d is used to define the convolutional layers. We define the channels they receive and how much should they return along with the kernel size. We start from 1 channels, as we are using Grayscale images
- nn.MaxPool2d is a max-pooling layer that just requires the kernel size and the stride
- nn.Linear is the fully connected layer, and nn.ReLU is the activation function used

In the forward method, we define the sequence, and, before the fully connected layers, we reshape the output to match the input to a fully connected layer

Source: https://blog.paperspace.com/writing-cnns-from-scratch-in-pytorch/


In [13]:
# @title
# Creating a CNN class
class ConvNeuralNet(torch.nn.Module):
    #  Determine what layers and their order in CNN object
    def __init__(self, num_classes):
        super(ConvNeuralNet, self).__init__()
        self.conv_layer1 = torch.nn.Conv2d(in_channels=1, out_channels=8, kernel_size=3)
        self.max_pool1 = torch.nn.MaxPool2d(kernel_size = 2, stride = 2)

        self.conv_layer2 = torch.nn.Conv2d(in_channels=4, out_channels=16, kernel_size=3)
        self.max_pool2 = torch.nn.MaxPool2d(kernel_size = 2, stride = 2)

        self.relu1 = torch.nn.ReLU()
        self.fc1 = torch.nn.Linear(400, num_classes)

    # Progresses data across layers
    def forward(self, x):
        out = self.conv_layer1(x)
        out = self.max_pool1(out)

        out = self.conv_layer2(out)
        out = self.max_pool2(out)

        out = out.reshape(out.size(0), -1)

        out = self.relu1(out)
        out = self.fc1(out)
        return out

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device\n")

model = ConvNeuralNet(10).to(device)
print(model)

total_params = sum(p.numel() for p in model.parameters())
print(f"Number of Parameters:{total_params}")

Using cuda device

ConvNeuralNet(
  (conv_layer1): Conv2d(1, 4, kernel_size=(3, 3), stride=(1, 1))
  (max_pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv_layer2): Conv2d(4, 8, kernel_size=(3, 3), stride=(1, 1))
  (max_pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (relu1): ReLU()
  (fc1): Linear(in_features=200, out_features=10, bias=True)
)
Number of Parameters:2346


In [23]:
# @title
# Creating a CNN class
class ConvNeuralNet(torch.nn.Module):
    #  Determine what layers and their order in CNN object
    def __init__(self, num_classes):
        super(ConvNeuralNet, self).__init__()
        self.conv_layer1 = torch.nn.Conv2d(in_channels=1, out_channels=4, kernel_size=3)
        self.max_pool1 = torch.nn.MaxPool2d(kernel_size = 2, stride = 2)

        self.conv_layer2 = torch.nn.Conv2d(in_channels=4, out_channels=8, kernel_size=3)
        self.max_pool2 = torch.nn.MaxPool2d(kernel_size = 2, stride = 2)

        self.relu1 = torch.nn.ReLU()
        self.fc1 = torch.nn.Linear(200, num_classes)

    # Progresses data across layers
    def forward(self, x):
        out = self.conv_layer1(x)
        out = self.max_pool1(out)

        out = self.conv_layer2(out)
        out = self.max_pool2(out)

        out = out.reshape(out.size(0), -1)

        out = self.relu1(out)
        out = self.fc1(out)
        return out

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device\n")

model = ConvNeuralNet(10).to(device)
print(model)

total_params = sum(p.numel() for p in model.parameters())
print(f"Number of Parameters:{total_params}")

Using cuda device

ConvNeuralNet(
  (conv_layer1): Conv2d(1, 4, kernel_size=(3, 3), stride=(1, 1))
  (max_pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv_layer2): Conv2d(4, 8, kernel_size=(3, 3), stride=(1, 1))
  (max_pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (relu1): ReLU()
  (fc1): Linear(in_features=1152, out_features=10, bias=True)
)
Number of Parameters:11866


In [34]:
# @title
# Creating a CNN class
class ConvNeuralNet(torch.nn.Module):
    #  Determine what layers and their order in CNN object
    def __init__(self, num_classes):
        super(ConvNeuralNet, self).__init__()
        self.conv_layer1 = torch.nn.Conv2d(in_channels=1, out_channels=4, kernel_size=3)

        self.conv_layer2 = torch.nn.Conv2d(in_channels=4, out_channels=8, kernel_size=3)
        self.max_pool2 = torch.nn.MaxPool2d(kernel_size = 2, stride = 2)

        self.relu1 = torch.nn.ReLU()
        self.fc1 = torch.nn.Linear(1152, num_classes)

    # Progresses data across layers
    def forward(self, x):
        out = self.conv_layer1(x)

        out = self.conv_layer2(out)
        out = self.max_pool2(out)

        out = out.reshape(out.size(0), -1)

        out = self.relu1(out)
        out = self.fc1(out)
        return out

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device\n")

model = ConvNeuralNet(10).to(device)
print(model)

total_params = sum(p.numel() for p in model.parameters())
print(f"Number of Parameters:{total_params}")

Using cuda device

ConvNeuralNet(
  (conv_layer1): Conv2d(1, 4, kernel_size=(3, 3), stride=(1, 1))
  (conv_layer2): Conv2d(4, 8, kernel_size=(3, 3), stride=(1, 1))
  (max_pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (relu1): ReLU()
  (fc1): Linear(in_features=1152, out_features=10, bias=True)
)
Number of Parameters:11866


In [48]:
# Creating a CNN class
class ConvNeuralNet(torch.nn.Module):
    #  Determine what layers and their order in CNN object
    def __init__(self, num_classes):
        super(ConvNeuralNet, self).__init__()
        self.conv_layer1 = torch.nn.Conv2d(in_channels=1, out_channels=8, kernel_size=2)
        self.max_pool1 = torch.nn.MaxPool2d(kernel_size = 2, stride = 2)


        self.conv_layer1_5 = torch.nn.Conv2d(in_channels=8, out_channels=8, kernel_size=2)
        self.max_pool1_5 = torch.nn.MaxPool2d(kernel_size = 2, stride = 2)

        self.conv_layer2 = torch.nn.Conv2d(in_channels=8, out_channels=16, kernel_size=2)
        self.max_pool2 = torch.nn.MaxPool2d(kernel_size = 2, stride = 2)

        self.relu1 = torch.nn.ReLU()
        self.fc1 = torch.nn.Linear(64, num_classes)

    # Progresses data across layers
    def forward(self, x):
        out = self.conv_layer1(x)
        # print("Conv Layer 1:", out.shape)
        out = self.max_pool1(out)
        # print("Max Pool 1:", out.shape)

        out = self.conv_layer1_5(out)
        # print("Conv Layer 1_5:", out.shape)
        out = self.max_pool1_5(out)
        # print("Max Pool 1_5:", out.shape)

        out = self.conv_layer2(out)
        # print("Conv Layer 2:", out.shape)
        out = self.max_pool2(out)
        # print("Max Pool 2:", out.shape)

        out = out.reshape(out.size(0), -1)
        # print("Reshape:", out.shape)

        out = self.relu1(out)
        out = self.fc1(out)
        return out

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device\n")

model = ConvNeuralNet(10).to(device)
print(model)

total_params = sum(p.numel() for p in model.parameters())
print(f"Number of Parameters:{total_params}")

Using cuda device

ConvNeuralNet(
  (conv_layer1): Conv2d(1, 8, kernel_size=(2, 2), stride=(1, 1))
  (max_pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv_layer1_5): Conv2d(8, 8, kernel_size=(2, 2), stride=(1, 1))
  (max_pool1_5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv_layer2): Conv2d(8, 16, kernel_size=(2, 2), stride=(1, 1))
  (max_pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (relu1): ReLU()
  (fc1): Linear(in_features=64, out_features=10, bias=True)
)
Number of Parameters:1482


In [21]:
# @title
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [49]:
# Define relevant variables for the ML task
num_classes = 10
learning_rate = 0.001
num_epochs = 20

# Set Loss function with criterion
loss_fn = nn.CrossEntropyLoss()
# Set optimizer with optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay = 0.005, momentum = 0.9)

epochs = 10
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)
print("Done!")



Epoch 1
-------------------------------
loss: 2.310645  [    0/60000]
loss: 2.290847  [ 6400/60000]
loss: 2.299369  [12800/60000]
loss: 2.295385  [19200/60000]
loss: 2.296859  [25600/60000]
loss: 2.280128  [32000/60000]
loss: 2.281440  [38400/60000]
loss: 2.271632  [44800/60000]
loss: 2.245576  [51200/60000]
loss: 2.201687  [57600/60000]
Test Error: 
 Accuracy: 31.3%, Avg loss: 2.173136 

Epoch 2
-------------------------------
loss: 2.178939  [    0/60000]
loss: 2.053109  [ 6400/60000]
loss: 1.724094  [12800/60000]
loss: 1.271051  [19200/60000]
loss: 1.047569  [25600/60000]
loss: 0.913065  [32000/60000]
loss: 0.982864  [38400/60000]
loss: 0.920723  [44800/60000]
loss: 0.910908  [51200/60000]
loss: 0.788044  [57600/60000]
Test Error: 
 Accuracy: 66.7%, Avg loss: 0.934161 

Epoch 3
-------------------------------
loss: 0.749577  [    0/60000]
loss: 0.686194  [ 6400/60000]
loss: 0.806464  [12800/60000]
loss: 0.861688  [19200/60000]
loss: 0.695192  [25600/60000]
loss: 0.767218  [32000/600

<div style="border: solid 3px #fff;">
    <h1 style="text-align: center; color:#fff; font-family:Georgia; font-size:26px;">Exercise :</h1>
    <h1 style="text-align: left; color:#fff; font-family:Courier; font-size:16px;"> &emsp; Change the out_channels of the first conv2d layer to 4 and the out_channels of the second conv2d layer to 8. It is necessory to adapt the rest of layer to this new size. Check that if the training can be done. </h1>
    <h1 style="text-align: left; color:#fff; font-family:Courier; font-size:16px;"> &emsp; 1. By this modification, how much the number of parameters would be reduced? </h1>
    <p style='text-align: left;'> </p>
    <h1 style="text-align: left; color:#fff; font-family:Courier; font-size:16px;"> &emsp; 2. How would this modification has impact on training and convergence? </h1>
    <p style='text-align: left;'> </p>
</div>

In [16]:
# Le nombre de paramètre a diminué de plus de 3000 (il en reste environ 2300)
# Avant la modification on était à environ 84.3% de précision, après la modification on atteint 83.1%. On a donc perdu en précision mais ce n'est pas
# proportionnel à la diminution du nombre de paramètres

<div style="border: solid 3px #fff;">
    <h1 style="text-align: center; color:#fff; font-family:Georgia; font-size:26px;">Exercise :</h1>
    <h1 style="text-align: left; color:#fff; font-family:Courier; font-size:16px;"> &emsp; Remove the first MaxPool2d layer and adapt the size of the rest of layers to be trainable. </h1>
    <h1 style="text-align: left; color:#fff; font-family:Courier; font-size:16px;"> &emsp; 1. By this modification, how the number of parameters would change? Does it mean that MAxPool layer contains trainable parameters?</h1>
    <p style='text-align: left;'> </p>
</div>

In [26]:
# Le nombre de paramètres a augmenté pour atteindre 11866. MaxPool réduit le nombre de paramètres


<div style="border: solid 3px #fff;">
    <h1 style="text-align: center; color:#fff; font-family:Georgia; font-size:26px;">Exercise :</h1>
    <h1 style="text-align: left; color:#fff; font-family:Courier; font-size:16px;"> &emsp; By using a print of the out.shape() in the function of forward in Network class, the size of each layer's output can be diplayed (ONLY in the training mode.) </h1>
    <h1 style="text-align: left; color:#fff; font-family:Courier; font-size:16px;"> &emsp; 1. Add another Conv2d with stride=2 and MaxPool2d with stride=2 beween the first MaxPool2d and the second Conv2d layer.</h1>
    <h1 style="text-align: left; color:#fff; font-family:Courier; font-size:16px;"> &emsp; 2. Modify the kenel size to 2 in Conv2d layers. Adapt the rest of layers size to have a trainable network.</h1>
    <h1 style="text-align: left; color:#fff; font-family:Courier; font-size:16px;"> &emsp; 3. While another Conv2d layer is added, how is the number of model's parameters changed?.</h1>
    <p style='text-align: left;'> </p>
</div>

In [None]:
# your code is missing here
#Le nombre de paramètres à diminué car on a ajouté une couche de pooling et réduit la taille du kernel des couches de convolution