In [None]:
# Import necessary libraries
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5),(0.5))
])

train_set = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_set = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_set, batch_size=64, shuffle=True)
test_loader = DataLoader(test_set, batch_size=64, shuffle=False)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:01<00:00, 6049225.70it/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 159878.38it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:01<00:00, 1504972.06it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 4032711.42it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw






**# TODO: Figure out how many images are in the train_set and test_set.**

In [None]:
num_train_images = len(train_set)

print(f'The number of images in the training set is: {num_train_images}')
num_test_images = len(test_set)

print(f'The number of images in the test set is: {num_test_images}')

The number of images in the training set is: 60000
The number of images in the test set is: 10000


# **Building the Neural Network Model **

In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn

In [None]:


torch.manual_seed(42)

## create the NN_classification class
class NN_classification(nn.Module):
    def __init__(self):
        super(NN_classification, self).__init__()
        # initialize layers
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1, stride=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1, stride=1)
        self.conv3 = nn.Conv2d(64, 102, kernel_size=3, padding=1, stride=1)
        self.conv4 = nn.Conv2d(102, 64, kernel_size=3, padding=1, stride=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(64 * 1 * 1, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)

        # initialize activation functions
        self.relu = nn.ReLU()

    def forward(self, x):
        # define the forward pass
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = self.pool(self.relu(self.conv3(x)))
        x = self.pool(self.relu(self.conv4(x)))
        x = x.view(-1, 64 * 1 * 1)  # Flatten the tensor
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

## create an instance of NN_classification
model = NN_classification()
print(model)


NN_classification(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(64, 102, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4): Conv2d(102, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=64, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=10, bias=True)
  (relu): ReLU()
)


# **Training the model **

In [None]:


from torch.optim import SGD

num_epochs = 10

optimizer = SGD(model.parameters(), lr=0.01)
loss_fn = nn.CrossEntropyLoss()

for epoch in range(num_epochs):
    running_loss = 0.0

    for images, labels in train_loader:

        predictions = model(images)


        loss = loss_fn(predictions, labels)


        loss.backward()

        optimizer.step()


        optimizer.zero_grad()

        running_loss += loss.item()

    # Print the average loss for the epoch
    print(f"Epoch {epoch+1}, Loss {running_loss/len(train_loader)} ")

# Print model's state_dict
print("Model's state_dict:")
for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())


Epoch 1, Loss 2.301031039976108 
Epoch 2, Loss 2.2921639566482512 
Epoch 3, Loss 1.8344294530496414 
Epoch 4, Loss 0.4302783104370652 
Epoch 5, Loss 0.18361946780568184 
Epoch 6, Loss 0.12466027325730143 
Epoch 7, Loss 0.09881782047758336 
Epoch 8, Loss 0.08266701888658408 
Epoch 9, Loss 0.06996430915597675 
Epoch 10, Loss 0.06274524622304893 
Model's state_dict:
conv1.weight 	 torch.Size([32, 1, 3, 3])
conv1.bias 	 torch.Size([32])
conv2.weight 	 torch.Size([64, 32, 3, 3])
conv2.bias 	 torch.Size([64])
conv3.weight 	 torch.Size([102, 64, 3, 3])
conv3.bias 	 torch.Size([102])
conv4.weight 	 torch.Size([64, 102, 3, 3])
conv4.bias 	 torch.Size([64])
fc1.weight 	 torch.Size([128, 64])
fc1.bias 	 torch.Size([128])
fc2.weight 	 torch.Size([64, 128])
fc2.bias 	 torch.Size([64])
fc3.weight 	 torch.Size([10, 64])
fc3.bias 	 torch.Size([10])


# **evaluation on the test_loader**

In [None]:
model.eval()
test_loss = 0.0
for images, labels in test_loader:
    with torch.no_grad():
        predictions = model(images)
        loss = loss_fn(predictions, labels)
        test_loss += loss.item()

print(f"Loss: {test_loss/len(test_loader)}")


Loss: 0.06096286200351011


# **Implementing early stopping**

In [None]:
from torch.utils.data import DataLoader, random_split
# Split the training set into training and validation sets
train_size = int(0.8 * len(train_set))  # 80% training, 20% validation
val_size = len(train_set) - train_size
train_set, val_set = random_split(train_set, [train_size, val_size])
validation_loader = DataLoader(val_set, batch_size=64, shuffle=False)

In [None]:
# TODO: Complete this code to implement Early stopping
patience =5
min_delta =0.001
best_loss = None
patience_counter = 0

# Training loop with early stopping
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        # Training pass
        # Forward pass
        predictions = model(images)
        loss = loss_fn(predictions, labels)
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    # evaluation phase
    model.eval()
    validation_loss = 0.0
    with torch.no_grad():
        for images, labels in validation_loader:
            output = model(images)
            validation_loss += loss.item()

    # Calculate average losses
    training_loss = running_loss / len(train_loader)
    validation_loss /= len(validation_loader)

    print(f"Epoch {epoch+1}, Training Loss: {training_loss}, Validation Loss: {validation_loss}")

    # Early stopping logic
    if best_loss is None or validation_loss < best_loss - min_delta:
        best_loss = validation_loss
        patience_counter = 0
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print("Early stopping triggered!")
            break

print("Training is finished!")


Epoch 1, Training Loss: 0.05681765765306165, Validation Loss: 0.051730137318372726
Epoch 2, Training Loss: 0.05059565552649325, Validation Loss: 0.049796443432569504
Epoch 3, Training Loss: 0.04606383941324948, Validation Loss: 0.0036903556901961565
Epoch 4, Training Loss: 0.0429450196049202, Validation Loss: 0.009525937028229237
Epoch 5, Training Loss: 0.03877751104697994, Validation Loss: 0.005715304519981146
Epoch 6, Training Loss: 0.03599408433188313, Validation Loss: 0.0029411781579256058
Epoch 7, Training Loss: 0.03330974258097218, Validation Loss: 0.008382689207792282
Epoch 8, Training Loss: 0.03114490290428065, Validation Loss: 0.010182598605751991
Early stopping triggered!
Training is finished!


**patience** refers to the number of epochs to wait for improvement.

**min_delta** refers to the minimum change in validation loss to be considered as improvement.

By implementing early stopping, the training process stopped at epoch 8. Before applying early stopping, I had initially chosen 10 epochs arbitrarily. The notable difference observed is that with 10 epochs, the validation loss started increasing while the training loss continued to decrease.


# **Experimenting with Dropout**

**buliding the neural network**

In [None]:
import torch
import torch.nn as nn

torch.manual_seed(42)
class jk(nn.Module):
    def __init__(self):
        super(jk, self).__init__()
        # initialize layers
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1, stride=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1, stride=1)
        self.conv3 = nn.Conv2d(64, 102, kernel_size=3, padding=1, stride=1)
        self.conv4 = nn.Conv2d(102, 64, kernel_size=3, padding=1, stride=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(64 * 1 * 1, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)
        self.dropout = nn.Dropout(p=0.2)

        # initialize activation functions
        self.relu = nn.ReLU()

    def forward(self, x):
        # define the forward pass
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = self.pool(self.relu(self.conv3(x)))
        x = self.pool(self.relu(self.conv4(x)))
        x = x.view(-1, 64 * 1 * 1)  # Flatten the tensor
        x = self.dropout(x)
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

model_2 = jk()
print(model_2)


jk(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(64, 102, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4): Conv2d(102, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=64, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=10, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
  (relu): ReLU()
)


**training phase**

In [None]:

import torch.nn as nn
from torch.optim import SGD

num_epochs = 10

optimizer = SGD(model.parameters(), lr=0.01)
loss_fn = nn.CrossEntropyLoss()

for epoch in range(num_epochs):
    running_loss = 0.0

    for images, labels in train_loader:

        predictions = model_2(images)


        loss = loss_fn(predictions, labels)


        loss.backward()

        optimizer.step()


        optimizer.zero_grad()

        running_loss += loss.item()

    # Print the average loss for the epoch
    print(f"Epoch {epoch+1}, Loss {running_loss/len(train_loader)} ")

# Print model's state_dict
print("Model's state_dict:")
for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())


Epoch 1, Loss 2.3055715418573635 
Epoch 2, Loss 2.3055976936812086 
Epoch 3, Loss 2.305607631008254 
Epoch 4, Loss 2.305592506170781 
Epoch 5, Loss 2.3055811263859143 
Epoch 6, Loss 2.3055870731248023 
Epoch 7, Loss 2.3055969329276826 
Epoch 8, Loss 2.3055889487012364 
Epoch 9, Loss 2.3055902340773073 
Epoch 10, Loss 2.305593231085267 
Model's state_dict:
conv1.weight 	 torch.Size([32, 1, 3, 3])
conv1.bias 	 torch.Size([32])
conv2.weight 	 torch.Size([64, 32, 3, 3])
conv2.bias 	 torch.Size([64])
conv3.weight 	 torch.Size([102, 64, 3, 3])
conv3.bias 	 torch.Size([102])
conv4.weight 	 torch.Size([64, 102, 3, 3])
conv4.bias 	 torch.Size([64])
fc1.weight 	 torch.Size([128, 64])
fc1.bias 	 torch.Size([128])
fc2.weight 	 torch.Size([64, 128])
fc2.bias 	 torch.Size([64])
fc3.weight 	 torch.Size([10, 64])
fc3.bias 	 torch.Size([10])


here in this case i think using dropout didn't help to improve the model's performance because the loss function here is greater than the one with no dropout . maybe because the model that i have used is not too complex to use the drop method for optimization.

> Ajouter une citation

