In [None]:
"""
=========================================================================================
Topic: Deep Learning Project on MNIST Dataset
Author: Benalouache Sassi
Date: 19/11/2023
=========================================================================================
"""

# Part 1 : Shallow network

#### + Using the activation functions sigmoid :

In [1]:
import gzip, numpy, torch

# Define the neural network class
class ShallowNet(torch.nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(ShallowNet, self).__init__()
        # Define the hidden layer
        self.hidden = torch.nn.Linear(input_size, hidden_size)
        # Define the output layer
        self.output = torch.nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # Apply sigmoid activation function to the hidden layer
        x = torch.sigmoid(self.hidden(x))
        # Output layer (without activation as we're using MSE loss)
        x = self.output(x)
        return x

if __name__ == '__main__':
    # Hyperparameters
    batch_size = 5
    nb_epochs = 10
    eta = 0.001
    hidden_size = 128

    # Load the MNIST dataset
    ((data_train, label_train), (data_test, label_test)) = torch.load(gzip.open('mnist.pkl.gz'))

    # Prepare data loaders for training and testing
    train_dataset = torch.utils.data.TensorDataset(data_train, label_train)
    test_dataset = torch.utils.data.TensorDataset(data_test, label_test)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False)

    # Initialize the model
    model = ShallowNet(data_train.shape[1], hidden_size, label_train.shape[1])
    # Initialize weights uniformly for the hidden and output layers
    torch.nn.init.uniform_(model.hidden.weight, -0.001, 0.001)
    torch.nn.init.uniform_(model.output.weight, -0.001, 0.001)

    # Define the loss function and optimizer
    loss_func = torch.nn.MSELoss(reduction='sum')
    optim = torch.optim.SGD(model.parameters(), lr=eta)

    # Training and evaluation loop
    for n in range(nb_epochs):
        # Set model to training mode
        model.train()
        for x, t in train_loader:
            # Forward pass: compute predicted outputs
            y = model(x)
            # Compute loss
            loss = loss_func(t, y)
            # Backward pass: compute gradient of the loss with respect to model parameters
            loss.backward()
            # Perform a single optimization step
            optim.step()
            # Clear the gradients for the next iteration
            optim.zero_grad()

        # Set the model to evaluation mode
        model.eval()
        test_loss = 0.
        correct = 0
        with torch.no_grad(): # Turn off gradients for validation, saving memory and computations
            for x, t in test_loader:
                y = model(x)
                # Accumulate test loss
                test_loss += loss_func(t, y).item()
                # Count correct predictions
                correct += (torch.argmax(y, 1) == torch.argmax(t, 1)).sum().item()

        # Calculate average test loss and accuracy
        test_loss /= len(test_loader.dataset)
        accuracy = 100. * correct / len(test_loader.dataset)
        print(f"Epoch {n+1}/{nb_epochs}, Test Loss: {test_loss:.4f}, Accuracy: {accuracy:.2f}%")

Epoch 1/10, Test Loss: 0.4715, Accuracy: 77.21%
Epoch 2/10, Test Loss: 0.3666, Accuracy: 87.37%
Epoch 3/10, Test Loss: 0.2979, Accuracy: 89.23%
Epoch 4/10, Test Loss: 0.2476, Accuracy: 90.64%
Epoch 5/10, Test Loss: 0.2222, Accuracy: 91.04%
Epoch 6/10, Test Loss: 0.2008, Accuracy: 91.64%
Epoch 7/10, Test Loss: 0.1872, Accuracy: 91.87%
Epoch 8/10, Test Loss: 0.1780, Accuracy: 92.31%
Epoch 9/10, Test Loss: 0.1706, Accuracy: 92.53%
Epoch 10/10, Test Loss: 0.1617, Accuracy: 92.79%


#### + Adding an early stopping mechanism to prevent overfitting during the training of your neural network model :

In [5]:
import gzip, numpy, torch

# Define the neural network class
class ShallowNet(torch.nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(ShallowNet, self).__init__()
        # Define the hidden layer
        self.hidden = torch.nn.Linear(input_size, hidden_size)
        # Define the output layer
        self.output = torch.nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # Apply sigmoid activation function to the hidden layer
        x = torch.sigmoid(self.hidden(x))
        # Output layer (without activation as we're using MSE loss)
        x = self.output(x)
        return x

if __name__ == '__main__':
    # Hyperparameters
    batch_size = 64
    nb_epochs = 10
    eta = 0.001
    hidden_size = 128
    patience = 3  # Early stopping patience

    # Load the MNIST dataset
    ((data_train, label_train), (data_test, label_test)) = torch.load(gzip.open('mnist.pkl.gz'))

    # Prepare data loaders for training and testing
    train_dataset = torch.utils.data.TensorDataset(data_train, label_train)
    test_dataset = torch.utils.data.TensorDataset(data_test, label_test)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False)

    # Initialize the model
    model = ShallowNet(data_train.shape[1], hidden_size, label_train.shape[1])
    # Initialize weights uniformly for the hidden and output layers
    torch.nn.init.uniform_(model.hidden.weight, -0.001, 0.001)
    torch.nn.init.uniform_(model.output.weight, -0.001, 0.001)

    # Define the loss function and optimizer
    loss_func = torch.nn.MSELoss(reduction='sum')
    optim = torch.optim.SGD(model.parameters(), lr=eta)

    # Initialize early stopping parameters
    best_loss = float('inf')
    epochs_no_improve = 0

    # Training and evaluation loop
    for n in range(nb_epochs):
        # Set model to training mode
        model.train()
        for x, t in train_loader:
            # Forward pass: compute predicted outputs
            y = model(x)
            # Compute loss
            loss = loss_func(t, y)
            # Backward pass: compute gradient of the loss with respect to model parameters
            loss.backward()
            # Perform a single optimization step
            optim.step()
            # Clear the gradients for the next iteration
            optim.zero_grad()

        # Set the model to evaluation mode
        model.eval()
        test_loss = 0.
        correct = 0
        with torch.no_grad(): # Turn off gradients for validation, saving memory and computations
            for x, t in test_loader:
                y = model(x)
                # Accumulate test loss
                test_loss += loss_func(t, y).item()
                # Count correct predictions
                correct += (torch.argmax(y, 1) == torch.argmax(t, 1)).sum().item()

        # Calculate average test loss and accuracy
        test_loss /= len(test_loader.dataset)
        accuracy = 100. * correct / len(test_loader.dataset)
        print(f"Epoch {n+1}/{nb_epochs}, Test Loss: {test_loss:.4f}, Accuracy: {accuracy:.2f}%")

        # Early Stopping Check
        if test_loss < best_loss:
            best_loss = test_loss
            epochs_no_improve = 0
            # Save the best model (optional)
            torch.save(model.state_dict(), 'best_model.pth')
        else:
            epochs_no_improve += 1
            if epochs_no_improve == patience:
                print("Early stopping triggered")
                break

Epoch 1/10, Test Loss: 0.8699, Accuracy: 18.14%
Epoch 2/10, Test Loss: 0.8539, Accuracy: 18.23%
Epoch 3/10, Test Loss: 0.7515, Accuracy: 32.74%
Epoch 4/10, Test Loss: 0.6575, Accuracy: 42.10%
Epoch 5/10, Test Loss: 0.6297, Accuracy: 44.04%
Epoch 6/10, Test Loss: 0.4121, Accuracy: 80.01%
Epoch 7/10, Test Loss: 0.3142, Accuracy: 83.14%
Epoch 8/10, Test Loss: 0.2601, Accuracy: 88.30%
Epoch 9/10, Test Loss: 0.2363, Accuracy: 89.01%
Epoch 10/10, Test Loss: 0.2248, Accuracy: 89.23%


### + Training and Evaluation :

In [None]:
import gzip, numpy, torch

# Define the neural network class
class ShallowNet(torch.nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(ShallowNet, self).__init__()
        # Define the hidden layer
        self.hidden = torch.nn.Linear(input_size, hidden_size)
        # Define the output layer
        self.output = torch.nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # Apply sigmoid activation function to the hidden layer
        x = torch.sigmoid(self.hidden(x))
        # Output layer (without activation as we're using MSE loss)
        x = self.output(x)
        return x

if __name__ == '__main__':
    # Hyperparameters
    learning_rates = [0.01, 0.001, 0.0001]
    hidden_sizes = [32, 64, 128]
    batch_sizes = [5, 16, 32, 64]

    ((data_train, label_train), (data_test, label_test)) = torch.load(gzip.open('mnist.pkl.gz'))

    for batch_size in batch_sizes:
        train_loader = torch.utils.data.DataLoader(
            torch.utils.data.TensorDataset(data_train, label_train), 
            batch_size=batch_size, 
            shuffle=True
        )
        test_loader = torch.utils.data.DataLoader(
            torch.utils.data.TensorDataset(data_test, label_test), 
            batch_size=1, 
            shuffle=False
        )

        for eta in learning_rates:
            for hidden_size in hidden_sizes:
                
                
                model = ShallowNet(data_train.shape[1], hidden_size, label_train.shape[1])
                torch.nn.init.uniform_(model.hidden.weight, -0.001, 0.001)
                torch.nn.init.uniform_(model.output.weight, -0.001, 0.001)

                loss_func = torch.nn.MSELoss(reduction='sum')
                optim = torch.optim.SGD(model.parameters(), lr=eta)

                for n in range(10): # Number of epochs
                    model.train()
                    for x, t in train_loader:
                        y = model(x)
                        loss = loss_func(t, y)
                        loss.backward()
                        optim.step()
                        optim.zero_grad()

                # Evaluate after the last epoch
                model.eval()
                test_loss = 0.
                correct = 0
                with torch.no_grad():
                    for x, t in test_loader:
                        y = model(x)
                        test_loss += loss_func(t, y).item()
                        correct += (torch.argmax(y, 1) == torch.argmax(t, 1)).sum().item()

                test_loss /= len(test_loader.dataset)
                accuracy = 100. * correct / len(test_loader.dataset)
                print(f"Final Epoch for Batch Size={batch_size}, η={eta}, Hidden Size={hidden_size}: Test Loss: {test_loss:.4f}, Accuracy: {accuracy:.2f}%")

# Part 2 : Deep network

#### + Function using the activation functions sigmoid :

In [30]:
import gzip, numpy, torch

# Define the deep neural network class
class DeepNet(torch.nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size):
        super(DeepNet, self).__init__()
        self.hidden1 = torch.nn.Linear(input_size, hidden_size1)
        self.hidden2 = torch.nn.Linear(hidden_size1, hidden_size2)
        self.output = torch.nn.Linear(hidden_size2, output_size)

    def forward(self, x):
        x = torch.sigmoid(self.hidden1(x))
        x = torch.sigmoid(self.hidden2(x))
        x = self.output(x)
        return x

if __name__ == '__main__':
    # Set specific hyperparameters
    eta = 0.01
    hidden_size1, hidden_size2 = (128, 64)
    batch_size = 5

    # Load data
    ((data_train, label_train), (data_test, label_test)) = torch.load(gzip.open('mnist.pkl.gz'))

    # Prepare data loaders
    train_loader = torch.utils.data.DataLoader(
        torch.utils.data.TensorDataset(data_train, label_train), 
        batch_size=batch_size, 
        shuffle=True
    )
    test_loader = torch.utils.data.DataLoader(
        torch.utils.data.TensorDataset(data_test, label_test), 
        batch_size=1, 
        shuffle=False
    )

    # Initialize model
    model = DeepNet(data_train.shape[1], hidden_size1, hidden_size2, label_train.shape[1])
    torch.nn.init.uniform_(model.hidden1.weight, -0.001, 0.001)
    torch.nn.init.uniform_(model.hidden2.weight, -0.001, 0.001)
    torch.nn.init.uniform_(model.output.weight, -0.001, 0.001)

    # Loss function and optimizer
    loss_func = torch.nn.MSELoss(reduction='sum')
    optim = torch.optim.SGD(model.parameters(), lr=eta)

    # Training and evaluation loop
    num_epochs = 10
    for epoch in range(num_epochs):
        # Training
        model.train()
        for x, t in train_loader:
            y = model(x)
            loss = loss_func(t, y)
            loss.backward()
            optim.step()
            optim.zero_grad()

        # Evaluation
        model.eval()
        test_loss = 0.
        correct = 0
        with torch.no_grad():
            for x, t in test_loader:
                y = model(x)
                test_loss += loss_func(t, y).item()
                correct += (torch.argmax(y, 1) == torch.argmax(t, 1)).sum().item()

        test_loss /= len(test_loader.dataset)
        accuracy = 100. * correct / len(test_loader.dataset)
        print(f"Epoch {epoch+1}/{num_epochs}, Test Loss: {test_loss:.4f}, Accuracy: {accuracy:.2f}%")

Epoch 1/10, Test Loss: 0.8110, Accuracy: 19.83%
Epoch 2/10, Test Loss: 0.7431, Accuracy: 29.90%
Epoch 3/10, Test Loss: 0.4130, Accuracy: 67.30%
Epoch 4/10, Test Loss: 0.2674, Accuracy: 87.13%
Epoch 5/10, Test Loss: 0.2249, Accuracy: 83.70%
Epoch 6/10, Test Loss: 0.1452, Accuracy: 92.36%
Epoch 7/10, Test Loss: 0.1246, Accuracy: 93.40%
Epoch 8/10, Test Loss: 0.1242, Accuracy: 93.46%
Epoch 9/10, Test Loss: 0.1015, Accuracy: 94.46%
Epoch 10/10, Test Loss: 0.0972, Accuracy: 94.60%


#### + This part focuses on developing and analyzing a deep neural network for the MNIST dataset, the objective was to determine the impact of various hyperparameters :

In [3]:
import gzip, numpy, torch

# Define the deep neural network class
class DeepNet(torch.nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size):
        super(DeepNet, self).__init__()
        # Define the first hidden layer
        self.hidden1 = torch.nn.Linear(input_size, hidden_size1)
        # Define the second hidden layer
        self.hidden2 = torch.nn.Linear(hidden_size1, hidden_size2)
        # Define the output layer
        self.output = torch.nn.Linear(hidden_size2, output_size)

    def forward(self, x):
        # Apply sigmoid activation function to the first hidden layer
        x = torch.sigmoid(self.hidden1(x))
        # Apply sigmoid activation function to the second hidden layer
        x = torch.sigmoid(self.hidden2(x))
        # Output layer (without activation as we're using MSE loss)
        x = self.output(x)
        return x

if __name__ == '__main__':
    # Hyperparameters
    learning_rates = [0.01, 0.001, 0.0001]
    hidden_sizes = [(64, 32), (128, 64), (256, 128)]  # Pairs of sizes for two hidden layers
    batch_sizes = [5, 16, 32, 64]

    ((data_train, label_train), (data_test, label_test)) = torch.load(gzip.open('mnist.pkl.gz'))

    for batch_size in batch_sizes:
        train_loader = torch.utils.data.DataLoader(
            torch.utils.data.TensorDataset(data_train, label_train), 
            batch_size=batch_size, 
            shuffle=True
        )
        test_loader = torch.utils.data.DataLoader(
            torch.utils.data.TensorDataset(data_test, label_test), 
            batch_size=1, 
            shuffle=False
        )

        for eta in learning_rates:
            for hidden_sizes_pair in hidden_sizes:
                hidden_size1, hidden_size2 = hidden_sizes_pair

                model = DeepNet(data_train.shape[1], hidden_size1, hidden_size2, label_train.shape[1])
                torch.nn.init.uniform_(model.hidden1.weight, -0.001, 0.001)
                torch.nn.init.uniform_(model.hidden2.weight, -0.001, 0.001)
                torch.nn.init.uniform_(model.output.weight, -0.001, 0.001)

                loss_func = torch.nn.MSELoss(reduction='sum')
                optim = torch.optim.SGD(model.parameters(), lr=eta)

                for n in range(10): # Number of epochs
                    model.train()
                    for x, t in train_loader:
                        y = model(x)
                        loss = loss_func(t, y)
                        loss.backward()
                        optim.step()
                        optim.zero_grad()

                # Evaluate after the last epoch
                model.eval()
                test_loss = 0.
                correct = 0
                with torch.no_grad():
                    for x, t in test_loader:
                        y = model(x)
                        test_loss += loss_func(t, y).item()
                        correct += (torch.argmax(y, 1) == torch.argmax(t, 1)).sum().item()

                test_loss /= len(test_loader.dataset)
                accuracy = 100. * correct / len(test_loader.dataset)
                print(f"Final Epoch for Batch Size={batch_size}, η={eta}, Hidden Layer Sizes={hidden_sizes_pair}: Test Loss: {test_loss:.4f}, Accuracy: {accuracy:.2f}%")


Training with Batch Size=5, η=0.01, Hidden Layer Sizes=(64, 32)
Final Epoch for Batch Size=5, η=0.01, Hidden Layer Sizes=(64, 32): Test Loss: 0.0933, Accuracy: 95.13%

Training with Batch Size=5, η=0.01, Hidden Layer Sizes=(128, 64)
Final Epoch for Batch Size=5, η=0.01, Hidden Layer Sizes=(128, 64): Test Loss: 0.0930, Accuracy: 94.93%

Training with Batch Size=5, η=0.01, Hidden Layer Sizes=(256, 128)
Final Epoch for Batch Size=5, η=0.01, Hidden Layer Sizes=(256, 128): Test Loss: 0.1037, Accuracy: 94.37%

Training with Batch Size=5, η=0.001, Hidden Layer Sizes=(64, 32)
Final Epoch for Batch Size=5, η=0.001, Hidden Layer Sizes=(64, 32): Test Loss: 0.8205, Accuracy: 22.99%

Training with Batch Size=5, η=0.001, Hidden Layer Sizes=(128, 64)
Final Epoch for Batch Size=5, η=0.001, Hidden Layer Sizes=(128, 64): Test Loss: 0.8214, Accuracy: 24.34%

Training with Batch Size=5, η=0.001, Hidden Layer Sizes=(256, 128)
Final Epoch for Batch Size=5, η=0.001, Hidden Layer Sizes=(256, 128): Test Loss:

# Part 3 : CNN

In [None]:
pip install torchvision

In [19]:
import gzip
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader, TensorDataset

# Define the CNN class
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)  # 1 input channel, 6 output channels, 5x5 kernel
        self.pool = nn.MaxPool2d(2, 2)   # 2x2 max pooling
        self.conv2 = nn.Conv2d(6, 16, 5) # 6 input channels, 16 output channels, 5x5 kernel
        self.fc1 = nn.Linear(16 * 4 * 4, 120) # Fully connected layer
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10) # 10 output classes

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4) # Flatten the tensor for the fully connected layer
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Main function
if __name__ == '__main__':
    # Hyperparameters
    batch_size = 64
    nb_epochs = 5
    eta = 0.001

    # Load the MNIST dataset
    ((data_train, label_train), (data_test, label_test)) = torch.load(gzip.open('mnist.pkl.gz'))

    # Convert one-hot encoded labels to class indices
    label_train = torch.argmax(label_train, dim=1)
    label_test = torch.argmax(label_test, dim=1)

    # Reshape data to include channel dimension (1, 28, 28)
    data_train = data_train.view(-1, 1, 28, 28)
    data_test = data_test.view(-1, 1, 28, 28)

    # Prepare data loaders for training and testing
    train_dataset = TensorDataset(data_train, label_train)
    test_dataset = TensorDataset(data_test, label_test)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

    # Initialize the model
    model = SimpleCNN()

    # Define the loss function and optimizer
    loss_func = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=eta)

    # Training loop
    for epoch in range(nb_epochs):
        model.train()
        for x, t in train_loader:
            optimizer.zero_grad()
            y = model(x)
            loss = loss_func(y, t)
            loss.backward()
            optimizer.step()

        # Evaluation
        model.eval()
        test_loss = 0
        correct = 0
        with torch.no_grad():
            for x, t in test_loader:
                y = model(x)
                test_loss += F.cross_entropy(y, t, reduction='sum').item()
                pred = y.argmax(dim=1, keepdim=True)
                correct += pred.eq(t.view_as(pred)).sum().item()

        test_loss /= len(test_loader.dataset)
        accuracy = 100. * correct / len(test_loader.dataset)
        print(f"Epoch {epoch + 1}/{nb_epochs}, Test Loss: {test_loss:.4f}, Accuracy: {accuracy:.2f}%")

Epoch 1/5, Test Loss: 0.0951, Accuracy: 97.09%
Epoch 2/5, Test Loss: 0.0642, Accuracy: 97.96%
Epoch 3/5, Test Loss: 0.0585, Accuracy: 98.06%
Epoch 4/5, Test Loss: 0.0518, Accuracy: 98.53%
Epoch 5/5, Test Loss: 0.0473, Accuracy: 98.51%


# Part 4 : Push Forward

### For Shallow network :

Note : with the activation functions sigmoid and MSE Loss we have for :

+ Result with Batch Size=64, η=0.01, Hidden Size=64: Test Loss: 0.9135, Accuracy: 9.21%

#### Shallow network with Loss fonction Cross entropy loss, the activation function to ReLU, Xavier initialization and Adam optimizer :

In [23]:
import gzip
import numpy
import torch

# Define the neural network class
class ShallowNet(torch.nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(ShallowNet, self).__init__()
        self.hidden = torch.nn.Linear(input_size, hidden_size)
        self.output = torch.nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = torch.nn.functional.relu(self.hidden(x))  # Changed to ReLU
        x = self.output(x)
        return x

if __name__ == '__main__':
    batch_size = 64
    nb_epochs = 5
    eta = 0.01
    hidden_size = 64

    # Load the MNIST dataset
    ((data_train, label_train), (data_test, label_test)) = torch.load(gzip.open('mnist.pkl.gz'))

    # Preprocess labels for CrossEntropyLoss
    label_train = torch.argmax(label_train, axis=1)
    label_test = torch.argmax(label_test, axis=1)

    # Flatten the images
    data_train = data_train.reshape(-1, 28*28)
    data_test = data_test.reshape(-1, 28*28)

    train_dataset = torch.utils.data.TensorDataset(data_train, label_train)
    test_dataset = torch.utils.data.TensorDataset(data_test, label_test)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False)

    model = ShallowNet(28*28, hidden_size, 10)

    # Xavier initialization
    torch.nn.init.xavier_uniform_(model.hidden.weight)
    torch.nn.init.xavier_uniform_(model.output.weight)

    # Cross-Entropy Loss
    loss_func = torch.nn.CrossEntropyLoss()

    # Adam Optimizer
    optim = torch.optim.Adam(model.parameters(), lr=eta)

    for n in range(nb_epochs):
        model.train()
        for x, t in train_loader:
            y = model(x)
            loss = loss_func(y, t)
            loss.backward()
            optim.step()
            optim.zero_grad()

        model.eval()
        test_loss = 0.
        correct = 0
        with torch.no_grad():
            for x, t in test_loader:
                y = model(x)
                test_loss += loss_func(y, t).item()
                correct += (torch.argmax(y, 1) == t).sum().item()

        test_loss /= len(test_loader.dataset)
        accuracy = 100. * correct / len(test_loader.dataset)
        print(f"Epoch {n+1}/{nb_epochs}, Test Loss: {test_loss:.4f}, Accuracy: {accuracy:.2f}%")

Epoch 1/5, Test Loss: 0.1783, Accuracy: 94.71%
Epoch 2/5, Test Loss: 0.1255, Accuracy: 96.53%
Epoch 3/5, Test Loss: 0.1357, Accuracy: 96.34%
Epoch 4/5, Test Loss: 0.1539, Accuracy: 96.20%
Epoch 5/5, Test Loss: 0.1313, Accuracy: 96.47%


In [27]:
# Select a test image
test_image, true_label = data_test[8], label_test[8]

# Preprocess the image
test_image = test_image.reshape(-1, 28*28)  # Flatten the image
test_image = torch.tensor(test_image, dtype=torch.float32)  # Convert to tensor

# Make a prediction
model.eval()  # Set the model to evaluation mode
with torch.no_grad():
    prediction = model(test_image)

# Get the predicted class
predicted_class = torch.argmax(prediction).item()

# Print the results
print(f"Predicted Class: {predicted_class}, True Class: {true_label.item()}")

Predicted Class: 9, True Class: 9


  test_image = torch.tensor(test_image, dtype=torch.float32)  # Convert to tensor


### For Deep network :

Note : with the activation functions sigmoid and MSE Loss we have for :

+ Result with Batch Size=32, η=0.001, Hidden Layer Sizes=(64, 32): Test Loss: 0.8211, Accuracy: 19.64%

#### Deep network with Loss fonction Cross entropy loss, the activation function to ReLU, Xavier initialization and Adam optimizer :

In [20]:
import gzip
import numpy
import torch

class DeepNet(torch.nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size):
        super(DeepNet, self).__init__()
        self.hidden1 = torch.nn.Linear(input_size, hidden_size1)
        self.hidden2 = torch.nn.Linear(hidden_size1, hidden_size2)
        self.output = torch.nn.Linear(hidden_size2, output_size)
        self.relu = torch.nn.ReLU()  # ReLU activation

    def forward(self, x):
        x = self.relu(self.hidden1(x))
        x = self.relu(self.hidden2(x))
        x = self.output(x)
        return x

if __name__ == '__main__':
    eta = 0.001 
    hidden_size1, hidden_size2 = (64, 32)
    batch_size = 32 

    ((data_train, label_train), (data_test, label_test)) = torch.load(gzip.open('mnist.pkl.gz'))

    label_train = torch.argmax(label_train, axis=1)
    label_test = torch.argmax(label_test, axis=1)

    train_loader = torch.utils.data.DataLoader(
        torch.utils.data.TensorDataset(data_train, label_train), 
        batch_size=batch_size, 
        shuffle=True
    )
    test_loader = torch.utils.data.DataLoader(
        torch.utils.data.TensorDataset(data_test, label_test), 
        batch_size=batch_size,  # Adjusted for test loader too
        shuffle=False
    )

    model = DeepNet(data_train.shape[1], hidden_size1, hidden_size2, 10)
    torch.nn.init.xavier_uniform_(model.hidden1.weight)  # Xavier initialization
    torch.nn.init.xavier_uniform_(model.hidden2.weight)
    torch.nn.init.xavier_uniform_(model.output.weight)

    loss_func = torch.nn.CrossEntropyLoss()  # Cross entropy loss
    optim = torch.optim.Adam(model.parameters(), lr=eta)  # Using Adam optimizer

    num_epochs = 5
    for epoch in range(num_epochs):
        model.train()
        for x, t in train_loader:
            y = model(x)
            loss = loss_func(y, t)
            loss.backward()
            optim.step()
            optim.zero_grad()

        model.eval()
        test_loss = 0.
        correct = 0
        with torch.no_grad():
            for x, t in test_loader:
                y = model(x)
                test_loss += loss_func(y, t).item()
                correct += (torch.argmax(y, 1) == t).sum().item()

        test_loss /= len(test_loader.dataset)
        accuracy = 100. * correct / len(test_loader.dataset)
        print(f"Epoch {epoch+1}/{num_epochs}, Test Loss: {test_loss:.4f}, Accuracy: {accuracy:.2f}%")

Epoch 1/5, Test Loss: 0.0046, Accuracy: 95.60%
Epoch 2/5, Test Loss: 0.0033, Accuracy: 96.74%
Epoch 3/5, Test Loss: 0.0030, Accuracy: 97.20%
Epoch 4/5, Test Loss: 0.0030, Accuracy: 97.27%
Epoch 5/5, Test Loss: 0.0027, Accuracy: 97.27%


Test :

In [21]:
# Select a test image
test_image, true_label = data_test[8], label_test[8]

# Preprocess the image
test_image = test_image.reshape(-1, 28*28)  # Flatten the image
test_image = torch.tensor(test_image, dtype=torch.float32)  # Convert to tensor

# Make a prediction
model.eval()  # Set the model to evaluation mode
with torch.no_grad():
    prediction = model(test_image)

# Get the predicted class
predicted_class = torch.argmax(prediction).item()

# Print the results
print(f"Predicted Class: {predicted_class}, True Class: {true_label.item()}")

Predicted Class: 9, True Class: 9


  test_image = torch.tensor(test_image, dtype=torch.float32)  # Convert to tensor


### Pretrained ResNet model

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
from torchvision import models
import torch.nn as nn
import torch.optim as optim

# Transformations
transform = transforms.Compose([
    transforms.Resize(224),
    transforms.Grayscale(num_output_channels=3),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# MNIST Dataset
train_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)

# Data Loaders
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=64, shuffle=False)

# Load Pretrained ResNet18
model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 10)

# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training Loop
num_epochs = 5
for epoch in range(num_epochs):
    model.train()
    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}')

# Testing Loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the model on the 10000 test images: {100 * correct // total} %')