# Digit Recognition

Build a deep learning model to classify handwritten digits. You can use convolutional neural networks (CNNs) or other machine learning algorithms for this task.

In [None]:
# Importing necessary libraries for PyTorch and data visualization
import os
import torch
import random
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
import matplotlib.pyplot as plt
import torch.nn.functional as F
from torchvision import datasets, transforms

# Set random seed for reproducibility
manualSeed = 999
#manualSeed = random.randint(1, 10000) # use if you want new results
print("Random Seed: ", manualSeed)
random.seed(manualSeed)
torch.manual_seed(manualSeed)
torch.use_deterministic_algorithms(True) # Needed for reproducible results

## Preparing the dataset

In [None]:
# Create useful folders
folders = ["../data", "../results/", "../results/digit-recognition/"]
for f in folders:
    if not os.path.exists(f):
        os.mkdir(f)

In [None]:
# Defining batch sizes for training and testing data
batch_size_train = 64
batch_size_test = 1000

# Defining data transformations, including converting images to tensors and normalizing pixel values
transform = transforms.Compose(
    [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
)

# Loading MNIST dataset for training and testing
# For training data, setting train=True, downloading if not available, and applying transformations
train_dataset = datasets.MNIST("../data", train=True, download=True, transform=transform)

# For testing data, setting train=False, not downloading, and applying transformations
test_dataset = datasets.MNIST("../data", train=False, transform=transform)

# Creating data loaders for efficient batch processing during training and testing
# For training data, using DataLoader with specified batch size and shuffling the data
train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=batch_size_train, shuffle=True
)

# For testing data, using DataLoader with specified batch size and shuffling the data
test_loader = torch.utils.data.DataLoader(
    test_dataset, batch_size=batch_size_test, shuffle=True
)

In [None]:
# Getting one batch of examples from the test_loader
batch_idx, (example_data, example_targets) = next(enumerate(test_loader))

# Printing the shape of the example_data tensor
print("Shape of example_data:", example_data.shape)

# Plotting the first images along with their labels
fig, axes = plt.subplots(5, 5, figsize=(10, 15))

for i, ax in enumerate(axes.flat):
    ax.imshow(example_data[i][0], cmap="gray", interpolation="none")
    ax.set_title("{}".format(example_targets[i]))
    ax.axis('off')

plt.tight_layout()
plt.show()

## Building the network

In [None]:
class AlexNetMNIST(nn.Module):
    def __init__(self, num_classes=10):
        super(AlexNetMNIST, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=3, stride=2, padding=1), # Taille de sortie : (batch_size, 64, 14, 14)
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2), # Taille de sortie : (batch_size, 64, 7, 7)
            nn.Conv2d(64, 192, kernel_size=3, padding=1), # Taille de sortie : (batch_size, 192, 7, 7)
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2), # Taille de sortie : (batch_size, 192, 3, 3)
            nn.Conv2d(192, 384, kernel_size=3, padding=1), # Taille de sortie : (batch_size, 384, 3, 3)
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1), # Taille de sortie : (batch_size, 256, 3, 3)
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1), # Taille de sortie : (batch_size, 256, 3, 3)
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2), # Taille de sortie : (batch_size, 256, 1, 1)
        )
        self.avgpool = nn.AdaptiveAvgPool2d((3, 3))
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 3 * 3, 1024), # Taille de sortie : (batch_size, 1024)
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(1024, 512), # Taille de sortie : (batch_size, 512)
            nn.ReLU(inplace=True),
            nn.Linear(512, num_classes), # Taille de sortie : (batch_size, num_classes)
        )
        # Initialiser les poids
        self._initialize_weights()
        
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                init.constant_(m.weight, 1)
                init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                init.normal_(m.weight, 0, 0.01)
                init.constant_(m.bias, 0)

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

In [None]:
learning_rate = 1
n_epochs = 3
train_losses = []

# Create an instance of the model
model = AlexNetMNIST()
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
print(model)

In [None]:
from torch.utils.tensorboard import SummaryWriter
import shutil

# Path to the log directory
log_dir = "./runs/digit-recognition/"

# Remove the previous log directory (if it exists)
shutil.rmtree(log_dir, ignore_errors=True)

# Initialize TensorBoard writer
writer = SummaryWriter(log_dir=log_dir)

# Visualize the network architecture
dummy_input = torch.rand(1, 1, 28, 28)  # Create a dummy input tensor
writer.add_graph(model, dummy_input)

## Training the model

In [None]:
log_interval = 50
n1 = len(train_loader.dataset)
n2 = len(train_loader)
def train(epoch):
    """
    Training function for the neural network model.

    Args:
        epoch (int): Current epoch number.
    """
    model.train()  # Set the model to training mode
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()  # Zero the gradients
        output = model(data)  # Forward pass
        loss = criterion(output, target)  # Calculate the loss
        loss.backward()  # Backward pass
        optimizer.step()  # Update weights

        # Print training progress and store losses
        if batch_idx % log_interval == 0:
            print(
                "Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
                    epoch,
                    batch_idx * len(data),
                    n1,
                    100.0 * batch_idx / n2,
                    loss.item(),
                )
            )
            train_losses.append(loss.item())
    # Save model and optimizer states at the end of each epoch
    torch.save(model.state_dict(), '../results/digit-recognition/model_epoch_{}.pth'.format(epoch))
    torch.save(optimizer.state_dict(), '../results/digit-recognition/optimizer_epoch_{}.pth'.format(epoch))

def test():
    """
    Evaluation function for the neural network model on the test set.
    """
    model.eval()  # Set the model to evaluation mode
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            output = model(data)  # Forward pass
            test_loss += criterion(
                output, target, reduction="sum"
            ).item()  # Calculate the loss
            pred = output.argmax(dim=1, keepdim=True)  # Get the predicted labels
            correct += (
                pred.eq(target.view_as(pred)).sum().item()
            )  # Count correct predictions

    test_loss /= len(test_loader.dataset)
    # Print test set results
    print(
        "\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n".format(
            test_loss,
            correct,
            len(test_loader.dataset),
            100.0 * correct / len(test_loader.dataset),
        )
    )

In [None]:
# Training and testing loop
for epoch in range(1, n_epochs + 1):
    train(epoch)  # Train the model for the current epoch
    test()  # Evaluate the model on the test set after training

## Evaluating the Model's Performance

In [None]:
# Create a new figure for plotting
fig = plt.figure()

# Plot training losses as a blue line
plt.plot(train_losses, color="blue")
plt.grid(True)

# Add legend and labels
plt.legend(["Train Loss"], loc="upper right")
plt.xlabel("Number of training examples seen")
plt.ylabel("Negative log likelihood loss")

# Display the plot
plt.show()

In [None]:
with torch.no_grad():
    # Disable gradient calculation
    output = model(example_data)  # Forward pass of the model with example_data
    fig, axes = plt.subplots(5, 5, figsize=(10, 15))  # Create a 3x4 grid of subplots
    predictions = output.argmax(dim=1)  # Calculate predictions once

    # Iterate over each subplot
    for i, ax in enumerate(axes.flat):
        ax.imshow(example_data[i][0], cmap="gray", interpolation="none")
        prediction = predictions[i].item()
        target = example_targets[i].item()
        ax.set_title(f"pred={prediction}, real={target}", color='green' if prediction == target else 'red')
        ax.axis('off')  # Hide axis labels

    plt.tight_layout()  # Adjust layout
    plt.show()