In [None]:
# Sumani
# 29-7-2024

# Fully Connected Neural Network for MNIST Classification using PyTorch

In this notebook, we will walk through the process of building, training, and evaluating a fully connected neural network for classifying handwritten digits from the MNIST dataset using PyTorch. We'll cover data loading and preprocessing, model definition, training, evaluation, and visualization of the results.

In [None]:
# Import the necessary libraries
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import seaborn as sns


# Step 1: Data Loading and Preprocessing

We'll start by defining the transformations to apply to the images and loading the MNIST dataset.

In [None]:
# Define the transformation to apply to the images
transform = transforms.Compose([
    transforms.ToTensor(),  # Convert the images to tensors
    transforms.Normalize((0.1307,), (0.3081,))  # Normalize the pixel values with mean and std
])

# Load the MNIST dataset
train_dataset = datasets.MNIST(root='.', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='.', train=False, download=True, transform=transform)

# Create data loaders
batch_size = 64
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


# Step 2: Exploratory Data Analysis (EDA)

Let's visualize some sample images from the MNIST dataset.

In [None]:
# Visualize some sample images
examples = enumerate(train_loader)
batch_idx, (example_data, example_targets) = next(examples)

fig, axes = plt.subplots(3, 3, figsize=(8, 8))
for i in range(9):
    ax = axes[i // 3, i % 3]
    ax.imshow(example_data[i][0], cmap='gray')
    ax.set_title(f'Label: {example_targets[i]}')
    ax.axis('off')
plt.tight_layout()
plt.show()

# Step 3: Define the Neural Network Model

We will define a simple fully connected neural network with two layers.

In [None]:
# Define the neural network model
class FullyConnectedNet(nn.Module):
    def __init__(self):
        super(FullyConnectedNet, self).__init__()

        # First Layer
        self.layer_1 = nn.Linear(28*28, 512)
        self.activation_1 = nn.ReLU() # ReLU activation

        # Second layer
        self.layer_2 = nn.Linear(512, 10)  

    def forward(self, x):
        # Flatten the image
        x = x.view(-1, 28*28)

        # Calling the first layer
        x = self.layer_1(x)
        x = self.activation_1(x)

        # Calling the second layer
        x = self.layer_2(x)  # Output layer
        return x

# Create an instance of the model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = FullyConnectedNet().to(device)
print(model)

# Step 4: Define the Loss Function and Optimizer

We'll use cross-entropy loss and stochastic gradient descent (SGD) optimizer.

In [None]:
# Define the loss function and the optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

# Step 5: Training the Model

Let's define the training loop.

In [None]:
# Define a function to calculate accuracy
def accuracy(outputs, labels):
    _, preds = torch.max(outputs, 1)
    return torch.sum(preds == labels).item() / len(labels)

running_loss = 0.0
running_acc = 0.0

# Define the training loop
def train(model, device, train_loader, criterion, optimizer, epoch):
    global running_loss
    global running_acc
    running_acc = 0.0
    model.train()
    for i, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        running_acc += accuracy(outputs, labels)
        if (i + 1) % 200 == 0:
            print(f'Epoch {epoch}, Batch {i + 1}, Loss: {running_loss / 200:.4f}, Accuracy: {running_acc / 200:.4f}')
            running_loss = 0.0
            running_acc = 0.0

# Track training loss and accuracy
train_losses = []
train_accuracies = []

# Train the model
num_epochs = 10
for epoch in range(1, num_epochs + 1):
    train(model, device, train_loader, criterion, optimizer, epoch)
    train_losses.append(running_loss / len(train_loader))
    train_accuracies.append(running_acc / len(train_loader))


# Step 6: Evaluating the Model

We'll define a function to evaluate the model on the test dataset.

In [None]:
# Define the test loop
def test(model, device, test_loader, criterion):
    model.eval()
    test_loss = 0.0
    test_acc = 0.0
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            test_loss += loss.item()
            test_acc += accuracy(outputs, labels)
            all_preds.extend(outputs.argmax(dim=1).cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    print(f'Test Loss: {test_loss / len(test_loader):.4f}, Test Accuracy: {test_acc / len(test_loader):.4f}')
    return all_preds, all_labels

# Test the model
all_preds, all_labels = test(model, device, test_loader, criterion)


# Step 7: Visualizing Training Progress

Let's plot the training loss and accuracy over the epochs.

In [None]:
# Plot training loss and accuracy
epochs = range(1, num_epochs + 1)

plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(epochs, train_losses, label='Training Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(epochs, train_accuracies, label='Training Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Training Accuracy')
plt.legend()

plt.tight_layout()
plt.show()

# Step 8: Confusion Matrix

We'll plot the confusion matrix to see how well the model is performing across different classes.

In [None]:
# Confusion matrix
cm = confusion_matrix(all_labels, all_preds)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=[i for i in range(10)])
disp.plot(cmap=plt.cm.Blues)
plt.title('Confusion Matrix')
plt.show()

# Step 9: Visualizing Predictions

Finally, let's visualize some sample predictions from the model.

In [None]:
# Visualize some sample images and predictions
samples, labels = next(iter(test_loader))
samples, labels = samples.to(device), labels.to(device)
outputs = model(samples)
_, preds = torch.max(outputs, 1)
samples = samples.cpu().numpy()
fig, axes = plt.subplots(3, 3, figsize=(8, 8))
for i, ax in enumerate(axes.ravel()):
    ax.imshow(samples[i].squeeze(), cmap='gray')
    ax.set_title(f'Label: {labels[i]}, Prediction: {preds[i]}')
    ax.axis('off')
plt.tight_layout()
plt.show()


Let's display some examples where the model made incorrect predictions.

In [None]:
# Visualize incorrect predictions
incorrect = [i for i in range(len(all_preds)) if all_preds[i] != all_labels[i]]

fig, axes = plt.subplots(3, 3, figsize=(8, 8))
for i, ax in enumerate(axes.ravel()):
    idx = incorrect[i]
    ax.imshow(test_loader.dataset[idx][0][0], cmap='gray')
    ax.set_title(f'True: {all_labels[idx]}, Pred: {all_preds[idx]}')
    ax.axis('off')
plt.tight_layout()
plt.show()


# 10. Conclusion

In this notebook, we have built a simple fully connected neural network for classifying MNIST digits using PyTorch. We covered data loading, preprocessing, model definition, training, evaluation, and visualization of results. This provides a good foundation for understanding the process of developing neural network models for image classification tasks.