# "Gradient-Based Learning Applied to Document Recognition (1998)" | LeNet-5 Architecure

In [None]:
# Import Libraries
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision
from torchvision import transforms
from torchvision.datasets import MNIST

import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

# Dataset & DataLoaders

In [None]:
def get_dataloaders(batch_size=64):
    transform_pipeline = transforms.Compose([
        transforms.Resize((32, 32)),
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
    ])

    train_dataset = MNIST(
        root="./data",
        train=True,
        download=True,
        transform=transform_pipeline
    )

    test_dataset = MNIST(
        root="./data",
        train=False,
        download=True,
        transform=transform_pipeline
    )

    train_loader = DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True
    )

    test_loader = DataLoader(
        test_dataset,
        batch_size=batch_size,
        shuffle=False
    )

    return train_loader, test_loader

# Util Functions

In [None]:
def train(model, train_loader, epochs, criterion, optimizer, device):
    model.train()
    
    history = {"train_loss":[], "train_acc":[]}

    for epoch in range(epochs):
        loss_per_epoch = 0.0
        correct = 0
        total = 0

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            batch_size = labels.size(0)
            loss_per_epoch += loss.item() * batch_size

            predicted = torch.argmax(outputs, dim=1)
            total += batch_size
            correct += (predicted == labels).sum().item()
    
        loss_per_epoch /= total
        history["train_loss"].append(loss_per_epoch)
        
        accuracy_per_epoch = correct / total
        history["train_acc"].append(accuracy_per_epoch)

        print(f"Epoch: {epoch+1} / {epochs} - Train Loss: {loss_per_epoch:.4f}, Train Acc: {accuracy_per_epoch:.4f}")

    return history

In [None]:
def validate(model, test_loader, criterion, device):
    model.eval()

    val_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)

            val_loss += loss.item()

            predicted = torch.argmax(outputs, labels)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_loss /= len(test_loader)
    accuracy = correct / total

    return val_loss, accuracy


In [None]:
def plot_learning_curves(history):

    train_losses = history["train_loss"]
    train_accs = history["train_acc"]

    epochs = range(1, len(train_losses) + 1)
    
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
    
    # Plot loss curves
    ax1.plot(epochs, train_losses, 'b-', label='Training Loss')
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Loss')
    ax1.set_title('Training Loss')
    ax1.legend()
    ax1.grid(True)
    
    # Plot accuracy curves
    ax2.plot(epochs, train_accs, 'b-', label='Training Accuracy')
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel('Accuracy (%)')
    ax2.set_title('Training Accuracy')
    ax2.legend()
    ax2.grid(True)
    
    plt.tight_layout()
    plt.savefig('learning_curves.png', dpi=300, bbox_inches='tight')
    plt.show()