# Module 03: Building Your First CNN

**Hands-On: MNIST Digit Classification**

Time to build! In this module, you'll create a complete CNN from scratch and train it to recognize handwritten digits.

## What You'll Learn
- Design a CNN architecture
- Implement the network in PyTorch
- Train on MNIST dataset
- Evaluate performance
- Visualize learned filters
- Understand what your network learned

## Time Required
60 minutes

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import numpy as np

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

## Part 1: Designing the CNN Architecture

### Our CNN Design for MNIST

```
Input: 1×28×28 (grayscale image)
    ↓
Conv2d(1→32, kernel=3×3, padding=1) → ReLU → MaxPool(2×2)
    → Output: 32×14×14
    ↓
Conv2d(32→64, kernel=3×3, padding=1) → ReLU → MaxPool(2×2)
    → Output: 64×7×7
    ↓
Flatten → 64×7×7 = 3136
    ↓
FC(3136→128) → ReLU
    ↓
FC(128→10)
    ↓
Output: 10 (digits 0-9)
```

In [None]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()

        # Convolutional layers
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)  # 28x28 -> 28x28
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)  # 14x14 -> 14x14

        # Pooling layer
        self.pool = nn.MaxPool2d(2, 2)  # Divides dimensions by 2

        # Fully connected layers
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 10)

        # Dropout for regularization
        self.dropout = nn.Dropout(0.25)

    def forward(self, x):
        # Conv Block 1
        x = self.pool(F.relu(self.conv1(x)))  # 1x28x28 -> 32x14x14

        # Conv Block 2
        x = self.pool(F.relu(self.conv2(x)))  # 32x14x14 -> 64x7x7

        # Flatten
        x = x.view(-1, 64 * 7 * 7)  # 64x7x7 -> 3136

        # Fully connected layers
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)

        return x


# Create model
model = SimpleCNN().to(device)
print(model)

# Count parameters
total_params = sum(p.numel() for p in model.parameters())
print(f"\nTotal parameters: {total_params:,}")

## Part 2: Prepare Data

In [None]:
# Data transformations
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])

# Load datasets
train_dataset = datasets.MNIST("../data/datasets", train=True, download=True, transform=transform)
test_dataset = datasets.MNIST("../data/datasets", train=False, transform=transform)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1000, shuffle=False)

print(f"Training samples: {len(train_dataset)}")
print(f"Test samples: {len(test_dataset)}")

## Part 3: Training the CNN

In [None]:
# Training setup
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


def train_epoch(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for data, target in train_loader:
        data, target = data.to(device), target.to(device)

        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(output.data, 1)
        total += target.size(0)
        correct += (predicted == target).sum().item()

    return running_loss / len(train_loader), 100 * correct / total


def test(model, test_loader, criterion, device):
    model.eval()
    test_loss = 0
    correct = 0

    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += criterion(output, target).item()
            _, predicted = torch.max(output.data, 1)
            correct += (predicted == target).sum().item()

    test_loss /= len(test_loader)
    accuracy = 100 * correct / len(test_loader.dataset)
    return test_loss, accuracy


# Train!
epochs = 10
history = {"train_loss": [], "train_acc": [], "test_loss": [], "test_acc": []}

print("Training CNN...")
for epoch in range(epochs):
    train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
    test_loss, test_acc = test(model, test_loader, criterion, device)

    history["train_loss"].append(train_loss)
    history["train_acc"].append(train_acc)
    history["test_loss"].append(test_loss)
    history["test_acc"].append(test_acc)

    print(f"Epoch {epoch+1}/{epochs}: Train Acc: {train_acc:.2f}% | Test Acc: {test_acc:.2f}%")

print(f"\nFinal Test Accuracy: {history['test_acc'][-1]:.2f}%")

## Part 4: Visualizing Results

In [None]:
# Plot training history
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

ax1.plot(history["train_loss"], "b-o", label="Train")
ax1.plot(history["test_loss"], "r-o", label="Test")
ax1.set_xlabel("Epoch")
ax1.set_ylabel("Loss")
ax1.set_title("Loss Over Time")
ax1.legend()
ax1.grid(True, alpha=0.3)

ax2.plot(history["train_acc"], "b-o", label="Train")
ax2.plot(history["test_acc"], "r-o", label="Test")
ax2.set_xlabel("Epoch")
ax2.set_ylabel("Accuracy (%)")
ax2.set_title("Accuracy Over Time")
ax2.legend()
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## Summary

Congratulations! You built and trained your first CNN!

### What You Achieved:
- Built a CNN architecture from scratch
- Trained on 60,000 MNIST images
- Achieved ~99% accuracy on test set
- Much better than simple neural network!

### Next: Module 04 - Training & Optimization
Learn advanced techniques to train even better models!