# CNN Playground: Exploring Convolutional Neural Networks with MNIST

This notebook is designed for hands-on experimentation with different CNN architectures. You can mix and match various layers to understand their impact on model learning and performance.

## How to Use This Notebook:
1. Run the setup cells (imports and data loading)
2. Browse the **Layer Library** section
3. Copy-paste layers into the `CustomCNN` class
4. Train your model and observe the results
5. Experiment with different combinations!

## 1. Setup and Imports

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm

In [None]:
# Set random seed for reproducibility
torch.manual_seed(42)

In [None]:
# Check for GPU availability
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

In [None]:
# ============================================================================
# DATA LOADING
# ============================================================================

def load_mnist_data(batch_size=64):
    """Load and prepare MNIST dataset"""
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))  # MNIST mean and std
    ])

    train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
    test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    print(f'Training samples: {len(train_dataset)}')
    print(f'Test samples: {len(test_dataset)}')
    print(f'Image shape: {train_dataset[0][0].shape}\n')

    return train_loader, test_loader, train_dataset, test_dataset


def visualize_samples(dataset, num_samples=10):
    """Visualize sample images from dataset"""
    fig, axes = plt.subplots(2, 5, figsize=(12, 5))
    for i, ax in enumerate(axes.flat):
        img, label = dataset[i]
        ax.imshow(img.squeeze(), cmap='gray')
        ax.set_title(f'Label: {label}')
        ax.axis('off')
    plt.tight_layout()
    plt.show()

## 2. Layer Library

Copy and paste any layer into the `CustomCNN` class to experiment.

In [None]:
#######################################
# CONVOLUTIONAL LAYER OPTIONS:
#######################################

# Basic Conv Layer (3x3 kernel, commonly used)
# self.conv1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3, padding=1)

# Medium Conv Layer (3x3 kernel)
# self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1)

# Deep Conv Layer (3x3 kernel)
# self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)

# Large Kernel Conv (5x5 kernel, captures more spatial info)
# self.conv_large = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=5, padding=2)

# Small Kernel Conv (1x1 kernel, for channel dimension reduction)
# self.conv_1x1 = nn.Conv2d(in_channels=32, out_channels=16, kernel_size=1)


In [None]:
#######################################
# POOLING LAYER OPTIONS:
#######################################

# Max Pooling (2x2, most common - keeps maximum values)
# self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)

# Max Pooling (3x3, more aggressive downsampling)
# self.maxpool3 = nn.MaxPool2d(kernel_size=3, stride=2)

# Average Pooling (2x2, smoother than max pooling)
# self.avgpool = nn.AvgPool2d(kernel_size=2, stride=2)

# Average Pooling (3x3)
# self.avgpool3 = nn.AvgPool2d(kernel_size=3, stride=2)

# Global Average Pooling (reduces each feature map to a single value)
# self.global_avgpool = nn.AdaptiveAvgPool2d((1, 1))

# Global Max Pooling
# self.global_maxpool = nn.AdaptiveMaxPool2d((1, 1))

In [None]:
#######################################
# ACTIVATION FUNCTION OPTIONS:
# Note: Can be used as layers or functions (F.relu(), etc.)
#######################################

# ReLU (most common, fast and effective)
# self.relu = nn.ReLU()

# Leaky ReLU (allows small negative values)
# self.leaky_relu = nn.LeakyReLU(negative_slope=0.01)

# ELU (smoother than ReLU)
# self.elu = nn.ELU(alpha=1.0)

# GELU (used in transformers, smooth)
# self.gelu = nn.GELU()

# Sigmoid (outputs between 0 and 1)
# self.sigmoid = nn.Sigmoid()

# Tanh (outputs between -1 and 1)
# self.tanh = nn.Tanh()

# Swish/SiLU (self-gated, smooth)
# self.silu = nn.SiLU()

In [None]:
#######################################
# FULLY CONNECTED (LINEAR) LAYER OPTIONS:
# Note: Input size depends on the output of previous layers!
# For MNIST (28x28), after two 2x2 pooling layers: 7x7 spatial size
# Example: 32 channels × 7 × 7 = 1568 input features
#######################################

# Large FC Layer (from flattened conv output to 512 units)
# self.fc1 = nn.Linear(in_features=1568, out_features=512)

# Medium FC Layer (512 to 256 units)
# self.fc2 = nn.Linear(in_features=512, out_features=256)

# Small FC Layer (256 to 128 units)
# self.fc3 = nn.Linear(in_features=256, out_features=128)

# Tiny FC Layer (128 to 64 units)
# self.fc4 = nn.Linear(in_features=128, out_features=64)

# Output Layer (to 10 classes for MNIST)
# self.fc_out = nn.Linear(in_features=128, out_features=10)

# Alternative smaller output path
# self.fc_out_small = nn.Linear(in_features=64, out_features=10)

# Direct output (from conv features)
# self.fc_direct = nn.Linear(in_features=1568, out_features=10)


## 3. Modelling Section

In [None]:
class CustomCNN(nn.Module):
    """
    Custom CNN Model - Modify this class with your chosen layers!

    INSTRUCTIONS:
    1. Copy layers from the library above
    2. Paste them in __init__
    3. Define the forward pass using your layers
    4. Remember to match input/output dimensions!
    """

    def __init__(self):
        super(CustomCNN, self).__init__()

        # ====================================
        # PASTE YOUR CHOSEN LAYERS HERE
        # ====================================

        # Example architecture (feel free to modify!):
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3, padding=1)
        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(in_features=32*7*7, out_features=128)
        self.fc_out = nn.Linear(in_features=128, out_features=10)

    def forward(self, x):

        # ====================================
        # DEFINE YOUR FORWARD PASS HERE
        # ====================================

        # Example forward pass (modify to match your layers!):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.maxpool(x)  # 28x28 -> 14x14

        x = self.conv2(x)
        x = F.relu(x)
        x = self.maxpool(x)  # 14x14 -> 7x7

        # Flatten
        x = x.view(x.size(0), -1)

        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc_out(x)

        return x

In [None]:
def print_model_summary(model):
    """Print detailed model architecture and parameters"""
    print("="*70)
    print("MODEL SUMMARY")
    print("="*70)

    total_params = 0
    trainable_params = 0

    print(f"{'Layer':<30} {'Type':<25} {'Params':<15}")
    print("-"*70)

    for name, module in model.named_children():
        num_params = sum(p.numel() for p in module.parameters())
        num_trainable = sum(p.numel() for p in module.parameters() if p.requires_grad)
        total_params += num_params
        trainable_params += num_trainable
        module_type = module.__class__.__name__
        print(f"{name:<30} {module_type:<25} {num_params:<15,}")

    print("="*70)
    print(f"Total parameters: {total_params:,}")
    print(f"Trainable parameters: {trainable_params:,}")
    print(f"Non-trainable parameters: {total_params - trainable_params:,}")
    print("="*70)

    # Calculate model size
    param_size = sum(p.numel() * p.element_size() for p in model.parameters())
    buffer_size = sum(b.numel() * b.element_size() for b in model.buffers())
    size_mb = (param_size + buffer_size) / 1024**2
    print(f"Model size: {size_mb:.2f} MB")
    print("="*70)

## 4. Training and Evaluation

In [None]:
# ============================================================================
# TRAINING AND EVALUATION
# ============================================================================

def train_epoch(model, loader, criterion, optimizer, device):
    """Train for one epoch"""
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    pbar = tqdm(loader, desc='Training')
    for inputs, labels in pbar:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        pbar.set_postfix({'loss': running_loss/len(loader), 'acc': 100*correct/total})

    return running_loss / len(loader), 100 * correct / total


def evaluate(model, loader, criterion, device):
    """Evaluate the model"""
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in tqdm(loader, desc='Evaluating'):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    return running_loss / len(loader), 100 * correct / total


def train_model(model, train_loader, test_loader, criterion, optimizer, scheduler, num_epochs, device):
    """Complete training loop"""
    train_losses, train_accs = [], []
    test_losses, test_accs = [], []

    print("Starting training...\n")
    for epoch in range(num_epochs):
        print(f"Epoch {epoch+1}/{num_epochs}")

        # Train
        train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
        train_losses.append(train_loss)
        train_accs.append(train_acc)

        # Evaluate
        test_loss, test_acc = evaluate(model, test_loader, criterion, device)
        test_losses.append(test_loss)
        test_accs.append(test_acc)

        # Update learning rate
        scheduler.step()

        print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}%")
        print(f"Test Loss: {test_loss:.4f} | Test Acc: {test_acc:.2f}%\n")

    print("Training completed!")
    return train_losses, train_accs, test_losses, test_accs


## 5. Visualisation

In [None]:
def plot_training_history(train_losses, train_accs, test_losses, test_accs):
    """Plot training and test metrics"""
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

    # Plot loss
    ax1.plot(train_losses, label='Train Loss', marker='o')
    ax1.plot(test_losses, label='Test Loss', marker='s')
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Loss')
    ax1.set_title('Training and Test Loss')
    ax1.legend()
    ax1.grid(True)

    # Plot accuracy
    ax2.plot(train_accs, label='Train Accuracy', marker='o')
    ax2.plot(test_accs, label='Test Accuracy', marker='s')
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel('Accuracy (%)')
    ax2.set_title('Training and Test Accuracy')
    ax2.legend()
    ax2.grid(True)

    plt.tight_layout()
    plt.show()

## 6. Main Execution

In [None]:
def main():
    """Main execution function"""

    print("="*70)
    print("CNN PLAYGROUND FOR MNIST")
    print("="*70)
    print()

    # Load data
    train_loader, test_loader, train_dataset, test_dataset = load_mnist_data(batch_size=64)

    # Visualize samples
    print("Visualizing sample data...")
    visualize_samples(train_dataset)

    # Create model
    print("\nCreating model...")
    model = CustomCNN().to(device)
    print_model_summary(model)

    # Setup training
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

    # Train model
    num_epochs = 10
    train_losses, train_accs, test_losses, test_accs = train_model(
        model, train_loader, test_loader, criterion, optimizer, scheduler, num_epochs, device
    )

    # Visualize results
    print("\nPlotting training history...")
    plot_training_history(train_losses, train_accs, test_losses, test_accs)

    print("\n" + "="*70)
    print("EXPERIMENT COMPLETED!")
    print("="*70)
    print("\nEXPERIMENT IDEAS:")
    print("1. Try different layer combinations from the library")
    print("2. Experiment with depth (1-4 conv layers)")
    print("3. Compare different pooling strategies")
    print("4. Try different activation functions")
    print("\nModify the CustomCNN class and run again!")


if __name__ == "__main__":
    main()