# MNIST CNN Training Notebook

This notebook implements a CNN model for MNIST digit classification.

In [None]:
# Install required packages
!pip install tqdm



In [None]:
# Import necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split
from tqdm import tqdm
import os
from datetime import datetime
import torch.nn.functional as F

In [None]:
# Define the CNN model
dropout_value = 0.1
class MnistCNN(nn.Module):
    def __init__(self):
        super(MnistCNN, self).__init__()
        # Input Block
        self.convblock1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=12, kernel_size=(3, 3), padding=1, bias=False),
            nn.ReLU(),
            nn.BatchNorm2d(12),
            nn.Dropout(dropout_value)
        )


        self.convblock2 = nn.Sequential(
            nn.Conv2d(in_channels=12, out_channels=24, kernel_size=(3, 3), padding=0, bias=False),
            nn.ReLU(),
            nn.BatchNorm2d(24),
            nn.Dropout(dropout_value)
        )

        self.convblock3 = nn.Sequential(
            nn.Conv2d(in_channels=24, out_channels=18, kernel_size=(1, 1), padding=0, bias=False),
            nn.ReLU()
        )
        self.pool1 = nn.MaxPool2d(2, 2)

        self.convblock4 = nn.Sequential(
            nn.Conv2d(in_channels=18, out_channels=24, kernel_size=(3, 3), padding=1, bias=False),
            nn.ReLU(),
            nn.BatchNorm2d(24),
            nn.Dropout(dropout_value)
        )

        self.convblock41 = nn.Sequential(
            nn.Conv2d(in_channels=24, out_channels=24, kernel_size=(3, 3), padding=0, bias=False),
            nn.ReLU(),
            nn.BatchNorm2d(24),
            nn.Dropout(dropout_value)
        )

        self.convblock42 = nn.Sequential(
            nn.Conv2d(in_channels=24, out_channels=18, kernel_size=(3, 3), padding=0, bias=False),
            nn.ReLU(),
            nn.BatchNorm2d(18),
            nn.Dropout(dropout_value)
        )

        self.convblock5 = nn.Sequential(
            nn.Conv2d(in_channels=18, out_channels=12, kernel_size=(3, 3), padding=0, bias=False),
            nn.ReLU(),
            nn.BatchNorm2d(12),
            nn.Dropout(dropout_value)
        )

        self.convblock6 = nn.Sequential(
            nn.Conv2d(in_channels=12, out_channels=12, kernel_size=(3, 3), padding=0, bias=False),
            nn.ReLU(),
            nn.BatchNorm2d(12),
            nn.Dropout(dropout_value)
        )


        self.gap1 = nn.Sequential(
            nn.AvgPool2d(kernel_size =(5,5))
        ) # output_size = 1  RF 34
        #  # Output BLOCK
        self.convblock7 = nn.Sequential(
            nn.Conv2d(in_channels=12, out_channels=10, kernel_size=(1, 1), padding=0, bias=False),

        )


    def forward(self, x):
        x = self.convblock1(x)
        x = self.convblock2(x)
        x = self.convblock3(x)
        x = self.pool1(x)
        x = self.convblock4(x)
        x = self.convblock41(x)
        x = self.convblock42(x)
        x = self.convblock5(x)
        x = self.convblock6(x)
        x = self.gap1(x)
        x = self.convblock7(x)

        x = x.view(-1, 10)
        return F.log_softmax(x, dim=-1)

In [None]:
!pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = MnistCNN().to(device)
summary(model, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 12, 28, 28]             108
              ReLU-2           [-1, 12, 28, 28]               0
       BatchNorm2d-3           [-1, 12, 28, 28]              24
           Dropout-4           [-1, 12, 28, 28]               0
            Conv2d-5           [-1, 24, 26, 26]           2,592
              ReLU-6           [-1, 24, 26, 26]               0
       BatchNorm2d-7           [-1, 24, 26, 26]              48
           Dropout-8           [-1, 24, 26, 26]               0
            Conv2d-9           [-1, 18, 26, 26]             432
             ReLU-10           [-1, 18, 26, 26]               0
        MaxPool2d-11           [-1, 18, 13, 13]               0
           Conv2d-12           [-1, 24, 13, 13]           3,888
             ReLU-13           [-1, 24, 13, 13]               0
      BatchNorm2d-14           [-1, 24,

In [None]:
# Utility functions
def save_model(model, accuracy, path="models"):
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = f"mnist_model_{accuracy:.2f}acc_{timestamp}.pth"
    os.makedirs(path, exist_ok=True)
    torch.save(model.state_dict(), f"{path}/{filename}")
    return filename

def count_parameters(model):
    return sum(p.numel() for p in model.parameters())

In [None]:
class TransformDataset:
    def __init__(self, dataset, transform):
        self.dataset = dataset
        self.transform = transform

    def __getitem__(self, idx):
        data, label = self.dataset[idx]
        if self.transform:
            data = self.transform(data)
        return data, label

    def __len__(self):
        return len(self.dataset)

def train_model(epochs=1, batch_size=8):
    # Use CUDA only if available and not in CI environment
    device = torch.device('cuda' if torch.cuda.is_available() and not os.getenv('CI') else 'cpu')
    print(f"Using device: {device}")

    # Training transforms with data augmentation
    train_transform = transforms.Compose([
        transforms.RandomRotation(10),
        transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
        transforms.ColorJitter(brightness=0.10, contrast=0.1, saturation=0.10, hue=0.1),
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
    ])

    # Validation transforms without augmentation
    val_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
    ])

    # Load dataset
    full_dataset = torchvision.datasets.MNIST(root='./data',
                                            train=True,
                                            transform=None,  # No transform here
                                            download=True)

    # Split into 50K training and 10K validation
    train_size = 50000
    val_size = 10000
    train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

    # Apply transforms after splitting
    train_dataset = TransformDataset(train_dataset, train_transform)
    val_dataset = TransformDataset(val_dataset, val_transform)

    train_loader = DataLoader(train_dataset,
                            batch_size=batch_size,
                            shuffle=True,
                            num_workers=0)  # Set to 0 for GitHub Actions

    val_loader = DataLoader(val_dataset,
                           batch_size=batch_size,
                           shuffle=False,
                           num_workers=0)

    # Initialize model, loss, and optimizer
    model = MnistCNN().to(device)
    criterion = nn.CrossEntropyLoss()
    #criterion = nn.NLLLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.015, momentum=0.9)
    #optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

    print(f"Total trainable parameters: {count_parameters(model)}")

    # Training loop
    for epoch in range(epochs):
        # Training phase
        model.train()
        train_correct = 0
        train_total = 0
        train_loss = 0

        train_pbar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{epochs} [Train]')
        for batch_idx, (data, target) in enumerate(train_pbar):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()

            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            pred = output.argmax(dim=1, keepdim=True)
            train_correct += pred.eq(target.view_as(pred)).sum().item()
            train_total += target.size(0)

            train_pbar.set_postfix({
                'loss': f'{train_loss/(batch_idx+1):.4f}',
                'acc': f'{100.*train_correct/train_total:.2f}%'
            })

        # Validation phase
        model.eval()
        val_correct = 0
        val_total = 0
        val_loss = 0

        with torch.no_grad():
            val_pbar = tqdm(val_loader, desc=f'Epoch {epoch+1}/{epochs} [Val]')
            for batch_idx, (data, target) in enumerate(val_pbar):
                data, target = data.to(device), target.to(device)
                output = model(data)
                loss = criterion(output, target)

                val_loss += loss.item()
                pred = output.argmax(dim=1, keepdim=True)
                val_correct += pred.eq(target.view_as(pred)).sum().item()
                val_total += target.size(0)

                val_pbar.set_postfix({
                    'loss': f'{val_loss/(batch_idx+1):.4f}',
                    'acc': f'{100.*val_correct/val_total:.2f}%'
                })

        print(f'\nEpoch {epoch+1}/{epochs}:')
        print(f'Train Loss: {train_loss/len(train_loader):.4f}, Train Accuracy: {100.*train_correct/train_total:.2f}%')
        print(f'Val Loss: {val_loss/len(val_loader):.4f}, Val Accuracy: {100.*val_correct/val_total:.2f}%\n')

    # Save model
    val_accuracy = 100. * val_correct / val_total
    model_path = save_model(model, val_accuracy)

    return model, val_accuracy, model_path

In [None]:
# Train the model
model, accuracy, model_path = train_model(epochs=20, batch_size=128)
print(f"Training completed with validation accuracy: {accuracy:.2f}%")
print(f"Model saved as: {model_path}")

Using device: cuda
Total trainable parameters: 19704


Epoch 1/20 [Train]: 100%|██████████| 391/391 [00:39<00:00,  9.90it/s, loss=0.4761, acc=86.49%]
Epoch 1/20 [Val]: 100%|██████████| 79/79 [00:02<00:00, 32.03it/s, loss=0.0734, acc=97.79%]



Epoch 1/20:
Train Loss: 0.4761, Train Accuracy: 86.49%
Val Loss: 0.0734, Val Accuracy: 97.79%



Epoch 2/20 [Train]: 100%|██████████| 391/391 [00:38<00:00, 10.03it/s, loss=0.1131, acc=96.83%]
Epoch 2/20 [Val]: 100%|██████████| 79/79 [00:02<00:00, 32.91it/s, loss=0.0597, acc=98.25%]



Epoch 2/20:
Train Loss: 0.1131, Train Accuracy: 96.83%
Val Loss: 0.0597, Val Accuracy: 98.25%



Epoch 3/20 [Train]: 100%|██████████| 391/391 [00:38<00:00, 10.07it/s, loss=0.0896, acc=97.31%]
Epoch 3/20 [Val]: 100%|██████████| 79/79 [00:03<00:00, 25.21it/s, loss=0.0477, acc=98.65%]



Epoch 3/20:
Train Loss: 0.0896, Train Accuracy: 97.31%
Val Loss: 0.0477, Val Accuracy: 98.65%



Epoch 4/20 [Train]: 100%|██████████| 391/391 [00:38<00:00, 10.10it/s, loss=0.0764, acc=97.74%]
Epoch 4/20 [Val]: 100%|██████████| 79/79 [00:02<00:00, 31.91it/s, loss=0.0391, acc=98.94%]



Epoch 4/20:
Train Loss: 0.0764, Train Accuracy: 97.74%
Val Loss: 0.0391, Val Accuracy: 98.94%



Epoch 5/20 [Train]: 100%|██████████| 391/391 [00:38<00:00, 10.05it/s, loss=0.0673, acc=97.95%]
Epoch 5/20 [Val]: 100%|██████████| 79/79 [00:02<00:00, 32.30it/s, loss=0.0327, acc=98.99%]



Epoch 5/20:
Train Loss: 0.0673, Train Accuracy: 97.95%
Val Loss: 0.0327, Val Accuracy: 98.99%



Epoch 6/20 [Train]: 100%|██████████| 391/391 [00:39<00:00,  9.91it/s, loss=0.0620, acc=98.15%]
Epoch 6/20 [Val]: 100%|██████████| 79/79 [00:03<00:00, 26.27it/s, loss=0.0324, acc=99.05%]



Epoch 6/20:
Train Loss: 0.0620, Train Accuracy: 98.15%
Val Loss: 0.0324, Val Accuracy: 99.05%



Epoch 7/20 [Train]: 100%|██████████| 391/391 [00:38<00:00, 10.03it/s, loss=0.0582, acc=98.24%]
Epoch 7/20 [Val]: 100%|██████████| 79/79 [00:02<00:00, 32.17it/s, loss=0.0295, acc=99.10%]



Epoch 7/20:
Train Loss: 0.0582, Train Accuracy: 98.24%
Val Loss: 0.0295, Val Accuracy: 99.10%



Epoch 8/20 [Train]: 100%|██████████| 391/391 [00:38<00:00, 10.10it/s, loss=0.0534, acc=98.38%]
Epoch 8/20 [Val]: 100%|██████████| 79/79 [00:02<00:00, 32.35it/s, loss=0.0305, acc=99.13%]



Epoch 8/20:
Train Loss: 0.0534, Train Accuracy: 98.38%
Val Loss: 0.0305, Val Accuracy: 99.13%



Epoch 9/20 [Train]: 100%|██████████| 391/391 [00:39<00:00, 10.02it/s, loss=0.0531, acc=98.36%]
Epoch 9/20 [Val]: 100%|██████████| 79/79 [00:02<00:00, 28.15it/s, loss=0.0302, acc=99.04%]



Epoch 9/20:
Train Loss: 0.0531, Train Accuracy: 98.36%
Val Loss: 0.0302, Val Accuracy: 99.04%



Epoch 10/20 [Train]: 100%|██████████| 391/391 [00:38<00:00, 10.12it/s, loss=0.0500, acc=98.46%]
Epoch 10/20 [Val]: 100%|██████████| 79/79 [00:02<00:00, 31.68it/s, loss=0.0273, acc=99.20%]



Epoch 10/20:
Train Loss: 0.0500, Train Accuracy: 98.46%
Val Loss: 0.0273, Val Accuracy: 99.20%



Epoch 11/20 [Train]: 100%|██████████| 391/391 [00:38<00:00, 10.13it/s, loss=0.0482, acc=98.56%]
Epoch 11/20 [Val]: 100%|██████████| 79/79 [00:02<00:00, 32.02it/s, loss=0.0261, acc=99.19%]



Epoch 11/20:
Train Loss: 0.0482, Train Accuracy: 98.56%
Val Loss: 0.0261, Val Accuracy: 99.19%



Epoch 12/20 [Train]: 100%|██████████| 391/391 [00:39<00:00,  9.99it/s, loss=0.0474, acc=98.65%]
Epoch 12/20 [Val]: 100%|██████████| 79/79 [00:02<00:00, 28.88it/s, loss=0.0246, acc=99.26%]



Epoch 12/20:
Train Loss: 0.0474, Train Accuracy: 98.65%
Val Loss: 0.0246, Val Accuracy: 99.26%



Epoch 13/20 [Train]: 100%|██████████| 391/391 [00:38<00:00, 10.05it/s, loss=0.0463, acc=98.61%]
Epoch 13/20 [Val]: 100%|██████████| 79/79 [00:02<00:00, 32.07it/s, loss=0.0257, acc=99.26%]



Epoch 13/20:
Train Loss: 0.0463, Train Accuracy: 98.61%
Val Loss: 0.0257, Val Accuracy: 99.26%



Epoch 14/20 [Train]: 100%|██████████| 391/391 [00:38<00:00, 10.07it/s, loss=0.0435, acc=98.72%]
Epoch 14/20 [Val]: 100%|██████████| 79/79 [00:02<00:00, 32.04it/s, loss=0.0250, acc=99.34%]



Epoch 14/20:
Train Loss: 0.0435, Train Accuracy: 98.72%
Val Loss: 0.0250, Val Accuracy: 99.34%



Epoch 15/20 [Train]: 100%|██████████| 391/391 [00:39<00:00,  9.96it/s, loss=0.0438, acc=98.65%]
Epoch 15/20 [Val]: 100%|██████████| 79/79 [00:02<00:00, 32.52it/s, loss=0.0233, acc=99.27%]



Epoch 15/20:
Train Loss: 0.0438, Train Accuracy: 98.65%
Val Loss: 0.0233, Val Accuracy: 99.27%



Epoch 16/20 [Train]: 100%|██████████| 391/391 [00:38<00:00, 10.11it/s, loss=0.0406, acc=98.77%]
Epoch 16/20 [Val]: 100%|██████████| 79/79 [00:02<00:00, 32.39it/s, loss=0.0216, acc=99.41%]



Epoch 16/20:
Train Loss: 0.0406, Train Accuracy: 98.77%
Val Loss: 0.0216, Val Accuracy: 99.41%



Epoch 17/20 [Train]: 100%|██████████| 391/391 [00:38<00:00, 10.12it/s, loss=0.0424, acc=98.70%]
Epoch 17/20 [Val]: 100%|██████████| 79/79 [00:02<00:00, 31.07it/s, loss=0.0228, acc=99.38%]



Epoch 17/20:
Train Loss: 0.0424, Train Accuracy: 98.70%
Val Loss: 0.0228, Val Accuracy: 99.38%



Epoch 18/20 [Train]: 100%|██████████| 391/391 [00:39<00:00, 10.02it/s, loss=0.0395, acc=98.78%]
Epoch 18/20 [Val]: 100%|██████████| 79/79 [00:02<00:00, 31.67it/s, loss=0.0226, acc=99.28%]



Epoch 18/20:
Train Loss: 0.0395, Train Accuracy: 98.78%
Val Loss: 0.0226, Val Accuracy: 99.28%



Epoch 19/20 [Train]: 100%|██████████| 391/391 [00:38<00:00, 10.12it/s, loss=0.0416, acc=98.72%]
Epoch 19/20 [Val]: 100%|██████████| 79/79 [00:02<00:00, 32.22it/s, loss=0.0241, acc=99.30%]



Epoch 19/20:
Train Loss: 0.0416, Train Accuracy: 98.72%
Val Loss: 0.0241, Val Accuracy: 99.30%



Epoch 20/20 [Train]: 100%|██████████| 391/391 [00:38<00:00, 10.12it/s, loss=0.0376, acc=98.84%]
Epoch 20/20 [Val]: 100%|██████████| 79/79 [00:02<00:00, 31.73it/s, loss=0.0204, acc=99.41%]


Epoch 20/20:
Train Loss: 0.0376, Train Accuracy: 98.84%
Val Loss: 0.0204, Val Accuracy: 99.41%

Training completed with validation accuracy: 99.41%
Model saved as: mnist_model_99.41acc_20241201_131309.pth





## Test Model on Sample Images

You can add cells below to test the model on specific images or visualize the results.