<a href="https://colab.research.google.com/github/Responze31/CNN-for-MNIST-Dataset/blob/main/MNIST_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

In [2]:
# for convinience
transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

In [3]:
# to train data set ,a root then train to true/false download true and to tensors true
train_data = datasets.MNIST(root='rnn_data', train=True, download=True, transform=transforms)
test_data = datasets.MNIST(root='rnn_data', train=False, download=True, transform=transforms)

# DataLoader breaks the dataset into batches of 10 images for faster training.
train_loader = DataLoader(train_data, batch_size=128, shuffle=True)
test_loader = DataLoader(test_data, batch_size=128, shuffle=False)

100%|██████████| 9.91M/9.91M [00:10<00:00, 906kB/s] 
100%|██████████| 28.9k/28.9k [00:00<00:00, 135kB/s]
100%|██████████| 1.65M/1.65M [00:01<00:00, 1.28MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 5.54MB/s]


In [4]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        # 1) Convolution block 1
        self.conv1 = nn.Conv2d(1, 6, kernel_size=3, stride=1)
        self.bn1   = nn.BatchNorm2d(6)

        # 2) Convolution block 2
        self.conv2 = nn.Conv2d(6, 18, kernel_size=3, stride=1)
        self.bn2   = nn.BatchNorm2d(18)

        # 3) Fully‑connected layers
        self.fc1 = nn.Linear(18 * 5 * 5, 100)
        self.fc2 = nn.Linear(100, 65)
        self.fc3 = nn.Linear(65, 10)

        # 4) Dropout (25 %)
        self.dropout = nn.Dropout(p=0.25)

    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.max_pool2d(x, 2)           # 26×26 → 13×13

        x = F.relu(self.bn2(self.conv2(x)))
        x = F.max_pool2d(x, 2)           # 11×11 → 5×5

        x = x.view(-1, 18 * 5 * 5)

        x = self.dropout(F.relu(self.fc1(x)))
        x = self.dropout(F.relu(self.fc2(x)))
        return self.fc3(x)


In [5]:
# for reproductibiltiy
torch.manual_seed(42)

# defining CNN model
model = CNN()
# defining loss funtion
criterion = nn.CrossEntropyLoss()
# defininf optimizer
optimizer = torch.optim.Adam(model.parameters(),
                             lr=0.001,
                             weight_decay=1e-5)
print(model)

CNN(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (bn1): BatchNorm2d(6, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(6, 18, kernel_size=(3, 3), stride=(1, 1))
  (bn2): BatchNorm2d(18, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc1): Linear(in_features=450, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=65, bias=True)
  (fc3): Linear(in_features=65, out_features=10, bias=True)
  (dropout): Dropout(p=0.25, inplace=False)
)


```
Input: 28x28 grayscale image (MNIST)
        │
        ▼
Conv2D: 1 input → 6 output channels, 3x3 kernel
    → Output size: 26x26x6
        │
        ▼
MaxPool2D: 2x2 kernel
    → Output size: 13x13x6
        │
        ▼
Conv2D: 6 input → 18 output channels, 3x3 kernel
    → Output size: 11x11x18
        │
        ▼
MaxPool2D: 2x2 kernel
    → Output size: 5x5x18 = 450 features
        │
        ▼
Flatten to vector (shape: 450)
        │
        ▼
Fully Connected Layer: 450 → 100
        ▼
Fully Connected Layer: 100 → 65
        ▼
Fully Connected Layer: 65 → 10
        ▼
Output: Logits for 10 classes (digits 0–9)
```


In [6]:
num_epochs = 10  # total number of epochs to train

for epoch in range(num_epochs):
    model.train() # training mode
    running_loss = 0.0

    # ─── TRAIN over all mini‑batches ────────────────────────────────────────────
    for batch_idx, (images, labels) in enumerate(train_loader):
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

        if (batch_idx + 1) % 500 == 0:    # print every 500 batches
            print(f"Epoch [{epoch+1}/{num_epochs}], "
                  f"Step [{batch_idx+1}/{len(train_loader)}], "
                  f"Loss: {running_loss / 500:.4f}")
            running_loss = 0.0

    # ───Calculate TRAIN accuracy ─────────────────────────────────────────
    correct_train = 0
    total_train = 0
    model.eval() # BN/Dropout in eval mode for accuracy
    with torch.no_grad():
        for images, labels in train_loader:
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            total_train += labels.size(0)
            correct_train += (preds == labels).sum().item()
    train_acc = 100 * correct_train / total_train

    # ─── TEST accuracy ─────────────────────────────────────────────────────────
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    test_acc = 100 * correct / total
    # ───────────────────────────────────────────────────────────────────────────

    print(f"Epoch [{epoch+1}/{num_epochs}]  "
          f"Train Accuracy: {train_acc:.2f}%   "
          f"Test Accuracy: {test_acc:.2f}%\n")

    model.train()  # switch back to training mode for next epoch


Epoch [1/10]  Train Accuracy: 97.56%   Test Accuracy: 97.53%

Epoch [2/10]  Train Accuracy: 98.43%   Test Accuracy: 98.35%

Epoch [3/10]  Train Accuracy: 98.86%   Test Accuracy: 98.57%

Epoch [4/10]  Train Accuracy: 98.88%   Test Accuracy: 98.63%

Epoch [5/10]  Train Accuracy: 98.91%   Test Accuracy: 98.61%

Epoch [6/10]  Train Accuracy: 99.28%   Test Accuracy: 98.86%

Epoch [7/10]  Train Accuracy: 99.33%   Test Accuracy: 98.82%

Epoch [8/10]  Train Accuracy: 99.50%   Test Accuracy: 98.99%

Epoch [9/10]  Train Accuracy: 99.40%   Test Accuracy: 98.83%

Epoch [10/10]  Train Accuracy: 99.57%   Test Accuracy: 98.96%

