In [None]:
import struct
import numpy as np
import torch

def load_mnist_images(path):
    with open(path, "rb") as f:
        magic, num_images, rows, cols = struct.unpack(">IIII", f.read(16))
        # we unpack the first 16 bytes so 4 * 32 Bits, each 32 bits signigies one of the four elements in lhs
        # > signifies big endian ordering. I means unsigned 32 bit integers

        assert magic == 2051, "Invalid magic number for image file!"
        # Magic numbers exist to validate binary file types.
        # All files of type idx3-ubyte (3-dimensional unsigned byte arrays) use the magic number 2051.

        # Read raw bytes → convert to uint8 → reshape
        
        data = np.frombuffer(f.read(), dtype=np.uint8)
        # np.frombuffer interprets the raw bytes directly as an array without copying — extremely fast.
        # dtype=np.uint8 because MNIST pixels are 0–255.

        data = data.reshape(num_images, rows, cols)
        # Converts the flat stream of bytes into shape:
        # (number_of_images, 28, 28)
        # doesn't change the data per say but changes how you view it

        # Normalize to [0,1] and add channel dimension
        data = data.astype(np.float32) / 255.0
        data = np.expand_dims(data, axis=1)  # shape: (N,1,28,28)

        return torch.tensor(data)


def load_mnist_labels(path):
    with open(path, "rb") as f:
        magic, num_labels = struct.unpack(">II", f.read(8))
        assert magic == 2049, "Invalid magic number for label file!"

        data = np.frombuffer(f.read(), dtype=np.uint8)

        return torch.tensor(data, dtype=torch.long)

In [None]:
# train_images_path = "/content/drive/MyDrive/DCP/MNIST_Dataset/train-images-idx3-ubyte/train-images-idx3-ubyte"
# train_labels_path = "/content/drive/MyDrive/DCP/MNIST_Dataset/train-labels-idx1-ubyte/train-labels-idx1-ubyte"
train_images_path = "MNIST_Dataset/train-images-idx3-ubyte"
train_labels_path = "MNIST_Dataset/train-labels-idx1-ubyte"

train_images = load_mnist_images(train_images_path)
train_labels = load_mnist_labels(train_labels_path)

print(train_images.shape, train_labels.shape)
# for an individual shape the shape is 1,28,28 because its grayscale, if it was rgb then it would be 3,28,28, the first dimension is called channel
# You need 4 dimensions because PyTorch CNNs expect:

torch.Size([60000, 1, 28, 28]) torch.Size([60000])


In [None]:
from torch.utils.data import Dataset
#Dataset is an abstract base class provided by PyTorch.
#It defines the interface your dataset must provide to work with PyTorch tools like DataLoader.

class MNISTCustomDataset(Dataset):
    def __init__(self, images, labels):
        self.images = images
        self.labels = labels

    # as a child of dataset class
    # pytorch expects you to implement __len__(self) and __getitem__(self, idx) 
    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        return self.images[idx], self.labels[idx]

train_dataset = MNISTCustomDataset(train_images, train_labels)

In [None]:
import torch.nn as nn
import torch.nn.functional as F

class MNIST_CNN(nn.Module):
    # in the big picture transforms this (batch, 1, 28, 28) to this (batch, 10)
    def __init__(self):
        super().__init__()

        # Feature extractor
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)

        # Classifier
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        # Input: (batch, 1, 28, 28)
        x = self.pool(F.relu(self.conv1(x)))   # (batch, 32, 14, 14)
        x = self.pool(F.relu(self.conv2(x)))   # (batch, 64, 7, 7)

        x = x.view(x.size(0), -1)  # Flatten: (batch, 64*7*7)

        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x  # raw logits (no softmax needed)


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MNIST_CNN().to(device)
# .to(device) moves all model parameters and buffers to the specified device.

print(model)

MNIST_CNN(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=3136, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)


In [None]:
from torch.utils.data import DataLoader
# DataLoader takes a Dataset and turns it into batches
# Batches are pulled lazily during training

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
criterion = nn.CrossEntropyLoss()
# meaning we'll use crossentropy loss as the metric to judge how off or on point the predictions are
# this is the standard metric used in classification
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# we train all optimizable parameters and update them using adam optimization

In [None]:
# best_model_path="/content/drive/MyDrive/DCP/best_model.pth"
best_model_path="best_model.pth"

In [None]:
from tqdm import tqdm

def train(model, loader, optimizer, criterion, device, epochs=5):
    model.train()
    best_acc = 0.0

    for epoch in range(epochs):
        total_loss = 0
        correct = 0
        total = 0

        progress = tqdm(loader, desc=f"Epoch {epoch+1}/{epochs}", unit="batch")
        # tqdm wraps loader to show batch progress, dynamic loss, accuracy etc
        for images, labels in progress:
            images, labels = images.to(device), labels.to(device)
            # images and labels must be loaded to the same device, gpu memory or cpu ram

            # Forward pass
            outputs = model(images) # output shape is (batchsize,10)
            loss = criterion(outputs, labels) # output shape is (batchsize,)

            # Backward pass
            optimizer.zero_grad()
            # reset gradients each batch
            loss.backward()
            # Autograd computes ∂Loss/∂Weights for every parameter.
            optimizer.step()
            # Adam updates parameters using gradients.

            total_loss += loss.item()

            # Accuracy calculation
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            acc = 100 * correct / total
            # update progress bar
            progress.set_postfix(loss=total_loss/total, accuracy=acc)

        epoch_acc = 100 * correct / total
        print(f"\nEpoch {epoch+1} completed | Accuracy: {epoch_acc:.2f}%")

        # Save best model
        if epoch_acc > best_acc:
            best_acc = epoch_acc
            torch.save(model.state_dict(), best_model_path)
            print(f"✔ Saved best model (Accuracy: {best_acc:.2f}%)")

    print(f"\nTraining finished. Best accuracy: {best_acc:.2f}%")


In [None]:
train(model, train_loader, optimizer, criterion, device, epochs=5)

Epoch 1/5: 100%|██████████| 938/938 [00:05<00:00, 156.69batch/s, accuracy=94.5, loss=0.00281]



Epoch 1 completed | Accuracy: 94.53%
✔ Saved best model (Accuracy: 94.53%)


Epoch 2/5: 100%|██████████| 938/938 [00:04<00:00, 227.16batch/s, accuracy=98.4, loss=0.000806]



Epoch 2 completed | Accuracy: 98.41%
✔ Saved best model (Accuracy: 98.41%)


Epoch 3/5: 100%|██████████| 938/938 [00:04<00:00, 224.28batch/s, accuracy=98.9, loss=0.000564]



Epoch 3 completed | Accuracy: 98.89%
✔ Saved best model (Accuracy: 98.89%)


Epoch 4/5: 100%|██████████| 938/938 [00:04<00:00, 204.38batch/s, accuracy=99.1, loss=0.00042]



Epoch 4 completed | Accuracy: 99.13%
✔ Saved best model (Accuracy: 99.13%)


Epoch 5/5: 100%|██████████| 938/938 [00:04<00:00, 223.06batch/s, accuracy=99.3, loss=0.000323]



Epoch 5 completed | Accuracy: 99.31%
✔ Saved best model (Accuracy: 99.31%)

Training finished. Best accuracy: 99.31%


The following cells will be for evaluation

In [None]:
# test_images_path = "/content/drive/MyDrive/DCP/MNIST_Dataset/t10k-images-idx3-ubyte/t10k-images-idx3-ubyte"
# test_labels_path = "/content/drive/MyDrive/DCP/MNIST_Dataset/t10k-labels-idx1-ubyte/t10k-labels-idx1-ubyte"
test_images_path = "MNIST_Dataset/t10k-images-idx3-ubyte"
test_labels_path = "MNIST_Dataset/t10k-labels-idx1-ubyte"

test_images = load_mnist_images(test_images_path)
test_labels = load_mnist_labels(test_labels_path)


In [None]:
test_dataset = MNISTCustomDataset(test_images, test_labels)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [None]:
def evaluate(model, loader, device):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)
            _, predicted = torch.max(outputs, 1)

            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    acc = 100 * correct / total
    return acc

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = MNIST_CNN().to(device)
model.load_state_dict(torch.load(best_model_path, map_location=device))

test_acc = evaluate(model, test_loader, device)
print("Test Accuracy:", test_acc)


Test Accuracy: 98.81
