<a href="https://colab.research.google.com/github/HeshamAhmedd/CNN-Model-on-the-Fashion-MNIST-dataset/blob/main/cnn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
import numpy as np
import time
from tqdm import tqdm

In [2]:
torch.manual_seed(42)
np.random.seed(42)

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


Using device: cpu


In [4]:
# 1) Transforms and datasets
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Download datasets
train_full = datasets.FashionMNIST(root="./data", train=True, download=True, transform=transform)
test_set   = datasets.FashionMNIST(root="./data", train=False, download=True, transform=transform)


100%|██████████| 26.4M/26.4M [00:01<00:00, 18.3MB/s]
100%|██████████| 29.5k/29.5k [00:00<00:00, 297kB/s]
100%|██████████| 4.42M/4.42M [00:00<00:00, 5.45MB/s]
100%|██████████| 5.15k/5.15k [00:00<00:00, 14.3MB/s]


In [5]:
# split train -> train/val
train_size = len(train_full) - 10000
val_size = 10000
train_set, val_set = random_split(train_full, [train_size, val_size])

batch_size = 128
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)
val_loader   = DataLoader(val_set, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)
test_loader  = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)


In [6]:
# 2) Model
class FashionCNN(nn.Module):
    def __init__(self, num_classes=10):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)  # 28x28 -> 28x28
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1) # -> 28x28
        self.bn2 = nn.BatchNorm2d(64)
        self.pool = nn.MaxPool2d(2)  # -> 14x14
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1) # -> 14x14
        self.dropout = nn.Dropout(0.25)
        self.fc1 = nn.Linear(128 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn2(self.conv2(x)))
        x = self.pool(x)
        x = self.dropout(x)
        x = F.relu(self.conv3(x))
        x = self.pool(x)  # now spatial 7x7
        x = self.dropout(x)
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

model = FashionCNN().to(device)
print(model)


FashionCNN(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (dropout): Dropout(p=0.25, inplace=False)
  (fc1): Linear(in_features=6272, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)


In [8]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer,
    mode='min',
    factor=0.5,
    patience=3
)


In [9]:
# utility: evaluate
def evaluate(loader):
    model.eval()
    total, correct = 0, 0
    loss_sum = 0.0
    with torch.no_grad():
        for X, y in loader:
            X, y = X.to(device), y.to(device)
            logits = model(X)
            loss = criterion(logits, y)
            loss_sum += loss.item() * X.size(0)
            preds = logits.argmax(dim=1)
            correct += (preds == y).sum().item()
            total += y.size(0)
    return loss_sum / total, correct / total


In [13]:
import time
from tqdm import tqdm

# Performance settings
torch.backends.cudnn.benchmark = True   # good for fixed input sizes
# If you want to limit CPU threads (helpful on some systems)
# import os
# torch.set_num_threads(4)

scaler = torch.cuda.amp.GradScaler() if torch.cuda.is_available() else None

epochs = 12                # reduce epochs to start
best_val_acc = 0.0
batch_size = 256           # increase if GPU memory allows

# (Recreate DataLoaders if changing batch_size / num_workers)
# train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=6, pin_memory=True)
# val_loader   = DataLoader(val_set, batch_size=batch_size, shuffle=False, num_workers=6, pin_memory=True)

for epoch in range(1, epochs + 1):
    epoch_start = time.time()
    model.train()
    running_loss = 0.0
    num_samples = 0

    # training loop with AMP
    loop = tqdm(train_loader, desc=f"Epoch {epoch}/{epochs}", leave=False)
    for X, y in loop:
        X = X.to(device, non_blocking=True)
        y = y.to(device, non_blocking=True)

        optimizer.zero_grad()
        if scaler is not None:
            with torch.cuda.amp.autocast():
                logits = model(X)
                loss = criterion(logits, y)
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        else:
            logits = model(X)
            loss = criterion(logits, y)
            loss.backward()
            optimizer.step()

        batch_size_curr = X.size(0)
        running_loss += loss.item() * batch_size_curr
        num_samples += batch_size_curr

        loop.set_postfix(loss=(running_loss / num_samples))

    train_loss = running_loss / num_samples

    # validation (no grad)
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for Xv, yv in val_loader:
            Xv = Xv.to(device, non_blocking=True)
            yv = yv.to(device, non_blocking=True)
            if scaler is not None:
                with torch.cuda.amp.autocast():
                    logits = model(Xv)
                    lossv = criterion(logits, yv)
            else:
                logits = model(Xv)
                lossv = criterion(logits, yv)

            val_loss += lossv.item() * Xv.size(0)
            preds = logits.argmax(dim=1)
            correct += (preds == yv).sum().item()
            total += yv.size(0)

    val_loss = val_loss / total
    val_acc = correct / total

    # LR scheduler step (ReduceLROnPlateau expects validation metric)
    old_lr = optimizer.param_groups[0]['lr']
    scheduler.step(val_loss)
    new_lr = optimizer.param_groups[0]['lr']

    if new_lr != old_lr:
        print(f"LR reduced from {old_lr:.2e} to {new_lr:.2e}")

    epoch_time = time.time() - epoch_start
    print(f"Epoch {epoch:02d} | Train loss: {train_loss:.4f} | Val loss: {val_loss:.4f} | Val acc: {val_acc:.4f} | {epoch_time:.1f}s")

    # Save best
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), "fashion_cnn_pytorch_best.pth")
        print(" Saved best model.")




Epoch 01 | Train loss: 0.2459 | Val loss: 0.2237 | Val acc: 0.9182 | 277.0s
 Saved best model.




Epoch 02 | Train loss: 0.2208 | Val loss: 0.2204 | Val acc: 0.9217 | 272.0s
 Saved best model.




Epoch 03 | Train loss: 0.2030 | Val loss: 0.1983 | Val acc: 0.9270 | 275.4s
 Saved best model.




Epoch 04 | Train loss: 0.1935 | Val loss: 0.1940 | Val acc: 0.9305 | 277.1s
 Saved best model.




Epoch 05 | Train loss: 0.1767 | Val loss: 0.1940 | Val acc: 0.9302 | 271.8s




Epoch 06 | Train loss: 0.1637 | Val loss: 0.1995 | Val acc: 0.9296 | 270.0s




Epoch 07 | Train loss: 0.1557 | Val loss: 0.1913 | Val acc: 0.9324 | 270.7s
 Saved best model.




Epoch 08 | Train loss: 0.1466 | Val loss: 0.1866 | Val acc: 0.9323 | 270.5s




Epoch 09 | Train loss: 0.1370 | Val loss: 0.1944 | Val acc: 0.9325 | 272.3s
 Saved best model.




Epoch 10 | Train loss: 0.1300 | Val loss: 0.2009 | Val acc: 0.9310 | 274.1s




Epoch 11 | Train loss: 0.1254 | Val loss: 0.1884 | Val acc: 0.9355 | 272.8s
 Saved best model.




LR reduced from 1.00e-03 to 5.00e-04
Epoch 12 | Train loss: 0.1202 | Val loss: 0.1954 | Val acc: 0.9341 | 279.4s


In [14]:
# 5) Final test evaluation
model.load_state_dict(torch.load("fashion_cnn_pytorch_best.pth"))
test_loss, test_acc = evaluate(test_loader)
print(f"PyTorch test loss: {test_loss:.4f}, test accuracy: {test_acc:.4f}")



PyTorch test loss: 0.2122, test accuracy: 0.9310


In [15]:
# 6) Sample predictions
classes = ['T-shirt/top','Trouser','Pullover','Dress','Coat',
           'Sandal','Shirt','Sneaker','Bag','Ankle boot']

model.eval()
batch = next(iter(test_loader))
X_sample, y_sample = batch[0][:9].to(device), batch[1][:9].to(device)
with torch.no_grad():
    logits = model(X_sample)
    preds = logits.argmax(dim=1)
for i in range(len(preds)):
    print(i, "pred:", preds[i].item(), classes[preds[i].item()], "true:", y_sample[i].item(), classes[y_sample[i].item()])

0 pred: 9 Ankle boot true: 9 Ankle boot
1 pred: 2 Pullover true: 2 Pullover
2 pred: 1 Trouser true: 1 Trouser
3 pred: 1 Trouser true: 1 Trouser
4 pred: 6 Shirt true: 6 Shirt
5 pred: 1 Trouser true: 1 Trouser
6 pred: 4 Coat true: 4 Coat
7 pred: 6 Shirt true: 6 Shirt
8 pred: 5 Sandal true: 5 Sandal
