<a href="https://colab.research.google.com/github/simecek/dspracticum2025/blob/main/lesson02/FashionMNIST_SimpleCNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Let's build a simple neural network to classify images from the FashionMNIST dataset.

**1. Import Libraries**

In [11]:
# Cell 1 — imports & config
import torch, torch.nn as nn, torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torch.optim import Adam
from torch.optim.lr_scheduler import StepLR
from time import time
import numpy as np

In [12]:
# Reproducibility
seed = 42
torch.manual_seed(seed); np.random.seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

In [13]:
# Cell 2 — data loaders
mean, std = (0.5,), (0.5,)

train_tf = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(mean, std),
])

test_tf = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean, std),
])

train_ds = datasets.FashionMNIST(root="./data", train=True,  download=True, transform=train_tf)
test_ds  = datasets.FashionMNIST(root="./data", train=False, download=True, transform=test_tf)

batch_size = 128
train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True,  num_workers=2, pin_memory=True)
test_loader  = DataLoader(test_ds,  batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)

len(train_ds), len(test_ds)


(60000, 10000)

In [14]:
# Cell 3 — model
class BetterCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)   # 28x28 -> 28x28
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)  # 28x28 -> 28x28
        self.pool  = nn.MaxPool2d(2, 2)                           # 28->14, 14->7
        self.drop1 = nn.Dropout(0.25)
        self.fc1   = nn.Linear(64 * 7 * 7, 128)
        self.drop2 = nn.Dropout(0.5)
        self.fc2   = nn.Linear(128, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(F.relu(self.conv2(x)))  # 28->14
        x = self.drop1(x)
        # add a tiny extra conv block without extra params by reusing conv2? keep small: skip.
        x = self.pool(x)                      # 14->7
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = self.drop2(x)
        return self.fc2(x)

model = BetterCNN().to(device)
model


BetterCNN(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (drop1): Dropout(p=0.25, inplace=False)
  (fc1): Linear(in_features=3136, out_features=128, bias=True)
  (drop2): Dropout(p=0.5, inplace=False)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)

In [15]:
# Cell 4 — opt, sched, loss
lr = 1e-3
optimizer = Adam(model.parameters(), lr=lr)
scheduler = StepLR(optimizer, step_size=5, gamma=0.5)  # halve LR every 5 epochs
criterion = nn.CrossEntropyLoss()


In [16]:
# Cell 5 — helpers
def train_one_epoch(model, loader, optimizer, criterion, device):
    model.train()
    total_loss, correct, n = 0.0, 0, 0
    for x, y in loader:
        x, y = x.to(device, non_blocking=True), y.to(device, non_blocking=True)
        optimizer.zero_grad(set_to_none=True)
        logits = model(x)
        loss = criterion(logits, y)
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * y.size(0)
        correct += (logits.argmax(1) == y).sum().item()
        n += y.size(0)
    return total_loss / n, correct / n

@torch.no_grad()
def evaluate(model, loader, criterion, device):
    model.eval()
    total_loss, correct, n = 0.0, 0, 0
    for x, y in loader:
        x, y = x.to(device, non_blocking=True), y.to(device, non_blocking=True)
        logits = model(x)
        loss = criterion(logits, y)
        total_loss += loss.item() * y.size(0)
        correct += (logits.argmax(1) == y).sum().item()
        n += y.size(0)
    return total_loss / n, correct / n


In [None]:
# Cell 6 — training loop
epochs = 3
best_acc = 0.0
patience, wait = 2, 0
best_state = None
start = time()

for epoch in range(1, epochs + 1):
    tr_loss, tr_acc = train_one_epoch(model, train_loader, optimizer, criterion, device)
    va_loss, va_acc = evaluate(model, test_loader, criterion, device)
    scheduler.step()

    if va_acc > best_acc:
        best_acc, wait = va_acc, 0
        best_state = {k: v.cpu() for k, v in model.state_dict().items()}
    else:
        wait += 1

    print(f"Epoch {epoch:02d} | train loss: {tr_loss:.4f}, accuracy: {tr_acc * 100:.2f}% "
          f"| test loss: {va_loss:.4f}, accuracy: {va_acc * 100:.2f}%")

    if wait >= patience:
        print("Early stopping.")
        break

print(f"Done in {(time()-start):.1f}s. Best test acc: {best_acc:.3f}")
if best_state is not None:
    model.load_state_dict({k: v.to(device) for k, v in best_state.items()})


Epoch 01 | train loss: 0.3697, accuracy: 86.82% | test loss: 0.2808, accuracy: 89.98%
Epoch 02 | train loss: 0.3501, accuracy: 87.59% | test loss: 0.2706, accuracy: 90.04%
Epoch 03 | train loss: 0.3197, accuracy: 88.63% | test loss: 0.2577, accuracy: 90.69%
Epoch 04 | train loss: 0.3124, accuracy: 88.86% | test loss: 0.2533, accuracy: 90.45%
Epoch 05 | train loss: 0.3071, accuracy: 89.07% | test loss: 0.2494, accuracy: 90.57%
Early stopping.
Done in 551.5s. Best test acc: 0.907


In [None]:
# Cell 7 — final metrics
test_loss, test_acc = evaluate(model, test_loader, criterion, device)
print(f"Final test:\n"
      f"loss: {test_loss:.4f}\n"
      f"accuracy: {test_acc * 100:.2f}%")




Final test
loss: 0.3133
accuracy: 88.63%
