# Deep Learning Lab — Mustafa
Minimal notebook with FFNN, CNN, LR sweep and convolution arithmetic checks.

In [None]:
import os
import math
import random
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import matplotlib.pyplot as plt

def set_seed(seed: int = 1337):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    return seed

def ensure_dir(path: str):
    os.makedirs(path, exist_ok=True)

def save_plot(fig, outpath: str, title: str = ""):
    if title:
        fig.suptitle(title)
    fig.tight_layout()
    fig.savefig(outpath, dpi=150)
    plt.close(fig)

outputs_dir = "./outputs"
ensure_dir(outputs_dir)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
set_seed()


In [None]:
class FFNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Flatten(),
            nn.Linear(28*28, 128),
            nn.ReLU(),
            nn.Linear(128, 10)
        )
    def forward(self, x):
        return self.net(x)

class SimpleCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 8, kernel_size=5, stride=1, padding=2)
        self.pool  = nn.MaxPool2d(2,2)
        self.conv2 = nn.Conv2d(8, 16, kernel_size=3, stride=1, padding=1)
        self.fc1   = nn.Linear(16*14*14, 64)
        self.fc2   = nn.Linear(64, 10)
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = F.relu(self.conv2(x))
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x


In [None]:
def get_mnist_loaders(batch_size=64):
    tfm = transforms.Compose([transforms.ToTensor()])
    train_ds = datasets.MNIST(root="./data", train=True, download=True, transform=tfm)
    test_ds  = datasets.MNIST(root="./data", train=False, download=True, transform=tfm)
    return (
        DataLoader(train_ds, batch_size=batch_size, shuffle=True),
        DataLoader(test_ds,  batch_size=batch_size, shuffle=False)
    )

train_loader, test_loader = get_mnist_loaders(128)


In [None]:
def evaluate(model, loader):
    model.eval()
    correct=0; total=0
    with torch.no_grad():
        for x,y in loader:
            x,y = x.to(device), y.to(device)
            logits = model(x)
            pred = logits.argmax(1)
            correct += (pred==y).sum().item()
            total += y.numel()
    return correct/total

def train_model(model, train_loader, test_loader, lr=0.01, epochs=3, prefix="model"):
    model.to(device)
    opt = torch.optim.SGD(model.parameters(), lr=lr)
    losses, accs = [], []
    for ep in range(1, epochs+1):
        model.train()
        total_loss=0; steps=0
        for x,y in train_loader:
            x,y = x.to(device), y.to(device)
            opt.zero_grad()
            out = model(x)
            loss = F.cross_entropy(out,y)
            loss.backward()
            opt.step()
            total_loss+=loss.item(); steps+=1
        loss_avg = total_loss/max(1,steps)
        acc = evaluate(model,test_loader)
        losses.append(loss_avg); accs.append(acc)
        print(f"[{prefix}] epoch={ep} loss={loss_avg:.4f} acc={acc:.4f}")
    # Plots
    fig1 = plt.figure()
    plt.plot(range(1,epochs+1), losses, marker='o')
    plt.xlabel('Epoch'); plt.ylabel('Loss'); plt.title(f'{prefix} loss')
    save_plot(fig1, os.path.join(outputs_dir, f"{prefix}_loss.png"))

    fig2 = plt.figure()
    plt.plot(range(1,epochs+1), accs, marker='o')
    plt.xlabel('Epoch'); plt.ylabel('Accuracy'); plt.title(f'{prefix} accuracy')
    save_plot(fig2, os.path.join(outputs_dir, f"{prefix}_acc.png"))


## FFNN Training

In [None]:
ffnn = FFNN()
train_model(ffnn, train_loader, test_loader, lr=0.05, epochs=3, prefix="ffnn")

## CNN Training

In [None]:
cnn = SimpleCNN()
train_model(cnn, train_loader, test_loader, lr=0.01, epochs=3, prefix="cnn")

In [None]:
with torch.no_grad():
    w = cnn.conv1.weight.cpu()
    fmin = w.min(dim=-1, keepdim=True)[0].min(dim=-2, keepdim=True)[0]
    fmax = w.max(dim=-1, keepdim=True)[0].max(dim=-2, keepdim=True)[0]
    w_norm = (w - fmin) / (fmax - fmin + 1e-8)

    cols = w_norm.shape[0]
    fig, axes = plt.subplots(1, cols, figsize=(2*cols, 2))
    if cols == 1: axes = [axes]
    for i in range(cols):
        axes[i].imshow(w_norm[i,0].numpy(), cmap="gray")
        axes[i].set_title(f"F{i}")
        axes[i].axis("off")
    save_plot(fig, os.path.join(outputs_dir, "cnn_filters_conv1.png"), title="First-layer conv filters")

## Learning Rate Sweep

In [None]:
lrs = [0.005, 0.05, 0.5]
logs = {}
for lr in lrs:
    model = FFNN()
    logs[lr] = train_model(model, train_loader, test_loader, lr=lr, epochs=3, prefix=f"sweep_lr_{str(lr).replace('.','_')}")

fig = plt.figure()
for lr in lrs:
    plt.plot(range(1,4), logs[lr], label=f"lr={lr}")
plt.legend(); plt.xlabel('Epoch'); plt.ylabel('Loss'); plt.title('Learning rate sweep')
save_plot(fig, os.path.join(outputs_dir, "sweep_loss_compare.png"))

## Convolution Arithmetic Checks

In [None]:
def conv_out_size(H, W, k, s, p, d=1):
    def one_dim(n):
        return math.floor((n + 2*p - d*(k-1) - 1)/s + 1)
    return one_dim(H), one_dim(W)

cases = [(28,28,5,1,2,1),
         (28,28,3,2,1,1),
         (32,32,3,1,0,1),
         (64,64,7,2,3,1),
         (64,64,3,1,1,2)]
rows = []
for (H,W,k,s,p,d) in cases:
    h_out, w_out = conv_out_size(H,W,k,s,p,d)
    x = torch.randn(1,1,H,W)
    conv = nn.Conv2d(1,1,k,s,p,d)
    with torch.no_grad():
        y = conv(x)
    rows.append((H,W,k,s,p,d,h_out,w_out,y.shape[-2],y.shape[-1]))

fig = plt.figure(figsize=(8,4))
plt.axis("off")
text = "H  W | k s p d | formula(H,W) | torch(H,W)\n" + "-"*48 + "\n"
for r in rows:
    text += f"{r[0]:>2} {r[1]:>2} | {r[2]} {r[3]} {r[4]} {r[5]} | ({r[6]:>2},{r[7]:>2}) | ({r[8]:>2},{r[9]:>2})\n"
plt.text(0.02,0.98,text,va="top",family="monospace")
save_plot(fig, os.path.join(outputs_dir, "conv_arith_checks.png"))