In [1]:
!pip install -q numpy pillow tqdm scikit-learn matplotlib streamlit
!pip install -q --index-url https://download.pytorch.org/whl/cpu torch torchvision torchaudio

In [2]:
import os, random
from pathlib import Path

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

import torchvision
import torchvision.transforms as T
from torchvision.models import resnet18, ResNet18_Weights

from torch.utils.data import DataLoader, Subset

def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

set_seed(42)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

In [3]:
import torch
import torchvision
import torchvision.transforms as T
from torch.utils.data import DataLoader, Subset

DATA_DIR = "./data"
BATCH_SIZE = 64
NUM_WORKERS = 0  # Windows/Jupyter safe
PIN_MEMORY = torch.cuda.is_available()  # True only if CUDA

# CIFAR-10 normalization
CIFAR_MEAN = (0.4914, 0.4822, 0.4465)
CIFAR_STD  = (0.2023, 0.1994, 0.2010)

train_tfms = T.Compose([
    T.RandomCrop(32, padding=4),
    T.RandomHorizontalFlip(),
    T.ToTensor(),
    T.Normalize(CIFAR_MEAN, CIFAR_STD),
])

eval_tfms = T.Compose([
    T.ToTensor(),
    T.Normalize(CIFAR_MEAN, CIFAR_STD),
])

# Two datasets so validation does NOT use random augmentations
train_full = torchvision.datasets.CIFAR10(root=DATA_DIR, train=True,  download=True,  transform=train_tfms)
val_full   = torchvision.datasets.CIFAR10(root=DATA_DIR, train=True,  download=False, transform=eval_tfms)
test_ds    = torchvision.datasets.CIFAR10(root=DATA_DIR, train=False, download=True,  transform=eval_tfms)

CLASS_NAMES = train_full.classes

# Reproducible split
val_size = 5000
n = len(train_full)
g = torch.Generator().manual_seed(42)
perm = torch.randperm(n, generator=g).tolist()

val_idx = perm[:val_size]
train_idx = perm[val_size:]

train_ds = Subset(train_full, train_idx)
val_ds   = Subset(val_full, val_idx)

dl_kwargs = dict(
    batch_size=BATCH_SIZE,
    num_workers=NUM_WORKERS,
    pin_memory=PIN_MEMORY,
)

# (persistent_workers must be False when NUM_WORKERS=0)
if NUM_WORKERS > 0:
    dl_kwargs["persistent_workers"] = True

train_loader = DataLoader(train_ds, shuffle=True,  **dl_kwargs)
val_loader   = DataLoader(val_ds,   shuffle=False, **dl_kwargs)
test_loader  = DataLoader(test_ds,  shuffle=False, **dl_kwargs)

print("Classes:", CLASS_NAMES)
print("Sizes  : train =", len(train_ds), "val =", len(val_ds), "test =", len(test_ds))
print("Batches:", len(train_loader), len(val_loader), len(test_loader))

Classes: ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
Sizes  : train = 45000 val = 5000 test = 10000
Batches: 704 79 157


In [4]:
class SimpleCNN(nn.Module):
    def __init__(self, num_classes=10):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),

            nn.Conv2d(32, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),

            nn.Conv2d(64, 128, 3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 4 * 4, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(0.3),
            nn.Linear(256, num_classes),
        )

    def forward(self, x):
        return self.classifier(self.features(x))

In [5]:
from tqdm.auto import tqdm
import torch

@torch.no_grad()
def eval_accuracy(model, loader, device):
    model.eval()
    correct, total = 0, 0
    for x, y in loader:
        x, y = x.to(device), y.to(device)
        preds = model(x).argmax(1)
        correct += (preds == y).sum().item()
        total += y.size(0)
    return correct / max(total, 1)

def train_one_epoch(model, loader, optimizer, criterion, device):
    model.train()
    total_loss, total = 0.0, 0

    pbar = tqdm(loader, desc="train", leave=False)
    for x, y in pbar:
        x, y = x.to(device), y.to(device)

        optimizer.zero_grad(set_to_none=True)
        logits = model(x)
        loss = criterion(logits, y)
        loss.backward()
        optimizer.step()

        bs = y.size(0)
        total_loss += loss.item() * bs
        total += bs

        pbar.set_postfix(loss=float(loss.item()))

    return total_loss / max(total, 1)

In [6]:
Path("models").mkdir(exist_ok=True)

baseline = SimpleCNN(num_classes=10).to(device)

criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
optimizer = optim.AdamW(baseline.parameters(), lr=1e-3, weight_decay=5e-4)

EPOCHS_BASELINE = 5
best_val = 0.0

for epoch in range(1, EPOCHS_BASELINE + 1):
    loss = train_one_epoch(baseline, train_loader, optimizer, criterion, device)
    val_acc = eval_accuracy(baseline, val_loader, device)

    if val_acc > best_val:
        best_val = val_acc
        torch.save({"model_state": baseline.state_dict()}, "models/baseline_best.pt")

    print(f"[Baseline] Epoch {epoch:02d} | loss={loss:.4f} | val_acc={val_acc:.4f} | best={best_val:.4f}")

print("Saved: models/baseline_best.pt")

train:   0%|          | 0/704 [00:00<?, ?it/s]

[Baseline] Epoch 01 | loss=1.6887 | val_acc=0.6008 | best=0.6008


train:   0%|          | 0/704 [00:00<?, ?it/s]

[Baseline] Epoch 02 | loss=1.4377 | val_acc=0.6606 | best=0.6606


train:   0%|          | 0/704 [00:00<?, ?it/s]

[Baseline] Epoch 03 | loss=1.3500 | val_acc=0.6876 | best=0.6876


train:   0%|          | 0/704 [00:00<?, ?it/s]

[Baseline] Epoch 04 | loss=1.2871 | val_acc=0.7152 | best=0.7152


train:   0%|          | 0/704 [00:00<?, ?it/s]

[Baseline] Epoch 05 | loss=1.2430 | val_acc=0.7260 | best=0.7260
Saved: models/baseline_best.pt


In [7]:
INPUT_SIZE = 224
IMNET_MEAN = (0.485, 0.456, 0.406)
IMNET_STD  = (0.229, 0.224, 0.225)

train_tfms_rn = T.Compose([
    T.Resize(INPUT_SIZE),
    T.RandomResizedCrop(INPUT_SIZE, scale=(0.8, 1.0)),
    T.RandomHorizontalFlip(),
    T.ToTensor(),
    T.Normalize(IMNET_MEAN, IMNET_STD),
])

eval_tfms_rn = T.Compose([
    T.Resize(INPUT_SIZE),
    T.CenterCrop(INPUT_SIZE),
    T.ToTensor(),
    T.Normalize(IMNET_MEAN, IMNET_STD),
])

train_full_rn = torchvision.datasets.CIFAR10(root=DATA_DIR, train=True, download=True, transform=train_tfms_rn)
val_full_rn   = torchvision.datasets.CIFAR10(root=DATA_DIR, train=True, download=False, transform=eval_tfms_rn)
test_ds_rn    = torchvision.datasets.CIFAR10(root=DATA_DIR, train=False, download=True, transform=eval_tfms_rn)

# same split indices sizes as baseline
n = len(train_full_rn)
perm = torch.randperm(n, generator=torch.Generator().manual_seed(42)).tolist()
val_idx = perm[:val_size]
train_idx = perm[val_size:]

train_ds_rn = Subset(train_full_rn, train_idx)
val_ds_rn   = Subset(val_full_rn, val_idx)

train_loader_rn = DataLoader(train_ds_rn, batch_size=BATCH_SIZE, shuffle=True,  num_workers=NUM_WORKERS, pin_memory=PIN_MEMORY)
val_loader_rn   = DataLoader(val_ds_rn,   batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=PIN_MEMORY)
test_loader_rn  = DataLoader(test_ds_rn,  batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=PIN_MEMORY)

print("ResNet loaders ready:", len(train_loader_rn), len(val_loader_rn), len(test_loader_rn))

ResNet loaders ready: 704 79 157


In [8]:
model = resnet18(weights=ResNet18_Weights.DEFAULT)
model.fc = nn.Linear(model.fc.in_features, 10)

# Freeze backbone (CPU fast)
for name, param in model.named_parameters():
    if not name.startswith("fc."):
        param.requires_grad = False

model = model.to(device)

criterion_rn = nn.CrossEntropyLoss(label_smoothing=0.1)
optimizer_rn = optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()),
                           lr=3e-4, weight_decay=5e-4)

In [9]:
Path("models").mkdir(exist_ok=True)
ckpt_path = "models/cifar10_resnet18_best.pt"

EPOCHS_RESNET = 5
best_val = 0.0

for epoch in range(1, EPOCHS_RESNET + 1):
    loss = train_one_epoch(model, train_loader_rn, optimizer_rn, criterion_rn, device)
    val_acc = eval_accuracy(model, val_loader_rn, device)

    if val_acc > best_val:
        best_val = val_acc
        ckpt = {
            "model_name": "resnet18",
            "num_classes": 10,
            "class_names": CLASS_NAMES,
            "input_size": INPUT_SIZE,
            "norm_mean": IMNET_MEAN,
            "norm_std": IMNET_STD,
            "model_state": model.state_dict(),
        }
        torch.save(ckpt, ckpt_path)

    print(f"[ResNet18-Frozen] Epoch {epoch:02d} | loss={loss:.4f} | val_acc={val_acc:.4f} | best={best_val:.4f}")

print(f"Saved checkpoint: {ckpt_path}")

train:   0%|          | 0/704 [00:00<?, ?it/s]

[ResNet18-Frozen] Epoch 01 | loss=1.3995 | val_acc=0.7688 | best=0.7688


train:   0%|          | 0/704 [00:00<?, ?it/s]

[ResNet18-Frozen] Epoch 02 | loss=1.1229 | val_acc=0.7834 | best=0.7834


train:   0%|          | 0/704 [00:00<?, ?it/s]

[ResNet18-Frozen] Epoch 03 | loss=1.0909 | val_acc=0.7968 | best=0.7968


train:   0%|          | 0/704 [00:00<?, ?it/s]

[ResNet18-Frozen] Epoch 04 | loss=1.0713 | val_acc=0.7944 | best=0.7968


train:   0%|          | 0/704 [00:00<?, ?it/s]

[ResNet18-Frozen] Epoch 05 | loss=1.0615 | val_acc=0.8056 | best=0.8056
Saved checkpoint: models/cifar10_resnet18_best.pt


In [10]:
Path("sample_images").mkdir(exist_ok=True)

raw_test = torchvision.datasets.CIFAR10(root=DATA_DIR, train=False, download=True)
for i in range(20):
    img, label = raw_test[i]
    name = raw_test.classes[label]
    img.save(f"sample_images/{i:02d}_{name}.png") 

print("Saved sample images to sample_images/")

Saved sample images to sample_images/
