In [1]:
import os
import time
import random
import csv

import numpy as np
from PIL import Image

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models

# keep PyTorch from using too many CPU threads
torch.set_num_threads(1)

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", DEVICE)

##########################################
##### Dataset class for CUB-200-2011 #####
##########################################

class CUBDataset(Dataset):
    def __init__(self, root_dir, train=True, transform=None, subset_size=None):
        self.root_dir = root_dir
        self.img_dir = os.path.join(root_dir, "images")
        self.transform = transform

        images_path = os.path.join(root_dir, "images.txt")
        labels_path = os.path.join(root_dir, "image_class_labels.txt")
        split_path = os.path.join(root_dir, "train_test_split.txt")

        # id -> relative path
        id2img = {}
        with open(images_path, "r") as f:
            for line in f:
                img_id, rel_path = line.strip().split()
                id2img[int(img_id)] = rel_path

        # id -> label (0..199)
        id2label = {}
        with open(labels_path, "r") as f:
            for line in f:
                img_id, label = line.strip().split()
                id2label[int(img_id)] = int(label) - 1

        # id -> train/test flag
        id2is_train = {}
        with open(split_path, "r") as f:
            for line in f:
                img_id, is_train = line.strip().split()
                id2is_train[int(img_id)] = int(is_train)

        # build samples list
        self.samples = []
        for img_id in id2img:
            is_train_flag = (id2is_train[img_id] == 1)
            if is_train_flag == train:
                img_path = os.path.join(self.img_dir, id2img[img_id])
                label = id2label[img_id]
                self.samples.append((img_path, label))

        # subset
        if subset_size is not None:
            self.samples = self.samples[:subset_size]

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        img_path, label = self.samples[idx]
        image = Image.open(img_path).convert("RGB")

        if self.transform is not None:
            image = self.transform(image)

        return image, label

########################################
##### Hyperparameters $ Transforms #####
########################################

DATA_ROOT = "data/CUB_200_2011"   
NUM_CLASSES = 200

BATCH_SIZE = 8 # keep these small for CPU
NUM_WORKERS = 0  # important on macOS

DEBUG_TRAIN_SUBSET = 256 # take too long -> opt for subset
DEBUG_TEST_SUBSET  = 256   

EPOCHS = 10  
INIT_LR = 1e-4
WEIGHT_DECAY = 1e-4

# ImageNet normalization
mean = [0.485, 0.456, 0.406]
std  = [0.229, 0.224, 0.225]

train_transform = transforms.Compose([
    transforms.Resize(192),
    transforms.RandomResizedCrop(160, scale=(0.7, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

test_transform = transforms.Compose([
    transforms.Resize(192),
    transforms.CenterCrop(160),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

###################################
##### Datasets & DataLoaders #####
##################################

train_dataset = CUBDataset(
    DATA_ROOT,
    train=True,
    transform=train_transform,
    subset_size=DEBUG_TRAIN_SUBSET,
)

test_dataset = CUBDataset(
    DATA_ROOT,
    train=False,
    transform=test_transform,
    subset_size=DEBUG_TEST_SUBSET,
)

train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=NUM_WORKERS,
    pin_memory=True
)

test_loader = DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS,
    pin_memory=True
)

print("Train samples:", len(train_dataset))
print("Test samples:", len(test_dataset))

################################
##### VGG16 Backbone Model #####
################################
class VGG16CUB(nn.Module):
    def __init__(self, num_classes=NUM_CLASSES, pretrained=True, freeze_features=True):
        super().__init__()
        # Load pre-trained VGG16
        vgg = models.vgg16(
            weights=models.VGG16_Weights.IMAGENET1K_V1 if pretrained else None
        )

        # Freeze convolutional layers (faster on CPU)
        if freeze_features:
            for param in vgg.features.parameters():
                param.requires_grad = False

        # Replace final classifier layer 
        in_features = vgg.classifier[6].in_features 
        vgg.classifier[6] = nn.Linear(in_features, num_classes)

        self.model = vgg

    def forward(self, x):
        return self.model(x)


model = VGG16CUB(num_classes=NUM_CLASSES, pretrained=True, freeze_features=True).to(DEVICE)
print("Model defined (VGG16 backbone).")

###########################################
##### Training / Evaluation Functions #####
###########################################

def train_one_epoch(model, loader, optimizer, criterion, device):
    model.train()
    running_loss = 0.0

    for batch_idx, (images, labels) in enumerate(loader):
        images = images.to(device, non_blocking=True)
        labels = labels.to(device, non_blocking=True)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)

        # Progress print
        if (batch_idx + 1) % 10 == 0:
            print(f"[Batch {batch_idx+1}/{len(loader)}] loss={loss.item():.4f}")

    epoch_loss = running_loss / len(loader.dataset)
    return epoch_loss


@torch.no_grad()
def evaluate(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0

    for images, labels in loader:
        images = images.to(device, non_blocking=True)
        labels = labels.to(device, non_blocking=True)

        outputs = model(images)
        loss = criterion(outputs, labels)
        running_loss += loss.item() * images.size(0)

        preds = outputs.argmax(dim=1)
        correct += (preds == labels).sum().item()

    epoch_loss = running_loss / len(loader.dataset)
    accuracy = 100.0 * correct / len(loader.dataset)
    return accuracy, epoch_loss

########################################
##### Loss, Optimizer, CSV Logging #####
########################################

SEED = 542
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)

criterion = nn.CrossEntropyLoss()

# Only optimize parameters that require grad
optimizer = optim.AdamW(
    filter(lambda p: p.requires_grad, model.parameters()),
    lr=INIT_LR,
    weight_decay=WEIGHT_DECAY
)

best_acc = 0.0
log_path = "vgg16_cub_log.csv"

# Prepare CSV log for plotting later
with open(log_path, "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(["epoch", "train_loss", "val_loss", "val_acc"])

print("Starting training... Logging to:", log_path)

#########################
##### Training Loop #####
#########################

for epoch in range(EPOCHS):
    start = time.time()
    print(f"\n=== Epoch {epoch+1}/{EPOCHS} ===")

    train_loss = train_one_epoch(model, train_loader, optimizer, criterion, DEVICE)
    val_acc, val_loss = evaluate(model, test_loader, criterion, DEVICE)
    elapsed = time.time() - start

    print(
        f"Epoch {epoch+1:02d}/{EPOCHS} | "
        f"Train Loss: {train_loss:.4f} | "
        f"Val Loss: {val_loss:.4f} | "
        f"Val Acc: {val_acc:.2f}% | "
        f"Time: {elapsed:.1f}s"
    )

    # Append to CSV
    with open(log_path, "a", newline="") as f:
        writer = csv.writer(f)
        writer.writerow([epoch + 1, train_loss, val_loss, val_acc])

    # Save best model
    if val_acc > best_acc:
        best_acc = val_acc
        torch.save(model.state_dict(), "vgg16_cub_best.pth")
        print(f"New best val acc: {best_acc:.2f}%, model saved.")

print(f"\nBest validation accuracy: {best_acc:.2f}%")


Using device: cpu
Train samples: 256
Test samples: 256
Model defined (VGG16 backbone).
Starting training... Logging to: vgg16_cub_log.csv

=== Epoch 1/10 ===
[Batch 10/32] loss=3.1529
[Batch 20/32] loss=1.8922
[Batch 30/32] loss=1.4090
Epoch 01/10 | Train Loss: 2.5646 | Val Loss: 2.7350 | Val Acc: 55.47% | Time: 190.4s
New best val acc: 55.47%, model saved.

=== Epoch 2/10 ===
[Batch 10/32] loss=0.8227
[Batch 20/32] loss=1.5285
[Batch 30/32] loss=0.7787
Epoch 02/10 | Train Loss: 0.6187 | Val Loss: 2.7678 | Val Acc: 59.77% | Time: 197.6s
New best val acc: 59.77%, model saved.

=== Epoch 3/10 ===
[Batch 10/32] loss=0.1567
[Batch 20/32] loss=0.3537
[Batch 30/32] loss=0.3909
Epoch 03/10 | Train Loss: 0.3765 | Val Loss: 2.9909 | Val Acc: 64.06% | Time: 201.3s
New best val acc: 64.06%, model saved.

=== Epoch 4/10 ===
[Batch 10/32] loss=0.2683
[Batch 20/32] loss=0.2071
[Batch 30/32] loss=0.5927
Epoch 04/10 | Train Loss: 0.3467 | Val Loss: 2.8854 | Val Acc: 66.80% | Time: 190.8s
New best val 