# Import

In [None]:
import torch, gc
gc.collect()
torch.cuda.empty_cache()
import os, time, random, numpy as np
import torch, torch.nn as nn, torch.optim as optim
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from tqdm import tqdm

# Configuration

In [None]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
DATA_ROOT = "data/CUB_200_2011"
NUM_CLASSES = 200
INIT_LR = 1e-4
NEW_LR = 1e-6
WEIGHT_DECAY = 1e-4
BATCH_SIZE = 32
STEP_SIZE = 7
GAMMA = 0.1
EPOCHS_STAGE1 = 7
EPOCHS_STAGE2 = 150
SEED = 87

torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
np.random.seed(SEED)
random.seed(SEED)

# Datset Setup

In [2]:
train_transforms = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.5, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1),
    transforms.RandomRotation(degrees=15),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

test_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])


class CUBDataset(Dataset):
    def __init__(self, root, train=True, transform=None):
        self.root = root
        self.transform = transform

        img_txt = os.path.join(root, "images.txt")
        label_txt = os.path.join(root, "image_class_labels.txt")
        split_txt = os.path.join(root, "train_test_split.txt")

        with open(img_txt) as f:
            imgs = [x.strip().split(" ") for x in f.readlines()]
        with open(label_txt) as f:
            labels = [int(x.strip().split(" ")[1]) - 1 for x in f.readlines()]
        with open(split_txt) as f:
            split = [int(x.strip().split(" ")[1]) for x in f.readlines()]

        self.samples = []
        for (img_id, img_path), label, is_train in zip(imgs, labels, split):
            if (train and is_train == 1) or (not train and is_train == 0):
                self.samples.append((os.path.join(root, "images", img_path), label))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        img_path, label = self.samples[idx]
        img = Image.open(img_path).convert("RGB")
        if self.transform:
            img = self.transform(img)
        return img, label


train_set = CUBDataset(DATA_ROOT, train=True, transform=train_transforms)
test_set = CUBDataset(DATA_ROOT, train=False, transform=test_transforms)

train_loader = DataLoader(
    train_set,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=10,
    pin_memory=True,
    persistent_workers=True,
    prefetch_factor=4
)

test_loader = DataLoader(
    test_set,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=10,
    pin_memory=True,
    persistent_workers=True,
    prefetch_factor=4
)

print(f"Loaded {len(train_set)} training and {len(test_set)} testing images.")


Loaded 5994 training and 5794 testing images.


# Utils

In [None]:
# MixUp Augmentation

def mixup_data(x, y, alpha=0.2, device='cuda'):
    if alpha > 0:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1
    batch_size = x.size()[0]
    index = torch.randperm(batch_size).to(device)
    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam

def mixup_criterion(criterion, pred, y_a, y_b, lam):
    return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)


# Evaluation
@torch.no_grad()
def evaluate_model(model, loader, criterion, device):
    model.eval()
    total_loss, correct, total = 0.0, 0, 0
    for images, labels in loader:
        images, labels = images.to(device, non_blocking=True), labels.to(device, non_blocking=True)
        outputs = model(images)
        loss = criterion(outputs, labels)
        batch_size = labels.size(0)
        total_loss += loss.item() * batch_size
        correct += (outputs.argmax(1) == labels).sum().item()
        total += labels.size(0)
    return 100.0 * correct / total, total_loss / total

# Train
def train_model(model, loader, optimizer, criterion, device):
    model.train()
    running_loss, correct, total = 0.0, 0, 0
    for images, labels in loader:
        images, labels = images.to(device, non_blocking=True), labels.to(device, non_blocking=True)
        mixed_imgs, y_a, y_b, lam = mixup_data(images, labels, alpha=0.2, device=device)

        optimizer.zero_grad()
        outputs = model(mixed_imgs)
        loss = mixup_criterion(criterion, outputs, y_a, y_b, lam)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

        with torch.no_grad():
            raw_outputs = model(images)
            preds = raw_outputs.argmax(1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    return running_loss / total, 100 * correct / total

# Main loop

In [23]:
# Stage 1
print("=== Stage 1 ===")
model = models.resnet152(weights=models.ResNet152_Weights.IMAGENET1K_V1)
for param in model.parameters():
    param.requires_grad = False

num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, NUM_CLASSES)
model = model.to(DEVICE)

criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.fc.parameters(), lr=INIT_LR, weight_decay=WEIGHT_DECAY)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

train_losses, test_losses, train_accuracies, test_accuracies = [], [], [], []
best_acc_stage1 = 0.0

for epoch in range(EPOCHS_STAGE1):
    start = time.time()
    tr_loss, tr_acc = train_model(model, train_loader, optimizer, criterion, DEVICE)
    te_acc, te_loss = evaluate_model(model, test_loader, criterion, DEVICE)
    scheduler.step()

    train_losses.append(tr_loss)
    test_losses.append(te_loss)
    train_accuracies.append(tr_acc)
    test_accuracies.append(te_acc)

    print(f"[S1-Epoch {epoch+1:02d}] Train Loss={tr_loss:.4f} | Train Acc={tr_acc:.2f}% | "
          f"Test Loss={te_loss:.4f} | Test Acc={te_acc:.2f}% | "
          f"LR={optimizer.param_groups[0]['lr']:.2e} | Time={time.time()-start:.2f}s")

    if te_acc > best_acc_stage1:
        best_acc_stage1 = te_acc
        torch.save(model.state_dict(), "best_resnet_stage1.pt")

# Stage 2
print("=== Stage 2===")
for param in model.parameters():
    param.requires_grad = True

optimizer = optim.AdamW(model.parameters(), lr=NEW_LR, weight_decay=WEIGHT_DECAY)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=EPOCHS_STAGE2, gamma=0.1)

best_acc_stage2 = 0.0

for epoch in range(EPOCHS_STAGE2):
    start = time.time()
    tr_loss, tr_acc = train_model(model, train_loader, optimizer, criterion, DEVICE)
    te_acc, te_loss = evaluate_model(model, test_loader, criterion, DEVICE)
    scheduler.step()

    train_losses.append(tr_loss)
    test_losses.append(te_loss)
    train_accuracies.append(tr_acc)
    test_accuracies.append(te_acc)

    print(f"[S2-Epoch {epoch+1:03d}] Train Loss={tr_loss:.4f} | Train Acc={tr_acc:.2f}% | "
          f"Test Loss={te_loss:.4f} | Test Acc={te_acc:.2f}% | "
          f"LR={optimizer.param_groups[0]['lr']:.2e} | Time={time.time()-start:.2f}s")

    if te_acc > best_acc_stage2:
        best_acc_stage2 = te_acc
        torch.save(model.state_dict(), "best_resnet_finetuned.pt")

print(f"Final Best Acc = {best_acc_stage2:.2f}%")

=== Stage 1 ===
[S1-Epoch 01] Train Loss=0.1640 | Train Acc=2.49% | Test Loss=4.9199 | Test Acc=4.87% | LR=1.00e-04 | Time=11.06s
[S1-Epoch 02] Train Loss=0.1525 | Train Acc=11.71% | Test Loss=4.5216 | Test Acc=21.14% | LR=1.00e-04 | Time=10.84s
[S1-Epoch 03] Train Loss=0.1433 | Train Acc=22.86% | Test Loss=4.2164 | Test Acc=28.96% | LR=1.00e-04 | Time=10.79s
[S1-Epoch 04] Train Loss=0.1359 | Train Acc=30.10% | Test Loss=3.9412 | Test Acc=32.41% | LR=1.00e-04 | Time=10.94s
[S1-Epoch 05] Train Loss=0.1284 | Train Acc=36.17% | Test Loss=3.6771 | Test Acc=38.61% | LR=1.00e-04 | Time=11.01s
[S1-Epoch 06] Train Loss=0.1241 | Train Acc=39.97% | Test Loss=3.4888 | Test Acc=40.87% | LR=1.00e-04 | Time=10.98s
[S1-Epoch 07] Train Loss=0.1174 | Train Acc=43.29% | Test Loss=3.3133 | Test Acc=42.51% | LR=1.00e-05 | Time=10.89s
=== Stage 2===
[S2-Epoch 001] Train Loss=0.1097 | Train Acc=49.13% | Test Loss=3.0465 | Test Acc=47.74% | LR=1.00e-06 | Time=20.22s
[S2-Epoch 002] Train Loss=0.1043 | Train A