In [1]:
import os
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, models
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision.datasets import OxfordIIITPet

import copy
import numpy as np
import re



## Load Dataset

In [27]:
class NeuralNetwork():
    def __init__(self, basemodel, output, train_loader, val_loader, test_loader, unfreeze=0, parameters=None):
        # Init model
        self.model = copy.deepcopy(basemodel)
        # changing the fully connected layer
        self.model.fc = nn.Linear(self.model.fc.in_features, output)
        # Unfreeze the L last layers and fully connected last layer
        self.unfreeze_layers(unfreeze)
        self.model = self.model.to(device)

        self.optimizer = optim.Adam([{"params": self.model.fc.parameters(),"lr": 1e-3, "weight_decay": 1e-4},])
        self.criterion = nn.CrossEntropyLoss()

        self.train_loader = train_loader
        self.val_loader = val_loader
        self.test_loader = test_loader

        self.parameters = parameters if parameters is not None else {}
        

    def unfreeze_layers(self, L):
      # freezing all layers
      for p in self.model.parameters():
          p.requires_grad = False
    
      layer = 0
      # unfreeze last L layers and fully connected layer
      for name, p in reversed(list(self.model.named_parameters())):
        if layer < L+1:
          if not name.endswith("bias"):
            layer += 1
          p.requires_grad = True

    def check_trainable_layers(self):
        for name, param in self.model.named_parameters():
            if param.requires_grad:
                print(f"{name} requires grad")
            else:
                print(f"{name} does NOT require grad")

                
    def run_epoch(self, loader, train=True):
        self.model.train() if train else self.model.eval()
        total_loss, correct, total = 0.0, 0, 0
        for imgs, labs in loader:
            imgs, labs = imgs.to(device), labs.to(device)
            if train:
                self.optimizer.zero_grad()
            logits = self.model(imgs)
            loss   = self.criterion(logits, labs)
            if train:
                loss.backward()
                self.optimizer.step()
            total_loss += loss.item() * imgs.size(0)
            preds      = logits.argmax(dim=1)
            correct   += (preds == labs).sum().item()
            total     += imgs.size(0)
        return total_loss/total, correct/total*100


    def test_model(self):
      test_loss, test_acc = self.run_epoch(self.test_loader, train=False)
      print(f"Test Accuracy: {test_acc:.2f}%")
      return test_acc
        
    def train(self, epochs, path="model.pth"):
      best_val_acc = 0.0
      for epoch in range(1,epochs+1):
    
        # gradually unfreeze layers for strategy 2 according to rate parameter
        if 'gradual_unfreezing' in self.parameters:
            uf_rate = 1 if not 'unfreezing_rate' in self.parameters else parameters['unfreezing_rate']
            L = np.floor(uf_rate*(1+epoch)) # allow for rates < 1
            if L < 1:
                L = 1
            self.unfreeze_layers(L)
    
        tr_loss, tr_acc = self.run_epoch(self.train_loader, train=True)
        val_loss, val_acc = self.run_epoch(self.val_loader, train=False)
        print(f"Epoch {epoch:2d} | Train: {tr_acc:.2f}% | Val: {val_acc:.2f}%")
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(self.model.state_dict(), path)
    
      test_acc = self.test_model()
      return test_acc

    def train2(self, epochs, path="model.pth"):
        best_val_acc = 0.0
        for epoch in range(1, epochs+1):
            # Gradual unfreezing
            if self.parameters.get('gradual_unfreezing', False):
                uf_rate = self.parameters.get('unfreezing_rate', 1)
                L = int(np.floor(uf_rate * (epoch + 1)))
                if L < 1:
                    L = 1
                self.unfreeze_layers(L)
    
            tr_loss, tr_acc = self.run_epoch(self.train_loader, train=True)
            val_loss, val_acc = self.run_epoch(self.val_loader, train=False)
            print(f"Epoch {epoch:2d} | Train: {tr_acc:.2f}% | Val: {val_acc:.2f}%")
    
            # Save best
            if val_acc > best_val_acc:
                best_val_acc = val_acc
                torch.save(self.model.state_dict(), path)
    
            # Step scheduler if present
            if hasattr(self, 'scheduler') and self.scheduler is not None:
                self.scheduler.step()
    
        return self.test_model()



In [3]:
OxfordIIITPet(root="data", download=True)

Dataset OxfordIIITPet
    Number of datapoints: 3680
    Root location: data

In [4]:
annnotations_dir = os.path.join("data", "oxford-iiit-pet", "annotations")
lines_for_files = open(os.path.join(annnotations_dir, "list.txt")).read().splitlines()[6:]
species_map = {l.split()[0]: int(l.split()[2]) for l in lines_for_files}

In [5]:
with open(os.path.join(annnotations_dir, "trainval.txt")) as f:
    trainval_names = [l.split()[0] for l in f if l.strip()]
with open(os.path.join(annnotations_dir, "test.txt")) as f:
    test_names = [l.split()[0] for l in f if l.strip()]

In [35]:
class BinaryPet(Dataset):
    def __init__(self, root, names, species_map, transform=None):
        self.root = root
        self.names = names
        self.species_map = species_map
        self.transform = transform

    def __len__(self):
        return len(self.names)

    def __getitem__(self, idx):
        name = self.names[idx]
        img_path = os.path.join(
            self.root, "oxford-iiit-pet", "images", name + ".jpg"
        )
        img = Image.open(img_path).convert("RGB")
        if self.transform:
            img = self.transform(img)
        label = self.species_map[name] - 1  # 1→cat→0, 2→dog→1
        return img, label

In [10]:
IMG_SIZE = 224

In [11]:
transformIMG = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]),
])

In [38]:
full = BinaryPet("data", trainval_names, species_map, transform=transformIMG)
n_val = int(0.1 * len(full))
train_ds, val_ds = random_split(full, [len(full)-n_val, n_val])

In [39]:
test_ds = BinaryPet("data", test_names, species_map, transform=transformIMG)

In [40]:
batch_size = 32
train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True,  num_workers=4, pin_memory=True)
val_loader   = DataLoader(val_ds,   batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)
test_loader  = DataLoader(test_ds,  batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)

# Binary Classification

## Model Set Up

In [12]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.resnet34(weights=models.ResNet34_Weights.DEFAULT)
# freezing all
for p in model.parameters():
    p.requires_grad = False
# changing the fully connected layer
model.fc = nn.Linear(model.fc.in_features, 2)
# Unfreeze the fully connected last layer
for name, p in model.named_parameters():
    if name.startswith("fc"):
        p.requires_grad = True
model = model.to(device)

In [13]:
criterion = nn.CrossEntropyLoss()

In [14]:
optimizer = optim.Adam([
    {"params": model.fc.parameters(),     "lr": 1e-3, "weight_decay": 1e-4},
])

In [18]:
print(torch.cuda.is_available())

True


## Training

In [16]:
def run_epoch(model, loader, train=True, criterion=criterion, optimizer=optimizer): # TODO: Include criterion and optimizer in a neater way
    model.train() if train else model.eval()
    total_loss, correct, total = 0.0, 0, 0
    for imgs, labs in loader:
        imgs, labs = imgs.to(device), labs.to(device)
        if train:
            optimizer.zero_grad()
        logits = model(imgs)
        loss   = criterion(logits, labs)
        if train:
            loss.backward()
            optimizer.step()
        total_loss += loss.item() * imgs.size(0)
        preds      = logits.argmax(dim=1)
        correct   += (preds == labs).sum().item()
        total     += imgs.size(0)
    return total_loss/total, correct/total*100


In [19]:
best_val_acc = 0.0
for epoch in range(1, 25):
    tr_loss, tr_acc = run_epoch(model, train_loader, train=True)
    val_loss, val_acc = run_epoch(model, val_loader, train=False)
    print(f"Epoch {epoch:2d} | Train: {tr_acc:.2f}% | Val: {val_acc:.2f}%")
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), "best_binary_resnet34.pth")

Epoch  1 | Train: 98.85% | Val: 99.18%
Epoch  2 | Train: 98.97% | Val: 99.46%
Epoch  3 | Train: 99.18% | Val: 99.73%
Epoch  4 | Train: 98.76% | Val: 99.46%
Epoch  5 | Train: 99.40% | Val: 99.18%
Epoch  6 | Train: 98.91% | Val: 99.73%
Epoch  7 | Train: 99.58% | Val: 99.73%
Epoch  8 | Train: 99.43% | Val: 99.46%
Epoch  9 | Train: 98.82% | Val: 99.18%
Epoch 10 | Train: 98.91% | Val: 99.73%
Epoch 11 | Train: 99.18% | Val: 99.46%
Epoch 12 | Train: 99.52% | Val: 99.46%
Epoch 13 | Train: 99.12% | Val: 99.46%
Epoch 14 | Train: 99.00% | Val: 99.46%
Epoch 15 | Train: 99.43% | Val: 99.18%
Epoch 16 | Train: 98.85% | Val: 99.46%
Epoch 17 | Train: 99.40% | Val: 99.18%
Epoch 18 | Train: 99.06% | Val: 99.46%
Epoch 19 | Train: 98.94% | Val: 99.46%
Epoch 20 | Train: 99.52% | Val: 99.46%
Epoch 21 | Train: 99.52% | Val: 99.18%
Epoch 22 | Train: 99.79% | Val: 99.46%
Epoch 23 | Train: 99.25% | Val: 99.46%
Epoch 24 | Train: 99.64% | Val: 99.73%


In [20]:
model.load_state_dict(torch.load("best_binary_resnet34.pth"))
test_loss, test_acc = run_epoch(model, test_loader, train=False)
print(f"Test Accuracy: {test_acc:.2f}%")

Test Accuracy: 99.18%


# Multi Class Classification

TODO:
* different learning rates/learning rate schedulers for different layers
* data augmentation
* Effect of fine-tuning batch-norm parameters
* Train with Imbalanced Dataset
* Testing and logging of the above
* gradual_unfreezing testing
* unfreezing l layers at a time testing
  
DONE:
* gradual_unfreezing
* unfreezing l layers at a time

## Load Dataset

In [6]:
annnotations_dir = os.path.join("data", "oxford-iiit-pet", "annotations")
lines_for_files = open(os.path.join(annnotations_dir, "list.txt")).read().splitlines()[6:]
breeds_map = {l.split()[0]:re.split(r'_\d+', l.split()[0])[0] for l in lines_for_files}

In [7]:
breed_to_id = {val: i for i, val in enumerate(sorted(set(breeds_map.values())))}
id_to_breed = {v: k for k, v in breed_to_id.items()}

In [8]:
class MultiClassPet(Dataset):
    def __init__(self, root, names, breeds_map, transform=None):
        self.root = root
        self.names = names
        self.breeds_map = breeds_map
        self.transform = transform

    def __len__(self):
        return len(self.names)

    def __getitem__(self, idx):
        name = self.names[idx]
        img_path = os.path.join(
            self.root, "oxford-iiit-pet", "images", name + ".jpg"
        )
        img = Image.open(img_path).convert("RGB")
        if self.transform:
            img = self.transform(img)
        label = breed_to_id[self.breeds_map[name]]
        return img, label

In [12]:
full = MultiClassPet("data", trainval_names, breeds_map, transform=transformIMG)
n_val = int(0.1 * len(full))
train_ds, val_ds = random_split(full, [len(full)-n_val, n_val])
test_ds = MultiClassPet("data", test_names, breeds_map, transform=transformIMG)

In [13]:
batch_size = 32
train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True,  num_workers=4, pin_memory=True)
val_loader   = DataLoader(val_ds,   batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)
test_loader  = DataLoader(test_ds,  batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)

## Model Set Up

In [14]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
basemodel = models.resnet34(weights=models.ResNet34_Weights.DEFAULT)

-> Strategy 1

In [31]:
# Strategy 1: train with l layers unfrozen simultaneously
accs = []
for L in range(1, 10):
  print(f"Start training model with {L} last layers trainable")
  modelmulti = NeuralNetwork(basemodel, 37, train_loader, val_loader, test_loader, unfreeze=L)  
  accs.append(modelmulti.train(10))

Start training model with 1 last layers trainable
Epoch  1 | Train: 59.81% | Val: 84.51%
Epoch  2 | Train: 88.04% | Val: 87.23%
Epoch  3 | Train: 91.43% | Val: 90.22%
Epoch  4 | Train: 93.39% | Val: 91.03%
Epoch  5 | Train: 93.99% | Val: 92.12%
Epoch  6 | Train: 94.69% | Val: 89.95%
Epoch  7 | Train: 95.20% | Val: 91.03%
Epoch  8 | Train: 96.07% | Val: 90.76%
Epoch  9 | Train: 96.68% | Val: 91.58%
Epoch 10 | Train: 96.92% | Val: 92.39%
Test Accuracy: 89.45%
Start training model with 2 last layers trainable
Epoch  1 | Train: 60.90% | Val: 84.51%
Epoch  2 | Train: 88.41% | Val: 88.32%
Epoch  3 | Train: 91.52% | Val: 90.22%
Epoch  4 | Train: 91.94% | Val: 89.13%
Epoch  5 | Train: 94.41% | Val: 90.49%
Epoch  6 | Train: 94.63% | Val: 91.30%
Epoch  7 | Train: 95.95% | Val: 90.22%
Epoch  8 | Train: 95.83% | Val: 91.85%
Epoch  9 | Train: 96.53% | Val: 89.95%
Epoch 10 | Train: 97.31% | Val: 89.40%
Test Accuracy: 89.32%
Start training model with 3 last layers trainable
Epoch  1 | Train: 60.75% |

In [25]:


# Transforms
base_tf = transforms.Compose([
    transforms.Resize(256), transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]),
])
aug_tf = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomResizedCrop(224, scale=(0.8,1.0)),
    transforms.RandomHorizontalFlip(), transforms.RandomRotation(15),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]),
])

# Create datasets and split 90/10
full_ds = MultiClassPet("data", trainval_names, breeds_map, transform=base_tf)
n_val   = int(0.1 * len(full_ds))
train_ds, val_ds = random_split(full_ds, [len(full_ds)-n_val, n_val])
test_ds = MultiClassPet("data", test_names, breeds_map, transform=base_tf)

# DataLoaders
batch_size = 32
val_loader  = DataLoader(val_ds,  batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)
test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)

# Strategy 1 grid
results = []
basemodel = models.resnet34(weights=models.ResNet34_Weights.DEFAULT)

for use_aug in [False, True]:
    for freeze_bn in [False, True]:
        for use_l2 in [False, True]:
            for use_sched in [False, True]:
                print(f"\n>>> aug={use_aug}, freeze_bn={freeze_bn}, L2={use_l2}, sched={use_sched}")

                # a) Set train transform
                train_ds.dataset.transform = aug_tf if use_aug else base_tf

                # b) Train loader
                train_loader = DataLoader(
                    train_ds, batch_size=batch_size, shuffle=True,
                    num_workers=4, pin_memory=True
                )

                # c) Init model, fine-tune last 6 layers
                model = NeuralNetwork(
                    basemodel=basemodel,
                    output=len(breed_names),
                    train_loader=train_loader,
                    val_loader=val_loader,
                    test_loader=test_loader,
                    unfreeze=6,
                    parameters={}
                )
                model.model.to(device)

                # d) Freeze batch-norm if requested
                if freeze_bn:
                    for m in model.model.modules():
                        if isinstance(m, nn.BatchNorm2d):
                            m.eval()
                            for p in m.parameters():
                                p.requires_grad = False

                # e) Optimizer with two LR groups, optional L2 weight decay
                wd = 1e-2 if use_l2 else 0.0
                head_p = list(model.model.fc.parameters())
                back_p = [p for n,p in model.model.named_parameters()
                          if p.requires_grad and not n.startswith("fc")]
                model.optimizer = optim.AdamW([
                    {"params": head_p, "lr":1e-3, "weight_decay":wd},
                    {"params": back_p, "lr":1e-4, "weight_decay":wd}
                ])

                # f) Scheduler if requested
                if use_sched:
                    model.scheduler = optim.lr_scheduler.StepLR(model.optimizer, step_size=5, gamma=0.1)
                else:
                    model.scheduler = None

                # g) Train and record test accuracy
                acc = model.train(epochs=10)
                results.append(((use_aug, freeze_bn, use_l2, use_sched), acc))

# Summary for comparison
print("\n=== Strategy 1 Extended Grid Results ===")
for (aug, bn, l2, sched), acc in results:
    print(f"aug={aug}, freeze_bn={bn}, L2={l2}, sched={sched} → Test Acc: {acc:.2f}%")



>>> aug=False, freeze_bn=False, L2=False, sched=False
Epoch  1 | Train: 74.61% | Val: 86.14%
Epoch  2 | Train: 94.11% | Val: 89.95%
Epoch  3 | Train: 97.49% | Val: 89.40%
Epoch  4 | Train: 99.49% | Val: 89.67%
Epoch  5 | Train: 99.61% | Val: 89.95%
Epoch  6 | Train: 99.64% | Val: 90.49%
Epoch  7 | Train: 99.88% | Val: 90.22%
Epoch  8 | Train: 99.94% | Val: 89.95%
Epoch  9 | Train: 99.82% | Val: 89.13%
Epoch 10 | Train: 99.91% | Val: 89.40%
Test Accuracy: 89.62%

>>> aug=False, freeze_bn=False, L2=False, sched=True
Epoch  1 | Train: 75.91% | Val: 88.86%
Epoch  2 | Train: 94.75% | Val: 88.59%
Epoch  3 | Train: 97.83% | Val: 88.32%
Epoch  4 | Train: 99.12% | Val: 89.67%
Epoch  5 | Train: 99.55% | Val: 89.67%
Epoch  6 | Train: 99.85% | Val: 90.22%
Epoch  7 | Train: 99.91% | Val: 89.40%
Epoch  8 | Train: 99.97% | Val: 89.95%
Epoch  9 | Train: 99.97% | Val: 89.95%
Epoch 10 | Train: 99.97% | Val: 90.22%
Test Accuracy: 90.87%

>>> aug=False, freeze_bn=False, L2=True, sched=False
Epoch  1 | Tr

-> Strategy 2

In [47]:
# Strategy 2: train with gradual unfreezing
parameters = {}
parameters['gradual_unfreezing'] = True
accs = []

# test rates in steps of two
for rate in [0.3, 0.5, 0.8, 1, 2, 3]:
  print(f"Start training model with uf_rate {rate}")
  parameters['unfreezing_rate'] = rate
  modelmulti = NeuralNetwork(basemodel, 37, train_loader, val_loader, test_loader, unfreeze=1, parameters=parameters)   # start with only one unfrozen layer
  accs.append(modelmulti.train(10))

Start training model with uf_rate 0.3
Epoch  1 | Train: 61.08% | Val: 83.97%
Epoch  2 | Train: 88.68% | Val: 89.13%
Epoch  3 | Train: 91.46% | Val: 90.76%
Epoch  4 | Train: 92.39% | Val: 90.22%
Epoch  5 | Train: 93.75% | Val: 91.30%
Epoch  6 | Train: 94.93% | Val: 90.22%
Epoch  7 | Train: 95.77% | Val: 91.03%
Epoch  8 | Train: 95.92% | Val: 90.22%
Epoch  9 | Train: 96.71% | Val: 89.95%
Epoch 10 | Train: 97.04% | Val: 90.22%
Test Accuracy: 90.27%
Start training model with uf_rate 0.5
Epoch  1 | Train: 60.60% | Val: 83.15%
Epoch  2 | Train: 88.07% | Val: 87.23%
Epoch  3 | Train: 91.70% | Val: 89.67%
Epoch  4 | Train: 93.48% | Val: 89.95%
Epoch  5 | Train: 93.72% | Val: 90.49%
Epoch  6 | Train: 94.96% | Val: 89.95%
Epoch  7 | Train: 95.59% | Val: 90.49%
Epoch  8 | Train: 95.59% | Val: 89.40%
Epoch  9 | Train: 95.98% | Val: 89.13%
Epoch 10 | Train: 96.68% | Val: 89.40%
Test Accuracy: 89.07%
Start training model with uf_rate 0.8
Epoch  1 | Train: 59.48% | Val: 85.60%
Epoch  2 | Train: 88.77

In [28]:


# transforms
base_tf = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],
                         [0.229,0.224,0.225]),
])
aug_tf = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomResizedCrop(224, scale=(0.8,1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],
                         [0.229,0.224,0.225]),
])

# model
basemodel = models.resnet34(weights=models.ResNet34_Weights.DEFAULT)

# fixed unfreeze rate
fixed_rate = 2.0

# grid definitions
augmentations = [False, True]
freeze_bns    = [False, True]
use_l2s       = [False, True]
schedulers    = ['none', 'steplr', 'onecycle']

results = []

# running the grid
for use_aug in augmentations:
    # update train transform
    train_ds.dataset.transform = aug_tf if use_aug else base_tf
    train_loader = DataLoader(
        train_ds,
        batch_size=32,
        shuffle=True,
        num_workers=4,
        pin_memory=True
    )

    for freeze_bn in freeze_bns:
        for use_l2 in use_l2s:
            for sched in schedulers:
                print(f"\n>>> aug={use_aug}, freeze_bn={freeze_bn}, L2={use_l2}, sched={sched}")

                # a) Instantiate with gradual unfreezing
                params = {'gradual_unfreezing': True, 'unfreezing_rate': fixed_rate}
                model = NeuralNetwork(
                    basemodel,
                    len(breed_names),
                    train_loader,
                    val_loader,
                    test_loader,
                    unfreeze=1,
                    parameters=params
                )
                model.model.to(device)

                # b) Optionally freeze BatchNorm layers
                if freeze_bn:
                    for m in model.model.modules():
                        if isinstance(m, nn.BatchNorm2d):
                            m.eval()
                            for p in m.parameters():
                                p.requires_grad = False

                # c) Build optimizer (head @1e-3, backbone @1e-4), optional L2
                wd = 1e-2 if use_l2 else 0.0
                head_p = list(model.model.fc.parameters())
                back_p = [
                    p for n,p in model.model.named_parameters()
                    if p.requires_grad and not n.startswith("fc")
                ]
                model.optimizer = optim.AdamW([
                    {'params': head_p, 'lr':1e-3, 'weight_decay':wd},
                    {'params': back_p, 'lr':1e-4, 'weight_decay':wd},
                ])

                # d) Attach scheduler if desired
                if sched == 'steplr':
                    model.scheduler = optim.lr_scheduler.StepLR(
                        model.optimizer,
                        step_size=5,
                        gamma=0.1
                    )
                elif sched == 'onecycle':
                    total_steps = len(train_loader) * 10
                    model.scheduler = optim.lr_scheduler.OneCycleLR(
                        model.optimizer,
                        max_lr=[1e-3,1e-4],
                        total_steps=total_steps,
                        pct_start=0.3,
                        anneal_strategy='cos'
                    )
                else:
                    model.scheduler = None

                # e) Train & record
                acc = model.train2(epochs=10)
                results.append((use_aug, freeze_bn, use_l2, sched, acc))

# Summary for comparison
print("\n=== Strategy 2 Grid Search Results ===")
print("aug | freeze_bn | L2   | scheduler | TestAcc")
print("----|-----------|------|-----------|--------")
for aug, fb, l2, sch, acc in results:
    print(f"{str(aug):<3} | {str(fb):<9} | {str(l2):<4} | {sch:<9} | {acc:6.2f}%")



>>> aug=False, freeze_bn=False, L2=False, sched=none
Epoch  1 | Train: 60.63% | Val: 84.51%
Epoch  2 | Train: 88.74% | Val: 86.41%
Epoch  3 | Train: 91.24% | Val: 84.78%
Epoch  4 | Train: 93.15% | Val: 88.59%
Epoch  5 | Train: 94.50% | Val: 88.32%
Epoch  6 | Train: 95.59% | Val: 89.40%
Epoch  7 | Train: 95.44% | Val: 88.04%
Epoch  8 | Train: 96.29% | Val: 90.22%
Epoch  9 | Train: 97.16% | Val: 88.04%
Epoch 10 | Train: 97.31% | Val: 88.59%
Test Accuracy: 89.34%

>>> aug=False, freeze_bn=False, L2=False, sched=steplr
Epoch  1 | Train: 59.51% | Val: 82.34%
Epoch  2 | Train: 88.13% | Val: 86.68%
Epoch  3 | Train: 91.82% | Val: 89.13%
Epoch  4 | Train: 92.84% | Val: 89.13%
Epoch  5 | Train: 93.90% | Val: 89.95%
Epoch  6 | Train: 95.89% | Val: 91.30%
Epoch  7 | Train: 96.01% | Val: 91.03%
Epoch  8 | Train: 96.38% | Val: 91.30%
Epoch  9 | Train: 96.35% | Val: 91.58%
Epoch 10 | Train: 96.26% | Val: 91.03%
Test Accuracy: 89.70%

>>> aug=False, freeze_bn=False, L2=False, sched=onecycle
Epoch  1

## Imbalanced classes