In [2]:
import os
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, models
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision.datasets import OxfordIIITPet
from torchsummary import summary
import copy

## Load Dataset

In [3]:
OxfordIIITPet(root="data", download=True)

100%|██████████| 792M/792M [02:32<00:00, 5.20MB/s] 
100%|██████████| 19.2M/19.2M [00:03<00:00, 5.77MB/s]


Dataset OxfordIIITPet
    Number of datapoints: 3680
    Root location: data

In [17]:
annnotations_dir = os.path.join("data", "oxford-iiit-pet", "annotations")
lines_for_files = open(os.path.join(annnotations_dir, "list.txt")).read().splitlines()[6:]
species_map = {l.split()[0]: int(l.split()[2]) for l in lines_for_files}

In [18]:
with open(os.path.join(annnotations_dir, "trainval.txt")) as f:
    trainval_names = [l.split()[0] for l in f if l.strip()]
with open(os.path.join(annnotations_dir, "test.txt")) as f:
    test_names = [l.split()[0] for l in f if l.strip()]

In [19]:
class BinaryPet(Dataset):
    def __init__(self, root, names, species_map, transform=None):
        self.root = root
        self.names = names
        self.species_map = species_map
        self.transform = transform

    def __len__(self):
        return len(self.names)

    def __getitem__(self, idx):
        name = self.names[idx]
        img_path = os.path.join(
            self.root, "oxford-iiit-pet", "images", name + ".jpg"
        )
        img = Image.open(img_path).convert("RGB")
        if self.transform:
            img = self.transform(img)
        label = self.species_map[name] - 1  # 1→cat→0, 2→dog→1
        return img, label

In [20]:
IMG_SIZE = 224

In [21]:
transformIMG = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]),
])

In [22]:
full = BinaryPet("data", trainval_names, species_map, transform=transformIMG)
n_val = int(0.1 * len(full))
train_ds, val_ds = random_split(full, [len(full)-n_val, n_val])

In [23]:
test_ds = BinaryPet("data", test_names, species_map, transform=transformIMG)

In [24]:
batch_size = 32
train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True,  num_workers=4, pin_memory=True)
val_loader   = DataLoader(val_ds,   batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)
test_loader  = DataLoader(test_ds,  batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)



# Binary Classification

## Model Set Up

In [35]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.resnet34(weights=models.ResNet34_Weights.DEFAULT)
# freezing all
for p in model.parameters():
    p.requires_grad = False
# changing the fully connected layer
model.fc = nn.Linear(model.fc.in_features, 2)
# Uunfreeze the fully connected last layer
for name, p in model.named_parameters():
    if name.startswith("fc"):
        p.requires_grad = True
model = model.to(device)

In [36]:
criterion = nn.CrossEntropyLoss()

In [37]:
optimizer = optim.Adam([
    {"params": model.fc.parameters(),     "lr": 1e-3, "weight_decay": 1e-4},
])


## Training

In [38]:
def run_epoch(model, loader, train=True):
    model.train() if train else model.eval()
    total_loss, correct, total = 0.0, 0, 0
    for imgs, labs in loader:
        imgs, labs = imgs.to(device), labs.to(device)
        if train:
            optimizer.zero_grad()
        logits = model(imgs)
        loss   = criterion(logits, labs)
        if train:
            loss.backward()
            optimizer.step()
        total_loss += loss.item() * imgs.size(0)
        preds      = logits.argmax(dim=1)
        correct   += (preds == labs).sum().item()
        total     += imgs.size(0)
    return total_loss/total, correct/total*100


In [39]:
best_val_acc = 0.0
for epoch in range(1, 20):
    tr_loss, tr_acc = run_epoch(model, train_loader, train=True)
    val_loss, val_acc = run_epoch(model, val_loader, train=False)
    print(f"Epoch {epoch:2d} | Train: {tr_acc:.2f}% | Val: {val_acc:.2f}%")
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), "best_binary_resnet34.pth")

Epoch  1 | Train: 93.69% | Val: 96.74%
Epoch  2 | Train: 98.04% | Val: 98.37%
Epoch  3 | Train: 98.13% | Val: 98.10%
Epoch  4 | Train: 98.82% | Val: 97.01%
Epoch  5 | Train: 98.31% | Val: 97.83%
Epoch  6 | Train: 98.46% | Val: 99.18%
Epoch  7 | Train: 98.85% | Val: 98.37%
Epoch  8 | Train: 99.03% | Val: 97.83%
Epoch  9 | Train: 99.06% | Val: 98.37%
Epoch 10 | Train: 98.85% | Val: 98.64%
Epoch 11 | Train: 99.15% | Val: 99.18%
Epoch 12 | Train: 98.88% | Val: 97.55%
Epoch 13 | Train: 99.37% | Val: 98.37%
Epoch 14 | Train: 99.03% | Val: 98.91%
Epoch 15 | Train: 98.97% | Val: 99.46%
Epoch 16 | Train: 99.21% | Val: 98.91%
Epoch 17 | Train: 99.25% | Val: 98.64%
Epoch 18 | Train: 99.58% | Val: 98.64%
Epoch 19 | Train: 99.43% | Val: 98.91%


In [72]:
model.load_state_dict(torch.load("best_binary_resnet34.pth"))
test_loss, test_acc = run_epoch(model, test_loader, train=False)
print(f"Test Accuracy: {test_acc:.2f}%")



Test Accuracy: 99.21%


# Multi Class Classification

## Model Set Up

### Strategy 1: Fine-tune l layers simultaneously

In [None]:
def train_model(model, epochs, path="model.pth"):
  best_val_acc = 0.0
  for epoch in range(1,epochs+1):
    tr_loss, tr_acc = run_epoch(train_loader, train=True)
    val_loss, val_acc = run_epoch(val_loader, train=False)
    print(f"Epoch {epoch:2d} | Train: {tr_acc:.2f}% | Val: {val_acc:.2f}%")
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), path)

  test_acc = test_model(model=model)
  return test_acc


In [74]:
def test_model(model=None, path=None):
  if not model:
    if not path:
      print("error: plaese provide model or path to model")
      return
    else:
      model.load_state_dict(torch.load(path))

  test_loss, test_acc = run_epoch(test_loader, train=False)
  print(f"Test Accuracy: {test_acc:.2f}%")
  return test_acc

In [63]:
def init_model(basemodel, device, output, unfreeze):
  model = copy.deepcopy(basemodel)
  # changing the fully connected layer
  model.fc = nn.Linear(model.fc.in_features, output)

  # Unfreeze the L last layers and fully connected last layer
  model = unfreeze_layers(model, unfreeze)

  model = model.to(device)

  return model

In [67]:
def check_trainable_layers(model):
  for name, param in model.named_parameters():
    if param.requires_grad:
        print(f"{name} requires grad")
    else:
        print(f"{name} does NOT require grad")

In [44]:
def unfreeze_layers(model, L):
  # freezing all layers
  for p in model.parameters():
      p.requires_grad = False

  layer = 0
  # unfreeze last L layers and fully connected layer
  for name, p in reversed(list(model.named_parameters())):
    if layer < L+1:
      if not name.endswith("bias"):
        layer += 1
      p.requires_grad = True

  return model

In [70]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
basemodel = models.resnet34(weights=models.ResNet34_Weights.DEFAULT)

In [None]:
accs = []
for L in range(1, 10):
  print(f"Start training model with {L} last layers trainable")
  modelmulti = init_model(basemodel, device, 37, L)
  accs.append(train_model(modelmulti, 10))

Start training model with 1 last layers trainable




Epoch  1 | Train: 99.31% | Val: 98.91%
Epoch  2 | Train: 98.70% | Val: 97.83%
Epoch  3 | Train: 99.28% | Val: 98.64%
Epoch  4 | Train: 99.31% | Val: 98.91%
Epoch  5 | Train: 99.15% | Val: 98.91%
Epoch  6 | Train: 99.40% | Val: 98.91%
Epoch  7 | Train: 99.34% | Val: 98.37%
Epoch  8 | Train: 99.31% | Val: 99.46%
Epoch  9 | Train: 99.31% | Val: 98.37%
Epoch 10 | Train: 99.31% | Val: 98.10%
Test Accuracy: 98.99%
Start training model with 2 last layers trainable
Epoch  1 | Train: 99.31% | Val: 98.91%
Epoch  2 | Train: 99.28% | Val: 98.91%
Epoch  3 | Train: 99.55% | Val: 98.10%
Epoch  4 | Train: 99.31% | Val: 98.64%
Epoch  5 | Train: 98.64% | Val: 98.64%
Epoch  6 | Train: 99.18% | Val: 97.83%
Epoch  7 | Train: 99.58% | Val: 99.18%
Epoch  8 | Train: 99.25% | Val: 98.64%
Epoch  9 | Train: 99.28% | Val: 99.46%
Epoch 10 | Train: 99.55% | Val: 98.10%
Test Accuracy: 98.66%
Start training model with 3 last layers trainable
Epoch  1 | Train: 99.34% | Val: 98.10%
Epoch  2 | Train: 98.79% | Val: 98.10