In [38]:
import os
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, models
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision.datasets import OxfordIIITPet

In [39]:
OxfordIIITPet(root="data", download=True)

Dataset OxfordIIITPet
    Number of datapoints: 3680
    Root location: data

In [40]:
annnotations_dir = os.path.join("data", "oxford-iiit-pet", "annotations")
lines_for_files = open(os.path.join(annnotations_dir, "list.txt")).read().splitlines()[6:]
species_map = {l.split()[0]: int(l.split()[2]) for l in lines_for_files}

In [41]:
with open(os.path.join(annnotations_dir, "trainval.txt")) as f:
    trainval_names = [l.split()[0] for l in f if l.strip()]
with open(os.path.join(annnotations_dir, "test.txt")) as f:
    test_names = [l.split()[0] for l in f if l.strip()]

In [42]:
class BinaryPet(Dataset):
    def __init__(self, root, names, species_map, transform=None):
        self.root = root
        self.names = names
        self.species_map = species_map
        self.transform = transform

    def __len__(self):
        return len(self.names)

    def __getitem__(self, idx):
        name = self.names[idx]
        img_path = os.path.join(
            self.root, "oxford-iiit-pet", "images", name + ".jpg"
        )
        img = Image.open(img_path).convert("RGB")
        if self.transform:
            img = self.transform(img)
        label = self.species_map[name] - 1  # 1→cat→0, 2→dog→1
        return img, label

In [43]:
transformIMG = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]),
])

In [44]:
full = BinaryPet("data", trainval_names, species_map, transform=transformIMG)
n_val = int(0.1 * len(full))
train_ds, val_ds = random_split(full, [len(full)-n_val, n_val])

In [45]:
test_ds = BinaryPet("data", test_names, species_map, transform=transformIMG)

In [46]:
batch_size = 32
train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True,  num_workers=4, pin_memory=True)
val_loader   = DataLoader(val_ds,   batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)
test_loader  = DataLoader(test_ds,  batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)

In [65]:

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.resnet34(weights=models.ResNet34_Weights.DEFAULT)
# freezing all
for p in model.parameters():
    p.requires_grad = False
# changing the fully connected layer
model.fc = nn.Linear(model.fc.in_features, 2)
# Uunfreeze the last layers and fully connected last layer
for name, p in model.named_parameters():
    if name.startswith("layer4") or name.startswith("fc"):
        p.requires_grad = True
model = model.to(device)

In [66]:
criterion = nn.CrossEntropyLoss()

In [67]:
optimizer = optim.Adam([
    {"params": model.layer4.parameters(), "lr": 1e-4, "weight_decay": 1e-4},
    {"params": model.fc.parameters(),     "lr": 1e-3, "weight_decay": 1e-4},
])


In [68]:
def run_epoch(loader, train=True):
    model.train() if train else model.eval()
    total_loss, correct, total = 0.0, 0, 0
    for imgs, labs in loader:
        imgs, labs = imgs.to(device), labs.to(device)
        if train:
            optimizer.zero_grad()
        logits = model(imgs)
        loss   = criterion(logits, labs)
        if train:
            loss.backward()
            optimizer.step()
        total_loss += loss.item() * imgs.size(0)
        preds      = logits.argmax(dim=1)
        correct   += (preds == labs).sum().item()
        total     += imgs.size(0)
    return total_loss/total, correct/total*100


In [69]:
best_val_acc = 0.0
for epoch in range(1, 10):
    tr_loss, tr_acc = run_epoch(train_loader, train=True)
    val_loss, val_acc = run_epoch(val_loader, train=False)
    print(f"Epoch {epoch:2d} | Train: {tr_acc:.2f}% | Val: {val_acc:.2f}%")
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), "best_binary_resnet34.pth")

Epoch  1 | Train: 98.22% | Val: 99.46%
Epoch  2 | Train: 99.67% | Val: 99.73%
Epoch  3 | Train: 99.52% | Val: 100.00%
Epoch  4 | Train: 99.91% | Val: 98.37%
Epoch  5 | Train: 99.61% | Val: 100.00%
Epoch  6 | Train: 99.97% | Val: 100.00%
Epoch  7 | Train: 99.94% | Val: 100.00%
Epoch  8 | Train: 99.97% | Val: 99.73%
Epoch  9 | Train: 100.00% | Val: 100.00%


In [70]:
model.load_state_dict(torch.load("best_binary_resnet34.pth"))
test_loss, test_acc = run_epoch(test_loader, train=False)
print(f"Test Accuracy: {test_acc:.2f}%")

Test Accuracy: 99.59%
