<a target="_blank" href="https://colab.research.google.com/github/Sebelino/DD2424-project/blob/main/e1_sebastian.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

In [4]:
import os
import socket

repo_name = "DD2424-project"
dep_filename = "download_dataset.py"
cwd = os.getcwd()
hostname = socket.gethostname()

print(f"Host: {hostname}")
print(f"You are here: {cwd}")

if os.path.exists(dep_filename):
    print(f"{dep_filename} is in your current directory. Assuming you are in the root of the repository.")
if not os.path.exists(repo_name):
    print(f"Cloning repo and fetching {dep_filename}...")
    !git clone https://github.com/Sebelino/DD2424-project
    !cp DD2424-project/download_dataset.py .
else:
    !git -C DD2424-project pull --rebase
    !cp DD2424-project/download_dataset.py .

Host: deeplearning-vm
You are here: /tmp/pycharm_project_153
download_dataset.py is in your current directory. Assuming you are in the root of the repository.
Already up to date.


# Download and extract the data

In [5]:
from download_dataset import maybe_download_and_extract

maybe_download_and_extract()

data/oxford-iiit-pet/images.tar.gz already exists. Skipping download.
data/oxford-iiit-pet/annotations.tar.gz already exists. Skipping download.
data/oxford-iiit-pet/images already extracted. Skipping.
data/oxford-iiit-pet/annotations already extracted. Skipping.


# Partition the data

In [6]:
import shutil
import random
from pathlib import Path


def copy_files(files_dir, filenames, dst_dir):
    for filename, label in filenames:
        src = files_dir / f"{filename}.jpg"
        dst = dst_dir / label / f"{filename}.jpg"
        if dst.exists():
            return  # Don't copy if it already exists
        if src.exists():
            shutil.copy(src, dst)


def make_partitioned_dataset(dataset_dir: str, output_dir):
    train_ratio = 0.8
    val_ratio = 0.1
    test_ratio = 1 - train_ratio - val_ratio

    dataset_dir = Path(dataset_dir)
    images_dir = dataset_dir / "images"
    annotations_file = dataset_dir / "annotations" / "list.txt"

    output_dir_already_exists = os.path.exists(output_dir)

    # Ensure output directories exist
    for split in ['train', 'val', 'test']:
        for cls in ['cat', 'dog']:
            subsubdir = output_dir / split / cls
            subsubdir.mkdir(parents=True, exist_ok=True)

    # Parse annotations to get filenames and binary labels
    with open(annotations_file) as f:
        lines = f.readlines()[6:]  # Skip header

    file_info = []
    for line in lines:
        parts = line.strip().split()
        filename, _, species_num_str, _ = parts
        species_num = int(species_num_str)  # 1 = Cat, 2 = Dog
        label = 'cat' if species_num == 1 else 'dog'
        file_info.append((filename, label))

    # Shuffle and split data
    random.shuffle(file_info)
    num_total = len(file_info)
    num_train = int(train_ratio * num_total)
    num_val = int(val_ratio * num_total)

    if output_dir_already_exists:
        print(f"Output directory {output_dir} already exists.")
        num_test = num_total - num_train - num_val
        return num_train, num_val, num_test

    train_files = file_info[:num_train]
    val_files = file_info[num_train:num_train + num_val]
    test_files = file_info[num_train + num_val:]

    # Copy to respective folders
    copy_files(images_dir, train_files, output_dir / "train")
    copy_files(images_dir, val_files, output_dir / "val")
    copy_files(images_dir, test_files, output_dir / "test")

    print(f"Dataset partitioned into: {len(train_files)} train, {len(val_files)} val, {len(test_files)} test images.")
    print(f"Output directory: {output_dir.resolve()}")

    return len(train_files), len(val_files), len(test_files)


dataset_dir = Path("data/oxford-iiit-pet")
output_dir = Path("data/dataset_partitioned")
n_train, n_val, n_test = make_partitioned_dataset(dataset_dir, output_dir)
print("Dataset prepared in ImageFolder format at:", output_dir)
print(f"Dataset partitioned into: {n_train} train, {n_val} val, {n_test} test images in {output_dir}")

Output directory data/dataset_partitioned already exists.
Dataset prepared in ImageFolder format at: data/dataset_partitioned
Dataset partitioned into: 5879 train, 734 val, 736 test images in data/dataset_partitioned


# Load the data

In [7]:
from torchvision import datasets
from torchvision.models import ResNet18_Weights
from torch.utils.data import DataLoader

# Use ResNet's default preprocessing pipeline
weights = ResNet18_Weights.DEFAULT
preprocess = weights.transforms()

# Dataset paths
train_dataset = datasets.ImageFolder(root=f"{output_dir}/train", transform=preprocess)
val_dataset = datasets.ImageFolder(root=f"{output_dir}/val", transform=preprocess)
test_dataset = datasets.ImageFolder(root=f"{output_dir}/test", transform=preprocess)

#small_train_dataset = torch.utils.data.Subset(train_dataset, range(1000))
#small_train_loader = DataLoader(small_train_dataset, batch_size=32, shuffle=True)
#train_dataset = small_train_dataset

# DataLoaders
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size, shuffle=True, num_workers=2, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size, shuffle=False)

print(f"Batch size: {train_loader.batch_size}, Dataset size: {len(train_loader.dataset)}, Iterations per epoch: {len(train_loader)}")


Batch size: 32, Dataset size: 5879, Iterations per epoch: 184


# Set up model

In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models
from tqdm.notebook import tqdm

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load ResNet18
model = models.resnet18(weights=weights)
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 2)  # Binary classification (cat vs dog)
model = model.to(device)

# Optimizer
eta = 0.001
optimizer = optim.Adam(model.fc.parameters(), lr=eta)  # Fine-tune final layer only

print(f"GPU acceleration enabled: {'Yes 🚀' if device.type == 'cuda' else 'No 🐌'}")

GPU acceleration enabled: Yes 🚀


# Train model

In [12]:
from torch import GradScaler, autocast

criterion = nn.CrossEntropyLoss()

num_epochs = 5
model.train()


def evaluate(model, loader, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in tqdm(loader, desc="Evaluating"):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    model.train()
    return 100 * correct / total

def backward_pass(scaler=None):
    if scaler is not None:
        with autocast(device_type='cuda'):  # Mixed precision enabled
            outputs = model(inputs)
            loss = criterion(outputs, labels)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
    else:
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    return outputs, loss


enable_mixed_precion = True
scaler = GradScaler() if enable_mixed_precion else None

for epoch in tqdm(range(num_epochs), desc="Epoch"):
    running_loss = 0.0
    correct = 0
    total = 0
    for inputs, labels in tqdm(train_loader, desc="Iteration", leave=False):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs, loss = backward_pass(scaler)

        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    train_acc = 100 * correct / total
    val_acc = evaluate(model, val_loader, device)
    print(
        f"Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss / len(train_loader):.4f}, Train Acc: {train_acc:.2f}%, Val Acc: {val_acc:.2f}%")


Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Iteration:   0%|          | 0/184 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/23 [00:00<?, ?it/s]

Epoch [1/5], Loss: 0.0997, Train Acc: 96.97%, Val Acc: 98.23%


Iteration:   0%|          | 0/184 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/23 [00:00<?, ?it/s]

Epoch [2/5], Loss: 0.0647, Train Acc: 97.91%, Val Acc: 98.64%


Iteration:   0%|          | 0/184 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/23 [00:00<?, ?it/s]

Epoch [3/5], Loss: 0.0595, Train Acc: 97.91%, Val Acc: 98.77%


Iteration:   0%|          | 0/184 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/23 [00:00<?, ?it/s]

Epoch [4/5], Loss: 0.0444, Train Acc: 98.37%, Val Acc: 98.64%


Iteration:   0%|          | 0/184 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/23 [00:00<?, ?it/s]

Epoch [5/5], Loss: 0.0469, Train Acc: 98.42%, Val Acc: 98.64%


# Benchmarking
This is low long training takes for different combinations of optimizations (in Colab):
```
1:13 mixed_precision=no  num_workers=1 pin_memory=False data=1000 batch_size=32
1:11 mixed_precision=no  num_workers=1 pin_memory=False data=1000 batch_size=32
1:07 mixed_precision=yes num_workers=1 pin_memory=False data=1000 batch_size=32
1:07 mixed_precision=yes num_workers=1 pin_memory=False data=1000 batch_size=32
1:01 mixed_precision=no  num_workers=2 pin_memory=True  data=1000 batch_size=32
3:42 mixed_precision=no  num_workers=2 pin_memory=True  data=all  batch_size=32
3:34 mixed_precision=yes num_workers=2 pin_memory=True  data=all  batch_size=32
3:45 mixed_precision=yes num_workers=2 pin_memory=True  data=all  batch_size=64
3:46 mixed_precision=yes num_workers=2 pin_memory=False data=all  batch_size=32
4:16 mixed_precision=yes num_workers=1 pin_memory=False data=all  batch_size=32
```

# Test model

In [13]:
final_test_acc = evaluate(model, test_loader, device)
print(f"Final Test Accuracy: {final_test_acc:.2f}%")

Evaluating:   0%|          | 0/23 [00:00<?, ?it/s]

Final Test Accuracy: 97.96%
