In [None]:
# =========================
# Cell 1: Imports & Config
# =========================

import os
import sys

# Add project root (one level up from /notebooks)
sys.path.append("../")

import json
from pathlib import Path
from time import time

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
from tqdm import tqdm

from sklearn.metrics import accuracy_score, f1_score
from models.model_v2 import CustomCNN

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", DEVICE)

PROJECT_ROOT = Path("../").resolve()

TRAIN_ROOT = PROJECT_ROOT / "data" / "train"

MODELS_DIR  = PROJECT_ROOT / "models"
RESULTS_DIR = PROJECT_ROOT / "results"

MODELS_DIR.mkdir(parents=True, exist_ok=True)
RESULTS_DIR.mkdir(parents=True, exist_ok=True)

MODEL_WEIGHTS_PATH = MODELS_DIR / "model_v2.pth"
METRICS_PATH       = RESULTS_DIR / "metrics_v2.json"

BATCH_SIZE = 32
NUM_EPOCHS = 30
LEARNING_RATE = 1e-3
IMAGE_SIZE = 256
VAL_SPLIT = 0.2  # 20% of train used as validation


Using device: cuda


In [None]:
# =========================
# Cell 2: Data Transforms & Loaders (train/val split from train/)
# =========================

train_transform = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3),
    transforms.RandomResizedCrop(IMAGE_SIZE, scale=(0.8, 1.0)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])


val_transform = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

# Load full labeled dataset from data/train with class subfolders cat/, dog/
full_dataset = datasets.ImageFolder(root=str(TRAIN_ROOT), transform=train_transform)

class_names = full_dataset.classes
print("Classes:", class_names)
print("Total labeled samples:", len(full_dataset))

# Split into train and validation subsets
val_size = int(VAL_SPLIT * len(full_dataset))
train_size = len(full_dataset) - val_size

train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

train_dataset.dataset.transform = train_transform
val_dataset.dataset.transform = val_transform

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
val_loader   = DataLoader(val_dataset,   batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

print("Train samples:", len(train_dataset))
print("Val samples:", len(val_dataset))


Classes: ['cat', 'dog']
Total labeled samples: 25000
Train samples: 20000
Val samples: 5000


In [None]:
# =========================
# Cell 3: Initialize Model, Loss, Optimizer
# =========================

num_classes = len(class_names)

model = CustomCNN(num_classes=num_classes).to(DEVICE)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)


print(model)


CustomCNN(
  (features): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU(inplace=True)
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU(inplace=True)
    (11): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (global_pool): AdaptiveAvgPool2d(output_size=(1, 1))
  (classifier): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
   

In [14]:
# =========================
# Cell 4: Training & Evaluation Functions
# =========================

def train_one_epoch(model, dataloader, optimizer, criterion, device):
    model.train()
    running_loss = 0.0
    all_preds = []
    all_labels = []

    pbar = tqdm(dataloader, desc="Training", leave=False)

    for images, labels in pbar:
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)

        preds = torch.argmax(outputs, dim=1)
        all_preds.extend(preds.detach().cpu().tolist())
        all_labels.extend(labels.detach().cpu().tolist())

        pbar.set_postfix({"loss": round(loss.item(), 4)})

    epoch_loss = running_loss / len(dataloader.dataset)
    epoch_acc = accuracy_score(all_labels, all_preds)
    epoch_f1 = f1_score(all_labels, all_preds, average="macro")

    return epoch_loss, epoch_acc, epoch_f1


def evaluate(model, dataloader, criterion, device):
    model.eval()
    running_loss = 0.0
    all_preds = []
    all_labels = []

    pbar = tqdm(dataloader, desc="Validating", leave=False)

    with torch.no_grad():
        for images, labels in pbar:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * images.size(0)

            preds = torch.argmax(outputs, dim=1)
            all_preds.extend(preds.detach().cpu().tolist())
            all_labels.extend(labels.detach().cpu().tolist())

            pbar.set_postfix({"loss": round(loss.item(), 4)})

    epoch_loss = running_loss / len(dataloader.dataset)
    epoch_acc = accuracy_score(all_labels, all_preds)
    epoch_f1 = f1_score(all_labels, all_preds, average="macro")

    return epoch_loss, epoch_acc, epoch_f1



In [None]:
# =========================
# Cell 5: Run Training
# =========================

best_val_f1 = 0.0
history = {
    "train_loss": [],
    "train_acc": [],
    "train_f1": [],
    "val_loss": [],
    "val_acc": [],
    "val_f1": [],
}

start_time = time()


for epoch in range(1, NUM_EPOCHS + 1):

    train_loss, train_acc, train_f1 = train_one_epoch(
        model, train_loader, optimizer, criterion, DEVICE
    )
    val_loss, val_acc, val_f1 = evaluate(
        model, val_loader, criterion, DEVICE
    )

    # Save stats
    history["train_loss"].append(train_loss)
    history["train_acc"].append(train_acc)
    history["train_f1"].append(train_f1)
    history["val_loss"].append(val_loss)
    history["val_acc"].append(val_acc)
    history["val_f1"].append(val_f1)

    print(
        f"Epoch [{epoch}/{NUM_EPOCHS}] "
        f"Train Loss: {train_loss:.4f} | Acc: {train_acc:.4f} | F1: {train_f1:.4f} "
        f"|| Val Loss: {val_loss:.4f} | Acc: {val_acc:.4f} | F1: {val_f1:.4f}"
    )

    # Save best model
    if val_f1 > best_val_f1:
        best_val_f1 = val_f1
        torch.save(model.state_dict(), MODEL_WEIGHTS_PATH)
        print(f"✅ Saved best model to {MODEL_WEIGHTS_PATH}")

    scheduler.step()

total_time = time() - start_time
print(f"Training completed in {total_time/60:.2f} minutes.")




Epoch [1/30] Train Loss: 0.6336 | Acc: 0.6288 | F1: 0.6260 || Val Loss: 0.6983 | Acc: 0.5740 | F1: 0.5096
✅ Saved best model to /media/veer/Data/Projects/collaborative_cnn_team08/models/model_v2.pth




Epoch [2/30] Train Loss: 0.5957 | Acc: 0.6779 | F1: 0.6770 || Val Loss: 0.5736 | Acc: 0.7010 | F1: 0.6942
✅ Saved best model to /media/veer/Data/Projects/collaborative_cnn_team08/models/model_v2.pth




Epoch [3/30] Train Loss: 0.5703 | Acc: 0.7016 | F1: 0.7009 || Val Loss: 0.6006 | Acc: 0.6850 | F1: 0.6748




Epoch [4/30] Train Loss: 0.5538 | Acc: 0.7180 | F1: 0.7175 || Val Loss: 0.5253 | Acc: 0.7458 | F1: 0.7457
✅ Saved best model to /media/veer/Data/Projects/collaborative_cnn_team08/models/model_v2.pth




Epoch [5/30] Train Loss: 0.5367 | Acc: 0.7307 | F1: 0.7302 || Val Loss: 0.5189 | Acc: 0.7414 | F1: 0.7414




Epoch [6/30] Train Loss: 0.5119 | Acc: 0.7496 | F1: 0.7492 || Val Loss: 0.4813 | Acc: 0.7730 | F1: 0.7726
✅ Saved best model to /media/veer/Data/Projects/collaborative_cnn_team08/models/model_v2.pth




Epoch [7/30] Train Loss: 0.4989 | Acc: 0.7603 | F1: 0.7600 || Val Loss: 0.4616 | Acc: 0.7868 | F1: 0.7868
✅ Saved best model to /media/veer/Data/Projects/collaborative_cnn_team08/models/model_v2.pth




Epoch [8/30] Train Loss: 0.4888 | Acc: 0.7662 | F1: 0.7660 || Val Loss: 0.4719 | Acc: 0.7838 | F1: 0.7826




Epoch [9/30] Train Loss: 0.4804 | Acc: 0.7716 | F1: 0.7714 || Val Loss: 0.4917 | Acc: 0.7608 | F1: 0.7580




Epoch [10/30] Train Loss: 0.4738 | Acc: 0.7778 | F1: 0.7776 || Val Loss: 0.5832 | Acc: 0.7046 | F1: 0.6879




Epoch [11/30] Train Loss: 0.4542 | Acc: 0.7886 | F1: 0.7884 || Val Loss: 0.4570 | Acc: 0.7874 | F1: 0.7858




Epoch [12/30] Train Loss: 0.4419 | Acc: 0.7963 | F1: 0.7962 || Val Loss: 0.4310 | Acc: 0.8082 | F1: 0.8078
✅ Saved best model to /media/veer/Data/Projects/collaborative_cnn_team08/models/model_v2.pth




Epoch [13/30] Train Loss: 0.4385 | Acc: 0.7971 | F1: 0.7969 || Val Loss: 0.4393 | Acc: 0.7976 | F1: 0.7967




Epoch [14/30] Train Loss: 0.4313 | Acc: 0.8032 | F1: 0.8031 || Val Loss: 0.4176 | Acc: 0.8120 | F1: 0.8118
✅ Saved best model to /media/veer/Data/Projects/collaborative_cnn_team08/models/model_v2.pth




Epoch [15/30] Train Loss: 0.4223 | Acc: 0.8073 | F1: 0.8072 || Val Loss: 0.4267 | Acc: 0.8026 | F1: 0.8016




Epoch [16/30] Train Loss: 0.4067 | Acc: 0.8194 | F1: 0.8193 || Val Loss: 0.4023 | Acc: 0.8194 | F1: 0.8189
✅ Saved best model to /media/veer/Data/Projects/collaborative_cnn_team08/models/model_v2.pth




Epoch [17/30] Train Loss: 0.3991 | Acc: 0.8207 | F1: 0.8206 || Val Loss: 0.3936 | Acc: 0.8256 | F1: 0.8256
✅ Saved best model to /media/veer/Data/Projects/collaborative_cnn_team08/models/model_v2.pth




Epoch [18/30] Train Loss: 0.3953 | Acc: 0.8236 | F1: 0.8235 || Val Loss: 0.3919 | Acc: 0.8280 | F1: 0.8276
✅ Saved best model to /media/veer/Data/Projects/collaborative_cnn_team08/models/model_v2.pth




Epoch [19/30] Train Loss: 0.3919 | Acc: 0.8270 | F1: 0.8270 || Val Loss: 0.3840 | Acc: 0.8298 | F1: 0.8296
✅ Saved best model to /media/veer/Data/Projects/collaborative_cnn_team08/models/model_v2.pth




Epoch [20/30] Train Loss: 0.3870 | Acc: 0.8279 | F1: 0.8279 || Val Loss: 0.4165 | Acc: 0.8064 | F1: 0.8033




Epoch [21/30] Train Loss: 0.3758 | Acc: 0.8339 | F1: 0.8339 || Val Loss: 0.3721 | Acc: 0.8374 | F1: 0.8373
✅ Saved best model to /media/veer/Data/Projects/collaborative_cnn_team08/models/model_v2.pth




Epoch [22/30] Train Loss: 0.3746 | Acc: 0.8363 | F1: 0.8362 || Val Loss: 0.3667 | Acc: 0.8428 | F1: 0.8428
✅ Saved best model to /media/veer/Data/Projects/collaborative_cnn_team08/models/model_v2.pth




Epoch [23/30] Train Loss: 0.3703 | Acc: 0.8390 | F1: 0.8390 || Val Loss: 0.3737 | Acc: 0.8374 | F1: 0.8370




Epoch [24/30] Train Loss: 0.3698 | Acc: 0.8375 | F1: 0.8374 || Val Loss: 0.3782 | Acc: 0.8318 | F1: 0.8310




Epoch [25/30] Train Loss: 0.3665 | Acc: 0.8414 | F1: 0.8413 || Val Loss: 0.3893 | Acc: 0.8222 | F1: 0.8213




Epoch [26/30] Train Loss: 0.3609 | Acc: 0.8430 | F1: 0.8430 || Val Loss: 0.3757 | Acc: 0.8304 | F1: 0.8300




Epoch [27/30] Train Loss: 0.3593 | Acc: 0.8427 | F1: 0.8426 || Val Loss: 0.3622 | Acc: 0.8390 | F1: 0.8390




Epoch [28/30] Train Loss: 0.3565 | Acc: 0.8458 | F1: 0.8457 || Val Loss: 0.3597 | Acc: 0.8400 | F1: 0.8399




Epoch [29/30] Train Loss: 0.3565 | Acc: 0.8433 | F1: 0.8432 || Val Loss: 0.3612 | Acc: 0.8396 | F1: 0.8396




Epoch [30/30] Train Loss: 0.3581 | Acc: 0.8445 | F1: 0.8445 || Val Loss: 0.3565 | Acc: 0.8426 | F1: 0.8426
Training completed in 47.12 minutes.


In [16]:
# =========================
# Cell 6: Save Metrics to results/metrics_v2.json
# =========================

metrics = {
    "num_epochs": NUM_EPOCHS,
    "batch_size": BATCH_SIZE,
    "learning_rate": LEARNING_RATE,
    "best_val_f1": best_val_f1,
    "history": history,
    "classes": class_names,
}

with open(METRICS_PATH, "w") as f:
    json.dump(metrics, f, indent=4)

print(f"Metrics saved to {METRICS_PATH}")
print("Done ✅")


Metrics saved to /media/veer/Data/Projects/collaborative_cnn_team08/results/metrics_v2.json
Done ✅
