# RESNET-152 TESTING BASE CODE

In [4]:
# Code Cell 1 Installing torch 12.9
%pip install --upgrade pip
%pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu129
%pip install onnx onnxruntime-gpu
%pip install matplotlib pillow tqdm

Note: you may need to restart the kernel to use updated packages.
Looking in indexes: https://download.pytorch.org/whl/cu129
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [5]:
# Code Cell 1.2 â€” Simulated 8 GB VRAM Cap (Allocator-Level)

import torch

assert torch.cuda.is_available(), "CUDA is not available."

# Explicit CUDA device index (required)
device_index = 0

# RTX 5060 Ti = 16 GB physical VRAM
TOTAL_VRAM_GB = 16
TARGET_VRAM_GB = 8

memory_fraction = TARGET_VRAM_GB / TOTAL_VRAM_GB

# Limit how much VRAM PyTorch is allowed to reserve
torch.cuda.set_per_process_memory_fraction(memory_fraction, device=device_index)

# Clear any cached allocations and reset stats
torch.cuda.empty_cache()
torch.cuda.reset_peak_memory_stats()

print(
    f"PyTorch VRAM usage capped at ~{TARGET_VRAM_GB} GB "
    f"({memory_fraction:.2f} of total device memory) on cuda:{device_index}"
)


PyTorch VRAM usage capped at ~8 GB (0.50 of total device memory) on cuda:0


In [6]:

import os
from pathlib import Path

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

import torchvision
from torchvision import datasets, transforms as T, models

import matplotlib.pyplot as plt
from tqdm.auto import tqdm

%matplotlib inline

BASE_DIR = Path().resolve()
DATA_DIR = BASE_DIR / "data"
DATA_DIR.mkdir(exist_ok=True)

print("Base directory:", BASE_DIR)
print("Data directory:", DATA_DIR)

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

if torch.cuda.is_available():
    print("GPU:", torch.cuda.get_device_name(0))
    print("CUDA capability:", torch.cuda.get_device_capability(0))
    print("Torch version:", torch.__version__)
    print("Torchvision version:", torchvision.__version__)


Base directory: C:\Programming\INDONESIA CONFERENCE PAPER\5060Ti\RESNET152
Data directory: C:\Programming\INDONESIA CONFERENCE PAPER\5060Ti\RESNET152\data
Using device: cuda
GPU: NVIDIA GeForce RTX 5060 Ti
CUDA capability: (12, 0)
Torch version: 2.8.0+cu129
Torchvision version: 0.23.0+cu129


  from .autonotebook import tqdm as notebook_tqdm


### Download and Extract Dataset

In [7]:
%pip install requests

import os
from pathlib import Path
import requests
from tqdm.auto import tqdm
import tarfile

DATASET_DIR = Path("oxford-iiit-pet")
DATASET_DIR.mkdir(exist_ok=True)

# URLs from the official Oxford-IIIT Pet dataset page
URLS = {
    "images": "https://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz",
    "annotations": "https://www.robots.ox.ac.uk/~vgg/data/pets/data/annotations.tar.gz"
}

def download_file(url, output_path):
    response = requests.get(url, stream=True)
    total = int(response.headers.get("content-length", 0))

    with open(output_path, "wb") as file, tqdm(
        desc=f"Downloading {output_path.name}",
        total=total,
        unit="B", unit_scale=True, unit_divisor=1024
    ) as bar:
        for data in response.iter_content(chunk_size=1024):
            size = file.write(data)
            bar.update(size)

def extract_tar(file_path, extract_to):
    print(f"Extracting {file_path.name} ...")
    with tarfile.open(file_path) as tar:
        tar.extractall(path=extract_to)
    print("Done.\n")

for name, url in URLS.items():
    tar_path = DATASET_DIR / f"{name}.tar.gz"

    if not tar_path.exists():
        download_file(url, tar_path)
    else:
        print(f"{tar_path.name} already exists, skipping download.")

    extract_tar(tar_path, DATASET_DIR)

print("Dataset download + extraction complete.")
print("Images folder:", DATASET_DIR / "images")
print("Annotations folder:", DATASET_DIR / "annotations")


Note: you may need to restart the kernel to use updated packages.
images.tar.gz already exists, skipping download.
Extracting images.tar.gz ...


  tar.extractall(path=extract_to)


Done.

annotations.tar.gz already exists, skipping download.
Extracting annotations.tar.gz ...
Done.

Dataset download + extraction complete.
Images folder: oxford-iiit-pet\images
Annotations folder: oxford-iiit-pet\annotations


In [8]:
from pathlib import Path
from torchvision.datasets import OxfordIIITPet
import torchvision.transforms as T
import torchvision.transforms.functional as F
import torch

DATASET_ROOT = Path(".")   # project root
BASE_FOLDER = "oxford-iiit-pet"
DATASET_DIR = DATASET_ROOT / BASE_FOLDER

DATASET_DIR = Path("dataset_oxford_pet")

class LetterboxToSquare420:
    def __init__(self, size=420, fill=0):
        self.size = size
        self.fill = fill

    def __call__(self, img):
        # img is a PIL Image (W, H)
        w, h = img.size
        max_side = max(w, h)
        scale = self.size / max_side

        new_w = int(round(w * scale))
        new_h = int(round(h * scale))

        img = F.resize(img, (new_h, new_w))

        pad_left = (self.size - new_w) // 2
        pad_right = self.size - new_w - pad_left
        pad_top = (self.size - new_h) // 2
        pad_bottom = self.size - new_h - pad_top

        img = F.pad(img, [pad_left, pad_top, pad_right, pad_bottom], fill=self.fill)
        return img

imagenet_mean = [0.485, 0.456, 0.406]
imagenet_std  = [0.229, 0.224, 0.225]

common_transform = T.Compose([
    LetterboxToSquare420(size=420, fill=0),
    T.ToTensor(),
    T.Normalize(mean=imagenet_mean, std=imagenet_std),
])

# Datasets: train/val
train_dataset = OxfordIIITPet(
    root=DATASET_ROOT,
    split="trainval",
    target_types="category",
    transform=common_transform,
    download=False,  # already downloaded
)

val_dataset = OxfordIIITPet(
    root=DATASET_ROOT,
    split="test",
    target_types="category",
    transform=common_transform,
    download=False,
)

num_classes = len(train_dataset.classes)
print(f"Number of classes: {num_classes}")
print(f"Train samples: {len(train_dataset)}")
print(f"Val samples:   {len(val_dataset)}")

x0, y0 = train_dataset[0]
print("Sample tensor shape:", x0.shape)  # expect: [3, 420, 420]
print("Sample label:", y0, "| class name:", train_dataset.classes[y0])


Number of classes: 37
Train samples: 3680
Val samples:   3669
Sample tensor shape: torch.Size([3, 420, 420])
Sample label: 0 | class name: Abyssinian


### Data Loaders

In [9]:
from torch.utils.data import DataLoader
BATCH_SIZE = 8
NUM_WORKERS = 0      
PIN_MEMORY = (device.type == "cuda")

print(f"Batch size: {BATCH_SIZE}")
print(f"Num workers: {NUM_WORKERS}")
print(f"Pin memory: {PIN_MEMORY}")

train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=NUM_WORKERS,
    pin_memory=PIN_MEMORY,
    persistent_workers=False,
)

val_loader = DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS,
    pin_memory=PIN_MEMORY,
    persistent_workers=False,
)

batch_imgs, batch_labels = next(iter(train_loader))
print("Train batch shape:", batch_imgs.shape)
print("Train batch labels shape:", batch_labels.shape)
print("Unique labels in this batch:", batch_labels.unique())


Batch size: 8
Num workers: 0
Pin memory: True
Train batch shape: torch.Size([8, 3, 420, 420])
Train batch labels shape: torch.Size([8])
Unique labels in this batch: tensor([ 2,  3,  4,  5, 15, 23, 36])


In [10]:
import torch.nn as nn
import torch.optim as optim
from torchvision.models import resnet152

print("Number of classes (from dataset):", num_classes)

model = resnet152(weights=None)

in_features = model.fc.in_features
model.fc = nn.Linear(in_features, num_classes)

model = model.to(device)

criterion = nn.CrossEntropyLoss()

LEARNING_RATE = 0.01
MOMENTUM = 0.9
WEIGHT_DECAY = 1e-4
EPOCHS = 30

optimizer = optim.SGD(
    model.parameters(),
    lr=LEARNING_RATE,
    momentum=MOMENTUM,
    weight_decay=WEIGHT_DECAY,
)

scheduler = optim.lr_scheduler.StepLR(
    optimizer,
    step_size=50,
    gamma=0.1,
)

USE_AMP = (device.type == "cuda")

from torch.cuda.amp import GradScaler
scaler = GradScaler(enabled=USE_AMP)

total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print("Model: ResNet-152")
print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")
print("Device:", device)

if device.type == "cuda":
    print("CUDA device:", torch.cuda.get_device_name(0))
    print("AMP enabled:", USE_AMP)


Number of classes (from dataset): 37
Model: ResNet-152
Total parameters: 58,219,621
Trainable parameters: 58,219,621
Device: cuda
CUDA device: NVIDIA GeForce RTX 5060 Ti
AMP enabled: True


  scaler = GradScaler(enabled=USE_AMP)


In [11]:
import time
from tqdm.auto import tqdm

history = {
    "train_loss": [],
    "val_loss": [],
    "val_acc": [],
    "epoch_throughput_imgs_per_sec": [],
}

def train_one_epoch(model, loader, optimizer, device, criterion, epoch_idx: int,
                    scaler, use_amp: bool):
    """
    Train for one epoch.
    Returns:
        avg_loss (float)
        avg_throughput (float) - images per second across this epoch
    """
    model.train()
    running_loss = 0.0
    total_samples = 0

    total_batches = 0
    throughput_sum = 0.0  # accumulate per-batch throughput

    loop = tqdm(loader, desc=f"Epoch {epoch_idx + 1} - Train", leave=False)

    for inputs, targets in loop:
        batch_size = inputs.size(0)
        total_samples += batch_size
        total_batches += 1

        t0 = time.time()

        inputs = inputs.to(device, non_blocking=True)
        targets = targets.to(device, non_blocking=True)

        optimizer.zero_grad(set_to_none=True)

        if use_amp:
            with torch.cuda.amp.autocast():
                outputs = model(inputs)
                loss = criterion(outputs, targets)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        else:
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

        if device.type == "cuda":
            torch.cuda.synchronize()

        t1 = time.time()
        batch_time = max(t1 - t0, 1e-6)

        throughput = batch_size / batch_time
        throughput_sum += throughput

        running_loss += loss.item() * batch_size

        loop.set_postfix({
            "loss": loss.item(),
            "thrpt": f"{throughput:.1f} img/s"
        })

    avg_loss = running_loss / max(total_samples, 1)
    avg_throughput = throughput_sum / max(total_batches, 1)

    return avg_loss, avg_throughput


@torch.no_grad()
def validate(model, loader, device, criterion, epoch_idx: int):
    """
    Validation loop.
    Returns:
        avg_val_loss (float)
        val_accuracy (float in [0, 1])
    """
    model.eval()
    running_loss = 0.0
    total_samples = 0
    correct = 0

    loop = tqdm(loader, desc=f"Epoch {epoch_idx + 1} - Val", leave=False)

    for inputs, targets in loop:
        batch_size = inputs.size(0)
        total_samples += batch_size

        inputs = inputs.to(device, non_blocking=True)
        targets = targets.to(device, non_blocking=True)

        outputs = model(inputs)
        loss = criterion(outputs, targets)

        running_loss += loss.item() * batch_size

        preds = outputs.argmax(dim=1)
        correct += (preds == targets).sum().item()

        loop.set_postfix({
            "val_loss": loss.item()
        })

    avg_val_loss = running_loss / max(total_samples, 1)
    val_acc = correct / max(total_samples, 1)

    return avg_val_loss, val_acc

print("Training and validation functions defined (AMP-ready).")


Training and validation functions defined (AMP-ready).


In [12]:
import time

print("Starting training...")
print(f"Device: {device}")
print(f"Epochs: {EPOCHS}")
print(f"Batch size: {BATCH_SIZE}")
print(f"AMP enabled: {USE_AMP}")

if device.type == "cuda":
    torch.cuda.reset_peak_memory_stats()

total_train_start = time.time()

best_val_acc = 0.0

for epoch in range(EPOCHS):
    print(f"\n===== Epoch {epoch + 1}/{EPOCHS} =====")

    train_loss, avg_throughput = train_one_epoch(
        model=model,
        loader=train_loader,
        optimizer=optimizer,
        device=device,
        criterion=criterion,
        epoch_idx=epoch,
        scaler=scaler,
        use_amp=USE_AMP,
    )

    val_loss, val_acc = validate(
        model=model,
        loader=val_loader,
        device=device,
        criterion=criterion,
        epoch_idx=epoch,
    )

    scheduler.step()

    history["train_loss"].append(train_loss)
    history["val_loss"].append(val_loss)
    history["val_acc"].append(val_acc)
    history["epoch_throughput_imgs_per_sec"].append(avg_throughput)

    if val_acc > best_val_acc:
        best_val_acc = val_acc

    print(f"Epoch {epoch + 1}/{EPOCHS} summary:")
    print(f"  Train loss:       {train_loss:.4f}")
    print(f"  Val loss:         {val_loss:.4f}")
    print(f"  Val accuracy:     {val_acc * 100:.2f}%")
    print(f"  Avg throughput:   {avg_throughput:.1f} images/sec")

total_train_end = time.time()
total_training_time_sec = total_train_end - total_train_start

peak_vram_bytes = None
peak_vram_mb = None
peak_vram_gb = None

if device.type == "cuda":
    peak_vram_bytes = torch.cuda.max_memory_allocated()
    peak_vram_mb = peak_vram_bytes / (1024 ** 2)
    peak_vram_gb = peak_vram_bytes / (1024 ** 3)

history["total_training_time_sec"] = total_training_time_sec
history["peak_vram_bytes"] = peak_vram_bytes
history["peak_vram_mb"] = peak_vram_mb
history["peak_vram_gb"] = peak_vram_gb
history["best_val_acc"] = best_val_acc
history["final_val_acc"] = history["val_acc"][-1] if history["val_acc"] else None

print("\n===== Training complete =====")
print(f"Total training time: {total_training_time_sec / 60:.2f} minutes")

if peak_vram_mb is not None:
    print(f"Peak VRAM usage:    {peak_vram_mb:.1f} MB ({peak_vram_gb:.3f} GB)")

print(f"Best val accuracy:  {best_val_acc * 100:.2f}%")
print(f"Final val accuracy: {history['final_val_acc'] * 100:.2f}%")

print("\nNOTE:")
print("- Record avg GPU Util, avg VRAM, and avg power from HWiNFO at mid-training.")
print("- Later, we can compute performance per watt = best_epoch_throughput / avg_power_W.")


Starting training...
Device: cuda
Epochs: 30
Batch size: 8
AMP enabled: True

===== Epoch 1/30 =====


  with torch.cuda.amp.autocast():
                                                                                               

Epoch 1/30 summary:
  Train loss:       3.9233
  Val loss:         5.6734
  Val accuracy:     3.19%
  Avg throughput:   55.9 images/sec

===== Epoch 2/30 =====


                                                                                               

Epoch 2/30 summary:
  Train loss:       3.6377
  Val loss:         6.0314
  Val accuracy:     2.78%
  Avg throughput:   55.2 images/sec

===== Epoch 3/30 =====


                                                                                               

Epoch 3/30 summary:
  Train loss:       3.6225
  Val loss:         5.7666
  Val accuracy:     3.11%
  Avg throughput:   54.8 images/sec

===== Epoch 4/30 =====


                                                                                               

Epoch 4/30 summary:
  Train loss:       3.5996
  Val loss:         12.9039
  Val accuracy:     3.22%
  Avg throughput:   55.0 images/sec

===== Epoch 5/30 =====


                                                                                               

Epoch 5/30 summary:
  Train loss:       3.5740
  Val loss:         5.8171
  Val accuracy:     3.76%
  Avg throughput:   55.1 images/sec

===== Epoch 6/30 =====


                                                                                               

Epoch 6/30 summary:
  Train loss:       3.5453
  Val loss:         8.7478
  Val accuracy:     4.77%
  Avg throughput:   54.4 images/sec

===== Epoch 7/30 =====


                                                                                               

Epoch 7/30 summary:
  Train loss:       3.5198
  Val loss:         4.0953
  Val accuracy:     4.72%
  Avg throughput:   55.3 images/sec

===== Epoch 8/30 =====


                                                                                               

Epoch 8/30 summary:
  Train loss:       3.4901
  Val loss:         8.4362
  Val accuracy:     5.15%
  Avg throughput:   55.3 images/sec

===== Epoch 9/30 =====


                                                                                               

Epoch 9/30 summary:
  Train loss:       3.4628
  Val loss:         4.4435
  Val accuracy:     5.31%
  Avg throughput:   54.8 images/sec

===== Epoch 10/30 =====


                                                                                                

Epoch 10/30 summary:
  Train loss:       3.4535
  Val loss:         4.4518
  Val accuracy:     5.29%
  Avg throughput:   54.6 images/sec

===== Epoch 11/30 =====


                                                                                                

Epoch 11/30 summary:
  Train loss:       3.4196
  Val loss:         3.9756
  Val accuracy:     7.44%
  Avg throughput:   53.2 images/sec

===== Epoch 12/30 =====


                                                                                                

Epoch 12/30 summary:
  Train loss:       3.3925
  Val loss:         3.6187
  Val accuracy:     4.72%
  Avg throughput:   52.9 images/sec

===== Epoch 13/30 =====


                                                                                                

Epoch 13/30 summary:
  Train loss:       3.3738
  Val loss:         3.5959
  Val accuracy:     6.98%
  Avg throughput:   52.6 images/sec

===== Epoch 14/30 =====


                                                                                                

Epoch 14/30 summary:
  Train loss:       3.3410
  Val loss:         5.4761
  Val accuracy:     7.20%
  Avg throughput:   52.8 images/sec

===== Epoch 15/30 =====


                                                                                                

Epoch 15/30 summary:
  Train loss:       3.3407
  Val loss:         3.5848
  Val accuracy:     7.20%
  Avg throughput:   52.6 images/sec

===== Epoch 16/30 =====


                                                                                                

Epoch 16/30 summary:
  Train loss:       3.3035
  Val loss:         3.6815
  Val accuracy:     7.74%
  Avg throughput:   53.3 images/sec

===== Epoch 17/30 =====


                                                                                                

Epoch 17/30 summary:
  Train loss:       3.2717
  Val loss:         3.7806
  Val accuracy:     8.97%
  Avg throughput:   52.9 images/sec

===== Epoch 18/30 =====


                                                                                                

Epoch 18/30 summary:
  Train loss:       3.2349
  Val loss:         3.5436
  Val accuracy:     8.91%
  Avg throughput:   53.3 images/sec

===== Epoch 19/30 =====


                                                                                                

Epoch 19/30 summary:
  Train loss:       3.1872
  Val loss:         3.6878
  Val accuracy:     10.38%
  Avg throughput:   52.6 images/sec

===== Epoch 20/30 =====


                                                                                                

Epoch 20/30 summary:
  Train loss:       3.1327
  Val loss:         3.1870
  Val accuracy:     11.69%
  Avg throughput:   52.9 images/sec

===== Epoch 21/30 =====


                                                                                                

Epoch 21/30 summary:
  Train loss:       3.0720
  Val loss:         3.7563
  Val accuracy:     11.47%
  Avg throughput:   56.8 images/sec

===== Epoch 22/30 =====


                                                                                                

Epoch 22/30 summary:
  Train loss:       3.0130
  Val loss:         3.3162
  Val accuracy:     13.90%
  Avg throughput:   56.5 images/sec

===== Epoch 23/30 =====


                                                                                                

Epoch 23/30 summary:
  Train loss:       2.9137
  Val loss:         3.7923
  Val accuracy:     14.34%
  Avg throughput:   56.6 images/sec

===== Epoch 24/30 =====


                                                                                                

Epoch 24/30 summary:
  Train loss:       2.8509
  Val loss:         3.2880
  Val accuracy:     14.80%
  Avg throughput:   56.7 images/sec

===== Epoch 25/30 =====


                                                                                                

Epoch 25/30 summary:
  Train loss:       2.7882
  Val loss:         3.2213
  Val accuracy:     16.49%
  Avg throughput:   56.7 images/sec

===== Epoch 26/30 =====


                                                                                                

Epoch 26/30 summary:
  Train loss:       2.7048
  Val loss:         3.0020
  Val accuracy:     18.86%
  Avg throughput:   56.6 images/sec

===== Epoch 27/30 =====


                                                                                                

Epoch 27/30 summary:
  Train loss:       2.6517
  Val loss:         4.4681
  Val accuracy:     15.54%
  Avg throughput:   56.7 images/sec

===== Epoch 28/30 =====


                                                                                                

Epoch 28/30 summary:
  Train loss:       2.5524
  Val loss:         2.9512
  Val accuracy:     20.71%
  Avg throughput:   56.8 images/sec

===== Epoch 29/30 =====


                                                                                                

Epoch 29/30 summary:
  Train loss:       2.4631
  Val loss:         3.6162
  Val accuracy:     16.52%
  Avg throughput:   55.8 images/sec

===== Epoch 30/30 =====


                                                                                                

Epoch 30/30 summary:
  Train loss:       2.3545
  Val loss:         3.4144
  Val accuracy:     19.38%
  Avg throughput:   56.7 images/sec

===== Training complete =====
Total training time: 67.03 minutes
Peak VRAM usage:    3162.1 MB (3.088 GB)
Best val accuracy:  20.71%
Final val accuracy: 19.38%

NOTE:
- Record avg GPU Util, avg VRAM, and avg power from HWiNFO at mid-training.
- Later, we can compute performance per watt = best_epoch_throughput / avg_power_W.


