# YOLOV8n Testing Base Code

In [None]:
# Install PyTorch (CUDA 12.9) + deps

# Upgrade pip first
%pip install --upgrade pip

# Install PyTorch with CUDA 12.9 (adjust if your setup uses a different CUDA build)
%pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu129
%pip install ultralytics pandas psutil matplotlib tqdm


Note: you may need to restart the kernel to use updated packages.
Looking in indexes: https://download.pytorch.org/whl/cu129
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [None]:
import torch
import os
from pathlib import Path
import random
import numpy as np
from datetime import datetime

print("=== PyTorch & GPU Status ===")
print(f"Torch Version : {torch.__version__}")

if torch.cuda.is_available():
    device = torch.device("cuda")
    print("CUDA available: YES")
    print(f"GPU Name      : {torch.cuda.get_device_name(0)}")
    print(f"Compute Cap   : {torch.cuda.get_device_capability(0)}")
    print(f"CUDA Version  : {torch.version.cuda}")
else:
    device = torch.device("cpu")
    print("CUDA available: NO (running on CPU)")
    print(" This benchmark is intended to run on GPU.")

ROOT = Path(r"C:\Programming\CONFERENCE PAPER\NVIDIA\YOLOv8n").resolve()
RUNS_DIR = ROOT / "runs"
DATA_ROOT = ROOT / "data" / "coco5k"   # where we'll store the 5k COCO subset
YAML_PATH = DATA_ROOT / "coco5k.yaml"  # will be CREATED in Code Cell 3

RUNS_DIR.mkdir(parents=True, exist_ok=True)
DATA_ROOT.mkdir(parents=True, exist_ok=True)

EXPERIMENT_NAME = "yolov8n_coco5k_pilot"
MODEL_WEIGHTS = "yolov8n.pt"   # base Ultralytics model

EPOCHS_PILOT = 30
IMG_SIZE = 512
BATCH_SIZE = 16
NUM_WORKERS = 4
SEED = 55

def set_seed(seed: int = 55):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

set_seed(SEED)

print("\n=== Benchmark Configuration ===")
print(f"ROOT Dir         : {ROOT}")
print(f"Runs Dir         : {RUNS_DIR}")
print(f"Data Root        : {DATA_ROOT}")
print(f"Planned YAML     : {YAML_PATH})")
print(f"Experiment Name  : {EXPERIMENT_NAME}")
print(f"Epochs (Pilot)   : {EPOCHS_PILOT}")
print(f"Image Size       : {IMG_SIZE}")
print(f"Batch Size       : {BATCH_SIZE}")
print(f"Workers          : {NUM_WORKERS}")
print(f"Seed             : {SEED}")
print(f"Device           : {device}")
print(f"Start Time       : {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")


=== PyTorch & GPU Status ===
Torch Version : 2.8.0+cu129
CUDA available: YES
GPU Name      : NVIDIA GeForce RTX 5060 Ti
Compute Cap   : (12, 0)
CUDA Version  : 12.9

=== Benchmark Configuration ===
ROOT Dir         : C:\Programming\CONFERENCE PAPER\NVIDIA\YOLOV8n
Runs Dir         : C:\Programming\CONFERENCE PAPER\NVIDIA\YOLOV8n\runs
Data Root        : C:\Programming\CONFERENCE PAPER\NVIDIA\YOLOV8n\data\coco5k
Planned YAML     : C:\Programming\CONFERENCE PAPER\NVIDIA\YOLOV8n\data\coco5k\coco5k.yaml)
Experiment Name  : yolov8n_coco5k_pilot
Epochs (Pilot)   : 30
Image Size       : 512
Batch Size       : 16
Workers          : 4
Seed             : 55
Device           : cuda
Start Time       : 2025-12-03 16:59:13


In [None]:
# Uses official COCO 2017 val set (~5,000 images)

import os
from pathlib import Path
from urllib.request import urlretrieve
import zipfile

print("=== COCO 2017 val (5k) – Download & Extract ===")
print(f"DATA_ROOT : {DATA_ROOT}")

images_zip_path = DATA_ROOT / "val2017.zip"
ann_zip_path    = DATA_ROOT / "annotations_trainval2017.zip"

images_dir      = DATA_ROOT / "val2017"        # will contain 5000 images
ann_dir         = DATA_ROOT / "annotations"    # will contain instances_val2017.json

images_url = "http://images.cocodataset.org/zips/val2017.zip"
ann_url    = "http://images.cocodataset.org/annotations/annotations_trainval2017.zip"

DATA_ROOT.mkdir(parents=True, exist_ok=True)

if not images_zip_path.exists():
    print("\n[1/4] Downloading COCO val2017 images (~1 GB)...")
    print(f"From: {images_url}")
    print(f"To  : {images_zip_path}")
    urlretrieve(images_url, images_zip_path)
    print("Download complete.")
else:
    print("\n[1/4] val2017.zip already exists, skipping download.")

if not ann_zip_path.exists():
    print("\n[2/4] Downloading COCO annotations (~250 MB)...")
    print(f"From: {ann_url}")
    print(f"To  : {ann_zip_path}")
    urlretrieve(ann_url, ann_zip_path)
    print("Download complete.")
else:
    print("\n[2/4] annotations_trainval2017.zip already exists, skipping download.")

if not images_dir.exists():
    print("\n[3/4] Extracting val2017.zip...")
    with zipfile.ZipFile(images_zip_path, 'r') as zf:
        zf.extractall(DATA_ROOT)
    print(f"Extraction complete. Images in: {images_dir}")
else:
    print("\n[3/4] val2017 directory already exists, skipping extraction.")

if not ann_dir.exists():
    print("\n[4/4] Extracting annotations_trainval2017.zip...")
    with zipfile.ZipFile(ann_zip_path, 'r') as zf:
        zf.extractall(DATA_ROOT)  # creates 'annotations' folder by default
    print(f"Extraction complete. Annotations in: {ann_dir}")
else:
    print("\n[4/4] annotations directory already exists, skipping extraction.")

print("\nDone. You now have:")
print(f"- Images     : {images_dir}")
print(f"- Annotations: {ann_dir}")


=== COCO 2017 val (5k) – Download & Extract ===
DATA_ROOT : C:\Programming\CONFERENCE PAPER\NVIDIA\YOLOV8n\data\coco5k

[1/4] val2017.zip already exists, skipping download.

[2/4] annotations_trainval2017.zip already exists, skipping download.

[3/4] val2017 directory already exists, skipping extraction.

[4/4] annotations directory already exists, skipping extraction.

Done. You now have:
- Images     : C:\Programming\CONFERENCE PAPER\NVIDIA\YOLOV8n\data\coco5k\val2017
- Annotations: C:\Programming\CONFERENCE PAPER\NVIDIA\YOLOV8n\data\coco5k\annotations


In [None]:

import json
import shutil
from pathlib import Path

print("=== COCO 5k → YOLO-format Conversion ===")
print(f"DATA_ROOT : {DATA_ROOT}")
print(f"YAML_PATH : {YAML_PATH}")

images_dir = DATA_ROOT / "val2017"
ann_dir = DATA_ROOT / "annotations"
ann_file = ann_dir / "instances_val2017.json"

images_root = DATA_ROOT / "images"
labels_root = DATA_ROOT / "labels"

images_train_dir = images_root / "train2017"
images_val_dir   = images_root / "val2017"
labels_train_dir = labels_root / "train2017"
labels_val_dir   = labels_root / "val2017"

if YAML_PATH.exists() and labels_train_dir.exists() and any(labels_train_dir.glob("*.txt")):
    print("Detected existing YOLO-style dataset and YAML. Skipping conversion.")
else:
    # Create directories
    images_train_dir.mkdir(parents=True, exist_ok=True)
    images_val_dir.mkdir(parents=True, exist_ok=True)
    labels_train_dir.mkdir(parents=True, exist_ok=True)
    labels_val_dir.mkdir(parents=True, exist_ok=True)

    if not ann_file.exists():
        raise FileNotFoundError(f"Annotation file not found: {ann_file}")

    print("\n[1/5] Loading COCO annotations...")
    with open(ann_file, "r", encoding="utf-8") as f:
        coco = json.load(f)

    images_info = coco["images"]
    annotations = coco["annotations"]
    categories  = coco["categories"]

    print(f"Total images in val2017: {len(images_info)}")
    print(f"Total annotations      : {len(annotations)}")
    print(f"Total categories       : {len(categories)}")

    imgid_to_info = {img["id"]: img for img in images_info}

    imgid_to_anns = {}
    for ann in annotations:
        img_id = ann["image_id"]
        imgid_to_anns.setdefault(img_id, []).append(ann)

    catid_to_idx = {}
    cat_names = []
    for idx, cat in enumerate(sorted(categories, key=lambda c: c["id"])):
        catid_to_idx[cat["id"]] = idx
        cat_names.append(cat["name"])

    print("\n[2/5] Creating 80/20 train/val split...")
    # Sort images by file_name for deterministic behavior
    sorted_imgs = sorted(images_info, key=lambda x: x["file_name"])
    n_total = len(sorted_imgs)
    n_train = int(0.8 * n_total)
    n_val = n_total - n_train

    train_imgs = sorted_imgs[:n_train]
    val_imgs   = sorted_imgs[n_train:]

    print(f"Train images: {len(train_imgs)}")
    print(f"Val images  : {len(val_imgs)}")

    def write_yolo_labels(img, anns, target_label_dir: Path):
        """Write YOLO-format labels for one image."""
        img_w = img["width"]
        img_h = img["height"]
        stem = Path(img["file_name"]).stem
        label_path = target_label_dir / f"{stem}.txt"

        lines = []
        if anns:
            for ann in anns:
                cat_id = ann["category_id"]
                if cat_id not in catid_to_idx:
                    continue
                cls_idx = catid_to_idx[cat_id]

                x_min, y_min, w, h = ann["bbox"]
                x_center = x_min + w / 2.0
                y_center = y_min + h / 2.0

                x_rel = x_center / img_w
                y_rel = y_center / img_h
                w_rel = w / img_w
                h_rel = h / img_h

                x_rel = max(0.0, min(1.0, x_rel))
                y_rel = max(0.0, min(1.0, y_rel))
                w_rel = max(0.0, min(1.0, w_rel))
                h_rel = max(0.0, min(1.0, h_rel))

                lines.append(f"{cls_idx} {x_rel:.6f} {y_rel:.6f} {w_rel:.6f} {h_rel:.6f}")

        with open(label_path, "w", encoding="utf-8") as f:
            if lines:
                f.write("\n".join(lines))

    print("\n[3/5] Moving images and generating YOLO labels (this may take a bit)...")

    def process_split(split_name, img_list, images_src_dir, images_dst_dir, labels_dst_dir):
        print(f"\n   Processing {split_name} split...")
        for img in img_list:
            file_name = img["file_name"]
            src_img = images_src_dir / file_name
            dst_img = images_dst_dir / file_name

            if not dst_img.exists():

                shutil.copy2(src_img, dst_img)

            img_anns = imgid_to_anns.get(img["id"], [])
            write_yolo_labels(img, img_anns, labels_dst_dir)

    process_split("train", train_imgs, images_dir, images_train_dir, labels_train_dir)
    process_split("val",   val_imgs,   images_dir, images_val_dir,   labels_val_dir)

    print("\n[4/5] Finished creating YOLO images/labels for train and val splits.")
    print("\n[5/5] Creating coco5k.yaml...")

    yaml_text = f"""# COCO 5k subset (val2017) converted to YOLO format
# path is the root that contains 'images' and 'labels' directories
path: {DATA_ROOT.as_posix()}

train: images/train2017
val: images/val2017

nc: {len(cat_names)}
names:
"""

    for idx, name in enumerate(cat_names):
        yaml_text += f"  {idx}: {name}\n"

    with open(YAML_PATH, "w", encoding="utf-8") as f:
        f.write(yaml_text)

    print(f"YAML created at: {YAML_PATH}")

print("\nConversion step complete.")
print(f"- Images/train : {images_train_dir}")
print(f"- Images/val   : {images_val_dir}")
print(f"- Labels/train : {labels_train_dir}")
print(f"- Labels/val   : {labels_val_dir}")
print(f"- YAML         : {YAML_PATH}")


=== COCO 5k → YOLO-format Conversion ===
DATA_ROOT : C:\Programming\CONFERENCE PAPER\NVIDIA\YOLOV8n\data\coco5k
YAML_PATH : C:\Programming\CONFERENCE PAPER\NVIDIA\YOLOV8n\data\coco5k\coco5k.yaml
Detected existing YOLO-style dataset and YAML. Skipping conversion.

Conversion step complete.
- Images/train : C:\Programming\CONFERENCE PAPER\NVIDIA\YOLOV8n\data\coco5k\images\train2017
- Images/val   : C:\Programming\CONFERENCE PAPER\NVIDIA\YOLOV8n\data\coco5k\images\val2017
- Labels/train : C:\Programming\CONFERENCE PAPER\NVIDIA\YOLOV8n\data\coco5k\labels\train2017
- Labels/val   : C:\Programming\CONFERENCE PAPER\NVIDIA\YOLOV8n\data\coco5k\labels\val2017
- YAML         : C:\Programming\CONFERENCE PAPER\NVIDIA\YOLOV8n\data\coco5k\coco5k.yaml


In [None]:
from ultralytics import YOLO
import time

print("=== YOLOv8n – COCO 5k Pilot Training ===")

if not YAML_PATH.exists():
    raise FileNotFoundError(f"YAML file not found: {YAML_PATH}")

print(f"Using YAML       : {YAML_PATH}")
print(f"Experiment Name  : {EXPERIMENT_NAME}")
print(f"Epochs (Pilot)   : {EPOCHS_PILOT}")
print(f"Image Size       : {IMG_SIZE}")
print(f"Batch Size       : {BATCH_SIZE}")
print(f"Workers          : {NUM_WORKERS}")
print(f"Device           : {device}")

train_images_dir = DATA_ROOT / "images" / "train2017"
if not train_images_dir.exists():
    raise FileNotFoundError(f"Train images dir not found: {train_images_dir}")

train_image_count = sum(
    1 for p in train_images_dir.glob("*.*")
    if p.suffix.lower() in [".jpg", ".jpeg", ".png"]
)
print(f"Detected train images: {train_image_count}")

model = YOLO(MODEL_WEIGHTS)

start_time = time.time()

results = model.train(
    data=str(YAML_PATH),
    epochs=EPOCHS_PILOT,
    imgsz=IMG_SIZE,
    batch=BATCH_SIZE,
    workers=NUM_WORKERS,
    mosaic=0.0, 
    project=str(RUNS_DIR),
    name=EXPERIMENT_NAME,
    device=0,   
    verbose=True
)

end_time = time.time()
total_time_sec = end_time - start_time
avg_epoch_time = total_time_sec / EPOCHS_PILOT

print("\n=== Pilot Training Finished ===")
print(f"Total time (s)     : {total_time_sec:.2f}")
print(f"Avg time / epoch   : {avg_epoch_time:.3f} s")

if train_image_count > 0:
    total_images_seen = train_image_count * EPOCHS_PILOT
    throughput = total_images_seen / total_time_sec
    print(f"Approx. throughput : {throughput:.2f} images/s")
else:
    print("Approx. throughput : N/A (train_image_count = 0)")

if hasattr(results, "save_dir"):
    print(f"\nRun directory      : {results.save_dir}")
else:
    print("\nCheck runs folder  :", RUNS_DIR)


=== YOLOv8n – COCO 5k Pilot Training ===
Using YAML       : C:\Programming\CONFERENCE PAPER\NVIDIA\YOLOV8n\data\coco5k\coco5k.yaml
Experiment Name  : yolov8n_coco5k_pilot
Epochs (Pilot)   : 30
Image Size       : 512
Batch Size       : 16
Workers          : 4
Device           : cuda
Detected train images: 4000
New https://pypi.org/project/ultralytics/8.3.234 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.233  Python-3.10.11 torch-2.8.0+cu129 CUDA:0 (NVIDIA GeForce RTX 5060 Ti, 16311MiB)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=C:\Programming\CONFERENCE PAPER\NVIDIA\YOLOV8n\data\coco5k\coco5k.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=30, erasing=0.4, exi