In [6]:
import os, sys, shutil, pathlib, json, random, time
from tqdm import tqdm
from pathlib import Path

print("Python:", sys.version)

Python: 3.13.5 (tags/v3.13.5:6cb20a2, Jun 11 2025, 16:15:46) [MSC v.1943 64 bit (AMD64)]


In [2]:
import torch
from ultralytics import YOLO

print("Torch:", torch.__version__, "| CUDA available:", torch.cuda.is_available())
device = 0 if torch.cuda.is_available() else "cpu"
device

Torch: 2.8.0+cu128 | CUDA available: True


0

In [3]:
data_dir = Path("../datasets/Roboflow")  # <-- change this
assert (data_dir / "data.yaml").exists(), "data.yaml not found—check your path."
print((data_dir / "data.yaml").read_text()[:500])

train: ../train/images
val: ../valid/images
test: ../test/images

nc: 1
names: ['license-plate']

roboflow:
  workspace: car-plate-fcnrs
  project: sg-license-plate-yqedo
  version: 2
  license: CC BY 4.0
  url: https://universe.roboflow.com/car-plate-fcnrs/sg-license-plate-yqedo/dataset/2


## Training

In [10]:
# Paths
DATA_YAML = (data_dir / "data.yaml")      # <- update to your dataset path
PROJECT   = "runs_sg"                    # custom project folder (avoids clobbering default runs/)
RUN_NAME  = "yolo11_sg_plate"            # run name; resume uses this

# Model & training hyperparameters
BASE_MODEL   = "yolo11s.pt"              # yolo11n/s/m/l; pick 'm' or 'l' if you have more VRAM
TOTAL_EPOCHS = 60
IMGSZ        = 960                        # 1280 if you have headroom, 640 if OOM
BATCH        = 8                          # lower to 4/2 if OOM
ACCUMULATE   = 2                          # simulates larger effective batch (BATCH * ACCUMULATE)
DEVICE       = 0 if torch.cuda.is_available() else "cpu"

# Common augment & scheduler settings (good recall)
TRAIN_KW = dict(
    data=DATA_YAML,
    imgsz=IMGSZ,
    batch=BATCH,
    nbs=256,
    device=DEVICE,
    epochs=1,                 # <- we run one epoch per loop
    cos_lr=True,
    lr0=0.01,
    warmup_epochs=3,
    patience=20,
    mosaic=1.0, mixup=0.2, copy_paste=0.2, close_mosaic=10,
    hsv_h=0.015, hsv_s=0.6, hsv_v=0.6,
    translate=0.10, scale=0.70,
    degrees=0.0, shear=0.0, perspective=0.0,
    workers=2,
    cache=False,
    project=PROJECT,
    name=RUN_NAME,
)

In [11]:
# First epoch: start from a base checkpoint (auto-downloads)
model = YOLO(BASE_MODEL)

t0 = time.time()
for ep in tqdm(range(1, TOTAL_EPOCHS + 1), desc="Training (epochs)", unit="epoch"):
    if ep == 1:
        results = model.train(**TRAIN_KW)               # start fresh
    else:
        # Resume from last checkpoint saved in the same project/run
        last_ckpt = Path(PROJECT) / "detect" / RUN_NAME / "weights" / "last.pt"
        model = YOLO(str(last_ckpt))
        results = model.train(resume=True, **TRAIN_KW)  # resume keeps prior hyperparameters

elapsed = time.time() - t0
print(f"\nDone: {TOTAL_EPOCHS} epochs in {elapsed/60:.1f} min")

best_pt = Path(PROJECT) / "detect" / RUN_NAME / "weights" / "best.pt"
print("Best weights:", best_pt if best_pt.exists() else "(not found yet)")


Training (epochs):   0%|          | 0/60 [00:00<?, ?epoch/s]

Ultralytics 8.3.189  Python-3.13.5 torch-2.8.0+cu128 CUDA:0 (NVIDIA GeForce RTX 5060 Laptop GPU, 8151MiB)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=8, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.2, copy_paste_mode=flip, cos_lr=True, cutmix=0.0, data=..\datasets\Roboflow\data.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=1, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.6, hsv_v=0.6, imgsz=960, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.2, mode=train, model=yolo11s.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=yolo11_sg_plate, nbs=256, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience=20, perspect

Training (epochs):   0%|          | 0/60 [02:03<?, ?epoch/s]



KeyboardInterrupt: 

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path

# Load training results
results_csv = Path(PROJECT) / "detect" / RUN_NAME / "results.csv"

if results_csv.exists():
    # Read the results CSV
    df = pd.read_csv(results_csv)
    df.columns = df.columns.str.strip()  # Remove any whitespace from column names
    
    # Create subplots for different metrics
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    fig.suptitle('YOLO Training Metrics', fontsize=16)
    
    # Box Loss
    if 'train/box_loss' in df.columns:
        axes[0, 0].plot(df.index, df['train/box_loss'], 'b-', label='Train Box Loss', alpha=0.7)
        if 'val/box_loss' in df.columns:
            axes[0, 0].plot(df.index, df['val/box_loss'], 'r-', label='Val Box Loss', alpha=0.7)
        axes[0, 0].set_title('Box Loss')
        axes[0, 0].set_xlabel('Epoch')
        axes[0, 0].set_ylabel('Loss')
        axes[0, 0].legend()
        axes[0, 0].grid(True, alpha=0.3)
    
    # Class Loss
    if 'train/cls_loss' in df.columns:
        axes[0, 1].plot(df.index, df['train/cls_loss'], 'b-', label='Train Class Loss', alpha=0.7)
        if 'val/cls_loss' in df.columns:
            axes[0, 1].plot(df.index, df['val/cls_loss'], 'r-', label='Val Class Loss', alpha=0.7)
        axes[0, 1].set_title('Classification Loss')
        axes[0, 1].set_xlabel('Epoch')
        axes[0, 1].set_ylabel('Loss')
        axes[0, 1].legend()
        axes[0, 1].grid(True, alpha=0.3)
    
    # mAP metrics
    if 'metrics/mAP50(B)' in df.columns:
        axes[1, 0].plot(df.index, df['metrics/mAP50(B)'], 'g-', label='mAP@0.5', alpha=0.7)
        if 'metrics/mAP50-95(B)' in df.columns:
            axes[1, 0].plot(df.index, df['metrics/mAP50-95(B)'], 'orange', label='mAP@0.5:0.95', alpha=0.7)
        axes[1, 0].set_title('Mean Average Precision')
        axes[1, 0].set_xlabel('Epoch')
        axes[1, 0].set_ylabel('mAP')
        axes[1, 0].legend()
        axes[1, 0].grid(True, alpha=0.3)
    
    # Learning Rate and other metrics
    if 'lr/pg0' in df.columns:
        axes[1, 1].plot(df.index, df['lr/pg0'], 'purple', label='Learning Rate', alpha=0.7)
        axes[1, 1].set_title('Learning Rate')
        axes[1, 1].set_xlabel('Epoch')
        axes[1, 1].set_ylabel('LR')
        axes[1, 1].legend()
        axes[1, 1].grid(True, alpha=0.3)
    elif 'train/dfl_loss' in df.columns:
        axes[1, 1].plot(df.index, df['train/dfl_loss'], 'purple', label='Train DFL Loss', alpha=0.7)
        if 'val/dfl_loss' in df.columns:
            axes[1, 1].plot(df.index, df['val/dfl_loss'], 'orange', label='Val DFL Loss', alpha=0.7)
        axes[1, 1].set_title('Distribution Focal Loss')
        axes[1, 1].set_xlabel('Epoch')
        axes[1, 1].set_ylabel('Loss')
        axes[1, 1].legend()
        axes[1, 1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    # Print summary statistics
    print(f"\nTraining Summary:")
    print(f"Total epochs completed: {len(df)}")
    if 'metrics/mAP50(B)' in df.columns:
        print(f"Best mAP@0.5: {df['metrics/mAP50(B)'].max():.4f} at epoch {df['metrics/mAP50(B)'].idxmax() + 1}")
    if 'metrics/mAP50-95(B)' in df.columns:
        print(f"Best mAP@0.5:0.95: {df['metrics/mAP50-95(B)'].max():.4f} at epoch {df['metrics/mAP50-95(B)'].idxmax() + 1}")
    
    # Display available columns for reference
    print(f"\nAvailable metrics columns:")
    for col in sorted(df.columns):
        if '/' in col:  # Only show metric columns
            print(f"  - {col}")
            
else:
    print(f"Results file not found: {results_csv}")
    print("Train the model first to generate metrics.")

In [None]:
from IPython.display import display, Image
import glob, os

# Load the best weights just to be explicit
detector = YOLO(str(best_pt))

sample_images = sorted(glob.glob(str(data_dir / "valid" / "images" / "*")))[:8]
out_dir = Path("pred_samples"); out_dir.mkdir(exist_ok=True)

for im in sample_images:
    res = detector.predict(source=im, imgsz=IMGSZ, conf=0.2, iou=0.5, device=DEVICE, verbose=False)
    # Save a rendered image with boxes
    for r in res:
        r.save(filename=str(out_dir / f"pred_{Path(im).name}"))

# Display a couple
for im in sorted(glob.glob(str(out_dir / "pred_*")))[:4]:
    display(Image(filename=im))

In [6]:
# If this cell still OOMs later, restart the runtime once to clear residual VRAM.
import os, torch, gc
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"  # mitigate fragmentation
gc.collect()
if torch.cuda.is_available():
    torch.cuda.empty_cache()
    print("GPU:", torch.cuda.get_device_name(0))
else:
    print("CUDA not available; training will run on CPU.")


AcceleratorError: CUDA error: out of memory
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
