In [1]:
!pip install -U ultralytics opencv-python tqdm

[0m

In [2]:
from pathlib import Path

# üü¢ EDIT ONLY IF YOUR PATHS DIFFER
MIO_TCD_TRAIN_DIR = Path("/workspace/MIO-TCD-Localization/images/train")  # change if you keep it on Drive
O365_TRAIN_ROOT   = Path("/workspace/O365_vehicle13_subset_2/images/train")

# Where to create the 2‚Äëclass folder dataset for YOLO‚Äëcls.
# Using /content for fast I/O; it's just symlinks (or copies) pointing to your originals.
SCENE_DATASET_ROOT = Path("/workspace/scene_gate_cls")

print("MIO_TCD_TRAIN_DIR:", MIO_TCD_TRAIN_DIR)
print("O365_TRAIN_ROOT:", O365_TRAIN_ROOT)
print("SCENE_DATASET_ROOT:", SCENE_DATASET_ROOT)


MIO_TCD_TRAIN_DIR: /workspace/MIO-TCD-Localization/images/train
O365_TRAIN_ROOT: /workspace/O365_vehicle13_subset_2/images/train
SCENE_DATASET_ROOT: /workspace/scene_gate_cls


In [1]:
from pathlib import Path
import os, random, shutil, hashlib
from pathlib import Path
from tqdm import tqdm
from ultralytics import YOLO

# üü¢ EDIT ONLY IF YOUR PATHS DIFFER
MIO_TCD_TRAIN_DIR = Path("/workspace/MIO-TCD-Localization/images/train")
O365_TRAIN_ROOT   = Path("/workspace/O365_vehicle13_subset_2/images/train")
SCENE_DATASET_ROOT = Path("/workspace/scene_gate_cls")

print("MIO_TCD_TRAIN_DIR:", MIO_TCD_TRAIN_DIR)
print("O365_TRAIN_ROOT:", O365_TRAIN_ROOT)
print("SCENE_DATASET_ROOT:", SCENE_DATASET_ROOT)

# Sampling caps
MAX_PER_CLASS = 8000   
TRAIN_PCT, VAL_PCT, TEST_PCT = 0.8, 0.1, 0.1
RANDOM_SEED = 42
random.seed(RANDOM_SEED)

def rglob_jpgs(root: Path):
    return [p for p in root.rglob("*.jpg")]

def collect_o365_jpgs(o365_root: Path):
    jpgs = []
    # Reduced range to 20 to save time scanning if you have many patches
    for i in range(51): 
        patch = o365_root / f"patch{i}"
        if patch.exists():
            jpgs.extend(rglob_jpgs(patch))
    return jpgs

def split_list(items, train_pct, val_pct, test_pct):
    # ‚úÖ FIXED: Items are already shuffled before entering this function now, 
    # but shuffling again here doesn't hurt.
    items = list(items)
    random.shuffle(items)
    n = len(items)
    n_train = int(n * train_pct)
    n_val   = int(n * val_pct)
    return items[:n_train], items[n_train:n_train+n_val], items[n_train+n_val:]

def link_or_copy(src: Path, dst: Path):
    dst.parent.mkdir(parents=True, exist_ok=True)
    try:
        os.symlink(src, dst)
    except OSError:
        shutil.copy2(src, dst)

def hashed_jpg_name(path: Path):
    return f"{hashlib.md5(str(path).encode()).hexdigest()}.jpg"

# --- Collect ---
print("Scanning for images...")
mio_jpgs  = rglob_jpgs(MIO_TCD_TRAIN_DIR)
o365_jpgs = collect_o365_jpgs(O365_TRAIN_ROOT)

if not mio_jpgs: raise RuntimeError(f"No JPGs found under {MIO_TCD_TRAIN_DIR}")
if not o365_jpgs: raise RuntimeError(f"No JPGs found under {O365_TRAIN_ROOT}")

# --- ‚úÖ FIXED: SHUFFLE BEFORE SLICING ---
# This ensures we get a random mix of cameras/scenes, not just the first folder's contents.
print("Shuffling data to ensure diversity...")
random.shuffle(mio_jpgs)
random.shuffle(o365_jpgs)

# --- Cap & balance ---
mio_jpgs  = mio_jpgs[:MAX_PER_CLASS]
o365_jpgs = o365_jpgs[:MAX_PER_CLASS]
print(f"Using {len(mio_jpgs)} traffic_cam (MIO-TCD), {len(o365_jpgs)} normal_view (Objects365).")

# --- Split each class ---
mio_tr, mio_va, mio_te      = split_list(mio_jpgs,  TRAIN_PCT, VAL_PCT, TEST_PCT)
o365_tr, o365_va, o365_te   = split_list(o365_jpgs, TRAIN_PCT, VAL_PCT, TEST_PCT)

splits = {
    "train": {"traffic_cam": mio_tr, "normal_view": o365_tr},
    "val":   {"traffic_cam": mio_va, "normal_view": o365_va},
    "test":  {"traffic_cam": mio_te, "normal_view": o365_te},
}

# --- Materialize layout ---
# Clean up old runs to prevent mixing data
if SCENE_DATASET_ROOT.exists():
    print(f"Cleaning up old dataset at {SCENE_DATASET_ROOT}...")
    shutil.rmtree(SCENE_DATASET_ROOT)

for split, classes in splits.items():
    for cls_name, items in classes.items():
        dst_dir = SCENE_DATASET_ROOT / split / cls_name
        dst_dir.mkdir(parents=True, exist_ok=True)
        # print(f"Creating {dst_dir} ({len(items)} files)")
        for src in tqdm(items, desc=f"{split}/{cls_name}", leave=False):
            dst = dst_dir / hashed_jpg_name(src)
            link_or_copy(src, dst)

print("\n‚úÖ Dataset ready at:", SCENE_DATASET_ROOT)

# --- Train ---
gate = YOLO("yolov8n-cls.pt")

results = gate.train(
    data=str(SCENE_DATASET_ROOT),
    epochs=30,              
    imgsz=224,             # ‚úÖ FIXED: Changed from 640 to 224 to match inference size
    batch=64,              # Explicit batch size often helps stability on Colab
    workers=8,
    patience=5,
    verbose=True,
    project="/content/gate_run",
    name="gate_model"
)

# Evaluate
gate.val(data=str(SCENE_DATASET_ROOT), imgsz=224, split="test")

MIO_TCD_TRAIN_DIR: /workspace/MIO-TCD-Localization/images/train
O365_TRAIN_ROOT: /workspace/O365_vehicle13_subset_2/images/train
SCENE_DATASET_ROOT: /workspace/scene_gate_cls
Scanning for images...
Shuffling data to ensure diversity...
Using 8000 traffic_cam (MIO-TCD), 8000 normal_view (Objects365).
Cleaning up old dataset at /workspace/scene_gate_cls...


                                                                         


‚úÖ Dataset ready at: /workspace/scene_gate_cls
New https://pypi.org/project/ultralytics/8.3.229 available üòÉ Update with 'pip install -U ultralytics'
Ultralytics 8.3.228 üöÄ Python-3.12.11 torch-2.9.1+cu128 CUDA:0 (NVIDIA H200 NVL, 143156MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=64, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/workspace/scene_gate_cls, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=30, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=224, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n-cls.pt, momentum=0.

ultralytics.utils.metrics.ClassifyMetrics object with attributes:

confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x7139a1ff54c0>
curves: []
curves_results: []
fitness: 1.0
keys: ['metrics/accuracy_top1', 'metrics/accuracy_top5']
results_dict: {'metrics/accuracy_top1': 1.0, 'metrics/accuracy_top5': 1.0, 'fitness': 1.0}
save_dir: PosixPath('/workspace/runs/classify/val4')
speed: {'preprocess': 0.03485475142952055, 'inference': 0.2199457329697907, 'loss': 0.0032153096981346607, 'postprocess': 0.0031859762384556234}
task: 'classify'
top1: 1.0
top5: 1.0

In [8]:
!ls /workspace/MIO-TCD-Localization/images/train | wc -l

104500


In [2]:
# Evaluate on the held-out test split
gate.val(data=str(SCENE_DATASET_ROOT), imgsz=640, split="test")

Ultralytics 8.3.228 üöÄ Python-3.12.11 torch-2.9.1+cu128 CUDA:0 (NVIDIA H200 NVL, 143156MiB)
[34m[1mtrain:[0m /workspace/scene_gate_cls/train... found 12800 images in 2 classes ‚úÖ 
[34m[1mval:[0m /workspace/scene_gate_cls/val... found 1600 images in 2 classes ‚úÖ 
[34m[1mtest:[0m /workspace/scene_gate_cls/test... found 1600 images in 2 classes ‚úÖ 
[34m[1mtest: [0mFast image access ‚úÖ (ping: 0.0¬±0.0 ms, read: 2079.9¬±795.4 MB/s, size: 230.0 KB)
[K[34m[1mtest: [0mScanning /workspace/scene_gate_cls/test... 1600 images, 0 corrupt: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 1600/1600 4.6Mit/s 0.0s0s
[K               classes   top1_acc   top5_acc: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 100/100 29.0it/s 3.4s.0s
                   all      0.993          1
Speed: 0.2ms preprocess, 0.3ms inference, 0.0ms loss, 0.0ms postprocess per image
Results saved to [1m/workspace/runs/classify/val5[0m


ultralytics.utils.metrics.ClassifyMetrics object with attributes:

confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x71359211cf50>
curves: []
curves_results: []
fitness: 0.9965625107288361
keys: ['metrics/accuracy_top1', 'metrics/accuracy_top5']
results_dict: {'metrics/accuracy_top1': 0.9931250214576721, 'metrics/accuracy_top5': 1.0, 'fitness': 0.9965625107288361}
save_dir: PosixPath('/workspace/runs/classify/val5')
speed: {'preprocess': 0.207974185468629, 'inference': 0.3416968247620389, 'loss': 0.0004610934411175549, 'postprocess': 0.0005796097684651613}
task: 'classify'
top1: 0.9931250214576721
top5: 1.0