# Data Loading and Inspection

In [None]:
# ==========================================
# Cell 1) Detection datasets discovery & inspection (FINAL)
#   - How many datasets are found
#   - train/val image count per dataset
#   - Whether label cases (original/scale/side) exist
#   - Output estimated class count/names (multiclass-based)
#
# [UPDATED]
#   ‚úÖ Reflect split structure rules per dataset name
#     * BCCD, brain-tumor, Custom_Blood, homeobjects-3K, kitti, medical-pills, signature
#         images/labels -> train, val
#     * coco
#         images/labels -> train2017, val2017, test2017
#     * construction-ppe, african-wildlife
#         images/labels -> train, val, test
#     * lvis
#         images/labels -> train2017, val2017, test2017
#     * SKU-110K
#         images/labels -> no subfolders
#         -> seed-based 8:2 virtual split
#     * VOC
#         images/labels -> test2007, train2007, train2012, val2007, val2012
#         -> use train2012 / val2012 only
# ==========================================

from __future__ import annotations

import os, sys, random, shutil
from pathlib import Path
from typing import List, Tuple, Optional, Dict

# -------------------------------------------------------------------------
# 0) Register PROJECT_MODULE_DIR
# -------------------------------------------------------------------------
PROJECT_MODULE_DIR = Path("/home/ISW/project/Project_Module")
if str(PROJECT_MODULE_DIR) not in sys.path:
    sys.path.insert(0, str(PROJECT_MODULE_DIR))

# -------------------------------------------------------------------------
# 1) ultra_det_loader
# -------------------------------------------------------------------------
from ultra_det_loader import discover_det_datasets

# -------------------------------------------------------------------------
# 2) noisy_insection (use only scale/boundary jitter case list)
# -------------------------------------------------------------------------
try:
    from noisy_insection import UNIFORM_SCALING_FACTORS, JITTER_PATTERNS
except Exception:
    UNIFORM_SCALING_FACTORS = [0.6, 0.8, 1.2, 1.4]
    JITTER_PATTERNS = [3, 5, 7]

# -------------------------------------------------------------------------
# User config
# -------------------------------------------------------------------------
LOAD_DIR = "/home/ISW/project/datasets"
SEED = 42

# Image extensions
_IMG_EXTS = {".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff", ".webp"}

def set_seed(seed: int = 42):
    random.seed(seed)

def list_images(dir_path: Optional[Path]) -> List[Path]:
    if dir_path is None or not Path(dir_path).exists():
        return []
    dir_path = Path(dir_path)
    imgs = []
    for p in dir_path.rglob("*"):
        if p.is_file() and p.suffix.lower() in _IMG_EXTS:
            imgs.append(p)
    return sorted(imgs)

def normalize_name(name: str) -> str:
    # Simple normalization to absorb case/separator differences
    n = name.strip().lower()
    n = n.replace("_", "-")
    n = n.replace(" ", "-")
    return n

# -------------------------------------------------------------------------
# Legacy heuristic (fallback)
# -------------------------------------------------------------------------
def _fallback_train_dir(images_root: Path) -> Path:
    if (images_root / "train").is_dir():
        return images_root / "train"
    return images_root

def _fallback_val_dir(images_root: Path) -> Optional[Path]:
    if (images_root / "val").is_dir():
        return images_root / "val"
    if (images_root / "valid").is_dir():
        return images_root / "valid"
    return None

# -------------------------------------------------------------------------
# ‚úÖ Dataset-specific split rules
# -------------------------------------------------------------------------
_SIMPLE_TRAIN_VAL = {
    "bccd",
    "brain-tumor",
    "custom-blood",
    "homeobjects-3k",
    "kitti",
    "medical-pills",
    "signature",
}

_TRAIN_TEST_VAL = {
    "construction-ppe",
    "african-wildlife",
}

def detect_split_dirs(ds_root: Path) -> Dict[str, Optional[Path]]:
    """
    Interpret images/labels split structure based on ds_root.
    Returns:
        {
          "train_img_dir": Path|None,
          "val_img_dir": Path|None,
          "test_img_dir": Path|None,
          "split_mode": str,  # "explicit" | "sku_virtual_8_2" | "fallback"
          "train_tag": str,
          "val_tag": str,
        }
    """
    ds_name = normalize_name(ds_root.name)
    images_root = ds_root / "images"

    # 1) VOC rule: use train2012/val2012 only
    if ds_name == "voc":
        return dict(
            train_img_dir=images_root / "train2012",
            val_img_dir=images_root / "val2012",
            test_img_dir=None,
            split_mode="explicit",
            train_tag="train2012",
            val_tag="val2012",
        )

    # 2) COCO/LVIS rule
    #    - Handle exact coco/lvis names or potential inclusion
    if ds_name == "coco" or "coco" in ds_name:
        return dict(
            train_img_dir=images_root / "train2017",
            val_img_dir=images_root / "val2017",
            test_img_dir=images_root / "test2017",
            split_mode="explicit",
            train_tag="train2017",
            val_tag="val2017",
        )

    if ds_name == "lvis" or "lvis" in ds_name:
        return dict(
            train_img_dir=images_root / "train2017",
            val_img_dir=images_root / "val2017",
            test_img_dir=images_root / "test2017",
            split_mode="explicit",
            train_tag="train2017",
            val_tag="val2017",
        )

    # 3) Explicit train/val structure
    if ds_name in _SIMPLE_TRAIN_VAL:
        return dict(
            train_img_dir=images_root / "train",
            val_img_dir=images_root / "val",
            test_img_dir=None,
            split_mode="explicit",
            train_tag="train",
            val_tag="val",
        )

    # 4) train/test/val structure (only train/val used for summary)
    if ds_name in _TRAIN_TEST_VAL:
        return dict(
            train_img_dir=images_root / "train",
            val_img_dir=images_root / "val",
            test_img_dir=images_root / "test",
            split_mode="explicit",
            train_tag="train",
            val_tag="val",
        )

    # 5) SKU-110K: no subfolders -> virtual split
    #    - Handle name variations
    if ds_name in {"sku-110k", "sku110k", "sku_110k"} or "sku" in ds_name and "110k" in ds_name:
        return dict(
            train_img_dir=images_root,  # Same physical folder since virtual split
            val_img_dir=images_root,
            test_img_dir=None,
            split_mode="sku_virtual_8_2",
            train_tag="virtual_8_2",
            val_tag="virtual_8_2",
        )

    # 6) fallback
    tr = _fallback_train_dir(images_root)
    va = _fallback_val_dir(images_root)
    return dict(
        train_img_dir=tr,
        val_img_dir=va,
        test_img_dir=None,
        split_mode="fallback",
        train_tag=tr.name if tr else "unknown",
        val_tag=va.name if va else "missing",
    )

# -------------------------------------------------------------------------
# Class name estimation
# -------------------------------------------------------------------------
def infer_class_names_from_labels(label_root: Path, max_files: int = 2000) -> List[str]:
    if label_root is None or not label_root.exists():
        return ["class_0"]

    txts = list(label_root.rglob("*.txt"))
    if not txts:
        return ["class_0"]

    txts = txts[:max_files]
    cls_ids = set()

    for t in txts:
        try:
            with open(t, "r", encoding="utf-8") as f:
                for line in f:
                    parts = line.strip().split()
                    if len(parts) < 5:
                        continue
                    cid = int(float(parts[0]))
                    cls_ids.add(cid)
        except Exception:
            continue

    if not cls_ids:
        return ["class_0"]

    max_id = max(cls_ids)
    return [f"class_{i}" for i in range(max_id + 1)]

# -------------------------------------------------------------------------
# Label case detection
# -------------------------------------------------------------------------
def list_label_cases_for_dataset(ds_root: Path) -> List[Tuple[str, str]]:
    cases: List[Tuple[str, str]] = []

    if (ds_root / "labels").is_dir():
        cases.append(("original", "labels"))

    for s in UNIFORM_SCALING_FACTORS:
        d = f"labels_uniform_scaling_{s}"
        if (ds_root / d).is_dir():
            cases.append((f"scale_{s}", d))

    for k in JITTER_PATTERNS:
        d = f"labels_boundary_jitter_{k}"
        if (ds_root / d).is_dir():
            cases.append((f"side_{k}", d))

    return cases

# -------------------------------------------------------------------------
# SKU-110K virtual split count
# -------------------------------------------------------------------------
def compute_sku_virtual_counts(images_root: Path, seed: int = 42, ratio: float = 0.8) -> Tuple[int, int]:
    imgs = list_images(images_root)
    n = len(imgs)
    if n == 0:
        return 0, 0
    rnd = random.Random(seed)
    idxs = list(range(n))
    rnd.shuffle(idxs)
    cut = int(n * ratio)
    n_train = cut
    n_val = n - cut
    return n_train, n_val

# -------------------------------------------------------------------------
# Discover dataset roots
# -------------------------------------------------------------------------
set_seed(SEED)

specs = discover_det_datasets(LOAD_DIR)
roots: List[Path] = []
for s in specs:
    r = Path(s.root)
    if r not in roots:
        roots.append(r)

print("=" * 80)
print(f"[DISCOVERY] Found {len(roots)} unique dataset roots under: {Path(LOAD_DIR).resolve()}")
print("=" * 80)

# -------------------------------------------------------------------------
# Per-dataset summary
# -------------------------------------------------------------------------
dataset_summaries: List[Dict] = []

for ds_root in roots:
    ds_root = Path(ds_root)
    images_root = ds_root / "images"
    labels_root = ds_root / "labels"

    if not images_root.is_dir() or not labels_root.is_dir():
        print(f"‚è≠Ô∏è  Skip (missing images/labels): {ds_root}")
        continue

    split_info = detect_split_dirs(ds_root)
    train_dir = split_info["train_img_dir"]
    val_dir   = split_info["val_img_dir"]
    split_mode = split_info["split_mode"]
    train_tag  = split_info.get("train_tag", "train")
    val_tag    = split_info.get("val_tag", "val")

    # --- Calculate image count ---
    if split_mode == "sku_virtual_8_2":
        n_train, n_val = compute_sku_virtual_counts(images_root, seed=SEED, ratio=0.8)
    else:
        n_train = len(list_images(train_dir))
        n_val   = len(list_images(val_dir)) if val_dir else 0

    cases = list_label_cases_for_dataset(ds_root)
    class_names = infer_class_names_from_labels(labels_root)
    nc = len(class_names)

    info = {
        "dataset": ds_root.name,
        "root": str(ds_root),
        "images_root": str(images_root),
        "labels_root": str(labels_root),
        "train_dir": str(train_dir) if train_dir else None,
        "val_dir": str(val_dir) if val_dir else None,
        "n_train": n_train,
        "n_val": n_val,
        "split_mode": split_mode,
        "train_tag": train_tag,
        "val_tag": val_tag,
        "label_cases": [c[0] for c in cases],
        "nc_inferred": nc,
        "class_names_inferred": class_names,
    }
    dataset_summaries.append(info)

    print("\n" + "-" * 80)
    print(f"[Dataset] {ds_root.name}")
    print(f" - root        : {ds_root}")
    print(f" - split_mode  : {split_mode}")
    print(f" - train_dir   : {train_dir if train_dir else '(missing)'} | tag={train_tag} | n_train={n_train}")
    print(f" - val_dir     : {val_dir if val_dir else '(missing)'} | tag={val_tag} | n_val={n_val}")

    # If test structure exists, just report existence (summary counts focus on train/val)
    test_dir = split_info.get("test_img_dir", None)
    if test_dir and test_dir.is_dir():
        n_test = len(list_images(test_dir))
        print(f" - test_dir    : {test_dir} | n_test={n_test}")

    print(f" - label_cases : {[c[0] for c in cases] if cases else '(none)'}")
    print(f" - inferred classes (multiclass-based): nc={nc}, names={class_names}")
    print("-" * 80)

print("\n‚úÖ Cell 1 done.")
print(f"   -> dataset_summaries length = {len(dataset_summaries)}")
print("   -> roots variable is ready for Cell 2.")

[DISCOVERY] Found 13 unique dataset roots under: /home/ISW/project/datasets

--------------------------------------------------------------------------------
[Dataset] SKU-110K
 - root        : /home/ISW/project/datasets/SKU-110K
 - split_mode  : sku_virtual_8_2
 - train_dir   : /home/ISW/project/datasets/SKU-110K/images | tag=virtual_8_2 | n_train=9394
 - val_dir     : /home/ISW/project/datasets/SKU-110K/images | tag=virtual_8_2 | n_val=2349
 - label_cases : ['original', 'scale_0.6', 'scale_0.7', 'scale_0.8', 'scale_0.9', 'scale_1.1', 'scale_1.2', 'scale_1.3', 'scale_1.4', 'side_1', 'side_3', 'side_5', 'side_7', 'side_9']
 - inferred classes (multiclass-based): nc=1, names=['class_0']
--------------------------------------------------------------------------------

--------------------------------------------------------------------------------
[Dataset] kitti
 - root        : /home/ISW/project/datasets/kitti
 - split_mode  : explicit
 - train_dir   : /home/ISW/project/datasets/kitti/

# YOLO model train

In [2]:
# ==========================================
# Cell 2) Train & Validate (FAST & ROBUST) ‚Äî FINAL OPTIMIZED + OBJECT-ONLY
#   - Supports both multi-class and object-only modes
#   - Select only required modes from CLASS_MODES
#   - Select datasets to use via TARGET_DATASETS
#   - ‚úÖ NEW: Control training with original / uniform_scaling_noise / boundary_jitter_noise flags
# ==========================================

from __future__ import annotations

import os, json, shutil, random, csv, sys, time
from pathlib import Path
from typing import List, Dict, Tuple, Optional
from contextlib import contextmanager

# ‚úÖ Libraries for image integrity check
try:
    import cv2
    from PIL import Image
    from tqdm import tqdm
except ImportError:
    print("‚ö†Ô∏è Required libraries are missing. Please install: pip install opencv-python pillow tqdm")
    sys.exit(1)

# ‚úÖ OOM fragmentation mitigation
os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True")

import torch
from ultralytics import YOLO
import logging
logging.getLogger("ultralytics").setLevel(logging.ERROR)

# -------------------------------------------------------------------------
# ‚úÖ FIXED grids (Speed Optimized)
#   Note: These values are "candidate labels_* folders for training".
#           Must match actual generated labels_boundary_jitter_K / labels_uniform_scaling_S folder names for cases to be detected.
# -------------------------------------------------------------------------
UNIFORM_SCALING_FACTORS = [0.6, 0.7, 0.8, 0.9, 1.1, 1.2, 1.3, 1.4]
JITTER_PATTERNS = [1, 3, 5, 7, 9]

# -------------------------------------------------------------------------
# ‚úÖ NEW: Train-case control flags
#   - Can control inclusion/exclusion of noise label folders in training
# -------------------------------------------------------------------------
TRAIN_USE_ORIGINAL    = False
TRAIN_USE_UNIFORM_SCALING_NOISE = False
TRAIN_USE_BOUNDARY_JITTER_NOISE  = True
# e.g.) Original only: TRAIN_USE_ORIGINAL=True, TRAIN_USE_UNIFORM_SCALING_NOISE=False, TRAIN_USE_BOUNDARY_JITTER_NOISE=False
# e.g.) Side only: TRAIN_USE_ORIGINAL=False, TRAIN_USE_UNIFORM_SCALING_NOISE=False, TRAIN_USE_BOUNDARY_JITTER_NOISE=True

# -------------------------------------------------------------------------
# ‚úÖ Speed control
# -------------------------------------------------------------------------
TRAIN_FRACTION = 1.0
TRAIN_MIN_IMAGES = 50
NUM_WORKERS = min(8, os.cpu_count() or 4)

# -------------------------------------------------------------------------
# User config
# -------------------------------------------------------------------------
IMG_SIZE = 640
EPOCHS = 10
BATCH = 32
DEVICE = "0"
SEED = 42

OUT_ROOT = Path("/home/ISW/project/object_detect")
OUT_ROOT.mkdir(parents=True, exist_ok=True)

RUNTIME_VROOT_BASE = Path("/home/ISW/project/_runtime_dataset_views")
RUNTIME_VROOT_BASE.mkdir(parents=True, exist_ok=True)

CORRUPT_BACKUP_DIR = Path("/home/ISW/project/_corrupt_files_backup")
CORRUPT_BACKUP_DIR.mkdir(parents=True, exist_ok=True)

CLEANUP_RUNTIME_VROOT = True
SILENCE_ULTRA_OUTPUT = True

# ‚úÖ Select which class modes to run
CLASS_MODES = ["multiclass", "object_only"]

# ‚úÖ Select datasets to use (by folder name, case-insensitive)
TARGET_DATASETS: Optional[List[str]] = [
    # "SKU-110K",
    "kitti",
    "homeobjects-3K",
    "african-wildlife",
    "construction-ppe",
    "Custom_Blood",
    "brain-tumor",
    "BCCD",
    "signature",
    "medical-pills",
    # "coco",
    # "lvis",
    "VOC",
]
TARGET_DATASETS_LOWER = (
    {name.strip().lower() for name in TARGET_DATASETS}
    if TARGET_DATASETS is not None
    else None
)

@contextmanager
def suppress_output(enabled: bool = True):
    if not enabled:
        yield
        return
    devnull = open(os.devnull, "w")
    old_out, old_err = sys.stdout, sys.stderr
    try:
        sys.stdout, sys.stderr = devnull, devnull
        yield
    finally:
        sys.stdout, sys.stderr = old_out, old_err
        devnull.close()

# -------------------------------------------------------------------------
# Model specs
# -------------------------------------------------------------------------
YOLOV8N_CKPT_CANDIDATES = ["yolov8n.pt"]
YOLO11N_CKPT_CANDIDATES = ["yolo11n.pt", "yolov11n.pt"]
DETR_CKPT_CANDIDATES   = ["rtdetr-s.pt", "rtdetr-l.pt"]

MODEL_SPECS = [
    ("yolov8n", YOLOV8N_CKPT_CANDIDATES),
    ("yolo11n", YOLO11N_CKPT_CANDIDATES),
    ("detr",    DETR_CKPT_CANDIDATES),
]

# -------------------------------------------------------------------------
# Utils
# -------------------------------------------------------------------------
_IMG_EXTS = {".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff", ".webp"}

def set_seed(seed: int = 42):
    random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

def list_images(dir_path: Optional[Path]) -> List[Path]:
    if dir_path is None or not Path(dir_path).exists():
        return []
    dir_path = Path(dir_path)
    imgs = []
    for p in dir_path.rglob("*"):
        if p.is_file() and p.suffix.lower() in _IMG_EXTS:
            imgs.append(p)
    return sorted(imgs)

def _safe_symlink(src: Path, dst: Path):
    dst.parent.mkdir(parents=True, exist_ok=True)
    if dst.exists() or dst.is_symlink():
        return
    os.symlink(str(src), str(dst))

def _safe_copytree(src: Path, dst: Path):
    if not src.exists():
        return
    dst.parent.mkdir(parents=True, exist_ok=True)
    shutil.copytree(src, dst, dirs_exist_ok=True)

def _link_or_copy(src: Path, dst: Path, prefer_symlink: bool = True):
    if not src.exists():
        return
    try:
        if src.is_dir():
            _safe_copytree(src, dst)
            return
        if prefer_symlink:
            _safe_symlink(src, dst)
            return
        dst.parent.mkdir(parents=True, exist_ok=True)
        shutil.copy2(src, dst)
    except Exception:
        try:
            dst.parent.mkdir(parents=True, exist_ok=True)
            shutil.copy2(src, dst)
        except Exception:
            pass

def infer_class_names_from_labels(label_root: Path, max_files: int = 2000) -> List[str]:
    if label_root is None or not label_root.exists():
        return ["class_0"]
    txts = list(label_root.rglob("*.txt"))
    if not txts:
        return ["class_0"]

    txts = txts[:max_files]
    cls_ids = set()
    for t in txts:
        try:
            with open(t, "r", encoding="utf-8") as f:
                for line in f:
                    parts = line.strip().split()
                    if len(parts) < 5:
                        continue
                    cid = int(float(parts[0]))
                    cls_ids.add(cid)
        except Exception:
            continue

    if not cls_ids:
        return ["class_0"]
    max_id = max(cls_ids)
    return [f"class_{i}" for i in range(max_id + 1)]

def choose_model(ckpt_candidates: List[str]) -> YOLO:
    last_err = None
    for ckpt in ckpt_candidates:
        try:
            return YOLO(ckpt)
        except Exception as e:
            last_err = e
    raise RuntimeError(f"Failed to load model weights: {ckpt_candidates}") from last_err

def extract_metrics_dict(val_result) -> Dict:
    if val_result is None:
        return {}
    if hasattr(val_result, "results_dict") and isinstance(val_result.results_dict, dict):
        return dict(val_result.results_dict)
    try:
        d = dict(val_result.__dict__)
        d.pop("plots", None)
        d.pop("speed", None)
        return d
    except Exception:
        return {}

# -------------------------------------------------------------------------
# ‚úÖ Label rewrite utility for object-only mode
# -------------------------------------------------------------------------
def rewrite_label_file_to_object_only(src: Path, dst: Path):
    dst.parent.mkdir(parents=True, exist_ok=True)
    if not src.exists():
        try:
            dst.write_text("", encoding="utf-8")
        except Exception:
            pass
        return

    try:
        lines_out = []
        with open(src, "r", encoding="utf-8") as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) < 5:
                    continue
                parts[0] = "0"
                lines_out.append(" ".join(parts))
        with open(dst, "w", encoding="utf-8") as f:
            for ln in lines_out:
                f.write(ln + "\n")
    except Exception:
        try:
            dst.write_text("", encoding="utf-8")
        except Exception:
            pass

# -------------------------------------------------------------------------
# ‚úÖ Corrupt Image Cleaner
# -------------------------------------------------------------------------
def scan_and_clean_images(dir_path: Path):
    if not dir_path.exists():
        return

    images = list_images(dir_path)
    if not images:
        return

    print(f"   üîç Scanning integrity of {len(images)} images in {dir_path.name}...")
    corrupt_count = 0
    for img_path in tqdm(images, desc="Checking", leave=False):
        is_corrupt = False

        try:
            with Image.open(img_path) as im:
                im.verify()
        except Exception:
            is_corrupt = True

        if not is_corrupt:
            try:
                img = cv2.imread(str(img_path))
                if img is None:
                    is_corrupt = True
                else:
                    _ = img.shape
            except Exception:
                is_corrupt = True

        if is_corrupt:
            corrupt_count += 1
            dest = CORRUPT_BACKUP_DIR / dir_path.name / img_path.name
            dest.parent.mkdir(parents=True, exist_ok=True)

            label_path = img_path.parent.parent / "labels" / img_path.parent.name / img_path.with_suffix(".txt").name
            if not label_path.exists():
                label_path = img_path.with_suffix(".txt")

            try:
                shutil.move(str(img_path), str(dest))
                if label_path.exists():
                    shutil.move(str(label_path), str(dest.with_suffix(".txt")))
            except Exception:
                pass

    if corrupt_count > 0:
        print(f"   ‚ö†Ô∏è  Moved {corrupt_count} corrupt images to {CORRUPT_BACKUP_DIR}")
    else:
        print(f"   ‚úÖ  No corrupt images found.")

# -------------------------------------------------------------------------
# Split/Case helpers
# -------------------------------------------------------------------------
def normalize_name(name: str) -> str:
    return name.strip().lower().replace("_", "-").replace(" ", "-")

_ds_map: Dict[str, Dict] = {}
try:
    for info in dataset_summaries:
        key = normalize_name(info.get("dataset", ""))
        if key:
            _ds_map[key] = info
except Exception:
    _ds_map = {}

def get_split_info(ds_root: Path) -> Dict[str, Optional[Path]]:
    key = normalize_name(ds_root.name)
    if key in _ds_map:
        info = _ds_map[key]
        return dict(
            train_img_dir=Path(info["train_dir"]) if info.get("train_dir") else None,
            val_img_dir=Path(info["val_dir"]) if info.get("val_dir") else None,
            split_mode=info.get("split_mode", "fallback"),
            train_tag=info.get("train_tag", "train"),
            val_tag=info.get("val_tag", "val"),
        )

    images_root = ds_root / "images"
    tr = images_root / "train" if (images_root / "train").is_dir() else images_root
    va = images_root / "val" if (images_root / "val").is_dir() else (images_root / "valid" if (images_root / "valid").is_dir() else None)
    return dict(
        train_img_dir=tr,
        val_img_dir=va,
        split_mode="fallback",
        train_tag=tr.name if tr else "unknown",
        val_tag=va.name if va else "missing",
    )

# -------------------------------------------------------------------------
# ‚úÖ NEW: label case listing (flags reflected)
# -------------------------------------------------------------------------
def list_label_cases_for_dataset(ds_root: Path) -> List[Tuple[str, str]]:
    key = normalize_name(ds_root.name)
    existing = None
    if key in _ds_map and isinstance(_ds_map[key].get("label_cases"), list):
        existing = set(_ds_map[key]["label_cases"])

    cases: List[Tuple[str, str]] = []

    # (1) original
    if TRAIN_USE_ORIGINAL:
        if (ds_root / "labels").is_dir() and (existing is None or "original" in existing):
            cases.append(("original", "labels"))

    # (2) uniform scaling noise
    if TRAIN_USE_UNIFORM_SCALING_NOISE:
        for s in UNIFORM_SCALING_FACTORS:
            tag = f"scale_{s}"
            dirname = f"labels_uniform_scaling_{s}"
            if (ds_root / dirname).is_dir() and (existing is None or tag in existing):
                cases.append((tag, dirname))

    # (3) boundary jitter noise
    if TRAIN_USE_BOUNDARY_JITTER_NOISE:
        for k in JITTER_PATTERNS:
            tag = f"side_{k}"
            dirname = f"labels_boundary_jitter_{k}"
            if (ds_root / dirname).is_dir() and (existing is None or tag in existing):
                cases.append((tag, dirname))

    return cases

def resolve_label_base(label_root: Path, split_tag: str, is_train: bool) -> Path:
    if label_root is None:
        return label_root
    tagged = label_root / split_tag
    if split_tag and tagged.is_dir():
        return tagged
    if is_train:
        tdir = label_root / "train"
        if tdir.is_dir():
            return tdir
        return label_root
    else:
        vdir = label_root / "val"
        if vdir.is_dir():
            return vdir
        vdir2 = label_root / "valid"
        if vdir2.is_dir():
            return vdir2
        return label_root

# -------------------------------------------------------------------------
# SKU virtual split & Runtime View (unchanged)
# -------------------------------------------------------------------------
def sku_virtual_split_images(images_root: Path, seed: int = 42, ratio: float = 0.8) -> Tuple[List[Path], List[Path]]:
    imgs = list_images(images_root)
    n = len(imgs)
    if n == 0:
        return [], []
    rnd = random.Random(seed)
    idxs = list(range(n))
    rnd.shuffle(idxs)
    cut = int(n * ratio)
    train_imgs = [imgs[i] for i in idxs[:cut]]
    val_imgs   = [imgs[i] for i in idxs[cut:]]
    return train_imgs, val_imgs

def sample_train_images(base_train_imgs: List[Path], fraction: float, seed: int) -> Tuple[List[Path], int]:
    n_total = len(base_train_imgs)
    if n_total == 0:
        return [], 0
    if fraction >= 1.0:
        return base_train_imgs, n_total
    n_pick = int(n_total * fraction)
    n_pick = max(TRAIN_MIN_IMAGES, n_pick)
    n_pick = min(n_pick, n_total)
    rnd = random.Random(seed)
    chosen = rnd.sample(base_train_imgs, k=n_pick)
    return chosen, n_total

def build_runtime_view_root(
    ds_root: Path,
    case_labels_dirname: str,
    train_fraction: float,
    seed: int,
    class_mode: str = "multiclass",
) -> Tuple[Path, Path, int, int, str]:

    assert class_mode in ("multiclass", "object_only")
    images_root     = ds_root / "images"
    orig_label_root = ds_root / "labels"
    case_label_root = ds_root / case_labels_dirname

    split_info = get_split_info(ds_root)
    train_img_dir = split_info["train_img_dir"]
    val_img_dir   = split_info["val_img_dir"]
    split_mode    = split_info["split_mode"]
    train_tag     = split_info.get("train_tag", "train")
    val_tag       = split_info.get("val_tag", "val")

    vroot    = RUNTIME_VROOT_BASE / ds_root.name / f"case__{case_labels_dirname}__{class_mode}"
    v_images = vroot / "images"
    v_labels = vroot / "labels"

    if vroot.exists():
        try:
            shutil.rmtree(vroot)
        except Exception:
            pass

    (v_images / "train").mkdir(parents=True, exist_ok=True)
    (v_images / "val").mkdir(parents=True, exist_ok=True)
    (v_labels / "train").mkdir(parents=True, exist_ok=True)
    (v_labels / "val").mkdir(parents=True, exist_ok=True)

    # A) SKU virtual split
    if split_mode == "sku_virtual_8_2":
        base_train_imgs, base_val_imgs = sku_virtual_split_images(images_root, seed=seed, ratio=0.8)
        chosen_train_imgs, n_total_train = sample_train_images(base_train_imgs, train_fraction, seed)

        for img in chosen_train_imgs:
            rel = img.relative_to(images_root)
            dst = v_images / "train" / rel
            try:
                _safe_symlink(img, dst)
            except Exception:
                _link_or_copy(img, dst, prefer_symlink=False)

        for img in base_val_imgs:
            rel = img.relative_to(images_root)
            dst = v_images / "val" / rel
            try:
                _safe_symlink(img, dst)
            except Exception:
                _link_or_copy(img, dst, prefer_symlink=False)

        for img in chosen_train_imgs:
            rel = img.relative_to(images_root)
            src_lbl = case_label_root / rel.with_suffix(".txt")
            dst_lbl = v_labels / "train" / rel.with_suffix(".txt")
            if class_mode == "multiclass":
                dst_lbl.parent.mkdir(parents=True, exist_ok=True)
                if src_lbl.exists():
                    try:
                        _safe_symlink(src_lbl, dst_lbl)
                    except Exception:
                        _link_or_copy(src_lbl, dst_lbl, prefer_symlink=False)
                else:
                    try:
                        dst_lbl.write_text("", encoding="utf-8")
                    except Exception:
                        pass
            else:
                rewrite_label_file_to_object_only(src_lbl, dst_lbl)

        for img in base_val_imgs:
            rel = img.relative_to(images_root)
            src_lbl = orig_label_root / rel.with_suffix(".txt")
            dst_lbl = v_labels / "val" / rel.with_suffix(".txt")
            if class_mode == "multiclass":
                dst_lbl.parent.mkdir(parents=True, exist_ok=True)
                if src_lbl.exists():
                    try:
                        _safe_symlink(src_lbl, dst_lbl)
                    except Exception:
                        _link_or_copy(src_lbl, dst_lbl, prefer_symlink=False)
                else:
                    try:
                        dst_lbl.write_text("", encoding="utf-8")
                    except Exception:
                        pass
            else:
                rewrite_label_file_to_object_only(src_lbl, dst_lbl)

        if class_mode == "multiclass":
            names = infer_class_names_from_labels(orig_label_root)
            nc = len(names)
        else:
            names = ["object"]
            nc = 1

        data_yaml = vroot / "data.yaml"
        with open(data_yaml, "w", encoding="utf-8") as f:
            f.write(
                f"path: {str(vroot)}\n"
                f"train: images/train\n"
                f"val: images/val\n"
                f"nc: {nc}\n"
                f"names: {names}\n"
            )

        rt_n_train = len(list_images(v_images / "train"))
        if rt_n_train == 0:
            raise RuntimeError(f"Runtime train images empty (SKU mode): {v_images/'train'}")

        return vroot, data_yaml, len(chosen_train_imgs), len(base_train_imgs), split_mode

    # B) Standard split
    if train_img_dir is None or not Path(train_img_dir).is_dir():
        raise RuntimeError(f"No train images dir resolved for {ds_root.name}")
    if val_img_dir is None or not Path(val_img_dir).is_dir():
        raise RuntimeError(f"No val images dir resolved for {ds_root.name}")

    all_train_imgs = list_images(train_img_dir)
    chosen_train_imgs, n_total_train = sample_train_images(all_train_imgs, train_fraction, seed)
    if n_total_train == 0 or len(chosen_train_imgs) == 0:
        raise RuntimeError(f"No train images for {ds_root.name}")

    for img in chosen_train_imgs:
        rel = img.relative_to(train_img_dir)
        dst = v_images / "train" / rel
        try:
            _safe_symlink(img, dst)
        except Exception:
            _link_or_copy(img, dst, prefer_symlink=False)

    _safe_copytree(Path(val_img_dir), v_images / "val")

    case_train_lbl_base = resolve_label_base(case_label_root, train_tag, is_train=True)
    for img in chosen_train_imgs:
        rel = img.relative_to(train_img_dir)
        src_lbl = case_train_lbl_base / rel.with_suffix(".txt")
        dst_lbl = v_labels / "train" / rel.with_suffix(".txt")
        if class_mode == "multiclass":
            dst_lbl.parent.mkdir(parents=True, exist_ok=True)
            if src_lbl.exists():
                try:
                    _safe_symlink(src_lbl, dst_lbl)
                except Exception:
                    _link_or_copy(src_lbl, dst_lbl, prefer_symlink=False)
            else:
                try:
                    dst_lbl.write_text("", encoding="utf-8")
                except Exception:
                    pass
        else:
            rewrite_label_file_to_object_only(src_lbl, dst_lbl)

    orig_val_lbl_base = resolve_label_base(orig_label_root, val_tag, is_train=False)
    if class_mode == "multiclass":
        _safe_copytree(orig_val_lbl_base, v_labels / "val")
    else:
        for src_lbl in orig_val_lbl_base.rglob("*.txt"):
            rel = src_lbl.relative_to(orig_val_lbl_base)
            dst_lbl = v_labels / "val" / rel
            rewrite_label_file_to_object_only(src_lbl, dst_lbl)

    if class_mode == "multiclass":
        names = infer_class_names_from_labels(orig_label_root)
        nc = len(names)
    else:
        names = ["object"]
        nc = 1

    data_yaml = vroot / "data.yaml"
    with open(data_yaml, "w", encoding="utf-8") as f:
        f.write(
            f"path: {str(vroot)}\n"
            f"train: images/train\n"
            f"val: images/val\n"
            f"nc: {nc}\n"
            f"names: {names}\n"
        )

    rt_n_train = len(list_images(v_images / "train"))
    rt_n_val   = len(list_images(v_images / "val"))
    if rt_n_train == 0:
        raise RuntimeError(f"Runtime train images empty: {v_images/'train'}")
    if rt_n_val == 0:
        raise RuntimeError(f"Runtime val images empty: {v_images/'val'}")

    return vroot, data_yaml, len(chosen_train_imgs), n_total_train, split_mode

# -------------------------------------------------------------------------
# OOM-safe train wrapper
# -------------------------------------------------------------------------
def train_with_auto_oom(model: YOLO, data_yaml: Path, project_dir: Path, name_dir: str, model_tag: str):
    if model_tag == "detr":
        candidates = [(4, 640), (2, 640), (2, 512), (1, 512)]
    else:
        candidates = [(BATCH, IMG_SIZE)]

    last_err = None
    for b, sz in candidates:
        try:
            with suppress_output(SILENCE_ULTRA_OUTPUT):
                model.train(
                    data=str(data_yaml),
                    epochs=EPOCHS,
                    imgsz=sz,
                    batch=b,
                    device=DEVICE,
                    project=str(project_dir),
                    name=name_dir,
                    exist_ok=True,
                    verbose=False,
                    workers=NUM_WORKERS,
                    amp=True,
                )
            return True, b, sz, None
        except RuntimeError as e:
            msg = str(e).lower()
            last_err = e
            if "out of memory" in msg or "cuda out of memory" in msg:
                if torch.cuda.is_available():
                    torch.cuda.empty_cache()
                    torch.cuda.ipc_collect()
                continue
            break
        except Exception as e:
            last_err = e
            break
    return False, None, None, last_err

# -------------------------------------------------------------------------
# Train & validate loop
# -------------------------------------------------------------------------
set_seed(SEED)

print("=" * 80)
print("[TRAIN/EVAL] Start (Cell 1 summaries-aware + runtime path safe + object-only)")
print(f" - OUT_ROOT             : {OUT_ROOT}")
print(f" - RUNTIME_VROOT_BASE   : {RUNTIME_VROOT_BASE}")
print(f" - TRAIN_FRACTION       : {TRAIN_FRACTION}")
print(f" - NUM_WORKERS          : {NUM_WORKERS}")
print(f" - CLASS_MODES          : {CLASS_MODES}")
print(f" - CLEANUP_RUNTIME_VROOT: {CLEANUP_RUNTIME_VROOT}")
print(f" - TRAIN_USE_ORIGINAL   : {TRAIN_USE_ORIGINAL}")
print(f" - TRAIN_USE_UNIFORM_SCALING_NOISE: {TRAIN_USE_UNIFORM_SCALING_NOISE}")
print(f" - TRAIN_USE_BOUNDARY_JITTER_NOISE : {TRAIN_USE_BOUNDARY_JITTER_NOISE}")
if TARGET_DATASETS_LOWER is None:
    print(f" - TARGET_DATASETS      : ALL (no filter)")
else:
    print(f" - TARGET_DATASETS      : {sorted(TARGET_DATASETS_LOWER)}")
print("=" * 80)

summary_rows: List[Dict] = []

try:
    _ = roots
except NameError:
    raise RuntimeError("Please run Cell 1 first to prepare the roots variable.")

for ds_root in roots:
    ds_root = Path(ds_root)

    if TARGET_DATASETS_LOWER is not None:
        ds_name_lower = ds_root.name.strip().lower()
        if ds_name_lower not in TARGET_DATASETS_LOWER:
            print(f"‚è≠Ô∏è  Skip (not in TARGET_DATASETS): {ds_root.name}")
            continue

    images_root = ds_root / "images"
    labels_root = ds_root / "labels"

    if not images_root.is_dir() or not labels_root.is_dir():
        print(f"‚è≠Ô∏è  Skip (missing images/labels): {ds_root}")
        continue

    print(f"\n[Integrity Check] {ds_root.name}")
    scan_and_clean_images(images_root)

    cases = list_label_cases_for_dataset(ds_root)
    if not cases:
        print(f"‚è≠Ô∏è  Skip (no target label cases after flags/grid filter): {ds_root.name}")
        continue

    sp = get_split_info(ds_root)
    print("\n" + "-" * 80)
    print(f"[Dataset] {ds_root.name}")
    print(f" - split_mode : {sp.get('split_mode')}")
    print(f" - Cases      : {[c[0] for c in cases]}")
    print(f" - CLASS_MODES: {CLASS_MODES}")
    print("-" * 80)

    for case_tag, labels_dirname in cases:
        for class_mode in CLASS_MODES:
            try:
                vroot, data_yaml, n_used, n_total, split_mode = build_runtime_view_root(
                    ds_root=ds_root,
                    case_labels_dirname=labels_dirname,
                    train_fraction=TRAIN_FRACTION,
                    seed=SEED,
                    class_mode=class_mode,
                )
                pct = (n_used / max(1, n_total)) * 100.0
                print(f"  [Subset] case={case_tag} | class_mode={class_mode} | split_mode={split_mode} | train_used={n_used}/{n_total} ({pct:.1f}%)")
            except Exception as e:
                print(f"  ‚è≠Ô∏è  Skip build failed: case={case_tag} | class_mode={class_mode} | err={e}")
                continue

            for model_tag, ckpt_candidates in MODEL_SPECS:
                print(f"\n  [Train] case={case_tag} | class_mode={class_mode} | model={model_tag}")

                try:
                    model = choose_model(ckpt_candidates)
                except Exception as e:
                    print(f"    ‚ùå Model load failed: {ckpt_candidates} | err={e}")
                    continue

                project_dir = OUT_ROOT / ds_root.name
                frac_tag = f"tr{int(TRAIN_FRACTION*100)}"
                name_dir = f"{model_tag}__{case_tag}__{class_mode}__{frac_tag}"
                project_dir.mkdir(parents=True, exist_ok=True)

                ok, used_b, used_sz, err = train_with_auto_oom(model, data_yaml, project_dir, name_dir, model_tag)
                if not ok:
                    print(f"    ‚ùå Train failed: {err}")
                    try:
                        del model
                    except Exception:
                        pass
                    if torch.cuda.is_available():
                        torch.cuda.empty_cache()
                        torch.cuda.ipc_collect()
                    continue

                try:
                    with suppress_output(SILENCE_ULTRA_OUTPUT):
                        val_res = model.val(
                            data=str(data_yaml),
                            imgsz=used_sz if used_sz else IMG_SIZE,
                            device=DEVICE,
                            split="val",
                            verbose=False,
                            workers=NUM_WORKERS,
                        )
                except Exception as e:
                    print(f"    ‚ùå Val failed: {e}")
                    val_res = None

                metrics = extract_metrics_dict(val_res)

                metrics_out = project_dir / name_dir / "metrics_eval.json"
                try:
                    metrics_out.parent.mkdir(parents=True, exist_ok=True)
                    with open(metrics_out, "w", encoding="utf-8") as f:
                        json.dump(
                            {
                                "dataset": ds_root.name,
                                "root": str(ds_root),
                                "case_tag": case_tag,
                                "labels_dirname": labels_dirname,
                                "class_mode": class_mode,
                                "model_tag": model_tag,
                                "ckpt_candidates": ckpt_candidates,
                                "train_fraction": TRAIN_FRACTION,
                                "train_used": n_used,
                                "train_total": n_total,
                                "data_yaml": str(data_yaml),
                                "runtime_view_root": str(vroot),
                                "split_mode": split_mode,
                                "effective_batch": used_b,
                                "effective_imgsz": used_sz,
                                "metrics": metrics,
                            },
                            f,
                            ensure_ascii=False,
                            indent=2,
                        )
                    print(f"    ‚úÖ Saved metrics: {metrics_out}")
                except Exception:
                    pass

                row = {
                    "dataset": ds_root.name,
                    "model": model_tag,
                    "case": case_tag,
                    "labels_dir": labels_dirname,
                    "class_mode": class_mode,
                    "split_mode": split_mode,
                    "train_fraction": TRAIN_FRACTION,
                    "train_used": n_used,
                    "train_total": n_total,
                    "effective_batch": used_b,
                    "effective_imgsz": used_sz,
                }
                for k in ["metrics/mAP50(B)", "metrics/mAP50-95(B)", "metrics/precision(B)", "metrics/recall(B)"]:
                    if k in metrics:
                        row[k] = metrics[k]

                summary_rows.append(row)

                try:
                    del model
                except Exception:
                    pass
                if torch.cuda.is_available():
                    torch.cuda.empty_cache()
                    torch.cuda.ipc_collect()

            if CLEANUP_RUNTIME_VROOT:
                try:
                    shutil.rmtree(vroot)
                except Exception:
                    pass

# -------------------------------------------------------------------------
# Save summary CSV
# -------------------------------------------------------------------------
out_csv = OUT_ROOT / "summary_final_optimized.csv"
try:
    cols = set()
    for r in summary_rows:
        cols.update(r.keys())

    base_cols = [
        "dataset", "model", "case", "labels_dir",
        "class_mode",
        "split_mode",
        "train_fraction", "train_used", "train_total",
        "effective_batch", "effective_imgsz",
    ]
    extra_cols = sorted([c for c in cols if c not in set(base_cols)])
    cols = base_cols + extra_cols

    with open(out_csv, "w", newline="", encoding="utf-8") as f:
        w = csv.DictWriter(f, fieldnames=cols)
        w.writeheader()
        for r in summary_rows:
            w.writerow(r)

    print("\n" + "=" * 80)
    print(f"‚úÖ Saved summary CSV: {out_csv}")
    print(f"‚úÖ Total runs: {len(summary_rows)}")
    print("=" * 80)

except Exception as e:
    print(f"‚ö†Ô∏è  Summary CSV save failed: {e}")

print("\n‚úÖ Cell 2 done.")


[TRAIN/EVAL] Start (Cell 1 summaries-aware + runtime path safe + object-only)
 - OUT_ROOT             : /home/ISW/project/object_detect
 - RUNTIME_VROOT_BASE   : /home/ISW/project/_runtime_dataset_views
 - TRAIN_FRACTION       : 1.0
 - NUM_WORKERS          : 8
 - CLASS_MODES          : ['multiclass', 'object_only']
 - CLEANUP_RUNTIME_VROOT: True
 - TRAIN_USE_ORIGINAL   : False
 - TRAIN_USE_UNIFORM_SCALING_NOISE: False
 - TRAIN_USE_BOUNDARY_JITTER_NOISE : True
 - TARGET_DATASETS      : ['african-wildlife', 'bccd', 'brain-tumor', 'construction-ppe', 'custom_blood', 'homeobjects-3k', 'kitti', 'medical-pills', 'signature', 'voc']
‚è≠Ô∏è  Skip (not in TARGET_DATASETS): SKU-110K

[Integrity Check] kitti
   üîç Scanning integrity of 7481 images in images...


                                                              

   ‚úÖ  No corrupt images found.

--------------------------------------------------------------------------------
[Dataset] kitti
 - split_mode : explicit
 - Cases      : ['side_1', 'side_3', 'side_5', 'side_7', 'side_9']
 - CLASS_MODES: ['multiclass', 'object_only']
--------------------------------------------------------------------------------
  [Subset] case=side_1 | class_mode=multiclass | split_mode=explicit | train_used=5985/5985 (100.0%)

  [Train] case=side_1 | class_mode=multiclass | model=yolov8n
    ‚úÖ Saved metrics: /home/ISW/project/object_detect/kitti/yolov8n__side_1__multiclass__tr100/metrics_eval.json

  [Train] case=side_1 | class_mode=multiclass | model=yolo11n
    ‚úÖ Saved metrics: /home/ISW/project/object_detect/kitti/yolo11n__side_1__multiclass__tr100/metrics_eval.json

  [Train] case=side_1 | class_mode=multiclass | model=detr
    ‚úÖ Saved metrics: /home/ISW/project/object_detect/kitti/detr__side_1__multiclass__tr100/metrics_eval.json
  [Subset] case=side_1 |

                                                              

   ‚úÖ  No corrupt images found.

--------------------------------------------------------------------------------
[Dataset] homeobjects-3K
 - split_mode : explicit
 - Cases      : ['side_1', 'side_3', 'side_5', 'side_7', 'side_9']
 - CLASS_MODES: ['multiclass', 'object_only']
--------------------------------------------------------------------------------
  [Subset] case=side_1 | class_mode=multiclass | split_mode=explicit | train_used=2285/2285 (100.0%)

  [Train] case=side_1 | class_mode=multiclass | model=yolov8n
    ‚úÖ Saved metrics: /home/ISW/project/object_detect/homeobjects-3K/yolov8n__side_1__multiclass__tr100/metrics_eval.json

  [Train] case=side_1 | class_mode=multiclass | model=yolo11n
    ‚úÖ Saved metrics: /home/ISW/project/object_detect/homeobjects-3K/yolo11n__side_1__multiclass__tr100/metrics_eval.json

  [Train] case=side_1 | class_mode=multiclass | model=detr
    ‚úÖ Saved metrics: /home/ISW/project/object_detect/homeobjects-3K/detr__side_1__multiclass__tr100/metric

                                                              

   ‚úÖ  No corrupt images found.

--------------------------------------------------------------------------------
[Dataset] african-wildlife
 - split_mode : explicit
 - Cases      : ['side_1', 'side_3', 'side_5', 'side_7', 'side_9']
 - CLASS_MODES: ['multiclass', 'object_only']
--------------------------------------------------------------------------------
  [Subset] case=side_1 | class_mode=multiclass | split_mode=explicit | train_used=1052/1052 (100.0%)

  [Train] case=side_1 | class_mode=multiclass | model=yolov8n
    ‚úÖ Saved metrics: /home/ISW/project/object_detect/african-wildlife/yolov8n__side_1__multiclass__tr100/metrics_eval.json

  [Train] case=side_1 | class_mode=multiclass | model=yolo11n
    ‚úÖ Saved metrics: /home/ISW/project/object_detect/african-wildlife/yolo11n__side_1__multiclass__tr100/metrics_eval.json

  [Train] case=side_1 | class_mode=multiclass | model=detr
    ‚úÖ Saved metrics: /home/ISW/project/object_detect/african-wildlife/detr__side_1__multiclass__tr10

                                                              

   ‚úÖ  No corrupt images found.

--------------------------------------------------------------------------------
[Dataset] construction-ppe
 - split_mode : explicit
 - Cases      : ['side_1', 'side_3', 'side_5', 'side_7', 'side_9']
 - CLASS_MODES: ['multiclass', 'object_only']
--------------------------------------------------------------------------------
  [Subset] case=side_1 | class_mode=multiclass | split_mode=explicit | train_used=1132/1132 (100.0%)

  [Train] case=side_1 | class_mode=multiclass | model=yolov8n
    ‚úÖ Saved metrics: /home/ISW/project/object_detect/construction-ppe/yolov8n__side_1__multiclass__tr100/metrics_eval.json

  [Train] case=side_1 | class_mode=multiclass | model=yolo11n
    ‚úÖ Saved metrics: /home/ISW/project/object_detect/construction-ppe/yolo11n__side_1__multiclass__tr100/metrics_eval.json

  [Train] case=side_1 | class_mode=multiclass | model=detr
    ‚úÖ Saved metrics: /home/ISW/project/object_detect/construction-ppe/detr__side_1__multiclass__tr10

                                                             

   ‚úÖ  No corrupt images found.

--------------------------------------------------------------------------------
[Dataset] Custom_Blood
 - split_mode : explicit
 - Cases      : ['side_1', 'side_3', 'side_5', 'side_7', 'side_9']
 - CLASS_MODES: ['multiclass', 'object_only']
--------------------------------------------------------------------------------
  [Subset] case=side_1 | class_mode=multiclass | split_mode=explicit | train_used=1105/1105 (100.0%)

  [Train] case=side_1 | class_mode=multiclass | model=yolov8n
    ‚úÖ Saved metrics: /home/ISW/project/object_detect/Custom_Blood/yolov8n__side_1__multiclass__tr100/metrics_eval.json

  [Train] case=side_1 | class_mode=multiclass | model=yolo11n
    ‚úÖ Saved metrics: /home/ISW/project/object_detect/Custom_Blood/yolo11n__side_1__multiclass__tr100/metrics_eval.json

  [Train] case=side_1 | class_mode=multiclass | model=detr
    ‚úÖ Saved metrics: /home/ISW/project/object_detect/Custom_Blood/detr__side_1__multiclass__tr100/metrics_eval.j

                                                               

   ‚úÖ  No corrupt images found.

--------------------------------------------------------------------------------
[Dataset] brain-tumor
 - split_mode : explicit
 - Cases      : ['side_1', 'side_3', 'side_5', 'side_7', 'side_9']
 - CLASS_MODES: ['multiclass', 'object_only']
--------------------------------------------------------------------------------
  [Subset] case=side_1 | class_mode=multiclass | split_mode=explicit | train_used=893/893 (100.0%)

  [Train] case=side_1 | class_mode=multiclass | model=yolov8n
    ‚úÖ Saved metrics: /home/ISW/project/object_detect/brain-tumor/yolov8n__side_1__multiclass__tr100/metrics_eval.json

  [Train] case=side_1 | class_mode=multiclass | model=yolo11n
    ‚úÖ Saved metrics: /home/ISW/project/object_detect/brain-tumor/yolo11n__side_1__multiclass__tr100/metrics_eval.json

  [Train] case=side_1 | class_mode=multiclass | model=detr
    ‚úÖ Saved metrics: /home/ISW/project/object_detect/brain-tumor/detr__side_1__multiclass__tr100/metrics_eval.json
  

                                                            

   ‚úÖ  No corrupt images found.

--------------------------------------------------------------------------------
[Dataset] BCCD
 - split_mode : explicit
 - Cases      : ['side_1', 'side_3', 'side_5', 'side_7', 'side_9']
 - CLASS_MODES: ['multiclass', 'object_only']
--------------------------------------------------------------------------------
  [Subset] case=side_1 | class_mode=multiclass | split_mode=explicit | train_used=310/310 (100.0%)

  [Train] case=side_1 | class_mode=multiclass | model=yolov8n
    ‚úÖ Saved metrics: /home/ISW/project/object_detect/BCCD/yolov8n__side_1__multiclass__tr100/metrics_eval.json

  [Train] case=side_1 | class_mode=multiclass | model=yolo11n
    ‚úÖ Saved metrics: /home/ISW/project/object_detect/BCCD/yolo11n__side_1__multiclass__tr100/metrics_eval.json

  [Train] case=side_1 | class_mode=multiclass | model=detr
    ‚úÖ Saved metrics: /home/ISW/project/object_detect/BCCD/detr__side_1__multiclass__tr100/metrics_eval.json
  [Subset] case=side_1 | class

                                                            

   ‚úÖ  No corrupt images found.

--------------------------------------------------------------------------------
[Dataset] signature
 - split_mode : explicit
 - Cases      : ['side_1', 'side_3', 'side_5', 'side_7', 'side_9']
 - CLASS_MODES: ['multiclass', 'object_only']
--------------------------------------------------------------------------------
  [Subset] case=side_1 | class_mode=multiclass | split_mode=explicit | train_used=143/143 (100.0%)

  [Train] case=side_1 | class_mode=multiclass | model=yolov8n
    ‚úÖ Saved metrics: /home/ISW/project/object_detect/signature/yolov8n__side_1__multiclass__tr100/metrics_eval.json

  [Train] case=side_1 | class_mode=multiclass | model=yolo11n
    ‚úÖ Saved metrics: /home/ISW/project/object_detect/signature/yolo11n__side_1__multiclass__tr100/metrics_eval.json

  [Train] case=side_1 | class_mode=multiclass | model=detr
    ‚úÖ Saved metrics: /home/ISW/project/object_detect/signature/detr__side_1__multiclass__tr100/metrics_eval.json
  [Subset]

                                                           

   ‚úÖ  No corrupt images found.

--------------------------------------------------------------------------------
[Dataset] medical-pills
 - split_mode : explicit
 - Cases      : ['side_1', 'side_3', 'side_5', 'side_7', 'side_9']
 - CLASS_MODES: ['multiclass', 'object_only']
--------------------------------------------------------------------------------
  [Subset] case=side_1 | class_mode=multiclass | split_mode=explicit | train_used=92/92 (100.0%)

  [Train] case=side_1 | class_mode=multiclass | model=yolov8n
    ‚úÖ Saved metrics: /home/ISW/project/object_detect/medical-pills/yolov8n__side_1__multiclass__tr100/metrics_eval.json

  [Train] case=side_1 | class_mode=multiclass | model=yolo11n
    ‚úÖ Saved metrics: /home/ISW/project/object_detect/medical-pills/yolo11n__side_1__multiclass__tr100/metrics_eval.json

  [Train] case=side_1 | class_mode=multiclass | model=detr
    ‚úÖ Saved metrics: /home/ISW/project/object_detect/medical-pills/detr__side_1__multiclass__tr100/metrics_eval.j

                                                                 

   ‚úÖ  No corrupt images found.

--------------------------------------------------------------------------------
[Dataset] VOC
 - split_mode : explicit
 - Cases      : ['side_1', 'side_3', 'side_5', 'side_7', 'side_9']
 - CLASS_MODES: ['multiclass', 'object_only']
--------------------------------------------------------------------------------
  [Subset] case=side_1 | class_mode=multiclass | split_mode=explicit | train_used=5717/5717 (100.0%)

  [Train] case=side_1 | class_mode=multiclass | model=yolov8n
    ‚úÖ Saved metrics: /home/ISW/project/object_detect/VOC/yolov8n__side_1__multiclass__tr100/metrics_eval.json

  [Train] case=side_1 | class_mode=multiclass | model=yolo11n
    ‚úÖ Saved metrics: /home/ISW/project/object_detect/VOC/yolo11n__side_1__multiclass__tr100/metrics_eval.json

  [Train] case=side_1 | class_mode=multiclass | model=detr
    ‚úÖ Saved metrics: /home/ISW/project/object_detect/VOC/detr__side_1__multiclass__tr100/metrics_eval.json
  [Subset] case=side_1 | class_m

In [None]:
# ==========================================
# Cell 2) Train & Validate (FAST & ROBUST) ‚Äî FINAL OPTIMIZED + OBJECT-ONLY
#   - Supports both multi-class and object-only modes
#   - Select only required modes from CLASS_MODES
#   - Select datasets to use via TARGET_DATASETS
#   - ‚úÖ NEW: Control training with original / uniform_scaling_noise / boundary_jitter_noise flags
# ==========================================

from __future__ import annotations

import os, json, shutil, random, csv, sys, time
from pathlib import Path
from typing import List, Dict, Tuple, Optional
from contextlib import contextmanager

# ‚úÖ Libraries for image integrity check
try:
    import cv2
    from PIL import Image
    from tqdm import tqdm
except ImportError:
    print("‚ö†Ô∏è Required libraries are missing. Please install: pip install opencv-python pillow tqdm")
    sys.exit(1)

# ‚úÖ OOM fragmentation mitigation
os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True")

import torch
from ultralytics import YOLO
import logging
logging.getLogger("ultralytics").setLevel(logging.ERROR)

# -------------------------------------------------------------------------
# ‚úÖ FIXED grids (Speed Optimized)
#   Note: These values are "candidate labels_* folders for training".
#           Must match actual generated labels_boundary_jitter_K / labels_uniform_scaling_S folder names for cases to be detected.
# -------------------------------------------------------------------------
UNIFORM_SCALING_FACTORS = [0.6, 0.7, 0.8, 0.9, 1.1, 1.2, 1.3, 1.4]
JITTER_PATTERNS = [1, 3, 5, 7, 9]

# -------------------------------------------------------------------------
# ‚úÖ NEW: Train-case control flags
#   - Can control inclusion/exclusion of noise label folders in training
# -------------------------------------------------------------------------
TRAIN_USE_ORIGINAL    = False
TRAIN_USE_UNIFORM_SCALING_NOISE = False
TRAIN_USE_BOUNDARY_JITTER_NOISE  = True
# e.g.) Original only: TRAIN_USE_ORIGINAL=True, TRAIN_USE_UNIFORM_SCALING_NOISE=False, TRAIN_USE_BOUNDARY_JITTER_NOISE=False
# e.g.) Side only: TRAIN_USE_ORIGINAL=False, TRAIN_USE_UNIFORM_SCALING_NOISE=False, TRAIN_USE_BOUNDARY_JITTER_NOISE=True

# -------------------------------------------------------------------------
# ‚úÖ Speed control
# -------------------------------------------------------------------------
TRAIN_FRACTION = 1.0
TRAIN_MIN_IMAGES = 50
NUM_WORKERS = min(8, os.cpu_count() or 4)

# -------------------------------------------------------------------------
# User config
# -------------------------------------------------------------------------
IMG_SIZE = 640
EPOCHS = 10
BATCH = 32
DEVICE = "0"
SEED = 42

OUT_ROOT = Path("/home/ISW/project/object_detect")
OUT_ROOT.mkdir(parents=True, exist_ok=True)

RUNTIME_VROOT_BASE = Path("/home/ISW/project/_runtime_dataset_views")
RUNTIME_VROOT_BASE.mkdir(parents=True, exist_ok=True)

CORRUPT_BACKUP_DIR = Path("/home/ISW/project/_corrupt_files_backup")
CORRUPT_BACKUP_DIR.mkdir(parents=True, exist_ok=True)

CLEANUP_RUNTIME_VROOT = True
SILENCE_ULTRA_OUTPUT = True

# ‚úÖ Select which class modes to run
CLASS_MODES = ["multiclass", "object_only"]

# ‚úÖ Select datasets to use (by folder name, case-insensitive)
TARGET_DATASETS: Optional[List[str]] = [
    # "SKU-110K",
    # "coco",
    # "lvis",
    "kitti",
    "homeobjects-3K",
    "african-wildlife",
    "construction-ppe",
    "Custom_Blood",
    "brain-tumor",
    "BCCD",
    "signature",
    "medical-pills",
    "VOC",
]
TARGET_DATASETS_LOWER = (
    {name.strip().lower() for name in TARGET_DATASETS}
    if TARGET_DATASETS is not None
    else None
)

@contextmanager
def suppress_output(enabled: bool = True):
    if not enabled:
        yield
        return
    devnull = open(os.devnull, "w")
    old_out, old_err = sys.stdout, sys.stderr
    try:
        sys.stdout, sys.stderr = devnull, devnull
        yield
    finally:
        sys.stdout, sys.stderr = old_out, old_err
        devnull.close()

# -------------------------------------------------------------------------
# Model specs
# -------------------------------------------------------------------------
YOLOV8N_CKPT_CANDIDATES = ["yolov8n.pt"]
YOLO11N_CKPT_CANDIDATES = ["yolo11n.pt", "yolov11n.pt"]
DETR_CKPT_CANDIDATES   = ["rtdetr-s.pt", "rtdetr-l.pt"]

MODEL_SPECS = [
    ("yolov8n", YOLOV8N_CKPT_CANDIDATES),
    ("yolo11n", YOLO11N_CKPT_CANDIDATES),
    ("detr",    DETR_CKPT_CANDIDATES),
]

# -------------------------------------------------------------------------
# Utils
# -------------------------------------------------------------------------
_IMG_EXTS = {".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff", ".webp"}

def set_seed(seed: int = 42):
    random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

def list_images(dir_path: Optional[Path]) -> List[Path]:
    if dir_path is None or not Path(dir_path).exists():
        return []
    dir_path = Path(dir_path)
    imgs = []
    for p in dir_path.rglob("*"):
        if p.is_file() and p.suffix.lower() in _IMG_EXTS:
            imgs.append(p)
    return sorted(imgs)

def _safe_symlink(src: Path, dst: Path):
    dst.parent.mkdir(parents=True, exist_ok=True)
    if dst.exists() or dst.is_symlink():
        return
    os.symlink(str(src), str(dst))

def _safe_copytree(src: Path, dst: Path):
    if not src.exists():
        return
    dst.parent.mkdir(parents=True, exist_ok=True)
    shutil.copytree(src, dst, dirs_exist_ok=True)

def _link_or_copy(src: Path, dst: Path, prefer_symlink: bool = True):
    if not src.exists():
        return
    try:
        if src.is_dir():
            _safe_copytree(src, dst)
            return
        if prefer_symlink:
            _safe_symlink(src, dst)
            return
        dst.parent.mkdir(parents=True, exist_ok=True)
        shutil.copy2(src, dst)
    except Exception:
        try:
            dst.parent.mkdir(parents=True, exist_ok=True)
            shutil.copy2(src, dst)
        except Exception:
            pass

def infer_class_names_from_labels(label_root: Path, max_files: int = 2000) -> List[str]:
    if label_root is None or not label_root.exists():
        return ["class_0"]
    txts = list(label_root.rglob("*.txt"))
    if not txts:
        return ["class_0"]

    txts = txts[:max_files]
    cls_ids = set()
    for t in txts:
        try:
            with open(t, "r", encoding="utf-8") as f:
                for line in f:
                    parts = line.strip().split()
                    if len(parts) < 5:
                        continue
                    cid = int(float(parts[0]))
                    cls_ids.add(cid)
        except Exception:
            continue

    if not cls_ids:
        return ["class_0"]
    max_id = max(cls_ids)
    return [f"class_{i}" for i in range(max_id + 1)]

def choose_model(ckpt_candidates: List[str]) -> YOLO:
    last_err = None
    for ckpt in ckpt_candidates:
        try:
            return YOLO(ckpt)
        except Exception as e:
            last_err = e
    raise RuntimeError(f"Failed to load model weights: {ckpt_candidates}") from last_err

def extract_metrics_dict(val_result) -> Dict:
    if val_result is None:
        return {}
    if hasattr(val_result, "results_dict") and isinstance(val_result.results_dict, dict):
        return dict(val_result.results_dict)
    try:
        d = dict(val_result.__dict__)
        d.pop("plots", None)
        d.pop("speed", None)
        return d
    except Exception:
        return {}

# -------------------------------------------------------------------------
# ‚úÖ Label rewrite utility for object-only mode
# -------------------------------------------------------------------------
def rewrite_label_file_to_object_only(src: Path, dst: Path):
    dst.parent.mkdir(parents=True, exist_ok=True)
    if not src.exists():
        try:
            dst.write_text("", encoding="utf-8")
        except Exception:
            pass
        return

    try:
        lines_out = []
        with open(src, "r", encoding="utf-8") as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) < 5:
                    continue
                parts[0] = "0"
                lines_out.append(" ".join(parts))
        with open(dst, "w", encoding="utf-8") as f:
            for ln in lines_out:
                f.write(ln + "\n")
    except Exception:
        try:
            dst.write_text("", encoding="utf-8")
        except Exception:
            pass

# -------------------------------------------------------------------------
# ‚úÖ Corrupt Image Cleaner
# -------------------------------------------------------------------------
def scan_and_clean_images(dir_path: Path):
    if not dir_path.exists():
        return

    images = list_images(dir_path)
    if not images:
        return

    print(f"   üîç Scanning integrity of {len(images)} images in {dir_path.name}...")
    corrupt_count = 0
    for img_path in tqdm(images, desc="Checking", leave=False):
        is_corrupt = False

        try:
            with Image.open(img_path) as im:
                im.verify()
        except Exception:
            is_corrupt = True

        if not is_corrupt:
            try:
                img = cv2.imread(str(img_path))
                if img is None:
                    is_corrupt = True
                else:
                    _ = img.shape
            except Exception:
                is_corrupt = True

        if is_corrupt:
            corrupt_count += 1
            dest = CORRUPT_BACKUP_DIR / dir_path.name / img_path.name
            dest.parent.mkdir(parents=True, exist_ok=True)

            label_path = img_path.parent.parent / "labels" / img_path.parent.name / img_path.with_suffix(".txt").name
            if not label_path.exists():
                label_path = img_path.with_suffix(".txt")

            try:
                shutil.move(str(img_path), str(dest))
                if label_path.exists():
                    shutil.move(str(label_path), str(dest.with_suffix(".txt")))
            except Exception:
                pass

    if corrupt_count > 0:
        print(f"   ‚ö†Ô∏è  Moved {corrupt_count} corrupt images to {CORRUPT_BACKUP_DIR}")
    else:
        print(f"   ‚úÖ  No corrupt images found.")

# -------------------------------------------------------------------------
# Split/Case helpers
# -------------------------------------------------------------------------
def normalize_name(name: str) -> str:
    return name.strip().lower().replace("_", "-").replace(" ", "-")

_ds_map: Dict[str, Dict] = {}
try:
    for info in dataset_summaries:
        key = normalize_name(info.get("dataset", ""))
        if key:
            _ds_map[key] = info
except Exception:
    _ds_map = {}

def get_split_info(ds_root: Path) -> Dict[str, Optional[Path]]:
    key = normalize_name(ds_root.name)
    if key in _ds_map:
        info = _ds_map[key]
        return dict(
            train_img_dir=Path(info["train_dir"]) if info.get("train_dir") else None,
            val_img_dir=Path(info["val_dir"]) if info.get("val_dir") else None,
            split_mode=info.get("split_mode", "fallback"),
            train_tag=info.get("train_tag", "train"),
            val_tag=info.get("val_tag", "val"),
        )

    images_root = ds_root / "images"
    tr = images_root / "train" if (images_root / "train").is_dir() else images_root
    va = images_root / "val" if (images_root / "val").is_dir() else (images_root / "valid" if (images_root / "valid").is_dir() else None)
    return dict(
        train_img_dir=tr,
        val_img_dir=va,
        split_mode="fallback",
        train_tag=tr.name if tr else "unknown",
        val_tag=va.name if va else "missing",
    )

# -------------------------------------------------------------------------
# ‚úÖ NEW: label case listing (flags reflected)
# -------------------------------------------------------------------------
def list_label_cases_for_dataset(ds_root: Path) -> List[Tuple[str, str]]:
    key = normalize_name(ds_root.name)
    existing = None
    if key in _ds_map and isinstance(_ds_map[key].get("label_cases"), list):
        existing = set(_ds_map[key]["label_cases"])

    cases: List[Tuple[str, str]] = []

    # (1) original
    if TRAIN_USE_ORIGINAL:
        if (ds_root / "labels").is_dir() and (existing is None or "original" in existing):
            cases.append(("original", "labels"))

    # (2) uniform scaling noise
    if TRAIN_USE_UNIFORM_SCALING_NOISE:
        for s in UNIFORM_SCALING_FACTORS:
            tag = f"scale_{s}"
            dirname = f"labels_uniform_scaling_{s}"
            if (ds_root / dirname).is_dir() and (existing is None or tag in existing):
                cases.append((tag, dirname))

    # (3) boundary jitter noise
    if TRAIN_USE_BOUNDARY_JITTER_NOISE:
        for k in JITTER_PATTERNS:
            tag = f"side_{k}"
            dirname = f"labels_boundary_jitter_{k}"
            if (ds_root / dirname).is_dir() and (existing is None or tag in existing):
                cases.append((tag, dirname))

    return cases

def resolve_label_base(label_root: Path, split_tag: str, is_train: bool) -> Path:
    if label_root is None:
        return label_root
    tagged = label_root / split_tag
    if split_tag and tagged.is_dir():
        return tagged
    if is_train:
        tdir = label_root / "train"
        if tdir.is_dir():
            return tdir
        return label_root
    else:
        vdir = label_root / "val"
        if vdir.is_dir():
            return vdir
        vdir2 = label_root / "valid"
        if vdir2.is_dir():
            return vdir2
        return label_root

# -------------------------------------------------------------------------
# SKU virtual split & Runtime View (unchanged)
# -------------------------------------------------------------------------
def sku_virtual_split_images(images_root: Path, seed: int = 42, ratio: float = 0.8) -> Tuple[List[Path], List[Path]]:
    imgs = list_images(images_root)
    n = len(imgs)
    if n == 0:
        return [], []
    rnd = random.Random(seed)
    idxs = list(range(n))
    rnd.shuffle(idxs)
    cut = int(n * ratio)
    train_imgs = [imgs[i] for i in idxs[:cut]]
    val_imgs   = [imgs[i] for i in idxs[cut:]]
    return train_imgs, val_imgs

def sample_train_images(base_train_imgs: List[Path], fraction: float, seed: int) -> Tuple[List[Path], int]:
    n_total = len(base_train_imgs)
    if n_total == 0:
        return [], 0
    if fraction >= 1.0:
        return base_train_imgs, n_total
    n_pick = int(n_total * fraction)
    n_pick = max(TRAIN_MIN_IMAGES, n_pick)
    n_pick = min(n_pick, n_total)
    rnd = random.Random(seed)
    chosen = rnd.sample(base_train_imgs, k=n_pick)
    return chosen, n_total

def build_runtime_view_root(
    ds_root: Path,
    case_labels_dirname: str,
    train_fraction: float,
    seed: int,
    class_mode: str = "multiclass",
) -> Tuple[Path, Path, int, int, str]:

    assert class_mode in ("multiclass", "object_only")
    images_root     = ds_root / "images"
    orig_label_root = ds_root / "labels"
    case_label_root = ds_root / case_labels_dirname

    split_info = get_split_info(ds_root)
    train_img_dir = split_info["train_img_dir"]
    val_img_dir   = split_info["val_img_dir"]
    split_mode    = split_info["split_mode"]
    train_tag     = split_info.get("train_tag", "train")
    val_tag       = split_info.get("val_tag", "val")

    vroot    = RUNTIME_VROOT_BASE / ds_root.name / f"case__{case_labels_dirname}__{class_mode}"
    v_images = vroot / "images"
    v_labels = vroot / "labels"

    if vroot.exists():
        try:
            shutil.rmtree(vroot)
        except Exception:
            pass

    (v_images / "train").mkdir(parents=True, exist_ok=True)
    (v_images / "val").mkdir(parents=True, exist_ok=True)
    (v_labels / "train").mkdir(parents=True, exist_ok=True)
    (v_labels / "val").mkdir(parents=True, exist_ok=True)

    # A) SKU virtual split
    if split_mode == "sku_virtual_8_2":
        base_train_imgs, base_val_imgs = sku_virtual_split_images(images_root, seed=seed, ratio=0.8)
        chosen_train_imgs, n_total_train = sample_train_images(base_train_imgs, train_fraction, seed)

        for img in chosen_train_imgs:
            rel = img.relative_to(images_root)
            dst = v_images / "train" / rel
            try:
                _safe_symlink(img, dst)
            except Exception:
                _link_or_copy(img, dst, prefer_symlink=False)

        for img in base_val_imgs:
            rel = img.relative_to(images_root)
            dst = v_images / "val" / rel
            try:
                _safe_symlink(img, dst)
            except Exception:
                _link_or_copy(img, dst, prefer_symlink=False)

        for img in chosen_train_imgs:
            rel = img.relative_to(images_root)
            src_lbl = case_label_root / rel.with_suffix(".txt")
            dst_lbl = v_labels / "train" / rel.with_suffix(".txt")
            if class_mode == "multiclass":
                dst_lbl.parent.mkdir(parents=True, exist_ok=True)
                if src_lbl.exists():
                    try:
                        _safe_symlink(src_lbl, dst_lbl)
                    except Exception:
                        _link_or_copy(src_lbl, dst_lbl, prefer_symlink=False)
                else:
                    try:
                        dst_lbl.write_text("", encoding="utf-8")
                    except Exception:
                        pass
            else:
                rewrite_label_file_to_object_only(src_lbl, dst_lbl)

        for img in base_val_imgs:
            rel = img.relative_to(images_root)
            src_lbl = orig_label_root / rel.with_suffix(".txt")
            dst_lbl = v_labels / "val" / rel.with_suffix(".txt")
            if class_mode == "multiclass":
                dst_lbl.parent.mkdir(parents=True, exist_ok=True)
                if src_lbl.exists():
                    try:
                        _safe_symlink(src_lbl, dst_lbl)
                    except Exception:
                        _link_or_copy(src_lbl, dst_lbl, prefer_symlink=False)
                else:
                    try:
                        dst_lbl.write_text("", encoding="utf-8")
                    except Exception:
                        pass
            else:
                rewrite_label_file_to_object_only(src_lbl, dst_lbl)

        if class_mode == "multiclass":
            names = infer_class_names_from_labels(orig_label_root)
            nc = len(names)
        else:
            names = ["object"]
            nc = 1

        data_yaml = vroot / "data.yaml"
        with open(data_yaml, "w", encoding="utf-8") as f:
            f.write(
                f"path: {str(vroot)}\n"
                f"train: images/train\n"
                f"val: images/val\n"
                f"nc: {nc}\n"
                f"names: {names}\n"
            )

        rt_n_train = len(list_images(v_images / "train"))
        if rt_n_train == 0:
            raise RuntimeError(f"Runtime train images empty (SKU mode): {v_images/'train'}")

        return vroot, data_yaml, len(chosen_train_imgs), len(base_train_imgs), split_mode

    # B) Standard split
    if train_img_dir is None or not Path(train_img_dir).is_dir():
        raise RuntimeError(f"No train images dir resolved for {ds_root.name}")
    if val_img_dir is None or not Path(val_img_dir).is_dir():
        raise RuntimeError(f"No val images dir resolved for {ds_root.name}")

    all_train_imgs = list_images(train_img_dir)
    chosen_train_imgs, n_total_train = sample_train_images(all_train_imgs, train_fraction, seed)
    if n_total_train == 0 or len(chosen_train_imgs) == 0:
        raise RuntimeError(f"No train images for {ds_root.name}")

    for img in chosen_train_imgs:
        rel = img.relative_to(train_img_dir)
        dst = v_images / "train" / rel
        try:
            _safe_symlink(img, dst)
        except Exception:
            _link_or_copy(img, dst, prefer_symlink=False)

    _safe_copytree(Path(val_img_dir), v_images / "val")

    case_train_lbl_base = resolve_label_base(case_label_root, train_tag, is_train=True)
    for img in chosen_train_imgs:
        rel = img.relative_to(train_img_dir)
        src_lbl = case_train_lbl_base / rel.with_suffix(".txt")
        dst_lbl = v_labels / "train" / rel.with_suffix(".txt")
        if class_mode == "multiclass":
            dst_lbl.parent.mkdir(parents=True, exist_ok=True)
            if src_lbl.exists():
                try:
                    _safe_symlink(src_lbl, dst_lbl)
                except Exception:
                    _link_or_copy(src_lbl, dst_lbl, prefer_symlink=False)
            else:
                try:
                    dst_lbl.write_text("", encoding="utf-8")
                except Exception:
                    pass
        else:
            rewrite_label_file_to_object_only(src_lbl, dst_lbl)

    orig_val_lbl_base = resolve_label_base(orig_label_root, val_tag, is_train=False)
    if class_mode == "multiclass":
        _safe_copytree(orig_val_lbl_base, v_labels / "val")
    else:
        for src_lbl in orig_val_lbl_base.rglob("*.txt"):
            rel = src_lbl.relative_to(orig_val_lbl_base)
            dst_lbl = v_labels / "val" / rel
            rewrite_label_file_to_object_only(src_lbl, dst_lbl)

    if class_mode == "multiclass":
        names = infer_class_names_from_labels(orig_label_root)
        nc = len(names)
    else:
        names = ["object"]
        nc = 1

    data_yaml = vroot / "data.yaml"
    with open(data_yaml, "w", encoding="utf-8") as f:
        f.write(
            f"path: {str(vroot)}\n"
            f"train: images/train\n"
            f"val: images/val\n"
            f"nc: {nc}\n"
            f"names: {names}\n"
        )

    rt_n_train = len(list_images(v_images / "train"))
    rt_n_val   = len(list_images(v_images / "val"))
    if rt_n_train == 0:
        raise RuntimeError(f"Runtime train images empty: {v_images/'train'}")
    if rt_n_val == 0:
        raise RuntimeError(f"Runtime val images empty: {v_images/'val'}")

    return vroot, data_yaml, len(chosen_train_imgs), n_total_train, split_mode

# -------------------------------------------------------------------------
# OOM-safe train wrapper
# -------------------------------------------------------------------------
def train_with_auto_oom(model: YOLO, data_yaml: Path, project_dir: Path, name_dir: str, model_tag: str):
    if model_tag == "detr":
        candidates = [(4, 640), (2, 640), (2, 512), (1, 512)]
    else:
        candidates = [(BATCH, IMG_SIZE)]

    last_err = None
    for b, sz in candidates:
        try:
            with suppress_output(SILENCE_ULTRA_OUTPUT):
                model.train(
                    data=str(data_yaml),
                    epochs=EPOCHS,
                    imgsz=sz,
                    batch=b,
                    device=DEVICE,
                    project=str(project_dir),
                    name=name_dir,
                    exist_ok=True,
                    verbose=False,
                    workers=NUM_WORKERS,
                    amp=True,
                )
            return True, b, sz, None
        except RuntimeError as e:
            msg = str(e).lower()
            last_err = e
            if "out of memory" in msg or "cuda out of memory" in msg:
                if torch.cuda.is_available():
                    torch.cuda.empty_cache()
                    torch.cuda.ipc_collect()
                continue
            break
        except Exception as e:
            last_err = e
            break
    return False, None, None, last_err

# -------------------------------------------------------------------------
# Train & validate loop
# -------------------------------------------------------------------------
set_seed(SEED)

print("=" * 80)
print("[TRAIN/EVAL] Start (Cell 1 summaries-aware + runtime path safe + object-only)")
print(f" - OUT_ROOT             : {OUT_ROOT}")
print(f" - RUNTIME_VROOT_BASE   : {RUNTIME_VROOT_BASE}")
print(f" - TRAIN_FRACTION       : {TRAIN_FRACTION}")
print(f" - NUM_WORKERS          : {NUM_WORKERS}")
print(f" - CLASS_MODES          : {CLASS_MODES}")
print(f" - CLEANUP_RUNTIME_VROOT: {CLEANUP_RUNTIME_VROOT}")
print(f" - TRAIN_USE_ORIGINAL   : {TRAIN_USE_ORIGINAL}")
print(f" - TRAIN_USE_UNIFORM_SCALING_NOISE: {TRAIN_USE_UNIFORM_SCALING_NOISE}")
print(f" - TRAIN_USE_BOUNDARY_JITTER_NOISE : {TRAIN_USE_BOUNDARY_JITTER_NOISE}")
if TARGET_DATASETS_LOWER is None:
    print(f" - TARGET_DATASETS      : ALL (no filter)")
else:
    print(f" - TARGET_DATASETS      : {sorted(TARGET_DATASETS_LOWER)}")
print("=" * 80)

summary_rows: List[Dict] = []

try:
    _ = roots
except NameError:
    raise RuntimeError("Please run Cell 1 first to prepare the roots variable.")

for ds_root in roots:
    ds_root = Path(ds_root)

    if TARGET_DATASETS_LOWER is not None:
        ds_name_lower = ds_root.name.strip().lower()
        if ds_name_lower not in TARGET_DATASETS_LOWER:
            print(f"‚è≠Ô∏è  Skip (not in TARGET_DATASETS): {ds_root.name}")
            continue

    images_root = ds_root / "images"
    labels_root = ds_root / "labels"

    if not images_root.is_dir() or not labels_root.is_dir():
        print(f"‚è≠Ô∏è  Skip (missing images/labels): {ds_root}")
        continue

    print(f"\n[Integrity Check] {ds_root.name}")
    scan_and_clean_images(images_root)

    cases = list_label_cases_for_dataset(ds_root)
    if not cases:
        print(f"‚è≠Ô∏è  Skip (no target label cases after flags/grid filter): {ds_root.name}")
        continue

    sp = get_split_info(ds_root)
    print("\n" + "-" * 80)
    print(f"[Dataset] {ds_root.name}")
    print(f" - split_mode : {sp.get('split_mode')}")
    print(f" - Cases      : {[c[0] for c in cases]}")
    print(f" - CLASS_MODES: {CLASS_MODES}")
    print("-" * 80)

    for case_tag, labels_dirname in cases:
        for class_mode in CLASS_MODES:
            try:
                vroot, data_yaml, n_used, n_total, split_mode = build_runtime_view_root(
                    ds_root=ds_root,
                    case_labels_dirname=labels_dirname,
                    train_fraction=TRAIN_FRACTION,
                    seed=SEED,
                    class_mode=class_mode,
                )
                pct = (n_used / max(1, n_total)) * 100.0
                print(f"  [Subset] case={case_tag} | class_mode={class_mode} | split_mode={split_mode} | train_used={n_used}/{n_total} ({pct:.1f}%)")
            except Exception as e:
                print(f"  ‚è≠Ô∏è  Skip build failed: case={case_tag} | class_mode={class_mode} | err={e}")
                continue

            for model_tag, ckpt_candidates in MODEL_SPECS:
                print(f"\n  [Train] case={case_tag} | class_mode={class_mode} | model={model_tag}")

                try:
                    model = choose_model(ckpt_candidates)
                except Exception as e:
                    print(f"    ‚ùå Model load failed: {ckpt_candidates} | err={e}")
                    continue

                project_dir = OUT_ROOT / ds_root.name
                frac_tag = f"tr{int(TRAIN_FRACTION*100)}"
                name_dir = f"{model_tag}__{case_tag}__{class_mode}__{frac_tag}"
                project_dir.mkdir(parents=True, exist_ok=True)

                ok, used_b, used_sz, err = train_with_auto_oom(model, data_yaml, project_dir, name_dir, model_tag)
                if not ok:
                    print(f"    ‚ùå Train failed: {err}")
                    try:
                        del model
                    except Exception:
                        pass
                    if torch.cuda.is_available():
                        torch.cuda.empty_cache()
                        torch.cuda.ipc_collect()
                    continue

                try:
                    with suppress_output(SILENCE_ULTRA_OUTPUT):
                        val_res = model.val(
                            data=str(data_yaml),
                            imgsz=used_sz if used_sz else IMG_SIZE,
                            device=DEVICE,
                            split="val",
                            verbose=False,
                            workers=NUM_WORKERS,
                        )
                except Exception as e:
                    print(f"    ‚ùå Val failed: {e}")
                    val_res = None

                metrics = extract_metrics_dict(val_res)

                metrics_out = project_dir / name_dir / "metrics_eval.json"
                try:
                    metrics_out.parent.mkdir(parents=True, exist_ok=True)
                    with open(metrics_out, "w", encoding="utf-8") as f:
                        json.dump(
                            {
                                "dataset": ds_root.name,
                                "root": str(ds_root),
                                "case_tag": case_tag,
                                "labels_dirname": labels_dirname,
                                "class_mode": class_mode,
                                "model_tag": model_tag,
                                "ckpt_candidates": ckpt_candidates,
                                "train_fraction": TRAIN_FRACTION,
                                "train_used": n_used,
                                "train_total": n_total,
                                "data_yaml": str(data_yaml),
                                "runtime_view_root": str(vroot),
                                "split_mode": split_mode,
                                "effective_batch": used_b,
                                "effective_imgsz": used_sz,
                                "metrics": metrics,
                            },
                            f,
                            ensure_ascii=False,
                            indent=2,
                        )
                    print(f"    ‚úÖ Saved metrics: {metrics_out}")
                except Exception:
                    pass

                row = {
                    "dataset": ds_root.name,
                    "model": model_tag,
                    "case": case_tag,
                    "labels_dir": labels_dirname,
                    "class_mode": class_mode,
                    "split_mode": split_mode,
                    "train_fraction": TRAIN_FRACTION,
                    "train_used": n_used,
                    "train_total": n_total,
                    "effective_batch": used_b,
                    "effective_imgsz": used_sz,
                }
                for k in ["metrics/mAP50(B)", "metrics/mAP50-95(B)", "metrics/precision(B)", "metrics/recall(B)"]:
                    if k in metrics:
                        row[k] = metrics[k]

                summary_rows.append(row)

                try:
                    del model
                except Exception:
                    pass
                if torch.cuda.is_available():
                    torch.cuda.empty_cache()
                    torch.cuda.ipc_collect()

            if CLEANUP_RUNTIME_VROOT:
                try:
                    shutil.rmtree(vroot)
                except Exception:
                    pass

# -------------------------------------------------------------------------
# Save summary CSV
# -------------------------------------------------------------------------
out_csv = OUT_ROOT / "summary_final_optimized.csv"
try:
    cols = set()
    for r in summary_rows:
        cols.update(r.keys())

    base_cols = [
        "dataset", "model", "case", "labels_dir",
        "class_mode",
        "split_mode",
        "train_fraction", "train_used", "train_total",
        "effective_batch", "effective_imgsz",
    ]
    extra_cols = sorted([c for c in cols if c not in set(base_cols)])
    cols = base_cols + extra_cols

    with open(out_csv, "w", newline="", encoding="utf-8") as f:
        w = csv.DictWriter(f, fieldnames=cols)
        w.writeheader()
        for r in summary_rows:
            w.writerow(r)

    print("\n" + "=" * 80)
    print(f"‚úÖ Saved summary CSV: {out_csv}")
    print(f"‚úÖ Total runs: {len(summary_rows)}")
    print("=" * 80)

except Exception as e:
    print(f"‚ö†Ô∏è  Summary CSV save failed: {e}")

print("\n‚úÖ Cell 2 done.")


In [None]:
# # ==========================================
# # Cell 2) Train & Validate (FAST & ROBUST) ‚Äî FINAL OPTIMIZED + OBJECT-ONLY
# #   - Supports both multi-class and object-only modes
# #   - Select only required modes from CLASS_MODES
# #   - Select datasets to use via TARGET_DATASETS
# #   - ‚úÖ NEW: Control training with original / uniform_scaling_noise / boundary_jitter_noise flags
# # ==========================================

# from __future__ import annotations

# import os, json, shutil, random, csv, sys, time
# from pathlib import Path
# from typing import List, Dict, Tuple, Optional
# from contextlib import contextmanager

# # ‚úÖ Libraries for image integrity check
# try:
#     import cv2
#     from PIL import Image
#     from tqdm import tqdm
# except ImportError:
#     print("‚ö†Ô∏è Required libraries are missing. Please install: pip install opencv-python pillow tqdm")
#     sys.exit(1)

# # ‚úÖ OOM fragmentation mitigation
# os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True")

# import torch
# from ultralytics import YOLO
# import logging
# logging.getLogger("ultralytics").setLevel(logging.ERROR)

# # -------------------------------------------------------------------------
# # ‚úÖ FIXED grids (Speed Optimized)
# #   Note: These values are "candidate labels_* folders for training".
# #           Must match actual generated labels_boundary_jitter_K / labels_uniform_scaling_S folder names for cases to be detected.
# # -------------------------------------------------------------------------
# UNIFORM_SCALING_FACTORS = [0.6, 0.7, 0.8, 0.9, 1.1, 1.2, 1.3, 1.4]
# JITTER_PATTERNS = [1, 3, 5, 7, 9]

# # -------------------------------------------------------------------------
# # ‚úÖ NEW: Train-case control flags
# #   - Can control inclusion/exclusion of noise label folders in training
# # -------------------------------------------------------------------------
# TRAIN_USE_ORIGINAL    = True
# TRAIN_USE_UNIFORM_SCALING_NOISE = False
# TRAIN_USE_BOUNDARY_JITTER_NOISE  = False
# # e.g.) Original only: TRAIN_USE_ORIGINAL=True, TRAIN_USE_UNIFORM_SCALING_NOISE=False, TRAIN_USE_BOUNDARY_JITTER_NOISE=False
# # e.g.) Side only: TRAIN_USE_ORIGINAL=False, TRAIN_USE_UNIFORM_SCALING_NOISE=False, TRAIN_USE_BOUNDARY_JITTER_NOISE=True

# # -------------------------------------------------------------------------
# # ‚úÖ Speed control
# # -------------------------------------------------------------------------
# TRAIN_FRACTION = 1.0
# TRAIN_MIN_IMAGES = 50
# NUM_WORKERS = min(8, os.cpu_count() or 4)

# # -------------------------------------------------------------------------
# # User config
# # -------------------------------------------------------------------------
# IMG_SIZE = 640
# EPOCHS = 10
# BATCH = 32
# DEVICE = "0"
# SEED = 42

# OUT_ROOT = Path("/home/ISW/project/object_detect")
# OUT_ROOT.mkdir(parents=True, exist_ok=True)

# RUNTIME_VROOT_BASE = Path("/home/ISW/project/_runtime_dataset_views")
# RUNTIME_VROOT_BASE.mkdir(parents=True, exist_ok=True)

# CORRUPT_BACKUP_DIR = Path("/home/ISW/project/_corrupt_files_backup")
# CORRUPT_BACKUP_DIR.mkdir(parents=True, exist_ok=True)

# CLEANUP_RUNTIME_VROOT = True
# SILENCE_ULTRA_OUTPUT = True

# # ‚úÖ Select which class modes to run
# CLASS_MODES = ["object_only"]

# # ‚úÖ Select datasets to use (by folder name, case-insensitive)
# TARGET_DATASETS: Optional[List[str]] = [
#     # "SKU-110K",
#     # "kitti",
#     # "homeobjects-3K",
#     # "african-wildlife",
#     # "construction-ppe",
#     # "Custom_Blood",
#     # "brain-tumor",
#     # "BCCD",
#     # "signature",
#     # "medical-pills",
#     # "coco",
#     # "lvis",
#     "VOC",
# ]
# TARGET_DATASETS_LOWER = (
#     {name.strip().lower() for name in TARGET_DATASETS}
#     if TARGET_DATASETS is not None
#     else None
# )

# @contextmanager
# def suppress_output(enabled: bool = True):
#     if not enabled:
#         yield
#         return
#     devnull = open(os.devnull, "w")
#     old_out, old_err = sys.stdout, sys.stderr
#     try:
#         sys.stdout, sys.stderr = devnull, devnull
#         yield
#     finally:
#         sys.stdout, sys.stderr = old_out, old_err
#         devnull.close()

# # -------------------------------------------------------------------------
# # Model specs
# # -------------------------------------------------------------------------
# YOLOV8N_CKPT_CANDIDATES = ["yolov8n.pt"]
# YOLO11N_CKPT_CANDIDATES = ["yolo11n.pt", "yolov11n.pt"]
# DETR_CKPT_CANDIDATES   = ["rtdetr-s.pt", "rtdetr-l.pt"]

# MODEL_SPECS = [
#     ("yolov8n", YOLOV8N_CKPT_CANDIDATES),
#     ("yolo11n", YOLO11N_CKPT_CANDIDATES),
#     ("detr",    DETR_CKPT_CANDIDATES),
# ]

# # -------------------------------------------------------------------------
# # Utils
# # -------------------------------------------------------------------------
# _IMG_EXTS = {".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff", ".webp"}

# def set_seed(seed: int = 42):
#     random.seed(seed)
#     torch.manual_seed(seed)
#     if torch.cuda.is_available():
#         torch.cuda.manual_seed_all(seed)

# def list_images(dir_path: Optional[Path]) -> List[Path]:
#     if dir_path is None or not Path(dir_path).exists():
#         return []
#     dir_path = Path(dir_path)
#     imgs = []
#     for p in dir_path.rglob("*"):
#         if p.is_file() and p.suffix.lower() in _IMG_EXTS:
#             imgs.append(p)
#     return sorted(imgs)

# def _safe_symlink(src: Path, dst: Path):
#     dst.parent.mkdir(parents=True, exist_ok=True)
#     if dst.exists() or dst.is_symlink():
#         return
#     os.symlink(str(src), str(dst))

# def _safe_copytree(src: Path, dst: Path):
#     if not src.exists():
#         return
#     dst.parent.mkdir(parents=True, exist_ok=True)
#     shutil.copytree(src, dst, dirs_exist_ok=True)

# def _link_or_copy(src: Path, dst: Path, prefer_symlink: bool = True):
#     if not src.exists():
#         return
#     try:
#         if src.is_dir():
#             _safe_copytree(src, dst)
#             return
#         if prefer_symlink:
#             _safe_symlink(src, dst)
#             return
#         dst.parent.mkdir(parents=True, exist_ok=True)
#         shutil.copy2(src, dst)
#     except Exception:
#         try:
#             dst.parent.mkdir(parents=True, exist_ok=True)
#             shutil.copy2(src, dst)
#         except Exception:
#             pass

# def infer_class_names_from_labels(label_root: Path, max_files: int = 2000) -> List[str]:
#     if label_root is None or not label_root.exists():
#         return ["class_0"]
#     txts = list(label_root.rglob("*.txt"))
#     if not txts:
#         return ["class_0"]

#     txts = txts[:max_files]
#     cls_ids = set()
#     for t in txts:
#         try:
#             with open(t, "r", encoding="utf-8") as f:
#                 for line in f:
#                     parts = line.strip().split()
#                     if len(parts) < 5:
#                         continue
#                     cid = int(float(parts[0]))
#                     cls_ids.add(cid)
#         except Exception:
#             continue

#     if not cls_ids:
#         return ["class_0"]
#     max_id = max(cls_ids)
#     return [f"class_{i}" for i in range(max_id + 1)]

# def choose_model(ckpt_candidates: List[str]) -> YOLO:
#     last_err = None
#     for ckpt in ckpt_candidates:
#         try:
#             return YOLO(ckpt)
#         except Exception as e:
#             last_err = e
#     raise RuntimeError(f"Failed to load model weights: {ckpt_candidates}") from last_err

# def extract_metrics_dict(val_result) -> Dict:
#     if val_result is None:
#         return {}
#     if hasattr(val_result, "results_dict") and isinstance(val_result.results_dict, dict):
#         return dict(val_result.results_dict)
#     try:
#         d = dict(val_result.__dict__)
#         d.pop("plots", None)
#         d.pop("speed", None)
#         return d
#     except Exception:
#         return {}

# # -------------------------------------------------------------------------
# # ‚úÖ Label rewrite utility for object-only mode
# # -------------------------------------------------------------------------
# def rewrite_label_file_to_object_only(src: Path, dst: Path):
#     dst.parent.mkdir(parents=True, exist_ok=True)
#     if not src.exists():
#         try:
#             dst.write_text("", encoding="utf-8")
#         except Exception:
#             pass
#         return

#     try:
#         lines_out = []
#         with open(src, "r", encoding="utf-8") as f:
#             for line in f:
#                 parts = line.strip().split()
#                 if len(parts) < 5:
#                     continue
#                 parts[0] = "0"
#                 lines_out.append(" ".join(parts))
#         with open(dst, "w", encoding="utf-8") as f:
#             for ln in lines_out:
#                 f.write(ln + "\n")
#     except Exception:
#         try:
#             dst.write_text("", encoding="utf-8")
#         except Exception:
#             pass

# # -------------------------------------------------------------------------
# # ‚úÖ Corrupt Image Cleaner
# # -------------------------------------------------------------------------
# def scan_and_clean_images(dir_path: Path):
#     if not dir_path.exists():
#         return

#     images = list_images(dir_path)
#     if not images:
#         return

#     print(f"   üîç Scanning integrity of {len(images)} images in {dir_path.name}...")
#     corrupt_count = 0
#     for img_path in tqdm(images, desc="Checking", leave=False):
#         is_corrupt = False

#         try:
#             with Image.open(img_path) as im:
#                 im.verify()
#         except Exception:
#             is_corrupt = True

#         if not is_corrupt:
#             try:
#                 img = cv2.imread(str(img_path))
#                 if img is None:
#                     is_corrupt = True
#                 else:
#                     _ = img.shape
#             except Exception:
#                 is_corrupt = True

#         if is_corrupt:
#             corrupt_count += 1
#             dest = CORRUPT_BACKUP_DIR / dir_path.name / img_path.name
#             dest.parent.mkdir(parents=True, exist_ok=True)

#             label_path = img_path.parent.parent / "labels" / img_path.parent.name / img_path.with_suffix(".txt").name
#             if not label_path.exists():
#                 label_path = img_path.with_suffix(".txt")

#             try:
#                 shutil.move(str(img_path), str(dest))
#                 if label_path.exists():
#                     shutil.move(str(label_path), str(dest.with_suffix(".txt")))
#             except Exception:
#                 pass

#     if corrupt_count > 0:
#         print(f"   ‚ö†Ô∏è  Moved {corrupt_count} corrupt images to {CORRUPT_BACKUP_DIR}")
#     else:
#         print(f"   ‚úÖ  No corrupt images found.")

# # -------------------------------------------------------------------------
# # Split/Case helpers
# # -------------------------------------------------------------------------
# def normalize_name(name: str) -> str:
#     return name.strip().lower().replace("_", "-").replace(" ", "-")

# _ds_map: Dict[str, Dict] = {}
# try:
#     for info in dataset_summaries:
#         key = normalize_name(info.get("dataset", ""))
#         if key:
#             _ds_map[key] = info
# except Exception:
#     _ds_map = {}

# def get_split_info(ds_root: Path) -> Dict[str, Optional[Path]]:
#     key = normalize_name(ds_root.name)
#     if key in _ds_map:
#         info = _ds_map[key]
#         return dict(
#             train_img_dir=Path(info["train_dir"]) if info.get("train_dir") else None,
#             val_img_dir=Path(info["val_dir"]) if info.get("val_dir") else None,
#             split_mode=info.get("split_mode", "fallback"),
#             train_tag=info.get("train_tag", "train"),
#             val_tag=info.get("val_tag", "val"),
#         )

#     images_root = ds_root / "images"
#     tr = images_root / "train" if (images_root / "train").is_dir() else images_root
#     va = images_root / "val" if (images_root / "val").is_dir() else (images_root / "valid" if (images_root / "valid").is_dir() else None)
#     return dict(
#         train_img_dir=tr,
#         val_img_dir=va,
#         split_mode="fallback",
#         train_tag=tr.name if tr else "unknown",
#         val_tag=va.name if va else "missing",
#     )

# # -------------------------------------------------------------------------
# # ‚úÖ NEW: label case listing (flags reflected)
# # -------------------------------------------------------------------------
# def list_label_cases_for_dataset(ds_root: Path) -> List[Tuple[str, str]]:
#     key = normalize_name(ds_root.name)
#     existing = None
#     if key in _ds_map and isinstance(_ds_map[key].get("label_cases"), list):
#         existing = set(_ds_map[key]["label_cases"])

#     cases: List[Tuple[str, str]] = []

#     # (1) original
#     if TRAIN_USE_ORIGINAL:
#         if (ds_root / "labels").is_dir() and (existing is None or "original" in existing):
#             cases.append(("original", "labels"))

#     # (2) uniform scaling noise
#     if TRAIN_USE_UNIFORM_SCALING_NOISE:
#         for s in UNIFORM_SCALING_FACTORS:
#             tag = f"scale_{s}"
#             dirname = f"labels_uniform_scaling_{s}"
#             if (ds_root / dirname).is_dir() and (existing is None or tag in existing):
#                 cases.append((tag, dirname))

#     # (3) boundary jitter noise
#     if TRAIN_USE_BOUNDARY_JITTER_NOISE:
#         for k in JITTER_PATTERNS:
#             tag = f"side_{k}"
#             dirname = f"labels_boundary_jitter_{k}"
#             if (ds_root / dirname).is_dir() and (existing is None or tag in existing):
#                 cases.append((tag, dirname))

#     return cases

# def resolve_label_base(label_root: Path, split_tag: str, is_train: bool) -> Path:
#     if label_root is None:
#         return label_root
#     tagged = label_root / split_tag
#     if split_tag and tagged.is_dir():
#         return tagged
#     if is_train:
#         tdir = label_root / "train"
#         if tdir.is_dir():
#             return tdir
#         return label_root
#     else:
#         vdir = label_root / "val"
#         if vdir.is_dir():
#             return vdir
#         vdir2 = label_root / "valid"
#         if vdir2.is_dir():
#             return vdir2
#         return label_root

# # -------------------------------------------------------------------------
# # SKU virtual split & Runtime View (unchanged)
# # -------------------------------------------------------------------------
# def sku_virtual_split_images(images_root: Path, seed: int = 42, ratio: float = 0.8) -> Tuple[List[Path], List[Path]]:
#     imgs = list_images(images_root)
#     n = len(imgs)
#     if n == 0:
#         return [], []
#     rnd = random.Random(seed)
#     idxs = list(range(n))
#     rnd.shuffle(idxs)
#     cut = int(n * ratio)
#     train_imgs = [imgs[i] for i in idxs[:cut]]
#     val_imgs   = [imgs[i] for i in idxs[cut:]]
#     return train_imgs, val_imgs

# def sample_train_images(base_train_imgs: List[Path], fraction: float, seed: int) -> Tuple[List[Path], int]:
#     n_total = len(base_train_imgs)
#     if n_total == 0:
#         return [], 0
#     if fraction >= 1.0:
#         return base_train_imgs, n_total
#     n_pick = int(n_total * fraction)
#     n_pick = max(TRAIN_MIN_IMAGES, n_pick)
#     n_pick = min(n_pick, n_total)
#     rnd = random.Random(seed)
#     chosen = rnd.sample(base_train_imgs, k=n_pick)
#     return chosen, n_total

# def build_runtime_view_root(
#     ds_root: Path,
#     case_labels_dirname: str,
#     train_fraction: float,
#     seed: int,
#     class_mode: str = "multiclass",
# ) -> Tuple[Path, Path, int, int, str]:

#     assert class_mode in ("multiclass", "object_only")
#     images_root     = ds_root / "images"
#     orig_label_root = ds_root / "labels"
#     case_label_root = ds_root / case_labels_dirname

#     split_info = get_split_info(ds_root)
#     train_img_dir = split_info["train_img_dir"]
#     val_img_dir   = split_info["val_img_dir"]
#     split_mode    = split_info["split_mode"]
#     train_tag     = split_info.get("train_tag", "train")
#     val_tag       = split_info.get("val_tag", "val")

#     vroot    = RUNTIME_VROOT_BASE / ds_root.name / f"case__{case_labels_dirname}__{class_mode}"
#     v_images = vroot / "images"
#     v_labels = vroot / "labels"

#     if vroot.exists():
#         try:
#             shutil.rmtree(vroot)
#         except Exception:
#             pass

#     (v_images / "train").mkdir(parents=True, exist_ok=True)
#     (v_images / "val").mkdir(parents=True, exist_ok=True)
#     (v_labels / "train").mkdir(parents=True, exist_ok=True)
#     (v_labels / "val").mkdir(parents=True, exist_ok=True)

#     # A) SKU virtual split
#     if split_mode == "sku_virtual_8_2":
#         base_train_imgs, base_val_imgs = sku_virtual_split_images(images_root, seed=seed, ratio=0.8)
#         chosen_train_imgs, n_total_train = sample_train_images(base_train_imgs, train_fraction, seed)

#         for img in chosen_train_imgs:
#             rel = img.relative_to(images_root)
#             dst = v_images / "train" / rel
#             try:
#                 _safe_symlink(img, dst)
#             except Exception:
#                 _link_or_copy(img, dst, prefer_symlink=False)

#         for img in base_val_imgs:
#             rel = img.relative_to(images_root)
#             dst = v_images / "val" / rel
#             try:
#                 _safe_symlink(img, dst)
#             except Exception:
#                 _link_or_copy(img, dst, prefer_symlink=False)

#         for img in chosen_train_imgs:
#             rel = img.relative_to(images_root)
#             src_lbl = case_label_root / rel.with_suffix(".txt")
#             dst_lbl = v_labels / "train" / rel.with_suffix(".txt")
#             if class_mode == "multiclass":
#                 dst_lbl.parent.mkdir(parents=True, exist_ok=True)
#                 if src_lbl.exists():
#                     try:
#                         _safe_symlink(src_lbl, dst_lbl)
#                     except Exception:
#                         _link_or_copy(src_lbl, dst_lbl, prefer_symlink=False)
#                 else:
#                     try:
#                         dst_lbl.write_text("", encoding="utf-8")
#                     except Exception:
#                         pass
#             else:
#                 rewrite_label_file_to_object_only(src_lbl, dst_lbl)

#         for img in base_val_imgs:
#             rel = img.relative_to(images_root)
#             src_lbl = orig_label_root / rel.with_suffix(".txt")
#             dst_lbl = v_labels / "val" / rel.with_suffix(".txt")
#             if class_mode == "multiclass":
#                 dst_lbl.parent.mkdir(parents=True, exist_ok=True)
#                 if src_lbl.exists():
#                     try:
#                         _safe_symlink(src_lbl, dst_lbl)
#                     except Exception:
#                         _link_or_copy(src_lbl, dst_lbl, prefer_symlink=False)
#                 else:
#                     try:
#                         dst_lbl.write_text("", encoding="utf-8")
#                     except Exception:
#                         pass
#             else:
#                 rewrite_label_file_to_object_only(src_lbl, dst_lbl)

#         if class_mode == "multiclass":
#             names = infer_class_names_from_labels(orig_label_root)
#             nc = len(names)
#         else:
#             names = ["object"]
#             nc = 1

#         data_yaml = vroot / "data.yaml"
#         with open(data_yaml, "w", encoding="utf-8") as f:
#             f.write(
#                 f"path: {str(vroot)}\n"
#                 f"train: images/train\n"
#                 f"val: images/val\n"
#                 f"nc: {nc}\n"
#                 f"names: {names}\n"
#             )

#         rt_n_train = len(list_images(v_images / "train"))
#         if rt_n_train == 0:
#             raise RuntimeError(f"Runtime train images empty (SKU mode): {v_images/'train'}")

#         return vroot, data_yaml, len(chosen_train_imgs), len(base_train_imgs), split_mode

#     # B) Standard split
#     if train_img_dir is None or not Path(train_img_dir).is_dir():
#         raise RuntimeError(f"No train images dir resolved for {ds_root.name}")
#     if val_img_dir is None or not Path(val_img_dir).is_dir():
#         raise RuntimeError(f"No val images dir resolved for {ds_root.name}")

#     all_train_imgs = list_images(train_img_dir)
#     chosen_train_imgs, n_total_train = sample_train_images(all_train_imgs, train_fraction, seed)
#     if n_total_train == 0 or len(chosen_train_imgs) == 0:
#         raise RuntimeError(f"No train images for {ds_root.name}")

#     for img in chosen_train_imgs:
#         rel = img.relative_to(train_img_dir)
#         dst = v_images / "train" / rel
#         try:
#             _safe_symlink(img, dst)
#         except Exception:
#             _link_or_copy(img, dst, prefer_symlink=False)

#     _safe_copytree(Path(val_img_dir), v_images / "val")

#     case_train_lbl_base = resolve_label_base(case_label_root, train_tag, is_train=True)
#     for img in chosen_train_imgs:
#         rel = img.relative_to(train_img_dir)
#         src_lbl = case_train_lbl_base / rel.with_suffix(".txt")
#         dst_lbl = v_labels / "train" / rel.with_suffix(".txt")
#         if class_mode == "multiclass":
#             dst_lbl.parent.mkdir(parents=True, exist_ok=True)
#             if src_lbl.exists():
#                 try:
#                     _safe_symlink(src_lbl, dst_lbl)
#                 except Exception:
#                     _link_or_copy(src_lbl, dst_lbl, prefer_symlink=False)
#             else:
#                 try:
#                     dst_lbl.write_text("", encoding="utf-8")
#                 except Exception:
#                     pass
#         else:
#             rewrite_label_file_to_object_only(src_lbl, dst_lbl)

#     orig_val_lbl_base = resolve_label_base(orig_label_root, val_tag, is_train=False)
#     if class_mode == "multiclass":
#         _safe_copytree(orig_val_lbl_base, v_labels / "val")
#     else:
#         for src_lbl in orig_val_lbl_base.rglob("*.txt"):
#             rel = src_lbl.relative_to(orig_val_lbl_base)
#             dst_lbl = v_labels / "val" / rel
#             rewrite_label_file_to_object_only(src_lbl, dst_lbl)

#     if class_mode == "multiclass":
#         names = infer_class_names_from_labels(orig_label_root)
#         nc = len(names)
#     else:
#         names = ["object"]
#         nc = 1

#     data_yaml = vroot / "data.yaml"
#     with open(data_yaml, "w", encoding="utf-8") as f:
#         f.write(
#             f"path: {str(vroot)}\n"
#             f"train: images/train\n"
#             f"val: images/val\n"
#             f"nc: {nc}\n"
#             f"names: {names}\n"
#         )

#     rt_n_train = len(list_images(v_images / "train"))
#     rt_n_val   = len(list_images(v_images / "val"))
#     if rt_n_train == 0:
#         raise RuntimeError(f"Runtime train images empty: {v_images/'train'}")
#     if rt_n_val == 0:
#         raise RuntimeError(f"Runtime val images empty: {v_images/'val'}")

#     return vroot, data_yaml, len(chosen_train_imgs), n_total_train, split_mode

# # -------------------------------------------------------------------------
# # OOM-safe train wrapper
# # -------------------------------------------------------------------------
# def train_with_auto_oom(model: YOLO, data_yaml: Path, project_dir: Path, name_dir: str, model_tag: str):
#     if model_tag == "detr":
#         candidates = [(4, 640), (2, 640), (2, 512), (1, 512)]
#     else:
#         candidates = [(BATCH, IMG_SIZE)]

#     last_err = None
#     for b, sz in candidates:
#         try:
#             with suppress_output(SILENCE_ULTRA_OUTPUT):
#                 model.train(
#                     data=str(data_yaml),
#                     epochs=EPOCHS,
#                     imgsz=sz,
#                     batch=b,
#                     device=DEVICE,
#                     project=str(project_dir),
#                     name=name_dir,
#                     exist_ok=True,
#                     verbose=False,
#                     workers=NUM_WORKERS,
#                     amp=True,
#                 )
#             return True, b, sz, None
#         except RuntimeError as e:
#             msg = str(e).lower()
#             last_err = e
#             if "out of memory" in msg or "cuda out of memory" in msg:
#                 if torch.cuda.is_available():
#                     torch.cuda.empty_cache()
#                     torch.cuda.ipc_collect()
#                 continue
#             break
#         except Exception as e:
#             last_err = e
#             break
#     return False, None, None, last_err

# # -------------------------------------------------------------------------
# # Train & validate loop
# # -------------------------------------------------------------------------
# set_seed(SEED)

# print("=" * 80)
# print("[TRAIN/EVAL] Start (Cell 1 summaries-aware + runtime path safe + object-only)")
# print(f" - OUT_ROOT             : {OUT_ROOT}")
# print(f" - RUNTIME_VROOT_BASE   : {RUNTIME_VROOT_BASE}")
# print(f" - TRAIN_FRACTION       : {TRAIN_FRACTION}")
# print(f" - NUM_WORKERS          : {NUM_WORKERS}")
# print(f" - CLASS_MODES          : {CLASS_MODES}")
# print(f" - CLEANUP_RUNTIME_VROOT: {CLEANUP_RUNTIME_VROOT}")
# print(f" - TRAIN_USE_ORIGINAL   : {TRAIN_USE_ORIGINAL}")
# print(f" - TRAIN_USE_UNIFORM_SCALING_NOISE: {TRAIN_USE_UNIFORM_SCALING_NOISE}")
# print(f" - TRAIN_USE_BOUNDARY_JITTER_NOISE : {TRAIN_USE_BOUNDARY_JITTER_NOISE}")
# if TARGET_DATASETS_LOWER is None:
#     print(f" - TARGET_DATASETS      : ALL (no filter)")
# else:
#     print(f" - TARGET_DATASETS      : {sorted(TARGET_DATASETS_LOWER)}")
# print("=" * 80)

# summary_rows: List[Dict] = []

# try:
#     _ = roots
# except NameError:
#     raise RuntimeError("Please run Cell 1 first to prepare the roots variable.")

# for ds_root in roots:
#     ds_root = Path(ds_root)

#     if TARGET_DATASETS_LOWER is not None:
#         ds_name_lower = ds_root.name.strip().lower()
#         if ds_name_lower not in TARGET_DATASETS_LOWER:
#             print(f"‚è≠Ô∏è  Skip (not in TARGET_DATASETS): {ds_root.name}")
#             continue

#     images_root = ds_root / "images"
#     labels_root = ds_root / "labels"

#     if not images_root.is_dir() or not labels_root.is_dir():
#         print(f"‚è≠Ô∏è  Skip (missing images/labels): {ds_root}")
#         continue

#     print(f"\n[Integrity Check] {ds_root.name}")
#     scan_and_clean_images(images_root)

#     cases = list_label_cases_for_dataset(ds_root)
#     if not cases:
#         print(f"‚è≠Ô∏è  Skip (no target label cases after flags/grid filter): {ds_root.name}")
#         continue

#     sp = get_split_info(ds_root)
#     print("\n" + "-" * 80)
#     print(f"[Dataset] {ds_root.name}")
#     print(f" - split_mode : {sp.get('split_mode')}")
#     print(f" - Cases      : {[c[0] for c in cases]}")
#     print(f" - CLASS_MODES: {CLASS_MODES}")
#     print("-" * 80)

#     for case_tag, labels_dirname in cases:
#         for class_mode in CLASS_MODES:
#             try:
#                 vroot, data_yaml, n_used, n_total, split_mode = build_runtime_view_root(
#                     ds_root=ds_root,
#                     case_labels_dirname=labels_dirname,
#                     train_fraction=TRAIN_FRACTION,
#                     seed=SEED,
#                     class_mode=class_mode,
#                 )
#                 pct = (n_used / max(1, n_total)) * 100.0
#                 print(f"  [Subset] case={case_tag} | class_mode={class_mode} | split_mode={split_mode} | train_used={n_used}/{n_total} ({pct:.1f}%)")
#             except Exception as e:
#                 print(f"  ‚è≠Ô∏è  Skip build failed: case={case_tag} | class_mode={class_mode} | err={e}")
#                 continue

#             for model_tag, ckpt_candidates in MODEL_SPECS:
#                 print(f"\n  [Train] case={case_tag} | class_mode={class_mode} | model={model_tag}")

#                 try:
#                     model = choose_model(ckpt_candidates)
#                 except Exception as e:
#                     print(f"    ‚ùå Model load failed: {ckpt_candidates} | err={e}")
#                     continue

#                 project_dir = OUT_ROOT / ds_root.name
#                 frac_tag = f"tr{int(TRAIN_FRACTION*100)}"
#                 name_dir = f"{model_tag}__{case_tag}__{class_mode}__{frac_tag}"
#                 project_dir.mkdir(parents=True, exist_ok=True)

#                 ok, used_b, used_sz, err = train_with_auto_oom(model, data_yaml, project_dir, name_dir, model_tag)
#                 if not ok:
#                     print(f"    ‚ùå Train failed: {err}")
#                     try:
#                         del model
#                     except Exception:
#                         pass
#                     if torch.cuda.is_available():
#                         torch.cuda.empty_cache()
#                         torch.cuda.ipc_collect()
#                     continue

#                 try:
#                     with suppress_output(SILENCE_ULTRA_OUTPUT):
#                         val_res = model.val(
#                             data=str(data_yaml),
#                             imgsz=used_sz if used_sz else IMG_SIZE,
#                             device=DEVICE,
#                             split="val",
#                             verbose=False,
#                             workers=NUM_WORKERS,
#                         )
#                 except Exception as e:
#                     print(f"    ‚ùå Val failed: {e}")
#                     val_res = None

#                 metrics = extract_metrics_dict(val_res)

#                 metrics_out = project_dir / name_dir / "metrics_eval.json"
#                 try:
#                     metrics_out.parent.mkdir(parents=True, exist_ok=True)
#                     with open(metrics_out, "w", encoding="utf-8") as f:
#                         json.dump(
#                             {
#                                 "dataset": ds_root.name,
#                                 "root": str(ds_root),
#                                 "case_tag": case_tag,
#                                 "labels_dirname": labels_dirname,
#                                 "class_mode": class_mode,
#                                 "model_tag": model_tag,
#                                 "ckpt_candidates": ckpt_candidates,
#                                 "train_fraction": TRAIN_FRACTION,
#                                 "train_used": n_used,
#                                 "train_total": n_total,
#                                 "data_yaml": str(data_yaml),
#                                 "runtime_view_root": str(vroot),
#                                 "split_mode": split_mode,
#                                 "effective_batch": used_b,
#                                 "effective_imgsz": used_sz,
#                                 "metrics": metrics,
#                             },
#                             f,
#                             ensure_ascii=False,
#                             indent=2,
#                         )
#                     print(f"    ‚úÖ Saved metrics: {metrics_out}")
#                 except Exception:
#                     pass

#                 row = {
#                     "dataset": ds_root.name,
#                     "model": model_tag,
#                     "case": case_tag,
#                     "labels_dir": labels_dirname,
#                     "class_mode": class_mode,
#                     "split_mode": split_mode,
#                     "train_fraction": TRAIN_FRACTION,
#                     "train_used": n_used,
#                     "train_total": n_total,
#                     "effective_batch": used_b,
#                     "effective_imgsz": used_sz,
#                 }
#                 for k in ["metrics/mAP50(B)", "metrics/mAP50-95(B)", "metrics/precision(B)", "metrics/recall(B)"]:
#                     if k in metrics:
#                         row[k] = metrics[k]

#                 summary_rows.append(row)

#                 try:
#                     del model
#                 except Exception:
#                     pass
#                 if torch.cuda.is_available():
#                     torch.cuda.empty_cache()
#                     torch.cuda.ipc_collect()

#             if CLEANUP_RUNTIME_VROOT:
#                 try:
#                     shutil.rmtree(vroot)
#                 except Exception:
#                     pass

# # -------------------------------------------------------------------------
# # Save summary CSV
# # -------------------------------------------------------------------------
# out_csv = OUT_ROOT / "summary_final_optimized_VOC_only.csv"
# try:
#     cols = set()
#     for r in summary_rows:
#         cols.update(r.keys())

#     base_cols = [
#         "dataset", "model", "case", "labels_dir",
#         "class_mode",
#         "split_mode",
#         "train_fraction", "train_used", "train_total",
#         "effective_batch", "effective_imgsz",
#     ]
#     extra_cols = sorted([c for c in cols if c not in set(base_cols)])
#     cols = base_cols + extra_cols

#     with open(out_csv, "w", newline="", encoding="utf-8") as f:
#         w = csv.DictWriter(f, fieldnames=cols)
#         w.writeheader()
#         for r in summary_rows:
#             w.writerow(r)

#     print("\n" + "=" * 80)
#     print(f"‚úÖ Saved summary CSV: {out_csv}")
#     print(f"‚úÖ Total runs: {len(summary_rows)}")
#     print("=" * 80)

# except Exception as e:
#     print(f"‚ö†Ô∏è  Summary CSV save failed: {e}")

# print("\n‚úÖ Cell 2 done.")


[TRAIN/EVAL] Start (Cell 1 summaries-aware + runtime path safe + object-only)
 - OUT_ROOT             : /home/ISW/project/object_detect
 - RUNTIME_VROOT_BASE   : /home/ISW/project/_runtime_dataset_views
 - TRAIN_FRACTION       : 1.0
 - NUM_WORKERS          : 8
 - CLASS_MODES          : ['object_only']
 - CLEANUP_RUNTIME_VROOT: True
 - TRAIN_USE_ORIGINAL   : True
 - TRAIN_USE_UNIFORM_SCALING_NOISE: False
 - TRAIN_USE_BOUNDARY_JITTER_NOISE : False
 - TARGET_DATASETS      : ['voc']
‚è≠Ô∏è  Skip (not in TARGET_DATASETS): SKU-110K
‚è≠Ô∏è  Skip (not in TARGET_DATASETS): kitti
‚è≠Ô∏è  Skip (not in TARGET_DATASETS): homeobjects-3K
‚è≠Ô∏è  Skip (not in TARGET_DATASETS): african-wildlife
‚è≠Ô∏è  Skip (not in TARGET_DATASETS): construction-ppe
‚è≠Ô∏è  Skip (not in TARGET_DATASETS): Custom_Blood
‚è≠Ô∏è  Skip (not in TARGET_DATASETS): brain-tumor
‚è≠Ô∏è  Skip (not in TARGET_DATASETS): BCCD
‚è≠Ô∏è  Skip (not in TARGET_DATASETS): signature
‚è≠Ô∏è  Skip (not in TARGET_DATASETS): medical-pills
‚è≠Ô∏è  

                                                                 

   ‚úÖ  No corrupt images found.

--------------------------------------------------------------------------------
[Dataset] VOC
 - split_mode : explicit
 - Cases      : ['original']
 - CLASS_MODES: ['object_only']
--------------------------------------------------------------------------------
  [Subset] case=original | class_mode=object_only | split_mode=explicit | train_used=5717/5717 (100.0%)

  [Train] case=original | class_mode=object_only | model=yolov8n
    ‚úÖ Saved metrics: /home/ISW/project/object_detect/VOC/yolov8n__original__object_only__tr100/metrics_eval.json

  [Train] case=original | class_mode=object_only | model=yolo11n
    ‚úÖ Saved metrics: /home/ISW/project/object_detect/VOC/yolo11n__original__object_only__tr100/metrics_eval.json

  [Train] case=original | class_mode=object_only | model=detr
    ‚úÖ Saved metrics: /home/ISW/project/object_detect/VOC/detr__original__object_only__tr100/metrics_eval.json

‚úÖ Saved summary CSV: /home/ISW/project/object_detect/summary