In [None]:
!pip install --upgrade "ultralytics==8.3.165"

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Path to dataset zip in Drive
ZIP_PATH = '/content/drive/MyDrive/Data_Frames.zip'

!unzip -q "$ZIP_PATH" -d /content
DATASET_ROOT = '/content/Data_Frames'

In [None]:
#  Baseline multi-lighting YOLO-v9 training & evaluation script
#  — Colby Bowie, July 2025
#
#  * Trains the seven Fixed_Num models (no augmentation)
#  * Evaluates each on day / shop / led test sets
#  * Outputs everything to /Baseline_fixed_results

from pathlib import Path
import yaml, gc, os, signal, time
from ultralytics import YOLO

# ---------- PATHS ----------------------------------------------------------
DATASET_ROOT = Path("/content/Data_Frames")
OUTPUT_ROOT  = Path("/content/drive/MyDrive/Baseline_fixed_results")

# ---------- TRAINING HYPERPARAMS (minimal-aug recipe) ----------------------
MODEL_WEIGHTS = "yolov9s.pt"
IMG_SIZE      = 640          # captures thin strands better, fits in 12 GB
BATCH         = -1           # AutoBatch => max per-GPU imgs
EPOCHS        = 200
PATIENCE      = 20
SEED          = 2025         # reproducible splits
DEVICE        = 0            # 0 = first GPU; -1 = CPU (slow)
SAVE_PERIOD   = 25           # keep every 25th epoch checkpoint

# turn off all strong augs
AUG_KWARGS = dict(
    mosaic=0.0, mixup=0.0,
    scale=1.0,
    hsv_h=0.0, hsv_s=0.0, hsv_v=0.0,
    flipud=0.0, fliplr=0.0,
)

# ---------- DATA SPLIT CONFIGS --------------------------------------------
CFG_LIST = [
    "day_only",
    "shop_only",
    "led_only",
    "day_shop",
    "day_led",
    "shop_led",
    "all_lighting",
]

TEST_SPLITS = {
    "day_test":  "Testing/day_test/images",
    "shop_test": "Testing/shop_test/images",
    "led_test":  "Testing/led_test/images",
}

# ---------- helper : create tiny YAML files on-the-fly ---------------------
def make_yaml(train_dir, val_dir, test_dir, out_path):
    data = dict(
        train=str(train_dir),
        val=str(val_dir),
        test=str(test_dir),
        nc=1,
        names=["spaghetti"],
    )
    out_path.parent.mkdir(parents=True, exist_ok=True)
    yaml.safe_dump(data, out_path.open("w"))
    return out_path

# ---------- main loop ------------------------------------------------------
for cfg_name in CFG_LIST:
    print(f"\n Training model: {cfg_name}")
    run_dir   = OUTPUT_ROOT / cfg_name
    train_dir = DATASET_ROOT / "Fixed_Num" / cfg_name / "train/images"
    val_dir   = DATASET_ROOT / "Fixed_Num" / cfg_name / "val/images"

    # 1. create data YAML (Ultralytics wants *image* dirs in YAML)
    yaml_path = make_yaml(train_dir, val_dir, val_dir,  # temp test=val (over-written later)
                          run_dir / f"data_{cfg_name}.yaml")

    # 2. build / resume model
    ckpt = run_dir / "weights" / "last.pt"
    model = YOLO(str(ckpt) if ckpt.exists() else MODEL_WEIGHTS)

    # 3. train
    model.train(
        data=str(yaml_path),
        epochs=EPOCHS,
        patience=PATIENCE,
        imgsz=IMG_SIZE,
        batch=BATCH,
        project=str(OUTPUT_ROOT),
        name=cfg_name,
        seed=SEED,
        device=DEVICE,
        save_period=SAVE_PERIOD,
        exist_ok=True,
        resume=ckpt.exists(),
        **AUG_KWARGS,
    )

    # reload best weights for evaluation
    best_pt = run_dir / "weights" / "best.pt"
    model = YOLO(str(best_pt))

    # 4. cross-lighting evaluation loop
    for tst_tag, tst_rel in TEST_SPLITS.items():
        eval_yaml = make_yaml(train_dir, val_dir,
                              DATASET_ROOT / tst_rel,
                              run_dir / f"eval_{cfg_name}_{tst_tag}.yaml")
        metrics = model.val(
            data=str(eval_yaml),
            split="test",
            imgsz=IMG_SIZE,
            device=DEVICE,
            save_json=True,  # exports COCO json for later stats if needed
            verbose=False,
        )
        print(f"{cfg_name} on {tst_tag:9s}  "
              f"mAP50={metrics.box.map50:.3%}  mAP95={metrics.box.map:.3%}")

    # 5. cleanup
    del model; gc.collect()

print("\n All baseline models trained & evaluated.")

In [None]:
#  re-evaluate all models
#  — Colby Bowie, July 2025
#
#  • Finds each best.pt in Baseline_fixed_results
#  • Re-runs YOLOv9 evaluation on every YAML that has a test split
#  • Saves new results under reval_<run>_<tag>/
#  • Builds a summary CSV (model × test_split × mAP50/95)

from pathlib import Path
import yaml
import pandas as pd
from ultralytics import YOLO
import gc, re

# ---------- CONFIG ---------------------------------------------------------
RUNS_ROOT   = Path("/content/drive/MyDrive/Baseline_fixed_results")   # << change if needed
DEVICE      = 0     # GPU id, -1 for CPU
IMG_SIZE    = 640   # use same resolution as training
SAVE_JSON   = True  # save COCO json for each eval

# ----------- RUN -----------------------------------------------------------
rows = []

for run_dir in sorted([p for p in RUNS_ROOT.iterdir() if p.is_dir()]):
    best_pt = run_dir / "weights" / "best.pt"
    if not best_pt.exists():
        print(f" No best.pt in {run_dir.name} — skipping")
        continue

    # grab all YAML files in run folder (direct children or eval_* sub-dirs)
    yaml_paths = list(run_dir.glob("*.yaml")) + list(run_dir.glob("eval_*/*.yaml"))
    if not yaml_paths:
        print(f" No YAMLs found in {run_dir.name}")
        continue

    model = YOLO(str(best_pt))
    print(f"\n Evaluating {run_dir.name}  ({len(yaml_paths)} YAMLs found)")

    for yml in sorted(yaml_paths):
        with open(yml, "r") as f:
            data_cfg = yaml.safe_load(f)

        # skip if YAML lacks a test path
        if "test" not in data_cfg or not data_cfg["test"]:
            continue

        # derive a tag from YAML filename, e.g. eval_day_only_shop_test.yaml ➜ shop_test
        tag_match = re.search(r"(day_test|shop_test|led_test)", yml.stem)
        tag = tag_match.group(1) if tag_match else yml.stem

        out_name = f"reval_{run_dir.name}_{tag}"
        if (run_dir / out_name / "results.csv").exists():
            print(f"↻  {out_name} already exists — skipping reevaluation")
            continue

        metrics = model.val(
            data=str(yml),
            split="test",
            imgsz=IMG_SIZE,
            device=DEVICE,
            save_json=SAVE_JSON,
            project=str(run_dir),
            name=out_name,
            verbose=False,
        )

        # collect for summary
        rows.append({
            "model": run_dir.name,
            "test_split": tag,
            "mAP50":  metrics.box.map50 * 100,
            "mAP95":  metrics.box.map * 100,
            "precision": metrics.box.mp * 100,   # mean precision  (mp)
            "recall":    metrics.box.mr * 100    # mean recall     (mr)
        })

    del model; gc.collect()

# ---------- build summary table -------------------------------------------
if rows:
    df = pd.DataFrame(rows).sort_values(["model", "test_split"])
    df.to_csv(RUNS_ROOT / "cross_light_summary.csv", index=False)
    print("\n Summary saved to cross_light_summary.csv\n")
    print(df.to_string(index=False, formatters={"mAP50": "{:.1f}".format,
                                                "mAP95": "{:.1f}".format,
                                                "precision": "{:.1f}".format,
                                                "recall": "{:.1f}".format}))
else:
    print(" No evaluations were run — check YAML/test paths.")

In [None]:
#  Baseline multi-lighting YOLO-v9 training & evaluation script
#  — Colby Bowie, July 2025
#
#  * Trains the seven Fixed_Num models (no augmentation)
#  * Evaluates each on day / shop / led test sets
#  * Outputs everything to /Baseline_fixed_results

from pathlib import Path
import yaml, gc, os, signal, time
from ultralytics import YOLO

# ---------- PATHS ----------------------------------------------------------
DATASET_ROOT = Path("/content/Data_Frames")
OUTPUT_ROOT  = Path("/content/drive/MyDrive/Baseline_best_results")

# ---------- TRAINING HYPERPARAMS (minimal-aug recipe) ----------------------
MODEL_WEIGHTS = "yolov9s.pt"
IMG_SIZE      = 640          # captures thin strands better, fits in 12 GB
BATCH         = -1           # AutoBatch => max per-GPU imgs
EPOCHS        = 200
PATIENCE      = 20
SEED          = 2025         # reproducible splits
DEVICE        = 0            # 0 = first GPU; -1 = CPU (slow)
SAVE_PERIOD   = 25           # keep every 25th epoch checkpoint

# turn off all strong augs
AUG_KWARGS = dict(
    mosaic=0.0, mixup=0.0,
    scale=1.0,
    hsv_h=0.0, hsv_s=0.0, hsv_v=0.0,
    flipud=0.0, fliplr=0.0,
)

# ---------- DATA SPLIT CONFIGS --------------------------------------------
CFG_LIST = [
    "day_shop",
    "day_led",
    "shop_led",
    "all_lighting",
]

TEST_SPLITS = {
    "day_test":  "Testing/day_test/images",
    "shop_test": "Testing/shop_test/images",
    "led_test":  "Testing/led_test/images",
}

# ---------- helper : create tiny YAML files on-the-fly ---------------------
def make_yaml(train_dir, val_dir, test_dir, out_path):
    data = dict(
        train=str(train_dir),
        val=str(val_dir),
        test=str(test_dir),
        nc=1,
        names=["spaghetti"],
    )
    out_path.parent.mkdir(parents=True, exist_ok=True)
    yaml.safe_dump(data, out_path.open("w"))
    return out_path

# ---------- main loop ------------------------------------------------------
for cfg_name in CFG_LIST:
    print(f"\n Training model: {cfg_name}")
    run_dir   = OUTPUT_ROOT / cfg_name
    train_dir = DATASET_ROOT / "Best_case" / cfg_name / "train/images"
    val_dir   = DATASET_ROOT / "Best_case" / cfg_name / "val/images"

    # 1. create data YAML (Ultralytics wants *image* dirs in YAML)
    yaml_path = make_yaml(train_dir, val_dir, val_dir,  # temp test=val (over-written later)
                          run_dir / f"data_{cfg_name}.yaml")

    # 2. build / resume model
    ckpt = run_dir / "weights" / "last.pt"
    model = YOLO(str(ckpt) if ckpt.exists() else MODEL_WEIGHTS)

    # 3. train
    model.train(
        data=str(yaml_path),
        epochs=EPOCHS,
        patience=PATIENCE,
        imgsz=IMG_SIZE,
        batch=BATCH,
        project=str(OUTPUT_ROOT),
        name=cfg_name,
        seed=SEED,
        device=DEVICE,
        save_period=SAVE_PERIOD,
        exist_ok=True,
        resume=ckpt.exists(),
        **AUG_KWARGS,
    )

    # reload best weights for evaluation
    best_pt = run_dir / "weights" / "best.pt"
    model = YOLO(str(best_pt))

    # 4. cross-lighting evaluation loop
    for tst_tag, tst_rel in TEST_SPLITS.items():
        eval_yaml = make_yaml(train_dir, val_dir,
                              DATASET_ROOT / tst_rel,
                              run_dir / f"eval_{cfg_name}_{tst_tag}.yaml")
        metrics = model.val(
            data=str(eval_yaml),
            split="test",
            imgsz=IMG_SIZE,
            device=DEVICE,
            save_json=True,  # exports COCO json for later stats if needed
            verbose=False,
        )
        print(f"{cfg_name} on {tst_tag:9s}  "
              f"mAP50={metrics.box.map50:.3%}  mAP95={metrics.box.map:.3%}")

    # 5. cleanup
    del model; gc.collect()

print("\n All baseline models trained & evaluated.")

In [None]:
#  re-evaluate all models
#  — Colby Bowie, July 2025
#
#  • Finds each best.pt in Baseline_best_results
#  • Re-runs YOLOv9 evaluation on every YAML that has a test split
#  • Saves new results under reval_<run>_<tag>/
#  • Builds a summary CSV (model × test_split × mAP50/95)

from pathlib import Path
import yaml
import pandas as pd
from ultralytics import YOLO
import gc, re

# ---------- CONFIG ---------------------------------------------------------
RUNS_ROOT   = Path("/content/drive/MyDrive/Baseline_best_results")   # << change if needed
DEVICE      = 0     # GPU id, -1 for CPU
IMG_SIZE    = 640   # use same resolution as training
SAVE_JSON   = True  # save COCO json for each eval

# ----------- RUN -----------------------------------------------------------
rows = []

for run_dir in sorted([p for p in RUNS_ROOT.iterdir() if p.is_dir()]):
    best_pt = run_dir / "weights" / "best.pt"
    if not best_pt.exists():
        print(f" No best.pt in {run_dir.name} — skipping")
        continue

    # grab all YAML files in run folder (direct children or eval_* sub-dirs)
    yaml_paths = list(run_dir.glob("*.yaml")) + list(run_dir.glob("eval_*/*.yaml"))
    if not yaml_paths:
        print(f" No YAMLs found in {run_dir.name}")
        continue

    model = YOLO(str(best_pt))
    print(f"\n Evaluating {run_dir.name}  ({len(yaml_paths)} YAMLs found)")

    for yml in sorted(yaml_paths):
        with open(yml, "r") as f:
            data_cfg = yaml.safe_load(f)

        # skip if YAML lacks a test path
        if "test" not in data_cfg or not data_cfg["test"]:
            continue

        # derive a tag from YAML filename, e.g. eval_day_only_shop_test.yaml ➜ shop_test
        tag_match = re.search(r"(day_test|shop_test|led_test)", yml.stem)
        tag = tag_match.group(1) if tag_match else yml.stem

        out_name = f"reval_{run_dir.name}_{tag}"
        if (run_dir / out_name / "results.csv").exists():
            print(f"↻  {out_name} already exists — skipping reevaluation")
            continue

        metrics = model.val(
            data=str(yml),
            split="test",
            imgsz=IMG_SIZE,
            device=DEVICE,
            save_json=SAVE_JSON,
            project=str(run_dir),
            name=out_name,
            verbose=False,
        )

        # collect for summary
        rows.append({
          "model": run_dir.name,
          "test_split": tag,
          "mAP50":  metrics.box.map50 * 100,
          "mAP95":  metrics.box.map * 100,
          "precision": metrics.box.mp * 100,   # mean precision  (mp)
          "recall":    metrics.box.mr * 100    # mean recall     (mr)
        })

    del model; gc.collect()

# ---------- build summary table -------------------------------------------
if rows:
    df = pd.DataFrame(rows).sort_values(["model", "test_split"])
    df.to_csv(RUNS_ROOT / "cross_light_summary.csv", index=False)
    print("\n Summary saved to cross_light_summary.csv\n")
    print(df.to_string(index=False, formatters={"mAP50": "{:.1f}".format,
                                                "mAP95": "{:.1f}".format,
                                                "precision": "{:.1f}".format,
                                                "recall": "{:.1f}".format}))
else:
    print(" No evaluations were run — check YAML/test paths.")