# FPUS23 — YOLO Baseline vs Optimized (Balanced) on Colab

- Purpose: compare YOLO baselines to optimized training with balanced train (no denoise).- Toggles: RUN_BASELINES (n/s/m), RUN_OPTIMIZED (s/m; optional n).- Optimized = balanced train only (YOLO11 is anchor‑free; anchors ignored).- Pipeline: cleanup → prepare → verify → (if optimized) balance→COCO→YOLO → train → save to Drive.- Outputs: /content/drive/MyDrive/FPUS23_runs/<run_name>/

In [None]:
# Clone repo and install pinned requirements
!git clone https://github.com/Srinivas-Raghav-VC/MultiFetalOrgan-Detection.git /content/fpus23
%cd /content/fpus23
!pip -q install -r requirements_colab.txt
!pip -q install gdown

In [None]:
# Recommend batch size based on GPU
import torch, os
gpu = torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU'
print('GPU:', gpu)
if 'T4' in gpu: os.environ['FPUS23_BATCH']='16'
elif 'A100' in gpu: os.environ['FPUS23_BATCH']='24'
else: os.environ['FPUS23_BATCH']='12'
print('Recommended batch:', os.environ['FPUS23_BATCH'])


In [None]:
# Optional: save results to Google Drive
SAVE_TO_DRIVE = True
DRIVE_RESULTS = '/content/drive/MyDrive/FPUS23_runs'
if SAVE_TO_DRIVE:
    from google.colab import drive; drive.mount('/content/drive', force_remount=True)
    import pathlib as p
    p.Path(DRIVE_RESULTS).mkdir(parents=True, exist_ok=True)
    print('Results will be saved to', DRIVE_RESULTS)
else:
    print('Saving to local /content/fpus23_project/runs')


In [None]:
# Control which sections to run
RUN_BASELINES = False   # set True to run baseline cells
RUN_OPTIMIZED = True    # keep True to run optimized cells
print('RUN_BASELINES =', RUN_BASELINES, '| RUN_OPTIMIZED =', RUN_OPTIMIZED)


In [None]:
# Dataset: download & extract (or mount Drive and copy)
!gdown https://drive.google.com/uc?id=1LL-r2hNiP6C190UBSE4v1FFCF3OQT9N3 -O /content/FPUS23_Dataset.zip
!unzip -q /content/FPUS23_Dataset.zip -d /content/FPUS23_Dataset || true
!ls -lh /content/FPUS23_Dataset || true
from google.colab import drive; drive.mount('/content/drive', force_remount=True)
!test -f /content/FPUS23_Dataset.zip || cp '/content/drive/MyDrive/FPUS23_Dataset.zip' /content/ || true
!test -d /content/FPUS23_Dataset || unzip -q /content/FPUS23_Dataset.zip -d /content/FPUS23_Dataset || true

In [None]:
# Clean previous YOLO splits to avoid stale files on rerun
!rm -rf /content/fpus23_project/dataset/fpus23_yolo/images /content/fpus23_project/dataset/fpus23_yolo/labels /content/tmp/vis_val || true


In [None]:
# Prepare dataset (XML -> YOLO -> COCO)
!python 'New folder/scripts/prepare_fpus23.py' \
  --dataset-root /content/FPUS23_Dataset/Dataset \
  --project-root /content/fpus23_project
!ls -lh /content/fpus23_project/dataset/fpus23_yolo

In [None]:
# Sanity checks (train/val/test)
!python 'New folder/scripts/tools/verify_yolo_dataset.py' --data /content/fpus23_project/dataset/fpus23_yolo/data.yaml --split train --limit 16
!python 'New folder/scripts/tools/verify_yolo_dataset.py' --data /content/fpus23_project/dataset/fpus23_yolo/data.yaml --split val --limit 16
!python 'New folder/scripts/tools/verify_yolo_dataset.py' --data /content/fpus23_project/dataset/fpus23_yolo/data.yaml --split test --limit 16

## Baseline training

In [None]:
import os, sys, subprocess
if RUN_BASELINES:
    cmd=[sys.executable, 'New folder/scripts/train_yolo_fpus23_phase1.py',
         '--data', '/content/fpus23_project/dataset/fpus23_yolo/data.yaml',
         '--model', 'yolo11s.pt',
         '--epochs', '100',
         '--batch', os.environ.get('FPUS23_BATCH','16'),
         '--imgsz', '768',
         '--project', $DRIVE_RESULTS,
         '--name', 'fpus23_baseline_s']
    print('Running baseline:', ' '.join(cmd))
    subprocess.run(cmd, check=True)


### Baseline with yolo11n (nano)

In [None]:
import os, sys, subprocess
if RUN_BASELINES:
    cmd=[sys.executable, 'New folder/scripts/train_yolo_fpus23_phase1.py',
         '--data', '/content/fpus23_project/dataset/fpus23_yolo/data.yaml',
         '--model', 'yolo11n.pt',
         '--epochs', '100',
         '--batch', os.environ.get('FPUS23_BATCH','16'),
         '--imgsz', '768',
         '--project', $DRIVE_RESULTS,
         '--name', 'fpus23_baseline_n']
    print('Running baseline:', ' '.join(cmd))
    subprocess.run(cmd, check=True)


## Optimized training (anchors + balancing + optional despeckle)

In [None]:
# 1) Custom anchors
!python 'New folder/scripts/calculate_fpus23_anchors.py' --data /content/fpus23_project/dataset/fpus23_yolo/data.yaml --num-clusters 9
  --out /content/fpus23_project/outputs/fpus23_anchors.yaml
!cat /content/fpus23_project/outputs/fpus23_anchors.yaml || true

In [None]:
# 2) Balance dataset in COCO, then convert to YOLO for train split
!python 'New folder/scripts/balance_fpus23_dataset.py' --coco-root /content/fpus23_project/dataset/fpus23_coco
!python 'New folder/scripts/tools/coco_to_yolo.py' \
  --coco-json /content/fpus23_project/dataset/fpus23_coco/train_balanced.json \
  --images-dir /content/fpus23_project/dataset/fpus23_coco/images_balanced/train \
  --out-yolo-root /content/fpus23_project/dataset/fpus23_yolo_balanced \
  --orig-data-yaml /content/fpus23_project/dataset/fpus23_yolo/data.yaml
!ls -lh /content/fpus23_project/dataset/fpus23_yolo_balanced/labels/train | head

In [None]:
# 3) (Optional) Offline despeckle and create a new yaml pointing to despeckled train images
# Uncomment to enable
# !python 'New folder/scripts/preprocess_fpus23_despeckle.py' \
#   --input /content/fpus23_project/dataset/fpus23_yolo/images/train \
#   --output /content/fpus23_project/dataset/fpus23_yolo_despeckled/images/train \
#   --data-yaml /content/fpus23_project/dataset/fpus23_yolo/data.yaml
# DESPECKLED_YAML = '/content/fpus23_project/dataset/fpus23_yolo_despeckled/data.yaml'
DESPECKLED_YAML = None

In [None]:
# Pick optimized train yaml (balanced YOLO); fallback to original
OPT_TRAIN_YAML = '/content/fpus23_project/dataset/fpus23_yolo_balanced/data.yaml'
if DESPECKLED_YAML:
    OPT_TRAIN_YAML = DESPECKLED_YAML
print('Optimized train YAML:', OPT_TRAIN_YAML)
!sed -n '1,120p' /content/fpus23_project/dataset/fpus23_yolo_balanced/data.yaml || true

In [None]:
import os
if RUN_OPTIMIZED:
    os.system('''\
# 4) Optimized training
python 'New folder/scripts/train_yolo_fpus23_phase1.py' \
  --data $OPT_TRAIN_YAML \
  --model yolo11s.pt \
  --custom-anchors /content/fpus23_project/outputs/fpus23_anchors.yaml \
  --epochs 100 \
  --batch 16 \
  --project $DRIVE_RESULTS \
  --imgsz 768 \
  --name fpus23_optimized
tail -n +1 /content/fpus23_project/runs/detect/fpus23_optimized/results.csv | head''')


### Optimized with yolo11n (anchors + balanced train)

In [None]:
import os
if RUN_OPTIMIZED:
    os.system('''\
python 'New folder/scripts/train_yolo_fpus23_phase1.py' \
  --data $OPT_TRAIN_YAML \
  --model yolo11n.pt \
  --custom-anchors /content/fpus23_project/outputs/fpus23_anchors.yaml \
  --epochs 100 \
  --batch $FPUS23_BATCH \
  --imgsz 768 \
  --project $DRIVE_RESULTS \
  --name fpus23_optimized_n
tail -n +1 /content/fpus23_project/runs/detect/fpus23_optimized_n/results.csv | head
''')


## Compare results

In [None]:
import pandas as pd
import pathlib as p
base_csv = p.Path('/content/fpus23_project/runs/detect/fpus23_baseline/results.csv')
opt_csv  = p.Path('/content/fpus23_project/runs/detect/fpus23_optimized/results.csv')
b = pd.read_csv(base_csv) if base_csv.exists() else None
o = pd.read_csv(opt_csv) if opt_csv.exists() else None
if b is not None and o is not None:
    print('Baseline last row:')
    print(b.tail(1).T)
    print('
Optimized last row:')
    print(o.tail(1).T)
else:
    print('Missing results.csv files')

### Optional: Baseline with yolo11m (may improve mAP)

In [None]:
import os, sys, subprocess
if RUN_BASELINES:
    cmd=[sys.executable, 'New folder/scripts/train_yolo_fpus23_phase1.py',
         '--data', '/content/fpus23_project/dataset/fpus23_yolo/data.yaml',
         '--model', 'yolo11m.pt',
         '--epochs', '100',
         '--batch', 8,
         '--imgsz', '768',
         '--project', $DRIVE_RESULTS,
         '--name', 'fpus23_baseline_m']
    print('Running baseline:', ' '.join(cmd))
    subprocess.run(cmd, check=True)


### Optional: Optimized with yolo11m (anchors + balancing)

In [None]:
import os
if RUN_OPTIMIZED:
    os.system('''\
python 'New folder/scripts/train_yolo_fpus23_phase1.py' \
  --data $OPT_TRAIN_YAML \
  --model yolo11m.pt \
  --custom-anchors /content/fpus23_project/outputs/fpus23_anchors.yaml \
  --epochs 100 \
  --batch 8 \
  --imgsz 768 \
  --project $DRIVE_RESULTS \
  --name fpus23_optimized_m
tail -n +1 /content/fpus23_project/runs/detect/fpus23_optimized_m/results.csv | head
''')
