# Elite Dangerous Core Mining - Segmentation Detection Pipeline

Trains YOLOv8 **instance segmentation** models to identify and outline core
asteroids in Elite Dangerous screenshots in real time. Segmentation models draw
a polygon mask over each detected core rather than just a rectangle - a better
fit for the irregular "popcorn" asteroid shapes.

## Labeling in Roboflow

Use the **Smart Select (SAM2/SAM3) polygon tool** exactly as you are doing.
Click the asteroid and confirm the auto-traced outline. The SAM model used for
annotation is Roboflow-internal and has nothing to do with your trained model.

When exporting, choose: **YOLOv8 -> Segmentation** format.
The label files will contain polygon point coordinates instead of just a bbox.

## Dataset structure

```
datasets/
  ice/
    images/    <- all .png or .jpg screenshots, flat - no subfolders
    labels/    <- matching YOLO segmentation .txt files
  metallic/
    images/
    labels/
  rocky/
    images/
    labels/
  unified/     <- optional: all ring types combined
    images/
    labels/
```

YOLO segmentation label format (one line per core):
`0 x1 y1 x2 y2 ... xn yn`
Class index (0 = core) followed by normalized polygon point pairs (all 0.0-1.0).
Roboflow exports this automatically when you pick the segmentation export format.

## Models trained

We train and compare four YOLOv8-seg model sizes per ring type:
- yolov8n-seg  fastest, smallest (~3.4M params)
- yolov8s-seg  good balance (~11.8M params)
- yolov8m-seg  higher accuracy (~27.3M params)
- yolov8l-seg  best accuracy, slower inference (~46.5M params)

All four fit on RTX 3070 8GB at batch=16. K-fold cross-validation gives reliable
metric estimates without requiring manual train/val splits.


## 1. Install dependencies

Run once. Restart the kernel after.

In [None]:
import subprocess, sys

packages = [
    "ultralytics>=8.2.0",
    "opencv-python>=4.9.0",
    "pyyaml>=6.0.1",
    "matplotlib>=3.9.0",
    "scikit-learn>=1.5.0",
    "pandas>=2.2.0",
    "Pillow>=10.3.0",
    "onnx>=1.16.0",
    "onnxruntime-gpu>=1.18.0",
]

subprocess.run(
    [sys.executable, "-m", "pip", "install", "--quiet"] + packages,
    check=True,
)
print("All packages installed.")


## 2. Imports and GPU check

In [None]:
import os
import sys
import json
import math
import shutil
import csv
import yaml
import cv2
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib.patches import Polygon as MplPolygon
from matplotlib.collections import PatchCollection

from pathlib import Path
from datetime import datetime
from collections import defaultdict

import torch
from ultralytics import YOLO
from sklearn.model_selection import KFold

print(f"Python:  {sys.version}")
print(f"PyTorch: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

if torch.cuda.is_available():
    gpu = torch.cuda.get_device_properties(0)
    vram_gb = gpu.total_memory / 1024 ** 3
    print(f"GPU: {gpu.name}")
    print(f"VRAM: {vram_gb:.1f} GB")
    if vram_gb < 6:
        print("WARNING: less than 6 GB VRAM - reduce BATCH_SIZE to 8 if training crashes")
else:
    print("WARNING: no GPU found. Training on CPU will be very slow.")
    print("Reinstall PyTorch with CUDA support:")
    print("  pip install torch torchvision --index-url https://download.pytorch.org/whl/cu121")


## 3. Configuration

In [None]:
# ---- MAIN CONFIG - edit this before running ----

# Ring types to train. Remove any you don't have data for yet.
RING_TYPES = ["ice", "metallic", "rocky", "unified"]

# Segmentation model sizes to compare per ring type.
# All four fit on RTX 3070 8GB at batch=16.
# n=fastest, s=balanced, m=good accuracy, l=best accuracy
MODEL_SIZES = ["yolov8n-seg", "yolov8s-seg", "yolov8m-seg", "yolov8l-seg"]

# Number of cross-validation folds.
# 5 is the default. Use 3 if you have fewer than ~30 images per ring type.
K_FOLDS = 5

# Training hyperparameters
IMG_SIZE    = 640   # YOLO standard input size
EPOCHS      = 100   # max epochs per fold (early stopping usually fires earlier)
BATCH_SIZE  = 16    # reduce to 8 if you get CUDA out-of-memory errors
PATIENCE    = 20    # stop early if val mAP doesn't improve for this many epochs

# Confidence threshold for inference (0.0 - 1.0)
# 0.4 is a good starting point. Tune based on real-game performance.
CONF_THRESHOLD = 0.4

# Mask overlay alpha for visualization (0=transparent, 1=opaque)
MASK_ALPHA = 0.45

# Core highlight color in BGR for OpenCV visualization
MASK_COLOR_BGR = (0, 255, 128)   # bright green

# We only detect one class.
CLASS_NAMES = ["core"]

# Folder layout
DATASET_ROOT = Path("datasets")
RUNS_ROOT    = Path("runs")
EXPORTS_ROOT = Path("exports")
CV_TMP_ROOT  = Path("cv_tmp")

print("Config loaded.")
print(f"Ring types: {RING_TYPES}")
print(f"Model sizes: {MODEL_SIZES}")
print(f"K-folds: {K_FOLDS}")
device_name = torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU"
print(f"Device: {device_name}")


## 4. Create dataset folder structure

In [None]:
def create_dataset_structure():
    """Create flat image and label folders for each ring type."""
    for ring_type in RING_TYPES:
        (DATASET_ROOT / ring_type / "images").mkdir(parents=True, exist_ok=True)
        (DATASET_ROOT / ring_type / "labels").mkdir(parents=True, exist_ok=True)

    RUNS_ROOT.mkdir(exist_ok=True)
    EXPORTS_ROOT.mkdir(exist_ok=True)
    CV_TMP_ROOT.mkdir(exist_ok=True)

    print("Folder structure created. Drop your files here:\n")
    for ring_type in RING_TYPES:
        print(f"  datasets/{ring_type}/images/  <- all screenshots (.png or .jpg)")
        print(f"  datasets/{ring_type}/labels/  <- YOLO segmentation .txt files from Roboflow")
        print()
    print("No train/val split needed - k-fold handles it automatically.")


create_dataset_structure()


## 5. COCO JSON to YOLO segmentation format converter

Only needed if you exported COCO JSON from Roboflow instead of YOLOv8 format.
Prefer the direct YOLOv8 segmentation export from Roboflow and skip this.

COCO segmentation format stores polygon points in the `segmentation` field.
YOLO segmentation format: `class x1 y1 x2 y2 ... xn yn` (normalized 0-1).


In [None]:
def convert_coco_seg_to_yolo(coco_json_path, output_labels_dir):
    """
    Convert a COCO-format segmentation JSON to per-image YOLO .txt label files.

    COCO segmentation stores polygon points as flat pixel-coordinate lists in
    the annotation's "segmentation" field, e.g. [x1,y1,x2,y2,...].
    YOLO segmentation format: one line per object = class followed by normalized
    polygon points: 0 x1 y1 x2 y2 ... xn yn  (all values 0.0-1.0).

    If an annotation has no segmentation polygon but has a bbox, this function
    falls back to converting the bbox into a 4-point rectangle polygon so the
    annotation is not silently dropped.

    Args:
        coco_json_path: path to _annotations.coco.json
        output_labels_dir: folder where .txt files will be written
    """
    output_labels_dir = Path(output_labels_dir)
    output_labels_dir.mkdir(parents=True, exist_ok=True)

    with open(coco_json_path) as f:
        coco = json.load(f)

    cat_map = {}
    for cat in coco["categories"]:
        if cat["name"] in CLASS_NAMES:
            cat_map[cat["id"]] = CLASS_NAMES.index(cat["name"])

    if not cat_map:
        print(f"WARNING: no COCO categories match CLASS_NAMES {CLASS_NAMES}")
        print(f"Categories in file: {[c['name'] for c in coco['categories']]}")
        return 0

    images = {img["id"]: img for img in coco["images"]}

    anns_by_image = defaultdict(list)
    for ann in coco["annotations"]:
        if ann["category_id"] in cat_map:
            anns_by_image[ann["image_id"]].append(ann)

    converted = 0
    fallback_bbox = 0

    for img_id, anns in anns_by_image.items():
        img_info = images[img_id]
        W = img_info["width"]
        H = img_info["height"]
        img_name = Path(img_info["file_name"]).stem
        lines = []

        for ann in anns:
            cls = cat_map[ann["category_id"]]
            seg = ann.get("segmentation", [])

            if seg and isinstance(seg, list) and len(seg) > 0 and isinstance(seg[0], list):
                # standard COCO polygon: list of rings, take the outer ring (first one)
                points = seg[0]
            elif seg and isinstance(seg, list) and len(seg) > 0 and isinstance(seg[0], (int, float)):
                # flat list directly
                points = seg
            else:
                # no polygon - fall back to bbox as a 4-corner rectangle
                x, y, w, h = ann["bbox"]
                points = [x, y, x + w, y, x + w, y + h, x, y + h]
                fallback_bbox += 1

            if len(points) < 6:
                # need at least 3 points (6 values) for a valid polygon
                continue

            # normalize all x,y pairs
            norm = []
            for i in range(0, len(points) - 1, 2):
                norm.append(f"{points[i] / W:.6f}")
                norm.append(f"{points[i+1] / H:.6f}")

            lines.append(f"{cls} " + " ".join(norm))

        if lines:
            (output_labels_dir / f"{img_name}.txt").write_text("\n".join(lines))
            converted += 1

    print(f"Converted {converted} images -> {output_labels_dir}")
    if fallback_bbox > 0:
        print(f"  {fallback_bbox} annotation(s) had no polygon and were converted from bbox instead.")
        print("  Re-label those in Roboflow using the polygon tool for best results.")
    return converted


# Example:
# convert_coco_seg_to_yolo(
#     coco_json_path="my_export/_annotations.coco.json",
#     output_labels_dir="datasets/ice/labels",
# )


## 6. Validate datasets

Checks that every image has a label, every label has valid segmentation polygon format, and the dataset is large enough for K_FOLDS.

In [None]:
def validate_dataset(ring_type):
    """
    Check a ring type dataset for problems before training.
    Returns (stats dict, list of issue strings). Empty issues = all clear.

    Also checks that label files use segmentation format (10+ values per line)
    rather than detection format (5 values). Warns if detection labels are found
    so you know to re-export from Roboflow with the segmentation option.
    """
    img_dir = DATASET_ROOT / ring_type / "images"
    lbl_dir = DATASET_ROOT / ring_type / "labels"

    images = list(img_dir.glob("*.png")) + list(img_dir.glob("*.jpg"))
    labels = list(lbl_dir.glob("*.txt"))

    img_stems = {p.stem for p in images}
    lbl_stems = {p.stem for p in labels}

    issues = []

    missing = img_stems - lbl_stems
    orphans = lbl_stems - img_stems
    if missing:
        issues.append(f"{len(missing)} image(s) have no label: {sorted(missing)[:5]}")
    if orphans:
        issues.append(f"{len(orphans)} label(s) have no image: {sorted(orphans)[:5]}")

    total_cores    = 0
    bad_lines      = 0
    detection_lbls = 0   # lines that look like detection format (5 values only)

    for lbl in labels:
        for line in lbl.read_text().strip().splitlines():
            line = line.strip()
            if not line:
                continue
            parts = line.split()
            if len(parts) < 7:
                # less than 3 polygon points (class + 3*2=6 values minimum)
                if len(parts) == 5:
                    detection_lbls += 1
                else:
                    bad_lines += 1
                continue
            try:
                cls = int(parts[0])
                coords = [float(v) for v in parts[1:]]
                if any(v < 0 or v > 1 for v in coords):
                    bad_lines += 1
                elif cls == 0:
                    total_cores += 1
            except ValueError:
                bad_lines += 1

    if bad_lines:
        issues.append(f"{bad_lines} malformed label line(s)")
    if detection_lbls > 0:
        issues.append(
            f"{detection_lbls} label line(s) look like detection format (5 values). "
            "Re-export from Roboflow using YOLOv8 Segmentation format."
        )

    paired     = len(img_stems & lbl_stems)
    min_needed = K_FOLDS * 2
    if paired < min_needed:
        issues.append(
            f"only {paired} labeled images but K_FOLDS={K_FOLDS} needs >= {min_needed}. "
            "Add more data or reduce K_FOLDS in config."
        )

    stats = {
        "images":  len(images),
        "paired":  paired,
        "cores":   total_cores,
        "avg_cores_per_image": round(total_cores / paired, 2) if paired else 0,
    }

    print(f"\n{'=' * 44}")
    print(f"  {ring_type.upper()} dataset")
    print(f"{'=' * 44}")
    print(f"  Images:              {stats['images']}")
    print(f"  Paired (img+label):  {stats['paired']}")
    print(f"  Total core polygons: {stats['cores']}")
    print(f"  Avg cores/image:     {stats['avg_cores_per_image']}")

    if issues:
        print("  ISSUES:")
        for iss in issues:
            print(f"    - {iss}")
    else:
        print("  All checks passed.")

    return stats, issues


for ring_type in RING_TYPES:
    validate_dataset(ring_type)


## 7. Preview labeled images

Draws the segmentation polygons from your label files over the screenshots. Use this to confirm Roboflow exported correctly before wasting time training on bad labels.

In [None]:
def preview_labels(ring_type, n=4):
    """
    Show the first n labeled images with segmentation polygons drawn on them.
    Each core gets a filled semi-transparent polygon plus its outline.
    """
    img_dir = DATASET_ROOT / ring_type / "images"
    lbl_dir = DATASET_ROOT / ring_type / "labels"

    images = sorted(list(img_dir.glob("*.png")) + list(img_dir.glob("*.jpg")))[:n]
    if not images:
        print(f"No images found in {img_dir}")
        return

    cols = min(len(images), 4)
    rows = math.ceil(len(images) / cols)
    fig, axes = plt.subplots(rows, cols, figsize=(6 * cols, 5 * rows))

    if rows == 1 and cols == 1:
        axes = [axes]
    elif rows == 1 or cols == 1:
        axes = list(axes.flat)
    else:
        axes = [ax for row in axes for ax in row]

    for i, ax in enumerate(axes):
        if i >= len(images):
            ax.axis("off")
            continue

        img_path = images[i]
        img = cv2.imread(str(img_path))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        H, W = img.shape[:2]

        ax.imshow(img)

        lbl_path = lbl_dir / (img_path.stem + ".txt")
        if lbl_path.exists():
            patches = []
            for line in lbl_path.read_text().strip().splitlines():
                parts = line.strip().split()
                if len(parts) < 7:
                    continue
                coords = [float(v) for v in parts[1:]]
                # convert normalized pairs to pixel coordinates
                pts = np.array([
                    [coords[j] * W, coords[j + 1] * H]
                    for j in range(0, len(coords) - 1, 2)
                ])
                poly = MplPolygon(pts, closed=True)
                patches.append(poly)
                # draw outline
                ax.plot(
                    np.append(pts[:, 0], pts[0, 0]),
                    np.append(pts[:, 1], pts[0, 1]),
                    color="lime", linewidth=1.5
                )
                # label at centroid
                cx, cy = pts[:, 0].mean(), pts[:, 1].mean()
                ax.text(cx, cy, "core", color="white", fontsize=7,
                        ha="center", va="center",
                        bbox=dict(boxstyle="round,pad=0.2", fc="green", alpha=0.6))

            if patches:
                pc = PatchCollection(patches, alpha=0.25, facecolor="lime", edgecolor="none")
                ax.add_collection(pc)

        ax.set_title(img_path.name, fontsize=8)
        ax.axis("off")

    plt.suptitle(f"{ring_type} - segmentation label preview", fontsize=12)
    plt.tight_layout()
    plt.show()


preview_labels("ice", n=4)


## 8. K-fold cross-validation helpers

Same approach as before: build temporary per-fold datasets using hard links,
write a data.yaml per fold, train, then clean up. No extra disk space used.


In [None]:
def get_paired_samples(ring_type):
    """Return sorted list of (image_path, label_path) pairs that both exist."""
    img_dir = DATASET_ROOT / ring_type / "images"
    lbl_dir = DATASET_ROOT / ring_type / "labels"
    pairs = []
    for img_path in sorted(list(img_dir.glob("*.png")) + list(img_dir.glob("*.jpg"))):
        lbl_path = lbl_dir / (img_path.stem + ".txt")
        if lbl_path.exists():
            pairs.append((img_path, lbl_path))
    return pairs


def build_fold_dataset(pairs_train, pairs_val, fold_dir):
    """
    Build a temporary YOLO dataset for one fold using hard links.
    Falls back to file copy if the filesystem doesn't support hard links.
    """
    def link_or_copy(src, dst):
        dst.parent.mkdir(parents=True, exist_ok=True)
        if dst.exists():
            dst.unlink()
        try:
            os.link(src, dst)
        except OSError:
            shutil.copy2(src, dst)

    for split, pairs in [("train", pairs_train), ("val", pairs_val)]:
        for img_path, lbl_path in pairs:
            link_or_copy(img_path, fold_dir / "images" / split / img_path.name)
            link_or_copy(lbl_path, fold_dir / "labels" / split / lbl_path.name)


def write_fold_yaml(fold_dir):
    """Write data.yaml pointing YOLO at this fold's train/val folders."""
    yaml_path = fold_dir / "data.yaml"
    with open(yaml_path, "w") as f:
        yaml.dump({
            "path":  str(fold_dir.resolve()),
            "train": "images/train",
            "val":   "images/val",
            "nc":    len(CLASS_NAMES),
            "names": CLASS_NAMES,
        }, f, default_flow_style=False)
    return yaml_path


def cleanup_fold(fold_dir):
    """Delete temporary fold dataset after training is done."""
    if fold_dir.exists():
        shutil.rmtree(fold_dir)


In [None]:
def train_one_fold(yaml_path, run_name, model_size, n_train, n_val):
    """
    Train one YOLOv8-seg model on a prepared fold dataset.

    Segmentation models output both a bounding box head and a mask head.
    We track both box-mAP (mAP50(B)) and mask-mAP (mAP50(M)) in results.

    Returns:
        (weights_path str, metrics dict)
    """
    print(f"  Training {run_name}  (train={n_train}, val={n_val})")

    model = YOLO(f"{model_size}.pt")

    results = model.train(
        data=str(yaml_path),
        epochs=EPOCHS,
        imgsz=IMG_SIZE,
        batch=BATCH_SIZE,
        patience=PATIENCE,
        device=0 if torch.cuda.is_available() else "cpu",
        project=str(RUNS_ROOT),
        name=run_name,

        # -- Augmentation tuned for Elite Dangerous screenshots --
        # Asteroids rotate at all angles - heavy rotation is always valid
        degrees=45,
        # Horizontal/vertical flips are valid in zero-g environments
        fliplr=0.5,
        flipud=0.3,
        # Cores appear at different distances = different apparent sizes on screen
        scale=0.5,
        # Mosaic helps the model handle cluttered asteroid fields
        mosaic=1.0,
        close_mosaic=10,
        # Brightness and saturation shift for star-lit vs shadow-side-of-planet lighting
        hsv_v=0.4,
        hsv_s=0.5,
        # Small hue shift - ring types differ somewhat but not wildly
        hsv_h=0.015,
        # Game renders are sharp - skip blur augmentations
        blur=False,
        median_blur=0.0,

        save_period=0,   # skip intermediate checkpoints to save disk space
        verbose=False,
    )

    rd = results.results_dict
    weights = RUNS_ROOT / run_name / "weights" / "best.pt"

    metrics = {
        # box metrics (bounding box head)
        "box_mAP50":    float(rd.get("metrics/mAP50(B)",     0)),
        "box_mAP50_95": float(rd.get("metrics/mAP50-95(B)",  0)),
        "precision":    float(rd.get("metrics/precision(B)", 0)),
        "recall":       float(rd.get("metrics/recall(B)",    0)),
        # mask metrics (segmentation head) - the primary metric for seg models
        "mask_mAP50":    float(rd.get("metrics/mAP50(M)",    0)),
        "mask_mAP50_95": float(rd.get("metrics/mAP50-95(M)", 0)),
    }

    print(
        f"    -> box mAP50={metrics['box_mAP50']:.3f}  "
        f"mask mAP50={metrics['mask_mAP50']:.3f}  "
        f"P={metrics['precision']:.3f}  R={metrics['recall']:.3f}"
    )
    return str(weights), metrics


## 9. Train segmentation models with k-fold cross-validation

For each ring type x model size combination:
1. Split all images into K folds
2. Train K models (each validated on a different held-out chunk)
3. Average both box-mAP and mask-mAP across folds
4. Keep the best fold's weights as the deployed model

**Mask mAP50** is the primary metric for segmentation - it measures how well the
predicted polygon overlaps with your labeled polygon. Box mAP50 is the secondary
metric from the detection head (still useful for knowing if the model finds the
asteroid at all, even if the mask shape isn't perfect).

Rough timing on RTX 3070:
- yolov8n-seg: ~5 min/fold
- yolov8s-seg: ~10 min/fold
- yolov8m-seg: ~20 min/fold
- yolov8l-seg: ~35 min/fold


In [None]:
def train_kfold(ring_type, model_size):
    """
    Run K-fold cross-validation for one ring type + model size combination.
    Best fold is selected by mask_mAP50 (the primary seg metric).
    Returns a result dict or None if not enough data.
    """
    pairs = get_paired_samples(ring_type)

    if len(pairs) < K_FOLDS * 2:
        print(f"Skipping {ring_type}/{model_size}: only {len(pairs)} paired samples "
              f"(need >= {K_FOLDS * 2} for {K_FOLDS} folds).")
        return None

    print(f"\n{'=' * 56}")
    print(f"  {ring_type.upper()} / {model_size}  ({len(pairs)} images, {K_FOLDS} folds)")
    print(f"{'=' * 56}")

    kf        = KFold(n_splits=K_FOLDS, shuffle=True, random_state=42)
    pairs_arr = np.array(pairs, dtype=object)
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    fold_results = []

    for fold_idx, (train_idx, val_idx) in enumerate(kf.split(pairs_arr)):
        fold_num    = fold_idx + 1
        pairs_train = pairs_arr[train_idx].tolist()
        pairs_val   = pairs_arr[val_idx].tolist()

        fold_dir = CV_TMP_ROOT / f"{ring_type}_{model_size}_fold{fold_num}"
        run_name = f"{ring_type}_{model_size}_fold{fold_num}_{timestamp}"

        print(f"\nFold {fold_num}/{K_FOLDS}")
        build_fold_dataset(pairs_train, pairs_val, fold_dir)
        yaml_path = write_fold_yaml(fold_dir)

        weights, metrics = train_one_fold(
            yaml_path, run_name, model_size, len(pairs_train), len(pairs_val)
        )
        fold_results.append({"fold": fold_num, "weights": weights, **metrics})
        cleanup_fold(fold_dir)

    def avg(key):
        return float(np.mean([r[key] for r in fold_results]))

    def std(key):
        return float(np.std([r[key] for r in fold_results]))

    # best fold selected by mask mAP50 (primary seg metric)
    best_fold = max(fold_results, key=lambda r: r["mask_mAP50"])

    summary = {
        "best_weights":    best_fold["weights"],
        "best_fold":       best_fold["fold"],
        "fold_results":    fold_results,
        "box_mAP50":       avg("box_mAP50"),
        "box_mAP50_95":    avg("box_mAP50_95"),
        "mask_mAP50":      avg("mask_mAP50"),
        "mask_mAP50_95":   avg("mask_mAP50_95"),
        "precision":       avg("precision"),
        "recall":          avg("recall"),
        "mask_mAP50_std":  std("mask_mAP50"),
        "box_mAP50_std":   std("box_mAP50"),
    }

    print(f"\nAverage across {K_FOLDS} folds:")
    print(
        f"  box  mAP50={summary['box_mAP50']:.3f} (+/-{summary['box_mAP50_std']:.3f})"
    )
    print(
        f"  mask mAP50={summary['mask_mAP50']:.3f} (+/-{summary['mask_mAP50_std']:.3f})  "
        f"<-- primary metric"
    )
    print(
        f"  P={summary['precision']:.3f}  R={summary['recall']:.3f}  "
        f"best fold: {best_fold['fold']}"
    )
    return summary


In [None]:
# Run all ring type x model size combinations.
# Skips any ring type that doesn't have enough data yet.

all_results = {}

for ring_type in RING_TYPES:
    stats, issues = validate_dataset(ring_type)
    if issues:
        print(f"Skipping {ring_type} - fix dataset issues first:")
        for iss in issues:
            print(f"  {iss}")
        continue
    if stats["paired"] < K_FOLDS * 2:
        print(f"Skipping {ring_type} - not enough images ({stats['paired']}).")
        continue

    all_results[ring_type] = {}
    for model_size in MODEL_SIZES:
        result = train_kfold(ring_type, model_size)
        if result is not None:
            all_results[ring_type][model_size] = result

results_path = RUNS_ROOT / "all_results.json"
RUNS_ROOT.mkdir(exist_ok=True)
with open(results_path, "w") as f:
    json.dump(all_results, f, indent=2)

print("\nAll training runs complete. Results saved to:", results_path)


## 10. Compare model results

Both box mAP50 and mask mAP50 are shown. Mask mAP50 is what matters for segmentation quality - it measures polygon overlap with your ground truth labels.

In [None]:
def show_comparison_table():
    """
    Print a formatted table of all trained model metrics.
    Shows both box and mask mAP50 with standard deviation across folds.
    Best model per ring type is selected by mask_mAP50.
    """
    results_path = RUNS_ROOT / "all_results.json"
    if not results_path.exists():
        print("No results yet - run training first.")
        return {}

    with open(results_path) as f:
        results = json.load(f)

    header = (
        f"{'Ring':<12} {'Model':<16} "
        f"{'boxmAP50':>9} {'  +/-':>6} "
        f"{'mskMAP50':>9} {'  +/-':>6} "
        f"{'Prec':>7} {'Recall':>7}"
    )
    print(header)
    print("-" * len(header))

    best_per_ring = {}
    for ring_type, models in results.items():
        best_mask = -1
        for model_size, m in models.items():
            marker = ""
            if m["mask_mAP50"] > best_mask:
                best_mask = m["mask_mAP50"]
                best_per_ring[ring_type] = model_size
                marker = "  <-- best"
            print(
                f"{ring_type:<12} {model_size:<16} "
                f"{m['box_mAP50']:>9.3f} {m.get('box_mAP50_std', 0):>6.3f} "
                f"{m['mask_mAP50']:>9.3f} {m.get('mask_mAP50_std', 0):>6.3f} "
                f"{m['precision']:>7.3f} {m['recall']:>7.3f}"
                + marker
            )
        print()

    return best_per_ring


best_models = show_comparison_table()
print("Best model per ring type:", best_models)


## 11. Plot per-fold metrics

High variance between folds = model is sensitive to which images land in val, which usually means you need more labeled data.

In [None]:
def plot_fold_metrics(ring_type, model_size):
    """
    Bar chart of box mAP50 and mask mAP50 per fold side by side.
    Dashed lines show the mean across folds for each metric.
    """
    results_path = RUNS_ROOT / "all_results.json"
    if not results_path.exists():
        print("No results found.")
        return

    with open(results_path) as f:
        all_r = json.load(f)

    model_data = all_r.get(ring_type, {}).get(model_size)
    if not model_data:
        print(f"No data for {ring_type}/{model_size}")
        return

    fold_results = model_data.get("fold_results", [])
    if not fold_results:
        print("No per-fold results stored.")
        return

    folds      = [f"Fold {r['fold']}" for r in fold_results]
    box_maps   = [r["box_mAP50"]  for r in fold_results]
    mask_maps  = [r["mask_mAP50"] for r in fold_results]
    precs      = [r["precision"]  for r in fold_results]
    recs       = [r["recall"]     for r in fold_results]

    x = np.arange(len(folds))
    w = 0.2

    fig, ax = plt.subplots(figsize=(11, 5))
    ax.bar(x - 1.5*w, box_maps,  w, label="box mAP50",  color="steelblue")
    ax.bar(x - 0.5*w, mask_maps, w, label="mask mAP50", color="darkorange")
    ax.bar(x + 0.5*w, precs,     w, label="Precision",  color="seagreen")
    ax.bar(x + 1.5*w, recs,      w, label="Recall",     color="tomato")

    ax.axhline(np.mean(box_maps),  color="steelblue",  linestyle="--", linewidth=1.1,
               label=f"mean box={np.mean(box_maps):.3f}")
    ax.axhline(np.mean(mask_maps), color="darkorange",  linestyle="--", linewidth=1.1,
               label=f"mean mask={np.mean(mask_maps):.3f}")

    ax.set_xticks(x)
    ax.set_xticklabels(folds)
    ax.set_ylim(0, 1.05)
    ax.set_ylabel("Score")
    ax.set_title(f"{ring_type} / {model_size} - metrics per fold")
    ax.legend(fontsize=8)
    ax.grid(True, axis="y", alpha=0.3)
    plt.tight_layout()
    plt.show()


plot_fold_metrics("ice", "yolov8s-seg")


## 12. Plot training curves for a specific fold

In [None]:
def plot_training_curves(ring_type, model_size, fold=None):
    """
    Plot loss, box mAP50, and mask mAP50 curves from a run's results.csv.
    Defaults to the best fold if fold is not specified.
    """
    results_path = RUNS_ROOT / "all_results.json"
    if not results_path.exists():
        print("No results found.")
        return

    with open(results_path) as f:
        all_r = json.load(f)

    model_data = all_r.get(ring_type, {}).get(model_size)
    if not model_data:
        print(f"No data for {ring_type}/{model_size}")
        return

    if fold is None:
        fold = model_data["best_fold"]
        print(f"Plotting best fold: {fold}")

    weights_path = next(
        (r["weights"] for r in model_data["fold_results"] if r["fold"] == fold), None
    )
    if not weights_path:
        print(f"Fold {fold} not found.")
        return

    csv_path = Path(weights_path).parent.parent / "results.csv"
    if not csv_path.exists():
        print(f"results.csv not found at {csv_path}")
        return

    epochs, seg_loss, box_loss, cls_loss, box_map50, mask_map50 = [], [], [], [], [], []
    with open(csv_path) as f:
        reader = csv.DictReader(f)
        for row in reader:
            row = {k.strip(): v.strip() for k, v in row.items()}
            epochs.append(int(row.get("epoch", 0)))
            # seg loss key varies slightly by ultralytics version
            seg_loss.append(float(
                row.get("train/seg_loss") or row.get("train/dfl_loss") or 0
            ))
            box_loss.append(float(row.get("train/box_loss") or 0))
            cls_loss.append(float(row.get("train/cls_loss") or 0))
            box_map50.append(float(row.get("metrics/mAP50(B)") or 0))
            mask_map50.append(float(row.get("metrics/mAP50(M)") or 0))

    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(13, 4))

    ax1.plot(epochs, seg_loss,  label="seg loss")
    ax1.plot(epochs, box_loss,  label="box loss")
    ax1.plot(epochs, cls_loss,  label="cls loss")
    ax1.set_xlabel("Epoch")
    ax1.set_ylabel("Loss")
    ax1.set_title(f"{ring_type} / {model_size} fold {fold} - training losses")
    ax1.legend()
    ax1.grid(True, alpha=0.3)

    ax2.plot(epochs, box_map50,  label="box mAP50",  color="steelblue")
    ax2.plot(epochs, mask_map50, label="mask mAP50", color="darkorange", linewidth=2)
    ax2.set_xlabel("Epoch")
    ax2.set_ylabel("mAP50")
    ax2.set_title(f"{ring_type} / {model_size} fold {fold} - validation mAP50")
    ax2.legend()
    ax2.grid(True, alpha=0.3)

    plt.tight_layout()
    plt.show()


plot_training_curves("ice", "yolov8s-seg")


## 13. Test inference on a screenshot

Runs the segmentation model and draws filled polygon masks over detected cores with confidence scores.

In [None]:
def detect_cores(image_path, ring_type, model_size=None, conf=CONF_THRESHOLD):
    """
    Run the trained segmentation model on a single screenshot.
    Draws filled polygon masks over each detected core.

    Args:
        image_path: path to a .png or .jpg screenshot
        ring_type:  which ring type model to load
        model_size: e.g. "yolov8s-seg", or None to auto-pick best mask mAP50
        conf:       confidence threshold
    """
    results_path = RUNS_ROOT / "all_results.json"
    if not results_path.exists():
        print("No trained models found. Run training first.")
        return

    with open(results_path) as f:
        all_r = json.load(f)

    ring_models = all_r.get(ring_type, {})
    if not ring_models:
        print(f"No trained model for ring type '{ring_type}'.")
        return

    if model_size is None:
        model_size = max(ring_models, key=lambda m: ring_models[m]["mask_mAP50"])
        m = ring_models[model_size]
        print(
            f"Auto-selected: {model_size} "
            f"(mask mAP50={m['mask_mAP50']:.3f}, box mAP50={m['box_mAP50']:.3f})"
        )

    weights = ring_models.get(model_size, {}).get("best_weights", "")
    if not weights or not Path(weights).exists():
        print(f"Weights file not found: {weights}")
        return

    model = YOLO(weights)
    img   = cv2.imread(str(image_path))
    if img is None:
        print(f"Could not load image: {image_path}")
        return

    preds = model.predict(img, conf=conf, verbose=False)[0]

    # draw masks manually so we control colors and alpha
    img_rgb   = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).copy()
    overlay   = img_rgb.copy()
    H, W      = img_rgb.shape[:2]

    if preds.masks is not None:
        for i, mask_data in enumerate(preds.masks.xy):
            pts = mask_data.astype(np.int32)
            cv2.fillPoly(overlay, [pts], color=MASK_COLOR_BGR[::-1])  # BGR->RGB

        cv2.addWeighted(overlay, MASK_ALPHA, img_rgb, 1 - MASK_ALPHA, 0, img_rgb)

        # draw polygon outlines and confidence labels on top
        for i, (mask_data, box) in enumerate(zip(preds.masks.xy, preds.boxes)):
            pts  = mask_data.astype(np.int32)
            conf_val = float(box.conf)
            cv2.polylines(img_rgb, [pts], isClosed=True, color=(0, 255, 80), thickness=2)
            cx = int(pts[:, 0].mean())
            cy = int(pts[:, 1].mean())
            label = f"core {conf_val:.2f}"
            cv2.putText(img_rgb, label, (cx - 30, cy),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)

    plt.figure(figsize=(14, 8))
    plt.imshow(img_rgb)
    n = len(preds.boxes)
    plt.title(f"{ring_type} | {model_size} | {n} core(s) detected | conf>={conf}")
    plt.axis("off")
    plt.tight_layout()
    plt.show()

    print(f"Detections: {n}")
    for box in preds.boxes:
        xyxy = [round(v, 1) for v in box.xyxy[0].tolist()]
        print(f"  core | conf={float(box.conf):.3f} | bbox={xyxy}")

    return preds


# Usage:
# detect_cores("screenshot.png", ring_type="ice")
# detect_cores("screenshot.png", ring_type="metallic", model_size="yolov8m-seg")


## 14. One-button retrain

Drop new labeled screenshots into the dataset folder and call `retrain()`.

In [None]:
def retrain(ring_type="all", model_size="best"):
    """
    Retrain after adding new labeled screenshots.
    Drop new images + label files into datasets/<ring_type>/images/ and labels/,
    then call this.

    Args:
        ring_type:  "all" retrains every type, or pass one e.g. "ice"
        model_size: "best" reuses the size that scored highest last time by
                    mask_mAP50, or pass a specific size like "yolov8m-seg"
    """
    targets = RING_TYPES if ring_type == "all" else [ring_type]

    results_path = RUNS_ROOT / "all_results.json"
    prev_results = {}
    if results_path.exists():
        with open(results_path) as f:
            prev_results = json.load(f)

    for rt in targets:
        stats, issues = validate_dataset(rt)
        if issues:
            print(f"Dataset issues for '{rt}':")
            for iss in issues:
                print(f"  {iss}")
            continue

        if stats["paired"] < K_FOLDS * 2:
            print(f"Skipping '{rt}' - not enough images ({stats['paired']}). "
                  f"Need >= {K_FOLDS * 2}.")
            continue

        if model_size == "best":
            ring_prev = prev_results.get(rt, {})
            size = (
                max(ring_prev, key=lambda m: ring_prev[m]["mask_mAP50"])
                if ring_prev else "yolov8s-seg"
            )
            print(f"Using best model size from previous run: {size}")
        else:
            size = model_size

        result = train_kfold(rt, size)
        if result is not None:
            if rt not in prev_results:
                prev_results[rt] = {}
            prev_results[rt][size] = result

    with open(results_path, "w") as f:
        json.dump(prev_results, f, indent=2)

    print("\nRetrain complete!")
    show_comparison_table()


# Examples:
# retrain()                          # all ring types, best model size each
# retrain("ice")                     # only ice, auto-pick best size
# retrain("ice", "yolov8l-seg")      # force a specific model size


## 15. Export best models to ONNX

In [None]:
def export_best_models():
    """
    Export the best fold's weights for each ring type to ONNX format.
    Best is chosen by mask_mAP50. Output goes to exports/<ring_type>_best.onnx
    """
    results_path = RUNS_ROOT / "all_results.json"
    if not results_path.exists():
        print("No trained models to export.")
        return

    with open(results_path) as f:
        all_r = json.load(f)

    EXPORTS_ROOT.mkdir(exist_ok=True)

    for ring_type, models in all_r.items():
        if not models:
            continue

        best_size    = max(models, key=lambda m: models[m]["mask_mAP50"])
        best_weights = models[best_size]["best_weights"]

        if not Path(best_weights).exists():
            print(f"Weights missing for {ring_type}/{best_size}: {best_weights}")
            continue

        m = models[best_size]
        print(
            f"Exporting {ring_type} ({best_size}) "
            f"mask mAP50={m['mask_mAP50']:.3f} box mAP50={m['box_mAP50']:.3f}..."
        )
        model    = YOLO(best_weights)
        exported = model.export(
            format="onnx",
            imgsz=IMG_SIZE,
            simplify=True,
            opset=17,
            dynamic=False,
        )

        dest    = EXPORTS_ROOT / f"{ring_type}_best.onnx"
        shutil.copy(exported, dest)
        print(f"  -> {dest}  ({dest.stat().st_size / 1024 / 1024:.1f} MB)")

    print("\nExport complete. Files are in:", EXPORTS_ROOT)


export_best_models()


## 16. Batch inference on a folder of screenshots

In [None]:
def batch_detect(screenshots_folder, ring_type, output_folder=None, conf=CONF_THRESHOLD):
    """
    Run segmentation detection on every image in a folder and save annotated copies.
    Annotated copies have filled polygon masks drawn over detected cores.

    Args:
        screenshots_folder: folder with .png/.jpg screenshots
        ring_type:          which ring type model to use
        output_folder:      where to save annotated images (default: screenshots_folder/detected)
        conf:               confidence threshold
    """
    screenshots_folder = Path(screenshots_folder)
    output_folder = Path(output_folder or screenshots_folder / "detected")
    output_folder.mkdir(exist_ok=True)

    results_path = RUNS_ROOT / "all_results.json"
    if not results_path.exists():
        print("No trained models. Run training first.")
        return

    with open(results_path) as f:
        all_r = json.load(f)

    ring_models = all_r.get(ring_type, {})
    if not ring_models:
        print(f"No model for ring type '{ring_type}'")
        return

    best_size    = max(ring_models, key=lambda m: ring_models[m]["mask_mAP50"])
    best_weights = ring_models[best_size]["best_weights"]
    model        = YOLO(best_weights)

    images = sorted(
        list(screenshots_folder.glob("*.png"))
        + list(screenshots_folder.glob("*.jpg"))
    )
    print(f"Running {best_size} ({ring_type}) on {len(images)} images...")

    total_cores = 0
    for img_path in images:
        img   = cv2.imread(str(img_path))
        preds = model.predict(img, conf=conf, verbose=False)[0]

        # preds.plot() renders masks + boxes + labels into one annotated image
        annotated = preds.plot()
        cv2.imwrite(str(output_folder / img_path.name), annotated)
        total_cores += len(preds.boxes)

    print(f"Done. {total_cores} total core detections across {len(images)} screenshots.")
    print(f"Annotated images saved to: {output_folder}")


# Usage:
# batch_detect(
#     screenshots_folder=r"C:/Users/YourName/Pictures/Frontier Developments/Elite Dangerous",
#     ring_type="ice",
# )
