# 04b - Evaluation of variants
Loop over checkpoints produced by 03b/03c, save confusion matrices and reports.

In [None]:
from pathlib import Path
import sys
from subprocess import run
import json

# --- ROOT ---
ROOT = Path.cwd().resolve()
for _ in range(10):
    if (ROOT / "Data").exists():
        break
    ROOT = ROOT.parent
else:
    raise FileNotFoundError("Project root not found (Data folder missing).")

# --- Python (prefer venv) ---
VENV_PY = ROOT / ".venv" / "Scripts" / "python.exe"
PY = str(VENV_PY if VENV_PY.exists() else Path(sys.executable))

# --- Paths ---
TEST_DIR = ROOT / "Data" / "raw" / "test"
CURVES = ROOT / "Visualisation" / "training_curves.png"

WEIGHTS_DIR = ROOT / "Model" / "weights"
VIS_DIR = ROOT / "Visualisation"
MON_DIR = ROOT / "Monitoring" / "output"

VIS_DIR.mkdir(parents=True, exist_ok=True)
MON_DIR.mkdir(parents=True, exist_ok=True)

In [None]:
if not TEST_DIR.exists():
    raise FileNotFoundError(f"TEST_DIR missing: {TEST_DIR}")

weight_paths = sorted(WEIGHTS_DIR.glob("best_*.pt"))
print("Checkpoints found:", len(weight_paths))

In [None]:
def evaluate(weights_path: Path) -> dict:
    suffix = weights_path.stem.replace("best_", "")
    confusion_path = VIS_DIR / f"confusion_{suffix}.png"
    report_path = MON_DIR / f"metrics_{suffix}.json"
    metrics_global = MON_DIR / "metrics.json"

    cmd = [
        PY, "-m", "Model.training.evaluate",
        "--data-dir", str(TEST_DIR),
        "--weights", str(weights_path),
        "--confusion-path", str(confusion_path),
        "--report-path", str(report_path),
        "--metrics-json", str(metrics_global),
        "--training-curves", str(CURVES),
    ]

    print("===", " ".join(cmd))
    run(cmd, check=True, cwd=str(ROOT))

    return {
        "weights": str(weights_path),
        "confusion": str(confusion_path),
        "report": str(report_path),
        "metrics_global": str(metrics_global),
    }

artifacts = [evaluate(p) for p in weight_paths]
print(json.dumps(artifacts, indent=2, ensure_ascii=False))