# Fullrun-Notebook (Etappe 01–03)

Dieses Notebook ist eine optionale „Bedienoberfläche“ für `fullrun.py`: Parameter auswählen, Lauf starten, danach Ergebnisse anschauen und bei Bedarf Exporte für die Arbeit kopieren. Alles geht auch ohne Notebook direkt über die CLI.

Kurz zur Einordnung:
- Das Notebook startet `python3 fullrun.py ...` als Subprozess (Interpreter = `sys.executable`).
- Für die Nachvollziehbarkeit sind die erzeugten Ausgabeordner entscheidend (z.B. `fullrun_out/...`) inkl. `metadata.json`/`runs.jsonl` und den Exporten unter `stage3_evaluation/`.
- Begriffe und Zielgrößen sind wie im schriftlichen Teil; hier geht es wirklich nur um Ausführung und Export.

## Grober Ablauf
1. Umgebung prüfen
2. Parameter wählen und als `plan.json` speichern
3. Lauf starten (Live-Ausgabe + `notebook_run.log`)
4. Auswertung aus `stage3_evaluation/` ansehen
5. Gewünschte Tabellen/Abbildungen/CSVs nach `thesis_assets/<run_name>/` kopieren


## 2) Umgebung prüfen

Gedacht ist das Notebook für die Conda-Umgebung aus `environment.yml`. Standardmäßig wird `sys.executable` als Interpreter verwendet (also der Python, mit dem das Notebook läuft).

Das Projektwurzelverzeichnis wird automatisch gesucht (Start bei `Path.cwd()`, dann nach oben, bis `fullrun.py` gefunden wird).


In [None]:
from __future__ import annotations

from pathlib import Path
import os
import sys
import json
import subprocess
import datetime
import time
import re
import shutil

import ipywidgets as widgets
from IPython.display import display, HTML, IFrame, clear_output


EXPECTED_CONDA_ENV = "ba_pipeline"


def _html_box(text: str, *, kind: str = "info") -> HTML:
    colors = {
        "info": ("#0b5394", "#d9e8ff"),
        "warn": ("#7f6000", "#fff2cc"),
        "error": ("#990000", "#f4cccc"),
        "ok": ("#274e13", "#d9ead3"),
    }
    border, bg = colors.get(kind, colors["info"])
    safe = (
        str(text)
        .replace("&", "&amp;")
        .replace("<", "&lt;")
        .replace(">", "&gt;")
        .replace("\n", "<br>")
    )
    return HTML(
        """
        <div style="border:1px solid {border}; background:{bg}; padding:10px; border-radius:6px;">
          <div style="font-family: ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, Helvetica, Arial; font-size: 13px; line-height:1.35;">{safe}</div>
        </div>
        """.format(border=border, bg=bg, safe=safe)
    )


def find_repo_root(start: Path | None = None) -> Path:
    start_path = (start or Path.cwd()).resolve()
    for p in [start_path, *start_path.parents]:
        if (p / "fullrun.py").is_file():
            return p
    raise FileNotFoundError(
        "Projekt-Root konnte nicht gefunden werden (fullrun.py nicht entdeckt).\n"
        f"Start: {start_path}\n"
        "Tipp: Starte Jupyter im Projekt-Root oder in einem Unterordner davon."
    )


# Basis-Infos
print("sys.executable:", sys.executable)
print("sys.version:", sys.version.replace("\n", " "))
print("CONDA_DEFAULT_ENV:", os.environ.get("CONDA_DEFAULT_ENV"))
print("os.name:", os.name)
print("sys.platform:", sys.platform)
print("CWD:", Path.cwd())


# Conda-Env Warnung
if os.environ.get("CONDA_DEFAULT_ENV") != EXPECTED_CONDA_ENV:
    display(
        _html_box(
            "WARNUNG: CONDA_DEFAULT_ENV ist nicht 'ba_pipeline'.\n"
            "Empfohlen: Terminal öffnen und ausführen:\n"
            "  conda activate ba_pipeline\n"
            "Dann Jupyter aus genau diesem Terminal starten.",
            kind="warn",
        )
    )
else:
    display(_html_box("OK: ba_pipeline ist aktiv.", kind="ok"))


# Projekt-Root Detection (abbrechen, falls nicht gefunden)
try:
    repo_root = find_repo_root()
except Exception as e:
    raise SystemExit(str(e)) from e

print("Projekt-Root:", repo_root)


# Kleine Utilities (für spätere Zellen)
def _utc_now_iso() -> str:
    return datetime.datetime.now(datetime.timezone.utc).isoformat()


def _read_json(path: Path) -> dict | None:
    try:
        return json.loads(path.read_text(encoding="utf-8"))
    except Exception:
        return None


def _get_selected_run_root() -> Path | None:
    val = globals().get("SELECTED_RUN_ROOT")
    if not val:
        return None
    p = Path(str(val))
    return p if p.exists() else None


# Import-Check (Button)
btn_import_check = widgets.Button(description="Imports prüfen", button_style="info")
out_import_check = widgets.Output(layout=widgets.Layout(border="1px solid #ddd", padding="8px"))


def _try_import(name: str):
    try:
        mod = __import__(name)
        ver = getattr(mod, "__version__", "?")
        return True, ver, None
    except Exception as e:
        return False, None, f"{type(e).__name__}: {e}"


def _on_import_check(_):
    with out_import_check:
        clear_output()
        print("Import-Check (offline):")
        for name in ["numpy", "pandas", "matplotlib", "ipywidgets"]:
            ok, ver, err = _try_import(name)
            if ok:
                print(f"  [OK]   {name} (version: {ver})")
            else:
                print(f"  [FAIL] {name} -> {err}")
        ok, ver, err = _try_import("fitz")
        if ok:
            print(f"  [OK]   fitz (PyMuPDF) (version: {ver})")
        else:
            print("  [INFO] fitz (PyMuPDF) nicht verfügbar -> PDF-Preview via IFrame/Link.")


btn_import_check.on_click(_on_import_check)
display(widgets.VBox([btn_import_check, out_import_check]))


## 3) PLANUNG (Widgets)

Hier stelle ich die Parameter für einen Lauf zusammen, ohne fachliche Logik zu duplizieren: Aus den Eingaben baut das Notebook den `python3 fullrun.py ...`-Befehl. `Validieren` prüft kurz, ob `fullrun.py --help` mit dem aktuellen Interpreter läuft. `Plan schreiben` legt den Ausgabeordner an und speichert eine Momentaufnahme als `plan.json` (inkl. Commit + `git_dirty`, falls Git verfügbar ist).


In [None]:
# PLAN UI

def _sanitize_label(raw: str) -> str:
    t = str(raw or "").strip()
    if not t:
        return "run"
    t = re.sub(r"[\s/\\:]+", "_", t)
    t = re.sub(r"[^A-Za-z0-9._-]+", "_", t)
    t = t.strip("._-")
    return t or "run"


def _utc_now_iso() -> str:
    return datetime.datetime.now(datetime.timezone.utc).isoformat()


def _git_snapshot(cwd: Path):
    commit_hash = None
    git_dirty = None
    note = None

    try:
        r1 = subprocess.run(
            ["git", "rev-parse", "HEAD"],
            cwd=str(cwd),
            capture_output=True,
            text=True,
            encoding="utf-8",
            errors="replace",
            timeout=5,
        )
        if r1.returncode == 0:
            commit_hash = (r1.stdout or "").strip() or None
        else:
            note = (r1.stderr or r1.stdout or "").strip() or "git rev-parse fehlgeschlagen"

        r2 = subprocess.run(
            ["git", "status", "--porcelain"],
            cwd=str(cwd),
            capture_output=True,
            text=True,
            encoding="utf-8",
            errors="replace",
            timeout=5,
        )
        if r2.returncode == 0:
            git_dirty = bool((r2.stdout or "").strip())
        else:
            if note is None:
                note = (r2.stderr or r2.stdout or "").strip() or "git status fehlgeschlagen"
    except FileNotFoundError:
        note = "git unavailable"
    except Exception as e:
        note = f"git error: {type(e).__name__}: {e}"

    return commit_hash, git_dirty, note


w_base_out = widgets.Text(value="runs", description="Out-Basis", layout=widgets.Layout(width="420px"))
w_timestamped = widgets.Checkbox(value=True, description="Timestamped Out-Ordner")
w_label = widgets.Text(value="playground", description="Label", layout=widgets.Layout(width="420px"))
w_resume = widgets.Checkbox(value=True, description="Resume")
w_dry_run = widgets.Checkbox(value=False, description="Dry-run")
w_methods = widgets.SelectMultiple(
    options=["V1", "V2", "V3", "V4"],
    value=("V1", "V2", "V3", "V4"),
    description="Verfahren",
    layout=widgets.Layout(width="220px", height="110px"),
)
w_conda_run = widgets.Checkbox(value=False, description="Conda run")
w_conda_env = widgets.Text(value="ba_pipeline", description="Env", layout=widgets.Layout(width="320px"))

btn_validate = widgets.Button(description="Validieren", button_style="warning")
btn_write_plan = widgets.Button(description="Plan schreiben", button_style="success")
out_plan = widgets.Output(layout=widgets.Layout(border="1px solid #ddd", padding="8px"))


def _build_out_dir_rel(now: datetime.datetime | None = None) -> Path:
    base = Path((w_base_out.value or "runs").strip() or "runs")
    lbl = _sanitize_label(w_label.value)
    if w_timestamped.value:
        now = now or datetime.datetime.now()
        stamp = now.strftime("%Y-%m-%d_%H%M%S")
        return base / f"{stamp}_{lbl}"
    return base / lbl


def _build_python_launcher() -> list[str]:
    if w_conda_run.value:
        env_name = (w_conda_env.value or EXPECTED_CONDA_ENV).strip() or EXPECTED_CONDA_ENV
        return ["conda", "run", "-n", env_name, "python"]
    return [sys.executable]


def build_plan_spec(now: datetime.datetime | None = None) -> dict:
    methods = [str(m).strip() for m in w_methods.value]
    if not methods:
        raise ValueError("methods ist leer (mind. ein Verfahren auswählen).")

    out_dir_rel = _build_out_dir_rel(now=now)
    out_dir_abs = (repo_root / out_dir_rel).resolve()

    launcher = _build_python_launcher()
    cmd = list(launcher) + [
        "fullrun.py",
        "--out",
        str(out_dir_rel),
    ]
    if w_resume.value:
        cmd.append("--resume")
    cmd += ["--methods", ",".join(methods)]
    if w_dry_run.value:
        cmd.append("--dry-run")

    return {
        "timestamp_utc": _utc_now_iso(),
        "repo_root": str(repo_root),
        "out_dir_rel": str(out_dir_rel),
        "out_dir": str(out_dir_abs),
        "command_list": cmd,
        "command_string": subprocess.list2cmdline([str(x) for x in cmd]),
        "methods": methods,
        "resume": bool(w_resume.value),
        "dry_run": bool(w_dry_run.value),
        "sys_executable": sys.executable,
        "python_version": sys.version.split()[0],
        "conda_default_env": os.environ.get("CONDA_DEFAULT_ENV"),
        "platform": {
            "os_name": os.name,
            "sys_platform": sys.platform,
        },
        "launcher": launcher,
    }


def _render_plan_preview():
    with out_plan:
        clear_output()
        try:
            spec = build_plan_spec()
        except Exception as e:
            display(_html_box(f"Plan ist aktuell nicht valide: {type(e).__name__}: {e}", kind="error"))
            return

        out_dir_rel = spec["out_dir_rel"]
        out_dir_abs = spec["out_dir"]
        print("Out-Ordner (rel):", out_dir_rel)
        print("Out-Ordner (abs):", out_dir_abs)
        print("\nBefehl (Liste):")
        print(spec["command_list"])
        print("\nBefehl (String):")
        print(spec["command_string"])

        display(_html_box("OK: Plan ist valide.", kind="ok"))


def _on_validate(_):
    with out_plan:
        print("\n[Validieren] fullrun.py --help ...")
        try:
            launcher = _build_python_launcher()
            if launcher[:1] == ["conda"] and shutil.which("conda") is None:
                raise FileNotFoundError("'conda' nicht auf PATH (conda-run ist aktiv).")
            cmd = list(launcher) + ["fullrun.py", "--help"]
            r = subprocess.run(
                cmd,
                cwd=str(repo_root),
                capture_output=True,
                text=True,
                encoding="utf-8",
                errors="replace",
                timeout=10,
            )
            if r.returncode == 0:
                display(_html_box("Validieren OK: fullrun.py --help erfolgreich.", kind="ok"))
                lines = (r.stdout or "").splitlines()[:25]
                print("\n".join(lines))
            else:
                display(_html_box(f"Validieren fehlgeschlagen: Exitcode {r.returncode}", kind="error"))
                print((r.stdout or "").strip())
                print((r.stderr or "").strip())
        except Exception as e:
            display(_html_box(f"Validieren: {type(e).__name__}: {e}", kind="error"))


def write_plan(spec: dict, *, overwrite: bool = False) -> Path:
    out_dir_abs = Path(spec["out_dir"])  # abs
    out_dir_abs.mkdir(parents=True, exist_ok=True)

    plan_path = out_dir_abs / "plan.json"
    if plan_path.exists() and not overwrite:
        # Guard: nicht stillschweigend überschreiben
        return plan_path

    commit_hash, git_dirty, note = _git_snapshot(repo_root)
    spec = dict(spec)
    spec["commit_hash"] = commit_hash
    spec["git_dirty"] = git_dirty
    spec["git_note"] = note

    plan_path.write_text(json.dumps(spec, indent=2, ensure_ascii=False), encoding="utf-8")
    return plan_path


def _on_write_plan(_):
    with out_plan:
        print("\n[Plan schreiben] ...")
        try:
            now = datetime.datetime.now()
            spec = build_plan_spec(now=now)
            plan_path = write_plan(spec, overwrite=False)
            if plan_path.exists():
                display(_html_box(f"Plan geschrieben: {plan_path}", kind="ok"))
            else:
                display(_html_box("Plan konnte nicht geschrieben werden (unerwartet).", kind="error"))

            # globaler Snapshot für spätere Zellen
            globals()["LAST_PLAN_PATH"] = str(plan_path)
            globals()["LAST_OUT_DIR"] = str(Path(spec["out_dir"]))
        except Exception as e:
            display(_html_box(f"Plan schreiben: {type(e).__name__}: {e}", kind="error"))


btn_validate.on_click(_on_validate)
btn_write_plan.on_click(_on_write_plan)

ui_left = widgets.VBox([w_base_out, w_label, w_methods])
ui_right = widgets.VBox([w_timestamped, w_resume, w_dry_run, w_conda_run, w_conda_env])
ui_buttons = widgets.HBox([btn_validate, btn_write_plan])

display(widgets.VBox([widgets.HBox([ui_left, ui_right]), ui_buttons, out_plan]))


# Preview automatisch aktualisieren
for w in [w_base_out, w_timestamped, w_label, w_resume, w_dry_run, w_methods, w_conda_run, w_conda_env]:
    w.observe(lambda _chg: _render_plan_preview(), names="value")

_render_plan_preview()


## 4) AUSFÜHRUNG (Live-Ausgabe)

Der Startknopf startet den Subprozess (ohne `shell=True`) und streamt stdout/stderr live. Zusätzlich wird `notebook_run.log` in den Ausgabeordner geschrieben.


In [None]:
btn_run_now = widgets.Button(description="Run starten", button_style="danger")
out_run = widgets.Output(layout=widgets.Layout(border="1px solid #ddd", padding="8px", max_height="380px", overflow="auto"))


def _ensure_plan_for_run(spec: dict) -> Path:
    out_dir_abs = Path(spec["out_dir"])  # abs
    plan_path = out_dir_abs / "plan.json"
    if plan_path.exists():
        # Prüfe grob, ob der Plan zur aktuellen Spec passt (falls nicht: lieber abbrechen)
        try:
            old = json.loads(plan_path.read_text(encoding="utf-8"))
            if old.get("command_list") != spec.get("command_list"):
                raise RuntimeError(
                    "plan.json existiert bereits, aber passt nicht zur aktuellen Planung.\n"
                    "Tipp: Nutze ein neues Label oder aktiviere 'Timestamped Out-Ordner'."
                )
        except Exception as e:
            raise
        return plan_path
    return write_plan(spec, overwrite=False)


def _on_run_now(_):
    btn_run_now.disabled = True
    try:
        with out_run:
            clear_output()
            print("[Ausführung] Starte Subprozess ...")

        now = datetime.datetime.now()
        spec = build_plan_spec(now=now)
        out_dir_abs = Path(spec["out_dir"])  # abs
        cmd = [str(x) for x in spec["command_list"]]

        plan_path = _ensure_plan_for_run(spec)

        log_path = out_dir_abs / "notebook_run.log"
        start_t = time.monotonic()

        env = os.environ.copy()
        env["PYTHONUNBUFFERED"] = "1"
        env["PYTHONIOENCODING"] = "utf-8"

        with log_path.open("w", encoding="utf-8", errors="replace") as log_f:
            with out_run:
                print("Out-Ordner:", out_dir_abs)
                print("Plan:", plan_path)
                print("Befehl:")
                print(spec["command_string"])
                print("\n--- Live-Output ---\n")

            proc = subprocess.Popen(
                cmd,
                cwd=str(repo_root),
                stdout=subprocess.PIPE,
                stderr=subprocess.STDOUT,
                text=True,
                encoding="utf-8",
                errors="replace",
                bufsize=1,
                shell=False,
                env=env,
            )

            assert proc.stdout is not None
            for line in proc.stdout:
                log_f.write(line)
                log_f.flush()
                out_run.append_stdout(line)

            rc = int(proc.wait())
            dur = time.monotonic() - start_t

        with out_run:
            print("\n--- Fertig ---")
            print("Exitcode:", rc)
            print("Laufzeit (s):", round(dur, 2))
            print("Log:", log_path)

        globals()["LAST_RUN_DIR"] = str(out_dir_abs)

        # Optional: Report-Dropdown (falls bereits gebaut) auf diesen Run setzen
        if rc == 0 and "refresh_run_options" in globals() and "w_run_select" in globals():
            try:
                refresh_run_options()
                w_run_select.value = str(out_dir_abs)
            except Exception:
                pass

    except Exception as e:
        with out_run:
            display(_html_box(f"RUN ERROR: {type(e).__name__}: {e}", kind="error"))
    finally:
        btn_run_now.disabled = False


btn_run_now.on_click(_on_run_now)
display(widgets.VBox([btn_run_now, out_run]))


## 5) Bericht (Lauf auswählen + Überblick)

Hier werden die Ausgaben aus `stage3_evaluation/` eines ausgewählten Laufs geladen (CSV + Metadaten). Die zentrale Datei ist `stage3_evaluation/aggregated_metrics.csv`; wenn sie fehlt, ist Etappe 03 meistens noch nicht durchgelaufen.

Je nach Run können zusätzlich z. B. `per_instance_metrics.csv`, `coverage_matrix_aggregated.csv` oder `winrate_heatmap.csv` auftauchen.


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


out_report = widgets.Output(layout=widgets.Layout(border="1px solid #ddd", padding="8px"))
btn_refresh_runs = widgets.Button(description="Runs aktualisieren", button_style="info")


def _read_json(path: Path) -> dict | None:
    try:
        return json.loads(path.read_text(encoding="utf-8"))
    except Exception:
        return None


def _find_run_roots() -> list[Path]:
    roots = []

    # Fixe Anforderungen aus Kap. 5 (Methodisches Vorgehen)
    runs_root = (repo_root / "runs").resolve()
    if runs_root.exists():
        for p in runs_root.rglob("aggregated_metrics.csv"):
            if p.parent.name == "stage3_evaluation":
                roots.append(p.parent.parent)

    fullrun_out_root = (repo_root / "fullrun_out").resolve()
    p = fullrun_out_root / "stage3_evaluation" / "aggregated_metrics.csv"
    if p.exists():
        roots.append(fullrun_out_root)

    # Optional: falls Out-Basis anders gesetzt ist
    try:
        base_out = Path((w_base_out.value or "").strip())
        if str(base_out) and str(base_out) != "runs":
            base_root = (repo_root / base_out).resolve()
            if base_root.exists():
                for p in base_root.rglob("aggregated_metrics.csv"):
                    if p.parent.name == "stage3_evaluation":
                        roots.append(p.parent.parent)
    except Exception:
        pass

    # Dedupe + sort (neueste zuerst, falls timestamped-Ordner)
    uniq = []
    seen = set()
    for r in roots:
        s = str(r)
        if s in seen:
            continue
        seen.add(s)
        uniq.append(r)
    uniq.sort(key=lambda p: p.name, reverse=True)
    return uniq


def refresh_run_options():
    run_roots = _find_run_roots()
    options = []
    for r in run_roots:
        try:
            rel = r.relative_to(repo_root)
            label = str(rel)
        except Exception:
            label = str(r)
        options.append((label, str(r)))

    if not options:
        options = [("(keine Runs gefunden)", "")]

    w_run_select.options = options
    # Auto-select: letzter Run aus Run-Zelle
    last = globals().get("LAST_RUN_DIR")
    if last and any(v == last for _lbl, v in options):
        w_run_select.value = last


w_run_select = widgets.Dropdown(options=[("(lade...)", "")], description="Run auswählen", layout=widgets.Layout(width="720px"))


def _report_for_run(run_root: Path):
    stage3_dir = run_root / "stage3_evaluation"
    agg_path = stage3_dir / "aggregated_metrics.csv"
    if not agg_path.exists():
        display(_html_box(f"Fehlt: {agg_path}", kind="error"))
        return

    plan = _read_json(run_root / "plan.json")
    meta = _read_json(run_root / "metadata.json") or _read_json(stage3_dir / "metadata.json")

    # Repro Snapshot
    commit_hash = None
    git_dirty = None
    methods = None
    ts = None
    cmd_str = None

    if isinstance(plan, dict):
        commit_hash = plan.get("commit_hash")
        git_dirty = plan.get("git_dirty")
        methods = plan.get("methods")
        ts = plan.get("timestamp_utc")
        cmd_str = plan.get("command_string")

    if isinstance(meta, dict):
        commit_hash = commit_hash or meta.get("git_commit")
        ts = ts or meta.get("timestamp_utc")
        cli_args = meta.get("cli_args") if isinstance(meta.get("cli_args"), dict) else None
        if cli_args:
            methods = methods if methods is not None else cli_args.get("methods")
        if cmd_str is None and isinstance(meta.get("argv"), list):
            cmd_str = " ".join(str(x) for x in meta.get("argv"))

    snapshot_lines = [
        f"Run: {run_root}",
        f"timestamp: {ts}",
        f"commit_hash: {commit_hash}",
        f"git_dirty: {git_dirty}",
        f"methods: {methods}",
    ]
    if cmd_str:
        snapshot_lines.append(f"command: {cmd_str}")
    display(_html_box("Repro Snapshot\n" + "\n".join(snapshot_lines), kind="info"))

    # aggregated_metrics.csv (Pflicht)
    df_agg = pd.read_csv(agg_path)

    df_view = df_agg
    if "size_class" in df_view.columns and (df_view["size_class"].astype(str).str.lower() == "all").any():
        df_view = df_view[df_view["size_class"].astype(str).str.lower() == "all"]
    if "severity" in df_view.columns and (df_view["severity"].astype(str).str.lower() == "all").any():
        df_view = df_view[df_view["severity"].astype(str).str.lower() == "all"]

    display(HTML("<h4>Aggregated Metrics (Global)</h4>"))
    if df_view.empty:
        display(df_agg.head(20))
    else:
        # kompakter View
        cols = [c for c in [
            "size_class",
            "severity",
            "method",
            "n_is",
            "nd_size_median",
            "contrib_unique_median",
            "runtime_median_seconds_median",
            "ttff_median_seconds_median",
            "feasibility_rate_median",
        ] if c in df_view.columns]
        display(df_view[cols].reset_index(drop=True))

    # per_instance_metrics.csv (optional)
    per_path = stage3_dir / "per_instance_metrics.csv"
    if per_path.exists():
        df_per = pd.read_csv(per_path)
        if {"method", "nd_size", "runtime_median_seconds"}.issubset(df_per.columns):
            display(HTML("<h4>Per-Instance Boxplots</h4>"))
            fig, axes = plt.subplots(1, 2, figsize=(10, 4))
            df_per.boxplot(column="nd_size", by="method", ax=axes[0])
            axes[0].set_title("ND size")
            axes[0].set_xlabel("Verfahren")
            axes[0].set_ylabel("nd_size")

            df_per.boxplot(column="runtime_median_seconds", by="method", ax=axes[1])
            axes[1].set_title("Runtime median (s)")
            axes[1].set_xlabel("Verfahren")
            axes[1].set_ylabel("seconds")

            fig.suptitle("")
            fig.tight_layout()
            plt.show()
        else:
            display(_html_box(f"Spalten in per_instance_metrics unerwartet: {list(df_per.columns)}", kind="warn"))
    else:
        display(_html_box("Optional fehlt: per_instance_metrics.csv", kind="info"))

    # coverage_matrix_aggregated.csv (optional)
    cov_path = stage3_dir / "coverage_matrix_aggregated.csv"
    if cov_path.exists():
        df_cov = pd.read_csv(cov_path)
        needed = {"method_a", "method_b", "coverage_median"}
        if needed.issubset(df_cov.columns):
            df_cov_view = df_cov
            if "size_class" in df_cov_view.columns and (df_cov_view["size_class"].astype(str).str.lower() == "all").any():
                df_cov_view = df_cov_view[df_cov_view["size_class"].astype(str).str.lower() == "all"]
            if "severity" in df_cov_view.columns and (df_cov_view["severity"].astype(str).str.lower() == "all").any():
                df_cov_view = df_cov_view[df_cov_view["severity"].astype(str).str.lower() == "all"]

            pivot = df_cov_view.pivot(index="method_a", columns="method_b", values="coverage_median")
            pivot = pivot.reindex(index=sorted(pivot.index), columns=sorted(pivot.columns))

            display(HTML("<h4>Coverage Heatmap (Median)</h4>"))
            plt.figure(figsize=(5.5, 4.5))
            plt.imshow(pivot.values, cmap="viridis", vmin=0, vmax=1)
            plt.xticks(range(len(pivot.columns)), pivot.columns)
            plt.yticks(range(len(pivot.index)), pivot.index)
            plt.colorbar(label="C(A,B)")
            plt.title("Coverage (Median)")
            plt.tight_layout()
            plt.show()
        else:
            display(_html_box(f"coverage_matrix_aggregated Spalten unerwartet: {list(df_cov.columns)}", kind="warn"))
    else:
        display(_html_box("Optional fehlt: coverage_matrix_aggregated.csv", kind="info"))

    # winrate_heatmap.csv (optional)
    win_path = stage3_dir / "winrate_heatmap.csv"
    if win_path.exists():
        df_win = pd.read_csv(win_path)
        needed = {"omega", "method", "win_rate"}
        if needed.issubset(df_win.columns):
            pivot = df_win.pivot(index="omega", columns="method", values="win_rate")
            pivot = pivot.reindex(columns=sorted(pivot.columns))
            # stabile Reihenfolge der omegas: lexikografisch
            pivot = pivot.sort_index()

            display(HTML("<h4>Win-Rate Heatmap</h4>"))
            h = max(4.0, min(10.0, 0.25 * len(pivot.index)))
            plt.figure(figsize=(6.5, h))
            plt.imshow(pivot.values, cmap="magma", vmin=0, vmax=1, aspect="auto")
            plt.xticks(range(len(pivot.columns)), pivot.columns)
            plt.yticks(range(len(pivot.index)), pivot.index, fontsize=7)
            plt.colorbar(label="win_rate")
            plt.title("Win-Rate je ω")
            plt.tight_layout()
            plt.show()
        else:
            display(_html_box(f"winrate_heatmap Spalten unerwartet: {list(df_win.columns)}", kind="warn"))
    else:
        display(_html_box("Optional fehlt: winrate_heatmap.csv", kind="info"))

    globals()["SELECTED_RUN_ROOT"] = str(run_root)


def _on_run_select_change(_):
    with out_report:
        clear_output()
        val = str(w_run_select.value or "").strip()
        if not val:
            display(_html_box("Kein Run ausgewählt.", kind="warn"))
            return
        run_root = Path(val)
        _report_for_run(run_root)


def _on_refresh(_):
    refresh_run_options()


btn_refresh_runs.on_click(_on_refresh)
w_run_select.observe(_on_run_select_change, names="value")

display(widgets.VBox([widgets.HBox([btn_refresh_runs, w_run_select]), out_report]))
refresh_run_options()


## 6) Abbildungen (Browser)

Anzeige von PDFs aus `stage3_evaluation/figures/`. Standardmäßig werden sie per IFrame eingebettet (plus Link). Wenn `fitz` (PyMuPDF) installiert ist, rendert das Notebook alternativ Seiten als Bilder (mit Vor/Zurück).


In [None]:
import numpy as np
import matplotlib.pyplot as plt

out_fig = widgets.Output(layout=widgets.Layout(border="1px solid #ddd", padding="8px"))
btn_refresh_fig = widgets.Button(description="Abbildungen aktualisieren", button_style="info")
w_pdf_select = widgets.Dropdown(options=[("(kein Run geladen)", "")], description="PDF", layout=widgets.Layout(width="720px"))

btn_prev = widgets.Button(description="Vorherige Seite")
btn_next = widgets.Button(description="Nächste Seite")
w_page = widgets.IntText(value=0, description="Seite", layout=widgets.Layout(width="200px"))


def _get_selected_run_root() -> Path | None:
    val = globals().get("SELECTED_RUN_ROOT")
    if not val:
        return None
    p = Path(str(val))
    return p if p.exists() else None


def _list_pdfs(run_root: Path) -> list[Path]:
    fig_dir = run_root / "stage3_evaluation" / "figures"
    if not fig_dir.exists():
        return []
    return sorted(fig_dir.glob("*.pdf"))


def refresh_figures():
    run_root = _get_selected_run_root()
    if run_root is None:
        w_pdf_select.options = [("(kein Run geladen)", "")]
        w_pdf_select.value = ""
        return

    pdfs = _list_pdfs(run_root)
    if not pdfs:
        w_pdf_select.options = [("(keine PDFs gefunden)", "")]
        w_pdf_select.value = ""
        return

    options = []
    for p in pdfs:
        options.append((p.name, str(p)))
    w_pdf_select.options = options
    if not w_pdf_select.value:
        w_pdf_select.value = options[0][1]


def _render_pdf_preview(pdf_path: Path, page_index: int = 0):
    # Erst fitz versuchen
    try:
        import fitz  # type: ignore

        doc = fitz.open(pdf_path)
        if doc.page_count <= 0:
            display(_html_box("PDF hat keine Seiten.", kind="warn"))
            return
        page_index = max(0, min(int(page_index), doc.page_count - 1))
        page = doc.load_page(page_index)
        pix = page.get_pixmap(matrix=fitz.Matrix(2, 2), alpha=False)
        img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width, pix.n)

        plt.figure(figsize=(10, 7))
        plt.imshow(img)
        plt.axis("off")
        plt.title(f"{pdf_path.name} (page {page_index+1}/{doc.page_count})")
        plt.tight_layout()
        plt.show()
        return
    except Exception:
        pass

    # Fallback: IFrame + Link
    try:
        rel = pdf_path
        try:
            rel = pdf_path.relative_to(repo_root)
        except Exception:
            rel = pdf_path

        display(_html_box("fitz nicht verfügbar -> IFrame/Link-Fallback.", kind="info"))
        src = rel.as_posix()
        display(IFrame(src=src, width=950, height=600))
        display(HTML(f"<a href='{src}' target='_blank'>PDF öffnen: {pdf_path.name}</a>"))
    except Exception as e:
        display(_html_box(f"PDF Anzeige fehlgeschlagen: {type(e).__name__}: {e}", kind="error"))


def _on_pdf_change(_):
    with out_fig:
        clear_output()
        val = str(w_pdf_select.value or "").strip()
        if not val:
            display(_html_box("Kein PDF gewählt.", kind="warn"))
            return
        pdf_path = Path(val)
        if not pdf_path.exists():
            display(_html_box(f"PDF existiert nicht: {pdf_path}", kind="error"))
            return
        _render_pdf_preview(pdf_path, page_index=int(w_page.value))


def _on_prev(_):
    w_page.value = max(0, int(w_page.value) - 1)
    _on_pdf_change(None)


def _on_next(_):
    w_page.value = int(w_page.value) + 1
    _on_pdf_change(None)


btn_refresh_fig.on_click(lambda _: refresh_figures())
w_pdf_select.observe(_on_pdf_change, names="value")
btn_prev.on_click(_on_prev)
btn_next.on_click(_on_next)

display(widgets.VBox([widgets.HBox([btn_refresh_fig, w_pdf_select]), widgets.HBox([btn_prev, btn_next, w_page]), out_fig]))
refresh_figures()


## 7) Kurzchecks

Hier werden `stage2_runs/**/runs.jsonl` und `stage2_runs/**/solutions.jsonl` grob geprüft (z. B. Zeilenzahl in `solutions.jsonl` als `n_solutions`, Warnungen bei leeren/fehlenden Dateien). Wenn `stage3_evaluation/milp_subruns.csv` existiert, zeigt das Notebook zusätzlich die Häufigkeiten der `termination_reason`.


In [None]:
import pandas as pd

btn_sanity = widgets.Button(description="Sanity-Checks ausführen", button_style="warning")
out_sanity = widgets.Output(layout=widgets.Layout(border="1px solid #ddd", padding="8px"))


def _count_lines(path: Path) -> int:
    n = 0
    with path.open("r", encoding="utf-8", errors="replace") as f:
        for _ in f:
            n += 1
    return n


def _on_sanity(_):
    with out_sanity:
        clear_output()
        run_root = _get_selected_run_root()
        if run_root is None:
            display(_html_box("Kein Run ausgewählt (Report zuerst laden).", kind="warn"))
            return

        stage2_dir = run_root / "stage2_runs"
        if not stage2_dir.exists():
            display(_html_box(f"Fehlt: {stage2_dir}", kind="error"))
            return

        rows = []
        for sol_path in stage2_dir.rglob("solutions.jsonl"):
            try:
                rel = sol_path.relative_to(stage2_dir)
                parts = rel.parts
                dataset_id = parts[0] if len(parts) >= 1 else "?"
                case_id = parts[1] if len(parts) >= 2 else "?"
                method = parts[2] if len(parts) >= 3 else "?"
            except Exception:
                dataset_id, case_id, method = "?", "?", "?"

            run_path = sol_path.with_name("runs.jsonl")
            n_sol = _count_lines(sol_path)
            n_run = _count_lines(run_path) if run_path.exists() else None

            rows.append(
                {
                    "dataset_id": dataset_id,
                    "case_id": case_id,
                    "method": method,
                    "n_solutions": n_sol,
                    "n_runs": n_run,
                    "solutions_path": str(sol_path),
                }
            )

        if not rows:
            display(_html_box("Keine solutions.jsonl gefunden.", kind="warn"))
        else:
            df = pd.DataFrame(rows)
            display(HTML("<h4>Stage 2 Artefakte</h4>"))
            display(df.sort_values(["dataset_id", "case_id", "method"]).reset_index(drop=True))

            display(HTML("<h4>Warnungen</h4>"))
            bad = df[df["n_solutions"] <= 0]
            if bad.empty:
                display(_html_box("OK: Keine leeren solutions.jsonl.", kind="ok"))
            else:
                display(_html_box(f"WARN: {len(bad)} Dateien haben 0 Lösungen.", kind="warn"))
                display(bad[["dataset_id", "case_id", "method", "n_solutions", "solutions_path"]])

            display(HTML("<h4>Überblick pro Verfahren</h4>"))
            display(df.groupby("method")["n_solutions"].agg(["count", "min", "median", "max", "sum"]))

        # Optional: MILP subruns termination reasons
        milp_path = run_root / "stage3_evaluation" / "milp_subruns.csv"
        if milp_path.exists():
            df_m = pd.read_csv(milp_path)
            display(HTML("<h4>MILP Subruns</h4>"))
            if "termination_reason" in df_m.columns:
                display(df_m["termination_reason"].value_counts(dropna=False).to_frame("count"))
            else:
                display(_html_box(f"Spalte termination_reason fehlt. Spalten: {list(df_m.columns)}", kind="warn"))
        else:
            display(_html_box("Optional fehlt: stage3_evaluation/milp_subruns.csv", kind="info"))


btn_sanity.on_click(_on_sanity)
display(widgets.VBox([btn_sanity, out_sanity]))


## 8) EXPORT (LaTeX-Assets)

Zielordner ist `thesis_assets/<run_name>/`. Von dort aus kann ich die Tabellen/Abbildungen später im LaTeX-Projekt referenzieren, ohne auf Zwischenstände in `fullrun_out/...` zu zeigen.

Je nach Auswahl kopiert das Notebook:
- `stage3_evaluation/tables/*.tex` nach `thesis_assets/.../tables/`
- ausgewählte PDFs nach `thesis_assets/.../figures/`
- ausgewählte CSVs nach `thesis_assets/.../csv/`

Zur Nachvollziehbarkeit wird zusätzlich eine `EXPORT_MANIFEST.json` geschrieben (Quelle, Timestamp, Commit/git_dirty, kopierte Dateien).


In [None]:
btn_export_tables = widgets.Button(description="LaTeX-Tabellen exportieren", button_style="success")
btn_export_figs = widgets.Button(description="Schlüsselabbildungen exportieren", button_style="success")
btn_export_csv = widgets.Button(description="CSVs exportieren", button_style="success")
out_export = widgets.Output(layout=widgets.Layout(border="1px solid #ddd", padding="8px"))

w_is_select = widgets.Dropdown(options=[("(auto)", "")], description="Instanz", layout=widgets.Layout(width="720px"))
btn_refresh_is = widgets.Button(description="Instanzen aktualisieren", button_style="info")


def _git_snapshot_now(cwd: Path) -> dict:
    """Git-Snapshot zum Export-Zeitpunkt (Fallback, falls plan.json fehlt)."""
    commit_hash = None
    git_dirty = None
    note = None

    try:
        r1 = subprocess.run(
            ["git", "rev-parse", "HEAD"],
            cwd=str(cwd),
            capture_output=True,
            text=True,
            encoding="utf-8",
            errors="replace",
            timeout=5,
        )
        if r1.returncode == 0:
            commit_hash = (r1.stdout or "").strip() or None
        else:
            note = (r1.stderr or r1.stdout or "").strip() or "git rev-parse fehlgeschlagen"

        r2 = subprocess.run(
            ["git", "status", "--porcelain"],
            cwd=str(cwd),
            capture_output=True,
            text=True,
            encoding="utf-8",
            errors="replace",
            timeout=5,
        )
        if r2.returncode == 0:
            git_dirty = bool((r2.stdout or "").strip())
        else:
            if note is None:
                note = (r2.stderr or r2.stdout or "").strip() or "git status fehlgeschlagen"
    except FileNotFoundError:
        note = "git unavailable"
    except Exception as e:
        note = f"git error: {type(e).__name__}: {e}"

    return {
        "commit_hash": commit_hash,
        "git_dirty": git_dirty,
        "git_note": note,
    }


def _load_repro_from_run(run_root: Path) -> dict:
    plan = _read_json(run_root / "plan.json")
    meta = _read_json(run_root / "metadata.json") or _read_json(run_root / "stage3_evaluation" / "metadata.json")

    commit_hash = None
    git_dirty = None
    methods = None

    if isinstance(plan, dict):
        commit_hash = plan.get("commit_hash")
        git_dirty = plan.get("git_dirty")
        methods = plan.get("methods")

    if isinstance(meta, dict):
        commit_hash = commit_hash or meta.get("git_commit")
        cli_args = meta.get("cli_args") if isinstance(meta.get("cli_args"), dict) else None
        if cli_args:
            methods = methods if methods is not None else cli_args.get("methods")

    return {
        "commit_hash": commit_hash,
        "git_dirty": git_dirty,
        "methods": methods,
    }


def _export_root_for_run(run_root: Path) -> Path:
    run_name = run_root.name
    return (repo_root / "thesis_assets" / run_name).resolve()


def _write_manifest(export_root: Path, run_root: Path):
    repro = _load_repro_from_run(run_root)
    git_now = _git_snapshot_now(repo_root)

    run_commit = repro.get("commit_hash")
    run_dirty = repro.get("git_dirty")

    commit_hash = run_commit or git_now.get("commit_hash")
    commit_hash_source = "run_metadata_or_plan" if run_commit else "git_at_export"

    git_dirty = run_dirty
    if run_dirty is not None:
        git_dirty_source = "plan.json"
    elif git_now.get("git_dirty") is not None:
        git_dirty = git_now.get("git_dirty")
        git_dirty_source = "git_status_at_export"
    else:
        git_dirty_source = "unavailable"

    plan_json_present = (run_root / "plan.json").exists()
    copied = []
    for p in export_root.rglob("*"):
        if not p.is_file():
            continue
        if p.name == "EXPORT_MANIFEST.json":
            continue
        copied.append(p.relative_to(export_root).as_posix())
    payload = {
        "timestamp_utc": _utc_now_iso(),
        "source_run": str(run_root),
        "repo_root": str(repo_root),
        "plan_json_present": plan_json_present,
        "commit_hash": commit_hash,
        "commit_hash_source": commit_hash_source,
        "git_dirty": git_dirty,
        "git_dirty_source": git_dirty_source,
        "export_time_git": git_now,
        "methods": repro.get("methods"),
        "copied_files": sorted(copied),
    }
    (export_root / "EXPORT_MANIFEST.json").write_text(
        json.dumps(payload, indent=2, ensure_ascii=False),
        encoding="utf-8",
    )


def _copy_files(src_paths: list[Path], dst_dir: Path) -> list[Path]:
    dst_dir.mkdir(parents=True, exist_ok=True)
    copied = []
    for src in src_paths:
        try:
            if src.exists() and src.is_file():
                dst = dst_dir / src.name
                shutil.copy2(src, dst)
                copied.append(dst)
        except Exception:
            continue
    return copied


def _discover_instances(run_root: Path) -> list[str]:
    fig_dir = run_root / "stage3_evaluation" / "figures"
    if not fig_dir.exists():
        return []
    is_ids = set()
    for p in fig_dir.glob("parallel_coords__*.pdf"):
        stem = p.stem
        if stem.startswith("parallel_coords__"):
            is_ids.add(stem[len("parallel_coords__"):])
    for p in fig_dir.glob("pareto_scatter__*.pdf"):
        stem = p.stem
        if stem.startswith("pareto_scatter__"):
            # pareto_scatter__{is_id}__f_x__f_y
            rest = stem[len("pareto_scatter__"):]
            parts = rest.split("__")
            if len(parts) >= 3:
                is_ids.add("__".join(parts[:-2]))
    return sorted(is_ids)


def refresh_instances():
    run_root = _get_selected_run_root()
    if run_root is None:
        w_is_select.options = [("(kein Run geladen)", "")]
        w_is_select.value = ""
        return
    is_ids = _discover_instances(run_root)
    if not is_ids:
        w_is_select.options = [("(keine Instanzen gefunden)", "")]
        w_is_select.value = ""
        return
    w_is_select.options = [(x, x) for x in is_ids]
    if not w_is_select.value:
        w_is_select.value = is_ids[0]


def _on_export_tables(_):
    with out_export:
        clear_output()
        run_root = _get_selected_run_root()
        if run_root is None:
            display(_html_box("Kein Run ausgewählt (Report zuerst laden).", kind="warn"))
            return
        export_root = _export_root_for_run(run_root)
        src_dir = run_root / "stage3_evaluation" / "tables"
        if not src_dir.exists():
            display(_html_box(f"Fehlt: {src_dir}", kind="error"))
            return
        src_files = sorted(src_dir.glob("*.tex"))
        copied = _copy_files(src_files, export_root / "tables")
        display(_html_box(f"Exportiert {len(copied)} LaTeX-Tabellen nach: {export_root}", kind="ok"))
        _write_manifest(export_root, run_root)


def _on_export_figs(_):
    with out_export:
        clear_output()
        run_root = _get_selected_run_root()
        if run_root is None:
            display(_html_box("Kein Run ausgewählt (Report zuerst laden).", kind="warn"))
            return
        export_root = _export_root_for_run(run_root)
        fig_dir = run_root / "stage3_evaluation" / "figures"
        if not fig_dir.exists():
            display(_html_box(f"Fehlt: {fig_dir}", kind="error"))
            return

        key_names = [
            "boxplots.pdf",
            "winrate_heatmap.pdf",
            "coverage_boxplots.pdf",  # fallback wird unten ergänzt
            "boxplots_coverage.pdf",
        ]
        key_files = []
        for n in key_names:
            p = fig_dir / n
            if p.exists():
                key_files.append(p)

        copied = _copy_files(key_files, export_root / "figures")

        # ausgewählte Instanz: max. 3 zusätzliche PDFs (parallel_coords + 2x pareto_scatter)
        is_id = str(w_is_select.value or "").strip()
        extra = []
        if is_id:
            pc = fig_dir / f"parallel_coords__{is_id}.pdf"
            if pc.exists():
                extra.append(pc)

            scat = sorted(fig_dir.glob(f"pareto_scatter__{is_id}__*.pdf"))
            limit = 2 if extra else 3
            extra.extend(scat[:limit])
        copied += _copy_files(extra, export_root / "figures")

        display(_html_box(f"Exportiert {len(copied)} Abbildungen nach: {export_root}", kind="ok"))
        _write_manifest(export_root, run_root)


def _on_export_csv(_):
    with out_export:
        clear_output()
        run_root = _get_selected_run_root()
        if run_root is None:
            display(_html_box("Kein Run ausgewählt (Report zuerst laden).", kind="warn"))
            return
        export_root = _export_root_for_run(run_root)
        stage3_dir = run_root / "stage3_evaluation"
        if not stage3_dir.exists():
            display(_html_box(f"Fehlt: {stage3_dir}", kind="error"))
            return

        names = [
            "aggregated_metrics.csv",
            "per_instance_metrics.csv",
            "coverage_matrix.csv",
            "coverage_matrix_aggregated.csv",
            "winrate_heatmap.csv",
            "winrate_per_is.csv",
            "milp_subruns.csv",
        ]
        src = [p for p in (stage3_dir / n for n in names) if p.exists()]
        copied = _copy_files(src, export_root / "csv")
        display(_html_box(f"Exportiert {len(copied)} CSVs nach: {export_root}", kind="ok"))
        _write_manifest(export_root, run_root)


btn_export_tables.on_click(_on_export_tables)
btn_export_figs.on_click(_on_export_figs)
btn_export_csv.on_click(_on_export_csv)
btn_refresh_is.on_click(lambda _: refresh_instances())

display(widgets.VBox([
    widgets.HBox([btn_refresh_is, w_is_select]),
    widgets.HBox([btn_export_tables, btn_export_figs, btn_export_csv]),
    out_export,
]))

refresh_instances()


## 9) Kleiner Hinweis für die Arbeit

Für den Text/LaTeX nutze ich in der Regel nur die Dateien aus `thesis_assets/<run_name>/`. So sind die Pfade stabil, und im Export-Manifest steht immer dabei, aus welchem Lauf (Commit/Seeds/Budget) eine Tabelle oder Abbildung stammt.
