In [12]:
# 07_report.py  —  Konsolidierter Abschlussreport (auto RUN_DIR)
# - wählt automatisch den neuesten passenden *_lstm Run (Lookback/H/eps)
# - liest evaluation.json (+ optional cost_sensitivity.csv von Block 6)
# - fasst Metriken & Backtests zusammen
# - schreibt: report_block7.md, report_block7.json, kpis_block7.csv
# -----------------------------------------------------------------------------

In [13]:
import os, json, yaml, re
from pathlib import Path
from datetime import datetime
import pandas as pd

In [14]:
# ---------- kleine Helfer ----------
def jread(p: Path):
    with open(p, "r", encoding="utf-8") as f:
        return json.load(f)

def latest_lstm_run(results_dir: Path,
                    lookback: int = None,
                    horizon: int = None,
                    eps_mode: str = None,
                    epsilon: float = None,
                    strict: bool = False) -> Path | None:
    """
    Suche in results_dir nach *_lstm Runs. Wenn lookback/horizon/eps angegeben sind,
    versuche passende Runs zu priorisieren. Fallback: neuester Run per mtime.
    """
    runs = sorted(results_dir.glob("*_lstm"), key=lambda p: p.stat().st_mtime, reverse=True)
    if not runs:
        return None

    def matches(run: Path) -> bool:
        try:
            cfg = jread(run / "config.json")
        except Exception:
            return False
        ok_lb = (lookback is None) or (int(cfg.get("lookback", -1)) == lookback)
        # H/EPS auch über train_csv-Namen tolerieren
        tc = str(cfg.get("train_csv", ""))
        mH = re.search(r"_cls_h(\d+)_", tc)
        mE = re.search(r"_(abs|rel)([\dp]+)\.csv$", tc)
        ok_h = True if horizon is None else (int(cfg.get("horizon", -1)) == horizon or (mH and int(mH.group(1)) == horizon))
        ok_m = True if eps_mode is None else ((mE and mE.group(1) == eps_mode))
        ok_e = True
        if epsilon is not None:
            if mE:
                ok_e = float(mE.group(2).replace("p",".")) == float(epsilon)
            else:
                ok_e = float(cfg.get("epsilon", 1e9)) == float(epsilon)
        return ok_lb and ok_h and ok_m and ok_e

    matches_list = [r for r in runs if matches(r)]
    if matches_list:
        return matches_list[0]  # neuester passender
    return None if strict else runs[0]  # neuester überhaupt

In [15]:
# ---------- Root-Config & Label auflösen ----------
ROOT = Path("..").resolve()
with open(ROOT / "config.json", "r") as f:
    C = json.load(f)

RESULTS_DIR = Path(C.get("results_dir", "../results")).resolve()
LOOKBACK    = int(C["lookback"])
FEATURESET  = C.get("featureset", "v2")

# Label primär aus YAML lesen (Block 2 ist Quelle der Wahrheit)
HORIZON = MODE = EPS = None
yml = ROOT / f"data/features_{FEATURESET}.yml"
if yml.exists():
    meta = yaml.safe_load(open(yml, "r")) or {}
    lab = meta.get("label", {})
    HORIZON = int(lab.get("horizon", 0)) or None
    MODE    = str(lab.get("mode", "")) or None
    EPS     = float(lab.get("epsilon", 0.0)) or None
# Fallback (falls YAML fehlt): später über evaluation.json des Runs

In [16]:
# ---------- RUN_DIR ermitteln ----------
# 1) manueller Override via Umgebungsvariable (z.B. in Notebook-Zelle: os.environ["RUN_DIR"]=".../results/..._lstm")
run_override = os.getenv("RUN_DIR", "").strip() or None

if run_override:
    RUN_DIR = Path(run_override).resolve()
else:
    RUN_DIR = latest_lstm_run(RESULTS_DIR, lookback=LOOKBACK, horizon=HORIZON, eps_mode=MODE, epsilon=EPS, strict=False)

if RUN_DIR is None or not RUN_DIR.exists():
    raise SystemExit("Kein *_lstm Run gefunden. Bitte Block 3/4/6 vorher einmal ausführen.")

print("RUN_DIR ->", RUN_DIR)

RUN_DIR -> C:\Users\jacin\finance-lstm\results\2025-10-19_16-48-09_lstm


In [17]:
# ---------- Artefakte einlesen ----------
ev_path = RUN_DIR / "evaluation.json"
if not ev_path.exists():
    raise SystemExit(f"evaluation.json fehlt in {RUN_DIR} (Block 4/6).")

ev    = jread(ev_path)
cfg   = ev.get("config", {})
metrics = (ev.get("metrics", {}) or {}).get("test", {})
thr_sel = ev.get("threshold_selection", {})
calib   = ev.get("calibration", {})
backtest_gross = ev.get("backtest", {})

# Falls YAML nicht gesetzt war, Label jetzt sicher aus evaluation übernehmen
if HORIZON is None:
    HORIZON = int(((ev.get("label_resolved_from") or {}).get("horizon")) or cfg.get("horizon"))
if MODE is None:
    MODE = (ev.get("label_resolved_from") or {}).get("mode") or cfg.get("epsilon_mode")
if EPS is None:
    EPS = float((ev.get("label_resolved_from") or {}).get("epsilon") or cfg.get("epsilon"))

# Kosten-Sensitivität (optional aus Block 6)
sens_path = RUN_DIR / "cost_sensitivity.csv"
sens_df = pd.read_csv(sens_path) if sens_path.exists() else None

# Haupt-Kostenannahme (für Report)
MAIN_RT = 15.0      # Roundtrip in bps
MAIN_SLIP_PER_LEG = 2.0  # nur Annotation für Plot/Legende

cost_pick = {}
if sens_df is not None and len(sens_df):
    df_t = sens_df[sens_df["model"].str.contains("Entry@t")].copy()
    if df_t.empty:  # Fallback: irgendein Modell
        df_t = sens_df.copy()
    df_t["rt_diff"] = (df_t["roundtrip_bps"] - MAIN_RT).abs()
    row = df_t.sort_values(["rt_diff", "roundtrip_bps"]).iloc[0].to_dict()
    cost_pick = dict(
        model=row["model"], roundtrip_bps=float(row["roundtrip_bps"]),
        trades=int(row["trades"]), CAGR=float(row["CAGR"]),
        Sharpe=float(row["Sharpe"]), MaxDD=float(row["MaxDD"]),
        final_equity=float(row["final_equity"]),
    )

In [18]:
# ---------- KPIs zusammensetzen ----------
kpis = {
    "run_dir": str(RUN_DIR),
    "generated_utc": datetime.utcnow().isoformat(timespec="seconds") + "Z",
    "data": {
        "ticker": cfg.get("ticker"),
        "interval": cfg.get("interval"),
        "period": [cfg.get("start"), cfg.get("end")],
        "horizon": cfg.get("horizon"),
        "lookback": cfg.get("lookback"),
        "featureset": cfg.get("featureset"),
        "features_used": ev.get("features_used"),
    },
    "label": ev.get("label_resolved_from"),
    "calibration": {
        "chosen": calib.get("chosen"),
        "val_brier": calib.get("val_brier"),
        "test_brier": calib.get("test_brier"),
    },
    "threshold": {
        "strategy": thr_sel.get("strategy"),
        "threshold": thr_sel.get("threshold"),
        "val_mcc": thr_sel.get("val_mcc"),
        "test_pred_pos_rate": thr_sel.get("test_pred_pos_rate"),
    },
    "classification_test": {
        "roc_auc": metrics.get("roc_auc"),
        "auprc": metrics.get("auprc"),
        "brier": metrics.get("brier"),
        "balanced_accuracy": metrics.get("balanced_accuracy"),
        "mcc": metrics.get("mcc"),
        "confusion_matrix": metrics.get("confusion_matrix"),
    },
    "backtest_gross": backtest_gross,   # ohne Kosten (Block 4)
    "backtest_cost_pick": cost_pick,    # netto Auswahl aus Block 6
}

In [19]:
# ---------- Markdown-Report schreiben ----------
fig_dir = RUN_DIR / "figures"
figs = {
    "roc": fig_dir / "roc_test.png",
    "pr":  fig_dir / "pr_test.png",
    "calib": fig_dir / "calibration_test.png",
    "cm":   fig_dir / "cm_test.png",
    "proba": fig_dir / "proba_hist_raw_vs_used.png",
    "equity_gross": fig_dir / "equity_curves_t_vs_t1.png",
    "equity_cost":  fig_dir / "equity_costed.png",  # Block 6
}
def _rel(p: Path) -> str:
    return str(p.relative_to(RUN_DIR)) if p.exists() else str(p)

report_md = RUN_DIR / "REPORT_block7.md"
lines = []
lines.append(f"# Block 7 – Abschluss-Report\n")
lines.append(f"- **Run-Ordner:** `{RUN_DIR.name}`")
lines.append(f"- **Erstellt (UTC):** {kpis['generated_utc']}")
lines.append(f"- **Ticker/Intervall:** {kpis['data']['ticker']} / {kpis['data']['interval']}")
lines.append(f"- **Zeitraum:** {kpis['data']['period'][0]} → {kpis['data']['period'][1]}")
lines.append(f"- **Horizon/Lookback:** H={kpis['data']['horizon']} / LB={kpis['data']['lookback']}")
lines.append(f"- **Featureset:** {kpis['data']['featureset']} → {', '.join(kpis['data']['features_used'])}\n")

# Klassif.-Metriken
m = kpis["classification_test"]
lines.append("## Test-Metriken")
lines.append(f"- AUROC: **{m['roc_auc']:.3f}**, AUPRC: **{m['auprc']:.3f}**, Brier: **{m['brier']:.3f}**")
lines.append(f"- Balanced Acc: **{m['balanced_accuracy']:.3f}**, MCC: **{m['mcc']:.3f}**\n")
lines.append(f"![ROC]({_rel(figs['roc'])})  \n![PR]({_rel(figs['pr'])})\n")
lines.append(f"![Calibration]({_rel(figs['calib'])})  \n![Confusion]({_rel(figs['cm'])})  \n![Probas]({_rel(figs['proba'])})\n")

# Backtests
lines.append("## Backtests")
bg = kpis["backtest_gross"]
if bg:
    t  = bg.get("strategy_t", {})
    t1 = bg.get("strategy_t1", {})
    bh = bg.get("buy_hold", {})
    lines.append(f"- **Ohne Kosten** – Entry@t: CAGR {t.get('CAGR'):.3f}, Sharpe {t.get('Sharpe'):.3f}, "
                 f"Equity {t.get('final_equity'):.3f};  Entry@t+1: CAGR {t1.get('CAGR'):.3f}, "
                 f"Sharpe {t1.get('Sharpe'):.3f}, Equity {t1.get('final_equity'):.3f};  "
                 f"Buy&Hold: CAGR {bh.get('CAGR'):.3f}, Equity {bh.get('final_equity'):.3f}.")
    lines.append(f"![Equity gross]({_rel(figs['equity_gross'])})\n")

cp = kpis["backtest_cost_pick"]
if cp:
    lines.append("## Kosten-Szenario (Netto)")
    lines.append(f"- Gewählt: **{cp['model']}** bei **{cp['roundtrip_bps']:.1f} bps** (≈ Main {MAIN_RT:.0f} bps); "
                 f"Trades={cp['trades']}, CAGR={cp['CAGR']:.3f}, Sharpe={cp['Sharpe']:.3f}, "
                 f"MaxDD={cp['MaxDD']:.3f}, Equity={cp['final_equity']:.3f}.")
    lines.append(f"- Plot: Slippage-Annahme {MAIN_SLIP_PER_LEG:.1f} bps pro Leg (nur Annotation).")
    lines.append(f"![Equity net]({_rel(figs['equity_cost'])})\n")
else:
    lines.append("**Hinweis:** Kein `cost_sensitivity.csv` gefunden – Block 6 noch nicht gelaufen.\n")

report_md.write_text("\n".join(lines), encoding="utf-8")
print("✓ REPORT geschrieben →", report_md)

# zusätzlich KPIs als JSON
(RUN_DIR / "REPORT_block7_kpis.json").write_text(json.dumps(kpis, indent=2), encoding="utf-8")
print("✓ KPIs JSON →", RUN_DIR / "REPORT_block7_kpis.json")

✓ REPORT geschrieben → C:\Users\jacin\finance-lstm\results\2025-10-19_16-48-09_lstm\REPORT_block7.md
✓ KPIs JSON → C:\Users\jacin\finance-lstm\results\2025-10-19_16-48-09_lstm\REPORT_block7_kpis.json
