In [1]:
# 07_report.py  —  Konsolidierter Abschlussreport (auto RUN_DIR)
# - wählt automatisch den neuesten passenden *_lstm Run (Lookback/H/eps)
# - liest evaluation.json (+ optional cost_sensitivity.csv von Block 6)
# - fasst Metriken & Backtests zusammen
# - schreibt: REPORT_block7.md, REPORT_block7_kpis.json, kpis_block7.csv (optional)
# ------------------------------------------------------------------------------

In [2]:
import os, json, yaml, re
from pathlib import Path
from datetime import datetime
import numpy as np
import pandas as pd

In [3]:
# ---------- kleine Helfer ----------
def jread(p: Path):
    with open(p, "r", encoding="utf-8") as f:
        return json.load(f)

def latest_lstm_run(results_dir: Path,
                    lookback: int = None,
                    horizon: int = None,
                    eps_mode: str = None,
                    epsilon: float = None,
                    strict: bool = False) -> Path | None:
    runs = sorted(results_dir.glob("*_lstm"), key=lambda p: p.stat().st_mtime, reverse=True)
    if not runs:
        return None

    def matches(run: Path) -> bool:
        try:
            cfg = jread(run / "config.json")
        except Exception:
            return False
        ok_lb = (lookback is None) or (int(cfg.get("lookback", -1)) == lookback)
        tc = str(cfg.get("train_csv", ""))
        mH = re.search(r"_cls_h(\d+)_", tc)
        mE = re.search(r"_(abs|rel)([\dp]+)\.csv$", tc)
        ok_h = True if horizon is None else (int(cfg.get("horizon", -1)) == horizon or (mH and int(mH.group(1)) == horizon))
        ok_m = True if eps_mode is None else ((mE and mE.group(1) == eps_mode))
        ok_e = True
        if epsilon is not None:
            if mE:
                ok_e = float(mE.group(2).replace("p",".")) == float(epsilon)
            else:
                ok_e = float(cfg.get("epsilon", 1e9)) == float(epsilon)
        return ok_lb and ok_h and ok_m and ok_e

    matches_list = [r for r in runs if matches(r)]
    if matches_list:
        return matches_list[0]
    return None if strict else runs[0]

In [4]:
# ---------- Root-Config & Label auflösen ----------
ROOT = Path("..").resolve()
with open(ROOT / "config.json", "r") as f:
    C = json.load(f)

RESULTS_DIR = Path(C.get("results_dir", "../results")).resolve()
LOOKBACK    = int(C["lookback"])
FEATURESET  = C.get("featureset", "v2")

# Label primär aus YAML lesen (Block 2 schreibt das hinein)
HORIZON = MODE = EPS = None
yml = ROOT / f"data/features_{FEATURESET}.yml"
if yml.exists():
    meta = yaml.safe_load(open(yml, "r")) or {}
    lab = meta.get("label", {})
    HORIZON = int(lab.get("horizon", 0)) or None
    MODE    = str(lab.get("mode", "")) or None
    EPS     = float(lab.get("epsilon", 0.0)) or None

In [5]:
# ---------- RUN_DIR ermitteln ----------
run_override = os.getenv("RUN_DIR", "").strip() or None
if run_override:
    RUN_DIR = Path(run_override).resolve()
else:
    RUN_DIR = latest_lstm_run(RESULTS_DIR, lookback=LOOKBACK, horizon=HORIZON, eps_mode=MODE, epsilon=EPS, strict=False)

if RUN_DIR is None or not RUN_DIR.exists():
    raise SystemExit("Kein *_lstm Run gefunden. Bitte Block 3/4/6 vorher einmal ausführen.")

print("RUN_DIR ->", RUN_DIR)

RUN_DIR -> C:\Users\jacin\finance-lstm\results\2025-10-22_12-14-18_lstm


In [6]:
# ---------- Artefakte einlesen ----------
ev_path = RUN_DIR / "evaluation.json"
if not ev_path.exists():
    raise SystemExit(f"evaluation.json fehlt in {RUN_DIR} (Block 4/6).")

ev    = jread(ev_path)
cfg   = ev.get("config", {})
metrics = (ev.get("metrics", {}) or {}).get("test", {})
thr_sel = ev.get("threshold_selection", {})
calib   = ev.get("calibration", {})
backtest_gross = ev.get("backtest", {})

# Falls YAML nicht gesetzt war, Label jetzt sicher aus evaluation übernehmen
if HORIZON is None:
    HORIZON = int(((ev.get("label_resolved_from") or {}).get("horizon")) or cfg.get("horizon"))
if MODE is None:
    MODE = (ev.get("label_resolved_from") or {}).get("mode") or cfg.get("epsilon_mode")
if EPS is None:
    EPS = float((ev.get("label_resolved_from") or {}).get("epsilon") or cfg.get("epsilon"))

In [7]:
# ---------- Kosten-Sensitivität laden ----------
sens_path = RUN_DIR / "cost_sensitivity.csv"
sens_df = pd.read_csv(sens_path) if sens_path.exists() else None

MAIN_RT = 15.0
MAIN_SLIP_PER_LEG = 2.0

cost_pick = {}
if sens_df is not None and len(sens_df):
    # CHANGE (Pflicht): KPI nur aus Entry@t+1 wählen
    df_t1_exact = sens_df[sens_df["model"] == "Entry@t+1"]                          # exakter Name
    df_t1_prefix = sens_df[sens_df["model"].astype(str).str.startswith("Entry@t+1")]# fallback (z.B. "Entry@t+1 (No-Overlap)")
    df_t1 = df_t1_exact if len(df_t1_exact) else df_t1_prefix

    if not len(df_t1):
        # äußerster Fallback: gesamte Sensitivität verwenden (sollte nicht nötig sein)
        df_t1 = sens_df.copy()

    df_t1["rt_diff"] = (df_t1["roundtrip_bps"] - MAIN_RT).abs()
    row = df_t1.sort_values(["rt_diff", "roundtrip_bps"]).iloc[0].to_dict()
    cost_pick = dict(
        model=row["model"], roundtrip_bps=float(row["roundtrip_bps"]),
        trades=int(row.get("trades", 0)), exposure=float(row.get("exposure", np.nan)),
        turnover=float(row.get("turnover", np.nan)),
        CAGR=float(row["CAGR"]), Sharpe=float(row["Sharpe"]), MaxDD=float(row["MaxDD"]),
        final_equity=float(row["final_equity"]),
    )

In [8]:
# ---------- Baselines (Empfehlung): Always-Up, LogReg, MACD -------------------
# Wir rekonstruieren die Splits, damit LogReg & MACD gegen denselben Test laufen
TRAIN_CSV = Path(cfg.get("train_csv", ""))
features_list = cfg.get("features", None)
if not features_list:
    # Fallback: YAML
    yml = ROOT / f"data/features_{FEATURESET}.yml"
    if yml.exists():
        meta = yaml.safe_load(open(yml, "r")) or {}
        features_list = meta.get("features", [])
df_all = pd.read_csv(TRAIN_CSV, index_col=0, parse_dates=True).sort_index()
X_all = df_all[features_list].copy()
y_all = df_all["target"].astype(int).copy()

n = len(df_all)
n_train = int(n * 0.70)
n_val   = int(n * 0.15)
n_test  = n - n_train - n_val

X_train, y_train = X_all.iloc[:n_train],              y_all.iloc[:n_train]
X_val,   y_val   = X_all.iloc[n_train:n_train+n_val], y_all.iloc[n_train:n_train+n_val]
X_test,  y_test  = X_all.iloc[n_train+n_val:],        y_all.iloc[n_train+n_val:]

# Window-Ende-Ausrichtung (wie im Netz)
LB = int(cfg.get("lookback", LOOKBACK))
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import average_precision_score

scaler = StandardScaler().fit(X_train)
Xtr_s = pd.DataFrame(scaler.transform(X_train), index=X_train.index, columns=X_train.columns)
Xva_s = pd.DataFrame(scaler.transform(X_val),   index=X_val.index,   columns=X_val.columns)
Xte_s = pd.DataFrame(scaler.transform(X_test),  index=X_test.index,  columns=X_test.columns)

tail = slice(LB-1, None)
ytr_tail, yva_tail, yte_tail = y_train.iloc[tail], y_val.iloc[tail], y_test.iloc[tail]
Xtr_tail, Xva_tail, Xte_tail = Xtr_s.iloc[tail],   Xva_s.iloc[tail],   Xte_s.iloc[tail]

# Always-Up baseline
pos_rate_test = float(yte_tail.mean())
auprc_always_up = pos_rate_test   # PR-AUC der trivialen Positiv-Baseline

# LogReg baseline neu fitten (wie in Block 3, nur für den Report)
logit = LogisticRegression(max_iter=200)
logit.fit(Xtr_tail, ytr_tail)
proba_lr = logit.predict_proba(Xte_tail)[:,1]
auprc_lr = float(average_precision_score(yte_tail, proba_lr))

# Simple MACD-Regel als Score (roh: macd_diff auf Test-Tail)
macd_diff = df_all.loc[Xte_tail.index, "macd_diff"].astype(float)
# falls fehlend, setze Null
if macd_diff.isna().any():
    macd_diff = macd_diff.fillna(0.0)
auprc_macd = float(average_precision_score(yte_tail, macd_diff.values))

baselines_tbl = pd.DataFrame([
    {"baseline": "Always-Up", "auprc": auprc_always_up, "pos_rate": pos_rate_test,
     "auprc_over_posrate": (auprc_always_up / max(pos_rate_test, 1e-12))},
    {"baseline": "Logistic Regression", "auprc": auprc_lr, "pos_rate": pos_rate_test,
     "auprc_over_posrate": (auprc_lr / max(pos_rate_test, 1e-12))},
    {"baseline": "Simple MACD (macd_diff score)", "auprc": auprc_macd, "pos_rate": pos_rate_test,
     "auprc_over_posrate": (auprc_macd / max(pos_rate_test, 1e-12))}
])

In [9]:
# ---------- KPIs zusammensetzen ----------
kpis = {
    "run_dir": str(RUN_DIR),
    "generated_utc": datetime.utcnow().isoformat(timespec="seconds") + "Z",
    "data": {
        "ticker": cfg.get("ticker"),
        "interval": cfg.get("interval"),
        "period": [cfg.get("start"), cfg.get("end")],
        "horizon": cfg.get("horizon"),
        "lookback": cfg.get("lookback"),
        "featureset": cfg.get("featureset"),
        "features_used": ev.get("features_used"),
    },
    "label": ev.get("label_resolved_from"),
    "calibration": {
        "chosen": calib.get("chosen"),
        "val_brier": calib.get("val_brier"),
        "test_brier": calib.get("test_brier"),
        # CHANGE: explizite Textnotiz zur Kalibrierungsentscheidung
        "note": "Kalibrationsentscheidung ausschließlich auf Validation; Test nur zur Berichterstattung."
    },
    "threshold": {
        "strategy": thr_sel.get("strategy"),
        "threshold": thr_sel.get("threshold"),
        "val_mcc": thr_sel.get("val_mcc"),
        "test_pred_pos_rate": thr_sel.get("test_pred_pos_rate"),
    },
    "classification_test": {
        "roc_auc": metrics.get("roc_auc"),
        "auprc": metrics.get("auprc"),
        "brier": metrics.get("brier"),
        "balanced_accuracy": metrics.get("balanced_accuracy"),
        "mcc": metrics.get("mcc"),
        "confusion_matrix": metrics.get("confusion_matrix"),
    },
    "backtest_gross": backtest_gross,     # ohne Kosten (Block 4)
    "backtest_cost_pick": cost_pick,      # netto Auswahl aus Block 6 (T+1-only)
    "baselines": baselines_tbl.to_dict(orient="records")
}

In [10]:
# ---------- Markdown-Report schreiben ----------
fig_dir = RUN_DIR / "figures"
figs = {
    "roc": fig_dir / "roc_test.png",
    "pr":  fig_dir / "pr_test.png",
    "calib": fig_dir / "calibration_test.png",
    "cm":   fig_dir / "cm_test.png",
    "proba": fig_dir / "proba_hist_raw_vs_used.png",
    "equity_gross": fig_dir / "equity_curves_t_vs_t1.png",
    "equity_cost":  fig_dir / "equity_costed.png",
}

def _rel(p: Path) -> str:
    return str(p.relative_to(RUN_DIR)) if p.exists() else str(p)

report_md = RUN_DIR / "REPORT_block7.md"
lines = []
lines.append(f"# Block 7 – Abschluss-Report\n")
lines.append(f"- **Run-Ordner:** `{RUN_DIR.name}`")
lines.append(f"- **Erstellt (UTC):** {kpis['generated_utc']}")
lines.append(f"- **Ticker/Intervall:** {kpis['data']['ticker']} / {kpis['data']['interval']}")
lines.append(f"- **Zeitraum:** {kpis['data']['period'][0]} → {kpis['data']['period'][1]}")
lines.append(f"- **Horizon/Lookback:** H={kpis['data']['horizon']} / LB={kpis['data']['lookback']}")
lines.append(f"- **Featureset:** {kpis['data']['featureset']} → {', '.join(kpis['data']['features_used'])}\n")

# Klassif.-Metriken
m = kpis["classification_test"]
lines.append("## Test-Metriken")
lines.append(f"- AUROC: **{m['roc_auc']:.3f}**, AUPRC: **{m['auprc']:.3f}** (Random-Baseline = Positivrate), Brier: **{m['brier']:.3f}**")
lines.append(f"- Balanced Acc: **{m['balanced_accuracy']:.3f}**, MCC: **{m['mcc']:.3f}**\n")
lines.append(f"![ROC]({_rel(figs['roc'])})  \n![PR]({_rel(figs['pr'])})\n")
lines.append(f"![Calibration]({_rel(figs['calib'])})  \n![Confusion]({_rel(figs['cm'])})  \n![Probas]({_rel(figs['proba'])})\n")

# Kalibration – klarer Satz (Pflicht)
lines.append("> **Kalibration:** Entscheidung ausschließlich auf der Validation; der Test-Split dient nur der Berichterstattung.\n")

# Backtests
lines.append("## Backtests")
bg = kpis["backtest_gross"]
if bg:
    t  = bg.get("strategy_t", {})
    t1 = bg.get("strategy_t1", {})
    bh = bg.get("buy_hold", {})
    lines.append(f"- **Ohne Kosten** – Entry@t: CAGR {t.get('CAGR'):.3f}, Sharpe {t.get('Sharpe'):.3f}, "
                 f"Equity {t.get('final_equity'):.3f} *(Referenz/Upper bound, nicht handelbar)*;  "
                 f"Entry@t+1: CAGR {t1.get('CAGR'):.3f}, Sharpe {t1.get('Sharpe'):.3f}, Equity {t1.get('final_equity'):.3f};  "
                 f"Buy&Hold: CAGR {bh.get('CAGR'):.3f}, Equity {bh.get('final_equity'):.3f}.")
    lines.append(f"![Equity gross]({_rel(figs['equity_gross'])})\n")

cp = kpis["backtest_cost_pick"]
lines.append("## Kosten-KPI (realistisch, T+1)")
if cp:
    lines.append(f"- Gewählt: **{cp['model']}** bei **{cp['roundtrip_bps']:.1f} bps** (≈ Main {MAIN_RT:.0f} bps); "
                 f"Trades={cp.get('trades','?')}, Exposure={cp.get('exposure',float('nan')):.3f}, "
                 f"Turnover={cp.get('turnover',float('nan')):.1f}, CAGR={cp['CAGR']:.3f}, "
                 f"Sharpe={cp['Sharpe']:.3f}, MaxDD={cp['MaxDD']:.3f}, Equity={cp['final_equity']:.3f}.")
    lines.append(f"![Equity net]({_rel(figs['equity_cost'])})\n")
else:
    lines.append("- **Hinweis:** Keine `cost_sensitivity.csv` gefunden – Block 6 (Kosten-Backtest) noch nicht gelaufen.\n")

# Baselines-Tabelle (Empfehlung)
lines.append("## Baselines (PR-AUC relativ zur Positivrate)")
lines.append("")
lines.append("| Baseline | PR-AUC | Positivrate | PR-AUC / Positivrate |")
lines.append("|---|---:|---:|---:|")
for r in kpis["baselines"]:
    lines.append(f"| {r['baseline']} | {r['auprc']:.3f} | {r['pos_rate']:.3f} | {r['auprc_over_posrate']:.2f} |")
lines.append("")

# Limitations & Negativresultate (Empfehlung)
lines.append("## Limitations & Negativresultate")
lines.append("- **ε-Sensitivität:** Ergebnisse hängen vom Label-Threshold ε ab; Sweeps zeigen teils deutliche Variation.")
lines.append("- **Schwacher MCC in WFCV:** Die walk-forward Cross-Validation ergab niedrige bis instabile MCC-Werte; Ranking-Fähigkeit ist begrenzt.")
lines.append("- **Regimewechsel:** Einmodell-Training über lange Zeiträume ist anfällig für Regimewechsel (Volatilität, Makro, Marktstruktur).")
lines.append("- **Single-Asset-Bias:** Ergebnisse basieren auf einem Asset/Ticker (AAPL); Generalisierbarkeit ist nicht gezeigt.\n")

report_md.write_text("\n".join(lines), encoding="utf-8")
print("✓ REPORT geschrieben →", report_md)

# zusätzlich KPIs als JSON
(RUN_DIR / "REPORT_block7_kpis.json").write_text(json.dumps(kpis, indent=2), encoding="utf-8")
print("✓ KPIs JSON →", RUN_DIR / "REPORT_block7_kpis.json")

# optional: CSV mit Kern-KPIs
try:
    kpi_rows = {
        "roc_auc": metrics.get("roc_auc"), "auprc": metrics.get("auprc"), "brier": metrics.get("brier"),
        "bal_acc": metrics.get("balanced_accuracy"), "mcc": metrics.get("mcc"),
        "cost_model": cp.get("model") if cp else None,
        "cost_rt_bps": cp.get("roundtrip_bps") if cp else None,
        "cost_CAGR": cp.get("CAGR") if cp else None,
        "cost_Sharpe": cp.get("Sharpe") if cp else None,
        "cost_MaxDD": cp.get("MaxDD") if cp else None,
        "cost_Equity": cp.get("final_equity") if cp else None
    }
    pd.DataFrame([kpi_rows]).to_csv(RUN_DIR / "kpis_block7.csv", index=False)
    print("✓ KPIs CSV →", RUN_DIR / "kpis_block7.csv")
except Exception as e:
    print("[WARN] KPIs CSV nicht geschrieben:", e)

✓ REPORT geschrieben → C:\Users\jacin\finance-lstm\results\2025-10-22_12-14-18_lstm\REPORT_block7.md
✓ KPIs JSON → C:\Users\jacin\finance-lstm\results\2025-10-22_12-14-18_lstm\REPORT_block7_kpis.json
✓ KPIs CSV → C:\Users\jacin\finance-lstm\results\2025-10-22_12-14-18_lstm\kpis_block7.csv
