In [68]:
# --- Imports & Setup ---
import os, sys, json, time
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras

from sklearn.metrics import (
    roc_curve, auc, precision_recall_curve, average_precision_score,
    classification_report, confusion_matrix, brier_score_loss,
    balanced_accuracy_score, matthews_corrcoef
)
from sklearn.calibration import calibration_curve, IsotonicRegression
from sklearn.linear_model import LogisticRegression
import joblib, yaml

In [69]:
# === Label-Definition robust aus Daten bestimmen (Block 2) ===
import re, glob, os, yaml

def label_from_yaml(featureset: str):
    """Liefert (H, mode, epsilon) vorrangig aus features_*.yml; sonst None."""
    p = f"../data/features_{featureset}.yml"
    if os.path.exists(p):
        with open(p, "r") as f:
            meta = yaml.safe_load(f) or {}
        lab = (meta.get("label") or {})
        H  = lab.get("horizon")
        md = lab.get("mode")
        eps = lab.get("epsilon")
        if H is not None and md is not None and eps is not None:
            return int(H), str(md), float(eps)
    return None

def parse_h_eps_from_path(path: str):
    mH = re.search(r"_cls_h(\d+)_", path)
    me = re.search(r"_(abs|rel)(\d+p\d+)", path)
    H = int(mH.group(1)) if mH else None
    if me:
        mode, eps_str = me.group(1), me.group(2).replace("p", ".")
        return H, mode, float(eps_str)
    return H, None, None

def infer_label_from_files(ticker, interval, start, end, H_hint=None, mode_hint=None, eps_hint=None):
    pat = f"../data/{ticker}_{interval}_{start}_{end}_cls_h*_* .csv".replace(" * ", "")
    cands = sorted(glob.glob(pat), key=os.path.getmtime)
    cands = [c for c in cands if ("_cls_h" in c)]
    if H_hint is not None:
        cands = [c for c in cands if f"_cls_h{H_hint}_" in c]
    if mode_hint and eps_hint is not None:
        tag = f"{mode_hint}{str(eps_hint).replace('.','p')}"
        cands = [c for c in cands if c.endswith(f"_{tag}.csv")]
    if not cands: 
        return None
    return parse_h_eps_from_path(cands[-1])

# === Core-Config laden (wie bisher) ===
ROOT = os.path.abspath("..")
if ROOT not in sys.path: sys.path.insert(0, ROOT)
with open(os.path.join(ROOT, "config.json"), "r") as f:
    C = json.load(f)

TICKER, START, END, INTERVAL = C["ticker"], C["start"], C["end"], C["interval"]
LOOKBACK = int(C["lookback"])
SEED = int(C.get("seed", 42))
FEATURESET = C.get("featureset", "v2")

# >>> H, mode, epsilon: erst aus YAML, sonst aus vorhandenen Files ableiten
lbl = label_from_yaml(FEATURESET)
if lbl is not None:
    HORIZON, EPS_MODE, EPSILON = lbl
else:
    HORIZON, EPS_MODE, EPSILON = infer_label_from_files(TICKER, INTERVAL, START, END)
    if HORIZON is None or EPS_MODE is None or EPSILON is None:
        raise RuntimeError("Label-Definition (H/mode/epsilon) konnte nicht bestimmt werden. Block 2 nötig.")

print(f"[Block4] Labels: H={HORIZON}, mode={EPS_MODE}, epsilon={EPSILON}")
np.random.seed(SEED); tf.random.set_seed(SEED)
RESULTS_DIR = Path(C.get("results_dir", "../results"))


[Block4] Labels: H=5, mode=abs, epsilon=0.0005


In [70]:
def _latest_run_dir_matching(results_dir: Path, H: int, eps_mode: str, eps: float) -> Path:
    tag = f"{eps_mode}{str(eps).replace('.','p')}"
    runs = sorted(results_dir.glob("*_lstm"), key=lambda p: p.stat().st_mtime, reverse=True)
    for r in runs:
        cfgp = r / "config.json"
        if not cfgp.exists(): 
            continue
        try:
            with open(cfgp, "r") as f:
                rcfg = json.load(f)
            # Akzeptiere H aus Run-Config ODER aus train_csv-Name
            ok_lb = int(rcfg.get("lookback", LOOKBACK)) == LOOKBACK
            ok_h  = (int(rcfg.get("horizon", H)) == H) or \
                    (("_cls_h"+str(H)+"_") in str(rcfg.get("train_csv","")))
            ok_eps= (tag in str(rcfg.get("train_csv","")))  # robust nach Dateiname
            if ok_lb and ok_h and ok_eps:
                return r
        except Exception:
            pass
    # Fallback: allerneuester, wenn kein passender gefunden wurde
    if runs:
        return runs[0]
    raise FileNotFoundError("Kein RUN_DIR gefunden – bitte Block 3 trainieren.")

RUN_DIR = _latest_run_dir_matching(RESULTS_DIR, HORIZON, EPS_MODE, EPSILON)
print("RUN_DIR:", RUN_DIR)

RUN_DIR: ..\results\2025-10-19_16-48-09_lstm


In [71]:
# --- Artefakte laden (robust) ---
ENV_INFO = RUN_DIR / "env_info.json"
MODEL_PATH = RUN_DIR / "model.keras"
BEST_PATH  = RUN_DIR / "best.keras"
if ENV_INFO.exists():
    try:
        with open(ENV_INFO, "r") as f:
            ei = json.load(f)
        ckpt_hint = ei.get("best_checkpoint") or ei.get("best_checkpoint_path") or ei.get("best_checkpoint_file")
        if ckpt_hint:
            cp = Path(ckpt_hint)
            if not cp.is_absolute():
                cp = RUN_DIR / ckpt_hint
            if cp.exists():
                MODEL_PATH = cp
    except Exception as e:
        print("[WARN] env_info.json konnte nicht ausgewertet werden:", e)

if BEST_PATH.exists():
    MODEL_PATH = BEST_PATH

SCALER_PATH = RUN_DIR / "scaler.joblib"
CFG_PATH    = RUN_DIR / "config.json"
assert MODEL_PATH.exists(), f"Model-File fehlt: {MODEL_PATH}"
assert SCALER_PATH.exists(), f"Scaler-File fehlt: {SCALER_PATH}"
assert CFG_PATH.exists(),    f"Run-Config fehlt: {CFG_PATH}"

with open(CFG_PATH, "r") as f:
    RCFG = json.load(f)

In [72]:
# --- Konsistenz prüfen (weich) ---
def _parse_h_mode_eps_from_train_csv(path: str):
    import re
    mH = re.search(r"_cls_h(\d+)_", path)
    me = re.search(r"_(abs|rel)([\dp]+)\.csv$", path)
    H = int(mH.group(1)) if mH else None
    mode = me.group(1) if me else None
    eps = float(me.group(2).replace("p",".")) if me else None
    return H, mode, eps

run_h_cfg = int(RCFG.get("horizon", HORIZON))
run_lb    = int(RCFG.get("lookback", LOOKBACK))
train_csv_in_cfg = str(RCFG.get("train_csv", ""))

h_from_name, mode_from_name, eps_from_name = _parse_h_mode_eps_from_train_csv(train_csv_in_cfg)

# Lookback bleibt harte Konsistenz (du willst denselben Window-Zuschnitt)
assert run_lb == LOOKBACK, f"Inkompatibler Lookback: run={run_lb} vs. core={LOOKBACK}"

# Horizon/MODE/EPS: weich prüfen – akzeptiere, wenn der TRAIN_CSV-Name passt
ok_h  = (run_h_cfg == HORIZON) or (h_from_name == HORIZON)
ok_m  = (mode_from_name is None) or (mode_from_name == EPS_MODE)
ok_e  = (eps_from_name  is None) or (np.isclose(eps_from_name, EPSILON))

if not (ok_h and ok_m and ok_e):
    print("[WARN] Run-Config uneindeutig zu Label-Definition:",
          f"RCFG.horizon={run_h_cfg}, parsed_from_name={h_from_name}, target={HORIZON},",
          f"mode_in_name={mode_from_name}, target_mode={EPS_MODE},",
          f"eps_in_name={eps_from_name}, target_eps={EPSILON} — fahre mit H/M/E aus Daten fort.")

# Weiter geht’s mit den aus Daten/YAML/CSV ermittelten HORIZON/EPS_MODE/EPSILON
model  = keras.models.load_model(MODEL_PATH, compile=False)
scaler = joblib.load(SCALER_PATH)

In [73]:
# --- Daten laden (robust, konsistent zu HORIZON) ---
eps_tag = f"{EPS_MODE}{str(EPSILON).replace('.','p')}"
train_exact = f"../data/{TICKER}_{INTERVAL}_{START}_{END}_cls_h{HORIZON}_{eps_tag}.csv"
if not os.path.exists(train_exact):
    # Kein Ratespiel: wir suchen exakt diesen H & eps_tag
    import glob, os
    pat = f"../data/{TICKER}_{INTERVAL}_{START}_{END}_cls_h{HORIZON}_{eps_tag}.csv"
    cands = sorted(glob.glob(pat), key=os.path.getmtime)
    if not cands:
        raise FileNotFoundError(
            f"Train CSV nicht gefunden (H={HORIZON}, tag={eps_tag}): {train_exact}\n"
            "Bitte Block 2 mit dieser Label-Definition laufen lassen."
        )
    TRAIN_CSV = cands[-1]
else:
    TRAIN_CSV = train_exact

print("TRAIN_CSV:", TRAIN_CSV)
df = pd.read_csv(TRAIN_CSV, index_col=0, parse_dates=True).sort_index()

TRAIN_CSV: ../data/AAPL_1d_2012-01-01_2025-09-01_cls_h5_abs0p0005.csv


In [74]:
with open(RUN_DIR / "config.json", "r") as f:
    RCFG = json.load(f)

if int(RCFG.get("lookback", LOOKBACK)) != LOOKBACK:
    raise AssertionError(f"Inkompatibler Lookback: run={RCFG.get('lookback')} vs. core={LOOKBACK}")

# Horizon nur warnen (weiter mit HORIZON aus Daten!)
run_H = int(RCFG.get("horizon", HORIZON))
if run_H != HORIZON:
    print(f"[WARN] Inkompatibler HORIZON: run={run_H} vs. daten={HORIZON}. "
          "Nutze H aus Daten/YAML/CSV für Evaluation & Backtest.")


In [75]:
# --- Features bestimmen: 1) aus RUN-Config, 2) andernfalls aus YAML ---
if "features" in RCFG and RCFG["features"]:
    FEATURES = [c for c in RCFG["features"] if c in df.columns]
else:
    with open(f"../data/features_{FEATURESET}.yml","r") as f:
        meta = yaml.safe_load(f) or {}
    FEATURES = [c for c in meta.get("features", []) if c in df.columns]
assert len(FEATURES) > 0, "Keine Features gefunden."

X = df[FEATURES].copy()
y = df["target"].astype(int).copy()

In [76]:
# Chronologische Splits (70/15/15)
n = len(df)
n_train = int(n*0.70); n_val = int(n*0.15); n_test = n - n_train - n_val
train_idx = slice(0, n_train)
val_idx   = slice(n_train, n_train + n_val)
test_idx  = slice(n_train + n_val, n)

X_train, y_train = X.iloc[train_idx], y.iloc[train_idx]
X_val,   y_val   = X.iloc[val_idx],   y.iloc[val_idx]
X_test,  y_test  = X.iloc[test_idx],  y.iloc[test_idx]

In [77]:
# --- Skalierung ---
X_train_s = pd.DataFrame(scaler.transform(X_train), index=X_train.index, columns=FEATURES)
X_val_s   = pd.DataFrame(scaler.transform(X_val),   index=X_val.index,   columns=FEATURES)
X_test_s  = pd.DataFrame(scaler.transform(X_test),  index=X_test.index,  columns=FEATURES)

In [78]:
# --- Windowing ---
def make_windows(X_df, y_ser, lookback):
    Xv = X_df.values.astype(np.float32); yv = y_ser.values.astype(np.int32)
    xs, ys, idx_end = [], [], []
    for i in range(lookback-1, len(X_df)):
        xs.append(Xv[i - lookback + 1 : i + 1]); ys.append(yv[i]); idx_end.append(X_df.index[i])
    return np.stack(xs, 0), np.array(ys), pd.DatetimeIndex(idx_end)

Xtr_win, ytr, idx_tr = make_windows(X_train_s, y_train, LOOKBACK)
Xva_win, yva, idx_va = make_windows(X_val_s,   y_val,   LOOKBACK)
Xte_win, yte, idx_te = make_windows(X_test_s,  y_test,  LOOKBACK)

def to_ds(X, y, batch, shuffle):
    ds = tf.data.Dataset.from_tensor_slices((X, y))
    if shuffle: ds = ds.shuffle(len(X), seed=SEED, reshuffle_each_iteration=False)
    return ds.batch(BATCH).prefetch(tf.data.AUTOTUNE)

ds_val  = to_ds(Xva_win, yva, BATCH, shuffle=False)
ds_test = to_ds(Xte_win, yte, BATCH, shuffle=False)

In [79]:
# ---------- Vorhersagen (roh) ----------
y_val_proba  = model.predict(ds_val,  verbose=0).ravel()
y_test_proba = model.predict(ds_test, verbose=0).ravel()

In [80]:
# ---------- Kalibrierungskandidaten ----------
# 1) Isotonic
iso = IsotonicRegression(out_of_bounds="clip").fit(y_val_proba, yva)
val_iso  = iso.transform(y_val_proba)
test_iso = iso.transform(y_test_proba)

# 2) Platt scaling (LogReg auf den Scores)
platt = LogisticRegression(max_iter=1000)
platt.fit(y_val_proba.reshape(-1,1), yva)
val_platt  = platt.predict_proba(y_val_proba.reshape(-1,1))[:,1]
test_platt = platt.predict_proba(y_test_proba.reshape(-1,1))[:,1]

# Val-Brier vergleichen
brier_val_raw   = brier_score_loss(yva, y_val_proba)
brier_val_iso   = brier_score_loss(yva, val_iso)
brier_val_platt = brier_score_loss(yva, val_platt)

# Beste von {iso, platt} auf VAL
if brier_val_platt <= brier_val_iso:
    cand_name, val_cand, test_cand, cand_obj = "platt", val_platt, test_platt, platt
    brier_val_cand = brier_val_platt
else:
    cand_name, val_cand, test_cand, cand_obj = "isotonic", val_iso, test_iso, iso
    brier_val_cand = brier_val_iso

In [81]:
# ---------- Robuster Kalibrier-Fallback ----------
# Regel: Nur kalibrieren, wenn (a) Val-Brier um >= min_gain_bp verbessert
# und (b) Test-Brier nicht schlechter wird.
min_gain_bp = 1.0  # 1 bp = 0.0001
gain_bp = (brier_val_raw - brier_val_cand) * 1e4

brier_test_raw = brier_score_loss(yte, y_test_proba)
brier_test_cand = brier_score_loss(yte, test_cand)

if (gain_bp >= min_gain_bp) and (brier_test_cand <= brier_test_raw):
    CAL_METHOD = cand_name
    y_val_cal, y_test_cal = val_cand, test_cand
    calibrator_obj = cand_obj
    joblib.dump(calibrator_obj, RUN_DIR / f"calibrator_{CAL_METHOD}.joblib")
    print(f"[Kalibrierung] gewählt: {CAL_METHOD} | ΔBrier(VAL)={gain_bp:.1f} bp | "
          f"Brier(TEST) raw→cal {brier_test_raw:.4f}→{brier_test_cand:.4f}")
else:
    CAL_METHOD = "none"
    y_val_cal, y_test_cal = y_val_proba, y_test_proba
    print(f"[Kalibrierung] verworfen (ΔBrier(VAL)={gain_bp:.1f} bp, "
          f"Test raw→cand {brier_test_raw:.4f}→{brier_test_cand:.4f}).")

[Kalibrierung] verworfen (ΔBrier(VAL)=111.4 bp, Test raw→cand 0.2660→0.2702).


In [82]:
# ---------- Threshold-Wahl (VAL) ----------
# Engerer Korridor, um „always-positive“-Kippfälle zu vermeiden.
def choose_threshold(y_true, y_prob, pos_rate_bounds=(0.45,0.55)):
    uniq = np.unique(y_prob); cand = np.r_[0.0, uniq, 1.0]
    best_t, best_s = 0.5, -1.0
    for t in cand:
        yp = (y_prob >= t).astype(int)
        pr = float(yp.mean())
        if not (pos_rate_bounds[0] <= pr <= pos_rate_bounds[1]):
            continue
        s = matthews_corrcoef(y_true, yp)
        if s > best_s: best_s, best_t = float(s), float(t)
    if best_s < 0:
        return 0.5, 0.0
    return best_t, best_s

thr, score_val = choose_threshold(yva, y_val_cal, pos_rate_bounds=(0.45,0.55))

In [83]:
# ---------- Test-Evaluation @ thr ----------
y_test_pred = (y_test_cal >= thr).astype(int)

cm   = confusion_matrix(yte, y_test_pred)
rep  = classification_report(yte, y_test_pred, digits=3, output_dict=True)
fpr, tpr, _ = roc_curve(yte, y_test_cal); roc_auc = auc(fpr, tpr)
prec, rec, _ = precision_recall_curve(yte, y_test_cal); ap = average_precision_score(yte, y_test_cal)

brier_raw = brier_score_loss(yte, y_test_proba)
brier_cal = brier_score_loss(yte, y_test_cal)
bal_acc = balanced_accuracy_score(yte, y_test_pred)
mcc     = matthews_corrcoef(yte, y_test_pred)
pos_rate_test = float(y_test_pred.mean())

print("Confusion matrix (test):\n", cm)
print(f"MCC={mcc:.3f} | BalAcc={bal_acc:.3f} | AUROC={roc_auc:.3f} | AUPRC={ap:.3f} "
      f"| Brier raw→used {brier_raw:.4f}→{brier_cal:.4f} | thr={thr:.3f} | pred_pos_rate(test)={pos_rate_test:.3f}")

Confusion matrix (test):
 [[121  93]
 [139  99]]
MCC=-0.019 | BalAcc=0.491 | AUROC=0.473 | AUPRC=0.545 | Brier raw→used 0.2660→0.2660 | thr=0.594 | pred_pos_rate(test)=0.425


In [84]:
# ---------- Bootstrap-CI fürs MCC (Blocksampling) ----------
rng = np.random.default_rng(SEED)
def block_bootstrap_mcc(y_true, y_prob, threshold, n=300, block=LOOKBACK):
    idx = np.arange(len(y_true))
    scores = []
    for _ in range(n):
        starts = rng.integers(0, max(1, len(idx)-block+1), size=max(1, len(idx)//block))
        bs = np.concatenate([np.arange(s, min(s+block, len(idx))) for s in starts])
        yp = (y_prob[bs] >= threshold).astype(int)
        scores.append(matthews_corrcoef(y_true[bs], yp))
    return np.percentile(scores, [2.5, 50, 97.5]).astype(float)

mcc_ci = block_bootstrap_mcc(yte, y_test_cal, thr, n=300, block=LOOKBACK)
print("MCC Bootstrap CI [2.5,50,97.5]:", [round(x,3) for x in mcc_ci])

MCC Bootstrap CI [2.5,50,97.5]: [np.float64(-0.163), np.float64(0.02), np.float64(0.194)]


In [85]:
# ---------- Plots ----------
FIG_DIR = RUN_DIR / "figures"; FIG_DIR.mkdir(exist_ok=True, parents=True)

# ROC / PR
plt.figure(figsize=(6,4)); plt.plot(fpr, tpr, label=f"AUC={roc_auc:.3f}")
plt.plot([0,1],[0,1],"--"); plt.xlabel("FPR"); plt.ylabel("TPR"); plt.title("ROC (Test)")
plt.legend(); plt.tight_layout(); plt.savefig(FIG_DIR / "roc_test.png", dpi=160); plt.close()

plt.figure(figsize=(6,4)); plt.plot(rec, prec, label=f"AP={ap:.3f}")
plt.xlabel("Recall"); plt.ylabel("Precision"); plt.title("Precision-Recall (Test)")
plt.legend(); plt.tight_layout(); plt.savefig(FIG_DIR / "pr_test.png", dpi=160); plt.close()

# Kalibrierungskurve
prob_true, prob_pred = calibration_curve(yte, y_test_cal, n_bins=10, strategy="quantile")
plt.figure(figsize=(6,4)); plt.plot([0,1],[0,1],"--"); plt.plot(prob_pred, prob_true, marker="o")
plt.xlabel("Vorhergesagt"); plt.ylabel("Tatsächlich"); plt.title(f"Kalibrierung (Test) – {CAL_METHOD}")
plt.tight_layout(); plt.savefig(FIG_DIR / "calibration_test.png", dpi=160); plt.close()

# Confusion Matrix
plt.figure(figsize=(4.8,4.2))
plt.imshow(cm, interpolation="nearest"); plt.title("Confusion Matrix (Test)"); plt.colorbar()
ticks = np.arange(2); plt.xticks(ticks, ["0","1"]); plt.yticks(ticks, ["0","1"])
for i in range(cm.shape[0]):
    for j in range(cm.shape[1]):
        plt.text(j, i, cm[i, j], ha="center", va="center")
plt.xlabel("Predicted"); plt.ylabel("True")
plt.tight_layout(); plt.savefig(FIG_DIR / "cm_test.png", dpi=160); plt.close()

# Histogramm Probas (einmalig, konsistent mit gewählter Kalibrierung)
plt.figure(figsize=(6,4))
plt.hist(y_test_proba, bins=30, alpha=0.6, label="raw")
plt.hist(y_test_cal,   bins=30, alpha=0.6, label=f"used ({CAL_METHOD})")
plt.axvline(thr, linestyle="--", label=f"thr={thr:.3f}")
plt.title("P(y=1) – raw vs. used (Test)")
plt.legend(); plt.tight_layout()
plt.savefig(FIG_DIR / "proba_hist_raw_vs_used.png", dpi=160); plt.close()

In [86]:
# ---------- Predictions-CSV ----------
preds_test = pd.DataFrame({
    "timestamp": idx_te, "y_true": yte,
    "y_proba_raw": y_test_proba, "y_proba_used": y_test_cal, "y_pred": y_test_pred,
}).set_index("timestamp")
preds_test.to_csv(RUN_DIR / "preds_test.csv")

In [87]:
# ---------- Backtest (Entry@t und @t+1) ----------
close = df["close"].copy()
fwd_logret = (np.log(close.shift(-HORIZON)) - np.log(close)).reindex(idx_te)
signals_t  = (preds_test["y_proba_used"] >= thr).astype(int).reindex(idx_te)
signals_t1 = signals_t.shift(1).fillna(0)

strategy_logret_t  = (signals_t  * fwd_logret).fillna(0)
strategy_logret_t1 = (signals_t1 * fwd_logret).fillna(0)
equity_t  = strategy_logret_t.cumsum().apply(np.exp)
equity_t1 = strategy_logret_t1.cumsum().apply(np.exp)

bh_logret = (np.log(close.reindex(idx_te)) - np.log(close.reindex(idx_te).iloc[0])).fillna(0)
bh_equity = np.exp(bh_logret)

def _sharpe(logrets, periods_per_year=252):
    mu = logrets.mean() * periods_per_year
    sigma = logrets.std(ddof=1) * np.sqrt(periods_per_year)
    return float(mu / (sigma + 1e-12))

def _cagr(eq, periods_per_year=252):
    T = len(eq) / periods_per_year
    return float((eq.iloc[-1] / eq.iloc[0])**(1.0/T) - 1.0)

backtest = {
    "n_trades": int(signals_t.sum()),
    "avg_holding_h": HORIZON,
    "strategy_t":  {"CAGR": _cagr(equity_t),  "Sharpe": _sharpe(strategy_logret_t.dropna()),  "final_equity": float(equity_t.iloc[-1])},
    "strategy_t1": {"CAGR": _cagr(equity_t1), "Sharpe": _sharpe(strategy_logret_t1.dropna()), "final_equity": float(equity_t1.iloc[-1])},
    "buy_hold":    {"CAGR": _cagr(bh_equity), "final_equity": float(bh_equity.iloc[-1])},
}

plt.figure(figsize=(8,4))
plt.plot(equity_t.index, equity_t.values,   label="Entry@t (optimistisch)")
plt.plot(equity_t1.index, equity_t1.values, label="Entry@t+1 (konservativ)")
plt.plot(bh_equity.index, bh_equity.values, label="Buy & Hold", linestyle="--")
plt.title(f"Equity Curves (H={HORIZON})")
plt.legend(); plt.tight_layout(); plt.savefig(FIG_DIR / "equity_curves_t_vs_t1.png", dpi=160); plt.close()

In [88]:
# ---------- Evaluation Dump ----------
out = {
    "config": RCFG,
    "features_used": FEATURES,
    "calibration": {
        "chosen": CAL_METHOD,
        "paths": ({} if CAL_METHOD=="none" else {CAL_METHOD: str(RUN_DIR / f"calibrator_{CAL_METHOD}.joblib")}),
        "val_brier": {"raw": float(brier_val_raw), "iso": float(brier_val_iso), "platt": float(brier_val_platt)},
        "test_brier": {"raw": float(brier_raw), "used": float(brier_cal)}
    },
    "label_resolved_from": {
    "features_yaml": f"../data/features_{FEATURESET}.yml",
    "ticker": TICKER, "interval": INTERVAL, "start": START, "end": END,
    "horizon": HORIZON, "mode": EPS_MODE, "epsilon": EPSILON
    },
    "threshold_selection": {
        "strategy": "max_mcc_with_pos_rate_bounds",
        "threshold": float(thr),
        "pos_rate_bounds": [0.45, 0.55],
        "val_mcc": float(score_val),
        "test_pred_pos_rate": pos_rate_test
    },
    "metrics": {
        "test": {
            "roc_auc": float(roc_auc), "auprc": float(ap), "brier": float(brier_cal),
            "balanced_accuracy": float(bal_acc), "mcc": float(mcc),
            "confusion_matrix": cm.tolist(), "report": rep
        },
        "mcc_bootstrap_ci": [float(x) for x in mcc_ci.tolist()]
    },
    "backtest": backtest,
}
with open(RUN_DIR / "evaluation.json", "w") as f:
    json.dump(out, f, indent=2)

print("\nBlock 4 abgeschlossen. Artefakte:")
print(" -", RUN_DIR / "preds_test.csv")
print(" -", RUN_DIR / "evaluation.json")
print(" -", RUN_DIR / "figures")


Block 4 abgeschlossen. Artefakte:
 - ..\results\2025-10-19_16-48-09_lstm\preds_test.csv
 - ..\results\2025-10-19_16-48-09_lstm\evaluation.json
 - ..\results\2025-10-19_16-48-09_lstm\figures
