In [1]:
# === SYSTEM & IMPORTS ===
import os, sys, json, time, re, glob
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras

# Metriken für Klassifikation (AUC, Precision, Recall, etc.)
from sklearn.metrics import (
    roc_curve, auc, precision_recall_curve, average_precision_score,
    classification_report, confusion_matrix, brier_score_loss,
    balanced_accuracy_score, matthews_corrcoef
)
# Kalibrierung und Baseline
from sklearn.calibration import calibration_curve, IsotonicRegression
from sklearn.linear_model import LogisticRegression
import joblib, yaml

In [2]:
# === HILFSFUNKTIONEN: LABEL & FILES FINDEN ===
# Diese Funktionen helfen uns, automatisch die richtigen Dateien zu finden,
# auch wenn wir Parameter geändert haben.

def label_from_yaml(featureset: str):
    # Liest Horizon, Mode und Epsilon aus der Feature-Config
    p = f"../data/features_{featureset}.yml"
    if os.path.exists(p):
        with open(p, "r") as f:
            meta = yaml.safe_load(f) or {}
        lab = (meta.get("label") or {})
        H  = lab.get("horizon")
        md = lab.get("mode")
        eps = lab.get("epsilon")
        if H is not None and md is not None and eps is not None:
            return int(H), str(md), float(eps)
    return None

def parse_h_eps_from_path(path: str):
    # Extrahiert Parameter aus dem Dateinamen (Regex)
    mH = re.search(r"_cls_h(\d+)_", path)
    me = re.search(r"_(abs|rel|q\d+\.\d+)([\dp.]+)\.csv$", path)
    H = int(mH.group(1)) if mH else None
    if me:
        mode, eps_str = me.group(1), me.group(2).replace("p", ".")
        return H, mode, float(eps_str)
    return H, None, None

def infer_label_from_files(ticker, interval, start, end, H_hint=None, mode_hint=None, eps_hint=None):
    # Sucht die neueste passende Trainingsdatei
    pat = f"../data/{ticker}_{interval}_{start}_{end}_cls_h*_.csv".replace("_ .csv",".csv")
    cands = sorted(glob.glob(pat), key=os.path.getmtime)
    cands = [c for c in cands if ("_cls_h" in c)]
    if H_hint is not None:
        cands = [c for c in cands if f"_cls_h{H_hint}_" in c]
    if mode_hint and eps_hint is not None:
        tag = f"{mode_hint}{str(eps_hint).replace('.','p')}"
        cands = [c for c in cands if c.endswith(f"_{tag}.csv")]
    if not cands:
        return None
    return parse_h_eps_from_path(cands[-1])

In [3]:
# === 1) CONFIG & RUN-DIR BESTIMMEN ===
ROOT = os.path.abspath("..")
if ROOT not in sys.path: sys.path.insert(0, ROOT)

# Basis-Config laden
with open(os.path.join(ROOT, "config.json"), "r") as f:
    C = json.load(f)

TICKER, START, END, INTERVAL = C["ticker"], C["start"], C["end"], C["interval"]
LOOKBACK = int(C["lookback"])
SEED = int(C.get("seed", 42))
FEATURESET = C.get("featureset", "v2")

# Versuchen, die korrekten Label-Parameter zu bestimmen
lbl = label_from_yaml(FEATURESET)
if lbl is not None:
    HORIZON, EPS_MODE, EPSILON = lbl
else:
    # Fallback: Aus Dateinamen raten
    HORIZON, EPS_MODE, EPSILON = infer_label_from_files(TICKER, INTERVAL, START, END)
    if HORIZON is None or EPS_MODE is None or EPSILON is None:
        raise RuntimeError("Label-Definition (H/mode/epsilon) konnte nicht bestimmt werden. Block 2 nötig.")

print(f"[Block4] Labels: H={HORIZON}, mode={EPS_MODE}, epsilon={EPSILON}")
np.random.seed(SEED); tf.random.set_seed(SEED)
RESULTS_DIR = Path(C.get("results_dir", "../results"))

# Den passenden (neuesten) Run-Ordner finden
def _latest_run_dir_matching(results_dir: Path, H: int, eps_mode: str, eps: float) -> Path:
    tag = f"{eps_mode}{str(eps).replace('.','p')}"
    runs = sorted(results_dir.glob("*_lstm"), key=lambda p: p.stat().st_mtime, reverse=True)
    for r in runs:
        cfgp = r / "config.json"
        if not cfgp.exists(): continue
        try:
            with open(cfgp, "r") as f:
                rcfg = json.load(f)
            # Prüfen ob Parameter übereinstimmen
            ok_lb = int(rcfg.get("lookback", LOOKBACK)) == LOOKBACK
            ok_h  = (int(rcfg.get("horizon", H)) == H) or (("_cls_h"+str(H)+"_") in str(rcfg.get("train_csv","")))
            ok_eps= (tag in str(rcfg.get("train_csv","")))
            if ok_lb and ok_h and ok_eps:
                return r
        except Exception:
            pass
    # Fallback: Einfach den allerneuesten nehmen
    if runs:
        return runs[0]
    raise FileNotFoundError("Kein RUN_DIR gefunden – bitte Block 3 trainieren.")

RUN_DIR = _latest_run_dir_matching(RESULTS_DIR, HORIZON, EPS_MODE, EPSILON)
print("RUN_DIR:", RUN_DIR)

[Block4] Labels: H=1, mode=abs, epsilon=0.0005
RUN_DIR: ..\results\2026-01-01_18-20-06_lstm


In [4]:
# === 2) ARTEFAKTE LADEN (Model, Scaler, Config) ===
ENV_INFO = RUN_DIR / "env_info.json"
MODEL_PATH = RUN_DIR / "model.keras"
BEST_PATH  = RUN_DIR / "best.keras"
SCALER_PATH = RUN_DIR / "scaler.joblib"
CFG_PATH    = RUN_DIR / "config.json"

# Falls vorhanden, lieber das 'best.keras' (vom Checkpoint) nehmen
if BEST_PATH.exists():
    MODEL_PATH = BEST_PATH

# Dateien prüfen
assert MODEL_PATH.exists(), f"Model-File fehlt: {MODEL_PATH}"
assert SCALER_PATH.exists(), f"Scaler-File fehlt: {SCALER_PATH}"
assert CFG_PATH.exists(),    f"Run-Config fehlt: {CFG_PATH}"

with open(CFG_PATH, "r") as f:
    RCFG = json.load(f)

In [5]:
# === 3) KONSISTENZPRÜFUNG & MODELL-LOAD ===
# Prüfen, ob das geladene Modell zu unseren Parametern passt
def _parse_h_mode_eps_from_train_csv(path: str):
    mH = re.search(r"_cls_h(\d+)_", path)
    me = re.search(r"_(abs|rel|q\d+\.\d+)([\dp.]+)\.csv$", path)
    H = int(mH.group(1)) if mH else None
    mode = me.group(1) if me else None
    eps = float(me.group(2).replace("p",".")) if me else None
    return H, mode, eps

run_h_cfg = int(RCFG.get("horizon", HORIZON))
run_lb    = int(RCFG.get("lookback", LOOKBACK))
train_csv_in_cfg = str(RCFG.get("train_csv", ""))

h_from_name, mode_from_name, eps_from_name = _parse_h_mode_eps_from_train_csv(train_csv_in_cfg)

assert run_lb == LOOKBACK, f"Inkompatibler Lookback: run={run_lb} vs. core={LOOKBACK}"

# Warnung, falls Diskrepanzen (aber kein Abbruch)
ok_h  = (run_h_cfg == HORIZON) or (h_from_name == HORIZON)
ok_m  = (mode_from_name is None) or (mode_from_name == EPS_MODE)
ok_e  = (eps_from_name  is None) or (np.isclose(eps_from_name, EPSILON))

if not (ok_h and ok_m and ok_e):
    print("[WARN] Run-Config uneindeutig zu Label-Definition:",
          f"RCFG.horizon={run_h_cfg}, parsed_from_name={h_from_name}, target={HORIZON},",
          f"mode_in_name={mode_from_name}, target_mode={EPS_MODE},",
          f"eps_in_name={eps_from_name}, target_eps={EPSILON} — fahre mit H/M/E aus Daten fort.")

# Modell und Scaler laden
model  = keras.models.load_model(MODEL_PATH, compile=False)
scaler = joblib.load(SCALER_PATH)

In [6]:
# === 4) ORIGINAL-DATEN LADEN ===
# Wir müssen die Daten exakt so laden wie im Training, um Features und Splits zu rekonstruieren
eps_tag = f"{EPS_MODE}{str(EPSILON).replace('.','p')}"
train_exact = f"../data/{TICKER}_{INTERVAL}_{START}_{END}_cls_h{HORIZON}_{eps_tag}.csv"
if not os.path.exists(train_exact):
    # Fallback Suche nach Datei
    pat = f"../data/{TICKER}_{INTERVAL}_{START}_{END}_cls_h{HORIZON}_{eps_tag}.csv"
    cands = sorted(glob.glob(pat), key=os.path.getmtime)
    if not cands:
        raise FileNotFoundError(
            f"Train CSV nicht gefunden (H={HORIZON}, tag={eps_tag}): {train_exact}\n"
            "Bitte Block 2 mit dieser Label-Definition laufen lassen."
        )
    TRAIN_CSV = cands[-1]
else:
    TRAIN_CSV = train_exact

print("TRAIN_CSV:", TRAIN_CSV)
df = pd.read_csv(TRAIN_CSV, index_col=0, parse_dates=True).sort_index()

with open(RUN_DIR / "config.json", "r") as f:
    RCFG = json.load(f)
if int(RCFG.get("lookback", LOOKBACK)) != LOOKBACK:
    raise AssertionError(f"Inkompatibler Lookback: run={RCFG.get('lookback')} vs. core={LOOKBACK}")

TRAIN_CSV: ../data/AAPL_1d_2010-01-01_2026-01-01_cls_h1_abs0p0005.csv


In [7]:
# === 5) FEATURE-SET BESTIMMEN ===
# Welche Features wurden trainiert? Das steht in der Config.
if "features" in RCFG and RCFG["features"]:
    FEATURES = [c for c in RCFG["features"] if c in df.columns]
else:
    with open(f"../data/features_{FEATURESET}.yml","r") as f:
        meta = yaml.safe_load(f) or {}
    FEATURES = [c for c in meta.get("features", []) if c in df.columns]
assert len(FEATURES) > 0, "Keine Features gefunden."

X = df[FEATURES].copy()
y = df["target"].astype(int).copy()

In [8]:
# === 6) SPLIT WIEDERHERSTELLEN ===
# Exakt dieselben Splits wie im Training, damit Val und Test sauber bleiben
n = len(df)
n_train = int(n*0.70); n_val = int(n*0.15); n_test = n - n_train - n_val
train_idx = slice(0, n_train)
val_idx   = slice(n_train, n_train + n_val)
test_idx  = slice(n_train + n_val, n)

X_train, y_train = X.iloc[train_idx], y.iloc[train_idx]
X_val,   y_val   = X.iloc[val_idx],   y.iloc[val_idx]
X_test,  y_test  = X.iloc[test_idx],  y.iloc[test_idx]

In [9]:
# === 7) SKALIERUNG ANWENDEN ===
# Den geladenen Scaler nutzen (nicht neu fitten!)
X_train_s = pd.DataFrame(scaler.transform(X_train), index=X_train.index, columns=FEATURES)
X_val_s   = pd.DataFrame(scaler.transform(X_val),   index=X_val.index,   columns=FEATURES)
X_test_s  = pd.DataFrame(scaler.transform(X_test),  index=X_test.index,  columns=FEATURES)

In [10]:
# === 8) WINDOWING (Datenaufbereitung für LSTM) ===
def make_windows(X_df, y_ser, lookback):
    Xv = X_df.values.astype(np.float32); yv = y_ser.values.astype(np.int32)
    xs, ys, idx_end = [], [], []
    for i in range(lookback-1, len(X_df)):
        xs.append(Xv[i - lookback + 1 : i + 1]); ys.append(yv[i]); idx_end.append(X_df.index[i])
    return np.stack(xs, 0), np.array(ys), pd.DatetimeIndex(idx_end)

Xtr_win, ytr, idx_tr = make_windows(X_train_s, y_train, LOOKBACK)
Xva_win, yva, idx_va = make_windows(X_val_s,   y_val,   LOOKBACK)
Xte_win, yte, idx_te = make_windows(X_test_s,  y_test,  LOOKBACK)

def to_ds(X, y, batch, shuffle):
    ds = tf.data.Dataset.from_tensor_slices((X, y))
    if shuffle: ds = ds.shuffle(len(X), seed=SEED, reshuffle_each_iteration=False)
    return ds.batch(int(C.get("batch", 64))).prefetch(tf.data.AUTOTUNE)

ds_val  = to_ds(Xva_win, yva, int(C.get("batch",64)), shuffle=False)
ds_test = to_ds(Xte_win, yte, int(C.get("batch",64)), shuffle=False)

In [11]:
# === 9) VORHERSAGEN (PREDICTION) ===
# Wir generieren Rohe Wahrscheinlichkeiten (0...1) für Validation und Test
y_val_proba  = model.predict(ds_val,  verbose=0).ravel()
y_test_proba = model.predict(ds_test, verbose=0).ravel()

In [12]:
# === 10) KALIBRIERUNG PRÜFEN ===
# Manchmal sind Keras-Wahrscheinlichkeiten nicht "echt" (z.B. alles zwischen 0.4 und 0.6).
# Methoden wie Isotonic Regression oder Platt Scaling können das korrigieren.

# Isotonic Regression (nicht-parametrisch)
iso = IsotonicRegression(out_of_bounds="clip").fit(y_val_proba, yva)
val_iso  = iso.transform(y_val_proba)
test_iso = iso.transform(y_test_proba)

# Platt Scaling (Logistische Regression auf den Outputs)
platt = LogisticRegression(max_iter=1000)
platt.fit(y_val_proba.reshape(-1,1), yva)
val_platt  = platt.predict_proba(y_val_proba.reshape(-1,1))[:,1]
test_platt = platt.predict_proba(y_test_proba.reshape(-1,1))[:,1]

# Brier Score misst die Qualität der Wahrscheinlichkeit (wie MSE, je kleiner desto besser)
brier_val_raw   = brier_score_loss(yva, y_val_proba)
brier_val_iso   = brier_score_loss(yva, val_iso)
brier_val_platt = brier_score_loss(yva, val_platt)

# Wer ist besser auf Validation? Iso oder Platt?
if brier_val_platt <= brier_val_iso:
    cand_name, val_cand, test_cand, cand_obj = "platt", val_platt, test_platt, platt
    brier_val_cand = brier_val_platt
else:
    cand_name, val_cand, test_cand, cand_obj = "isotonic", val_iso, test_iso, iso
    brier_val_cand = brier_val_iso

In [13]:
# === 11) KALIBRIERUNG ENTSCHEIDEN ===
# Wir akzeptieren die Kalibrierung nur, wenn sie den Brier Score signifikant verbessert (> 1 Basispunkt).
# WICHTIG: Entscheidung NUR auf Basis der Validation Daten!
min_gain_bp = 1.0  # 1 bp = 0.0001
gain_bp = (brier_val_raw - brier_val_cand) * 1e4

brier_test_raw  = brier_score_loss(yte, y_test_proba)   # nur zur Info
brier_test_cand = brier_score_loss(yte, test_cand)      # nur zur Info

use_cal = (brier_val_raw - brier_val_cand) > 1e-4
if use_cal:
    CAL_METHOD, y_val_cal, y_test_cal = cand_name, val_cand, test_cand
    joblib.dump(cand_obj, RUN_DIR / f"calibrator_{CAL_METHOD}.joblib")
else:
    CAL_METHOD, y_val_cal, y_test_cal = "none", y_val_proba, y_test_proba

print(f"[Kalibrierung] chosen={CAL_METHOD} | ΔBrier(VAL)={gain_bp:.1f} bp "
      f"| Test Brier raw→cand {brier_test_raw:.4f}→{brier_test_cand:.4f} (nur Bericht)")

[Kalibrierung] chosen=isotonic | ΔBrier(VAL)=76.2 bp | Test Brier raw→cand 0.2547→0.2503 (nur Bericht)


In [14]:
# === 12) OPTIMALEN THRESHOLD FINDEN ===
# Standard Threshold ist 0.5. Das ist aber oft nicht ideal.
# Wir suchen den Wert, der den MCC (Matthews Correlation Coefficient) auf VALIDATION maximiert.
# Bounds: Wir suchen nur in der Nähe der 'echten' Positivrate (Klassengleichgewicht).

def choose_threshold(y_true, y_prob, pos_rate_bounds=(0.45,0.55)):
    uniq = np.unique(y_prob); cand = np.r_[0.0, uniq, 1.0]
    best_t, best_s = 0.5, -1.0
    for t in cand:
        yp = (y_prob >= t).astype(int)
        pr = float(yp.mean())
        if not (pos_rate_bounds[0] <= pr <= pos_rate_bounds[1]):
            continue
        s = matthews_corrcoef(y_true, yp)
        if s > best_s: best_s, best_t = float(s), float(t)
    if best_s < 0:
        return 0.5, 0.0
    return best_t, best_s

p_val = float(yva.mean())
bounds = (max(0.0, p_val - 0.10), min(1.0, p_val + 0.10))
thr, score_val = choose_threshold(yva, y_val_cal, pos_rate_bounds=bounds)

# Statistische Spielerei (unbounded search als Referenz)
def best_mcc_unbounded(y_true, y_prob):
    # ... (code omitted for brevity in comments)
    ts = np.r_[0.0, np.unique(y_prob), 1.0]
    best = (-1.0, 0.5)
    for t in ts:
        m = matthews_corrcoef(y_true, (y_prob >= t).astype(int))
        if m > best[0]: best = (float(m), float(t))
    return best

def best_youden_j(y_true, y_prob):
    # ... (Youden's J statistic)
    ts = np.r_[0.0, np.unique(y_prob), 1.0]
    best = (-1.0, 0.5)
    from sklearn.metrics import confusion_matrix
    for t in ts:
        yhat = (y_prob >= t).astype(int)
        cm = confusion_matrix(y_true, yhat)
        tn, fp, fn, tp = cm.ravel()
        sens = tp / (tp + fn + 1e-12)
        spec = tn / (tn + fp + 1e-12)
        J = sens + spec - 1.0
        if J > best[0]: best = (float(J), float(t))
    return best

mcc_raw, thr_mcc_raw = best_mcc_unbounded(yva, y_val_cal)
J_raw,   thr_J_raw   = best_youden_j(yva, y_val_cal)

In [15]:
# === 13) FINALE TEST-EVALUATION (@ chosen Threshold) ===
y_test_pred = (y_test_cal >= thr).astype(int)

cm   = confusion_matrix(yte, y_test_pred)
rep  = classification_report(yte, y_test_pred, digits=3, output_dict=True)
fpr, tpr, _ = roc_curve(yte, y_test_cal); roc_auc = auc(fpr, tpr)
prec, rec, _ = precision_recall_curve(yte, y_test_cal); ap = average_precision_score(yte, y_test_cal)

# Random Baseline für PR-AUC = Anteil der positiven Labels
ap_baseline_val  = float(yva.mean())
ap_baseline_test = float(yte.mean())

brier_raw = brier_score_loss(yte, y_test_proba)
brier_cal = brier_score_loss(yte, y_test_cal)
bal_acc = balanced_accuracy_score(yte, y_test_pred)
mcc     = matthews_corrcoef(yte, y_test_pred)
pos_rate_test = float(y_test_pred.mean())

print("Confusion matrix (test):\n", cm)
print(f"MCC={mcc:.3f} | BalAcc={bal_acc:.3f} | AUROC={roc_auc:.3f} | "
      f"AUPRC={ap:.3f} (baseline={ap_baseline_test:.3f}) | "
      f"Brier raw→used {brier_raw:.4f}→{brier_cal:.4f} | thr={thr:.3f} | pred_pos_rate(test)={pos_rate_test:.3f}")

Confusion matrix (test):
 [[ 65 191]
 [ 67 218]]
MCC=0.022 | BalAcc=0.509 | AUROC=0.508 | AUPRC=0.532 (baseline=0.527) | Brier raw→used 0.2547→0.2503 | thr=0.500 | pred_pos_rate(test)=0.756


In [16]:
# === 14) BOOTSTRAP KONFIDENZINTREVALL (MCC) ===
# Ist unser Ergebnis statistisch signifikant?
rng = np.random.default_rng(SEED)
def block_bootstrap_mcc(y_true, y_prob, threshold, n=300, block=LOOKBACK):
    idx = np.arange(len(y_true))
    scores = []
    for _ in range(n):
        # Blocksampling (Zeitstruktur erhalten)
        starts = rng.integers(0, max(1, len(idx)-block+1), size=max(1, len(idx)//block))
        bs = np.concatenate([np.arange(s, min(s+block, len(idx))) for s in starts])
        yp = (y_prob[bs] >= threshold).astype(int)
        scores.append(matthews_corrcoef(y_true[bs], yp))
    return np.percentile(scores, [2.5, 50, 97.5]).astype(float)

mcc_ci = block_bootstrap_mcc(yte, y_test_cal, thr, n=300, block=LOOKBACK)
print("MCC Bootstrap CI [2.5,50,97.5]:", [round(x,3) for x in mcc_ci])

MCC Bootstrap CI [2.5,50,97.5]: [np.float64(-0.045), np.float64(0.032), np.float64(0.107)]


In [17]:
# === 15) DIAGNOSE-PLOTS SPEICHERN ===
FIG_DIR = RUN_DIR / "figures"; FIG_DIR.mkdir(exist_ok=True, parents=True)

# ROC Curve
plt.figure(figsize=(6,4)); plt.plot(fpr, tpr, label=f"AUC={roc_auc:.3f}")
plt.plot([0,1],[0,1],"--"); plt.xlabel("FPR"); plt.ylabel("TPR"); plt.title("ROC (Test)")
plt.legend(); plt.tight_layout(); plt.savefig(FIG_DIR / "roc_test.png", dpi=160); plt.close()

# Precision-Recall Curve (mit Random-Baseline)
plt.figure(figsize=(6,4))
plt.plot(rec, prec, label=f"AP={ap:.3f} (base={ap_baseline_test:.3f})")
plt.hlines(ap_baseline_test, xmin=0, xmax=1, linestyles="--", label="Random baseline")
plt.xlabel("Recall"); plt.ylabel("Precision"); plt.title("Precision-Recall (Test)")
plt.legend(); plt.tight_layout(); plt.savefig(FIG_DIR / "pr_test.png", dpi=160); plt.close()

# Kalibrierungskurve
prob_true, prob_pred = calibration_curve(yte, y_test_cal, n_bins=10, strategy="quantile")
plt.figure(figsize=(6,4)); plt.plot([0,1],[0,1],"--"); plt.plot(prob_pred, prob_true, marker="o")
plt.xlabel("Vorhergesagt"); plt.ylabel("Tatsächlich"); plt.title(f"Kalibrierung (Test) – {CAL_METHOD}")
plt.tight_layout(); plt.savefig(FIG_DIR / "calibration_test.png", dpi=160); plt.close()

# Confusion Matrix Visualisierung
plt.figure(figsize=(4.8,4.2))
plt.imshow(cm, interpolation="nearest"); plt.title("Confusion Matrix (Test)"); plt.colorbar()
ticks = np.arange(2); plt.xticks(ticks, ["0","1"]); plt.yticks(ticks, ["0","1"])
for i in range(cm.shape[0]):
    for j in range(cm.shape[1]):
        plt.text(j, i, cm[i, j], ha="center", va="center")
plt.xlabel("Predicted"); plt.ylabel("True")
plt.tight_layout(); plt.savefig(FIG_DIR / "cm_test.png", dpi=160); plt.close()

# Wahrscheinlichkeits-Histogramm
plt.figure(figsize=(6,4))
plt.hist(y_test_proba, bins=30, alpha=0.6, label="raw")
plt.hist(y_test_cal,   bins=30, alpha=0.6, label=f"used ({CAL_METHOD})")
plt.axvline(thr, linestyle="--", label=f"thr={thr:.3f}")
plt.title("P(y=1) – raw vs. used (Test)")
plt.legend(); plt.tight_layout()
plt.savefig(FIG_DIR / "proba_hist_raw_vs_used.png", dpi=160); plt.close()

In [18]:
# === 16) VORHERSAGEN SPEICHERN ===
preds_test = pd.DataFrame({
    "timestamp": idx_te, "y_true": yte,
    "y_proba_raw": y_test_proba, "y_proba_used": y_test_cal, "y_pred": y_test_pred,
}).set_index("timestamp")
preds_test.to_csv(RUN_DIR / "preds_test.csv")

In [19]:
# === 17) EINFACHER BACKTEST (Equity Curves) ===
# T+1 Entry Simulation (realistisch) und T Entry (optimistisch/unmöglich)
close = df["close"].copy()
fwd_logret = (np.log(close.shift(-HORIZON)) - np.log(close)).reindex(idx_te)

# Signale
signals_t  = (preds_test["y_proba_used"] >= thr).astype(int).reindex(idx_te)
signals_t1 = signals_t.shift(1).fillna(0) # Wir kaufen am Tag NACH dem Signal

# Renditen der Strategie
strategy_logret_t  = (signals_t  * fwd_logret).fillna(0)
strategy_logret_t1 = (signals_t1 * fwd_logret).fillna(0)
equity_t  = strategy_logret_t.cumsum().apply(np.exp) # Wertentwicklung
equity_t1 = strategy_logret_t1.cumsum().apply(np.exp)

# Buy & Hold Vergleich
bh_logret = (np.log(close.reindex(idx_te)) - np.log(close.reindex(idx_te).iloc[0])).fillna(0)
bh_equity = np.exp(bh_logret)

# KPIs (Sharpe, CAGR)
def _sharpe(logrets, periods_per_year=252):
    mu = logrets.mean() * periods_per_year
    sigma = logrets.std(ddof=1) * np.sqrt(periods_per_year)
    return float(mu / (sigma + 1e-12))

def _cagr(eq, periods_per_year=252):
    T = len(eq) / periods_per_year
    return float((eq.iloc[-1] / eq.iloc[0])**(1.0/max(T,1e-12)) - 1.0)

backtest = {
    "n_trades": int(signals_t.sum()),
    "avg_holding_h": HORIZON,
    "strategy_t":  {"CAGR": _cagr(equity_t),  "Sharpe": _sharpe(strategy_logret_t.dropna()),  "final_equity": float(equity_t.iloc[-1])},
    "strategy_t1": {"CAGR": _cagr(equity_t1), "Sharpe": _sharpe(strategy_logret_t1.dropna()), "final_equity": float(equity_t1.iloc[-1])},
    "buy_hold":    {"CAGR": _cagr(bh_equity), "final_equity": float(bh_equity.iloc[-1])},
}

# Equity Curve Plot
plt.figure(figsize=(8,4))
plt.plot(equity_t.index, equity_t.values,   label="Entry@t (optimistisch / Referenz)")
plt.plot(equity_t1.index, equity_t1.values, label="Entry@t+1 (realistische KPI-Basis)")
plt.plot(bh_equity.index, bh_equity.values, label="Buy & Hold", linestyle="--")
plt.title(f"Equity Curves (H={HORIZON})")
plt.legend(); plt.tight_layout(); plt.savefig(FIG_DIR / "equity_curves_t_vs_t1.png", dpi=160); plt.close()

In [20]:
# === 18) ALLES ZUSAMMENFASSEN & SPEICHERN ===
out = {
    "config": RCFG,
    "features_used": FEATURES,
    "calibration": {
        "chosen": CAL_METHOD,
        "paths": ({} if CAL_METHOD=="none" else {CAL_METHOD: str(RUN_DIR / f"calibrator_{CAL_METHOD}.joblib")}),
        "val_brier": {"raw": float(brier_val_raw), "iso": float(brier_val_iso), "platt": float(brier_val_platt)},
        "test_brier": {"raw": float(brier_test_raw), "candidate": float(brier_test_cand), "used": float(brier_cal)}
    },
    "label_resolved_from": {
        "features_yaml": f"../data/features_{FEATURESET}.yml",
        "ticker": TICKER, "interval": INTERVAL, "start": START, "end": END,
        "horizon": HORIZON, "mode": EPS_MODE, "epsilon": EPSILON
    },
    "threshold_selection": {
        "strategy": "max_mcc_with_pos_rate_bounds_centered_on_val_rate",
        "threshold": float(thr),
        "pos_rate_bounds": [float(bounds[0]), float(bounds[1])],
        "val_pos_rate": p_val,
        "val_mcc": float(score_val),
        "val_mcc_unbounded": {"mcc": float(mcc_raw), "thr": float(thr_mcc_raw)},
        "val_youden_j": {"J": float(J_raw), "thr": float(thr_J_raw)},
        "test_pred_pos_rate": pos_rate_test
    },
    "metrics": {
        "test": {
            "roc_auc": float(roc_auc),
            "auprc": float(ap),
            "auprc_baseline": ap_baseline_test,
            "brier": float(brier_cal),
            "balanced_accuracy": float(bal_acc),
            "mcc": float(mcc),
            "confusion_matrix": cm.tolist(),
            "report": rep
        },
        "mcc_bootstrap_ci": [float(x) for x in mcc_ci.tolist()]
    },
    "backtest": backtest,
    "report_notes": {
        "kpi_basis": "entry_t1",
        "entry_t_is_upper_bound": True,
        "pr_auc_baseline_note": "random baseline equals positive rate"
    }
}
with open(RUN_DIR / "evaluation.json", "w") as f:
    json.dump(out, f, indent=2)

print("\nBlock 4 abgeschlossen. Artefakte:")
print(" -", RUN_DIR / "preds_test.csv")
print(" -", RUN_DIR / "evaluation.json")
print(" -", RUN_DIR / "figures")


Block 4 abgeschlossen. Artefakte:
 - ..\results\2026-01-01_18-20-06_lstm\preds_test.csv
 - ..\results\2026-01-01_18-20-06_lstm\evaluation.json
 - ..\results\2026-01-01_18-20-06_lstm\figures
