In [19]:
# --- Imports & Setup ---
import os, sys, json, time
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras

from sklearn.metrics import (
    roc_curve, auc, precision_recall_curve, average_precision_score,
    classification_report, confusion_matrix, brier_score_loss,
    balanced_accuracy_score, matthews_corrcoef
)
from sklearn.calibration import calibration_curve, IsotonicRegression
from sklearn.linear_model import LogisticRegression
import joblib, yaml

ROOT = os.path.abspath("..")
if ROOT not in sys.path:
    sys.path.insert(0, ROOT)

with open(os.path.join(ROOT, "config.json"), "r") as f:
    C = json.load(f)

TICKER, START, END, INTERVAL = C["ticker"], C["start"], C["end"], C["interval"]
HORIZON  = int(C["horizon"])
LOOKBACK = int(C["lookback"])
BATCH    = int(C["batch"]);   EPOCHS = int(C["epochs"])
SEED     = int(C.get("seed", 42))
FEATURESET = C.get("featureset", "v2")
EPS_MODE   = C.get("epsilon_mode", "abs")
EPSILON    = float(C.get("epsilon", 0.001))

np.random.seed(SEED); tf.random.set_seed(SEED)

RESULTS_DIR = Path(C.get("results_dir", "../results"))
def _latest_run_dir(results_dir: Path) -> Path:
    runs = sorted(results_dir.glob("*_lstm"), key=lambda p: p.stat().st_mtime, reverse=True)
    if not runs: raise FileNotFoundError("Kein RUN_DIR gefunden. Bitte Block 3 trainieren.")
    return runs[0]

RUN_DIR = _latest_run_dir(RESULTS_DIR)
print("RUN_DIR:", RUN_DIR)

RUN_DIR: ..\results\2025-10-18_16-13-23_lstm


In [2]:
# --- Artefakte laden ---
MODEL_PATH  = RUN_DIR / "model.keras"
SCALER_PATH = RUN_DIR / "scaler.joblib"
CFG_PATH    = RUN_DIR / "config.json"
assert MODEL_PATH.exists() and SCALER_PATH.exists() and CFG_PATH.exists()

with open(CFG_PATH, "r") as f:
    RCFG = json.load(f)

# Konsistenz (Lookback/Horizon) prüfen
assert int(RCFG["horizon"]) == HORIZON and int(RCFG["lookback"]) == LOOKBACK

model  = keras.models.load_model(MODEL_PATH, compile=False)
scaler = joblib.load(SCALER_PATH)

In [3]:
# --- Daten laden ---
eps_tag   = f"{EPS_MODE}{str(EPSILON).replace('.','p')}"
TRAIN_CSV = f"../data/{TICKER}_{INTERVAL}_{START}_{END}_cls_h{HORIZON}_{eps_tag}.csv"
df = pd.read_csv(TRAIN_CSV, index_col=0, parse_dates=True).sort_index()

In [4]:
# --- Features bestimmen: 1) aus RUN-Config, 2) andernfalls aus YAML v2 ---
OHLCV = {"open","high","low","close","volume"}
if "features" in RCFG and RCFG["features"]:
    FEATURES = [c for c in RCFG["features"] if c in df.columns]
else:
    with open(f"../data/features_{FEATURESET}.yml","r") as f:
        meta = yaml.safe_load(f) or {}
    FEATURES = [c for c in meta.get("features", []) if c in df.columns]
assert len(FEATURES) > 0, "Keine Features gefunden."

X = df[FEATURES].copy()
y = df["target"].astype(int).copy()

In [5]:
# Chronologische Splits (70/15/15)
n = len(df)
n_train = int(n*0.70); n_val = int(n*0.15); n_test = n - n_train - n_val
train_idx = slice(0, n_train)
val_idx   = slice(n_train, n_train + n_val)
test_idx  = slice(n_train + n_val, n)

X_train, y_train = X.iloc[train_idx], y.iloc[train_idx]
X_val,   y_val   = X.iloc[val_idx],   y.iloc[val_idx]
X_test,  y_test  = X.iloc[test_idx],  y.iloc[test_idx]

In [6]:
# --- Skalierung ---
X_train_s = pd.DataFrame(scaler.transform(X_train), index=X_train.index, columns=FEATURES)
X_val_s   = pd.DataFrame(scaler.transform(X_val),   index=X_val.index,   columns=FEATURES)
X_test_s  = pd.DataFrame(scaler.transform(X_test),  index=X_test.index,  columns=FEATURES)


In [7]:
# --- Windowing ---
def make_windows(X_df, y_ser, lookback):
    Xv = X_df.values.astype(np.float32); yv = y_ser.values.astype(np.int32)
    xs, ys, idx_end = [], [], []
    for i in range(lookback-1, len(X_df)):
        xs.append(Xv[i - lookback + 1 : i + 1]); ys.append(yv[i]); idx_end.append(X_df.index[i])
    return np.stack(xs, 0), np.array(ys), pd.DatetimeIndex(idx_end)

Xtr_win, ytr, idx_tr = make_windows(X_train_s, y_train, LOOKBACK)
Xva_win, yva, idx_va = make_windows(X_val_s,   y_val,   LOOKBACK)
Xte_win, yte, idx_te = make_windows(X_test_s,  y_test,  LOOKBACK)

def to_ds(X, y, batch, shuffle):
    ds = tf.data.Dataset.from_tensor_slices((X, y))
    if shuffle: ds = ds.shuffle(len(X), seed=SEED, reshuffle_each_iteration=False)
    return ds.batch(BATCH).prefetch(tf.data.AUTOTUNE)

ds_val  = to_ds(Xva_win, yva, BATCH, shuffle=False)
ds_test = to_ds(Xte_win, yte, BATCH, shuffle=False)

In [8]:
# Roh-Wahrscheinlichkeiten
y_val_proba  = model.predict(ds_val,  verbose=0).ravel()
y_test_proba = model.predict(ds_test, verbose=0).ravel()

In [9]:
# --- Kalibrierungskandidaten ---
# 1) Isotonic
iso = IsotonicRegression(out_of_bounds="clip").fit(y_val_proba, yva)
val_iso = iso.transform(y_val_proba)
test_iso = iso.transform(y_test_proba)

# 2) Platt scaling (LogisticRegression auf den Scores)
platt = LogisticRegression(max_iter=1000)
platt.fit(y_val_proba.reshape(-1,1), yva)
val_platt  = platt.predict_proba(y_val_proba.reshape(-1,1))[:,1]
test_platt = platt.predict_proba(y_test_proba.reshape(-1,1))[:,1]

# Val-Brier vergleichen → beste Methode wählen
brier_val_raw   = brier_score_loss(yva, y_val_proba)
brier_val_iso   = brier_score_loss(yva, val_iso)
brier_val_platt = brier_score_loss(yva, val_platt)

if brier_val_platt <= brier_val_iso:
    CAL_METHOD = "platt"
    y_val_cal, y_test_cal = val_platt, test_platt
    calibrator_obj = platt
else:
    CAL_METHOD = "isotonic"
    y_val_cal, y_test_cal = val_iso, test_iso
    calibrator_obj = iso

joblib.dump(calibrator_obj, RUN_DIR / f"calibrator_{CAL_METHOD}.joblib")
print(f"Kalibriermethode gewählt: {CAL_METHOD} | Brier raw/iso/platt (VAL): "
      f"{brier_val_raw:.4f}/{brier_val_iso:.4f}/{brier_val_platt:.4f}")

Kalibriermethode gewählt: isotonic | Brier raw/iso/platt (VAL): 0.2470/0.2415/0.2483


In [10]:
# --- Backward-compat alias so ältere Zellen nicht brechen ---
y_val_proba_cal  = y_val_cal
y_test_proba_cal = y_test_cal
calib_name = CAL_METHOD

In [11]:
from sklearn.metrics import matthews_corrcoef

def choose_threshold(y_true, y_prob, strategy="max_mcc", pos_rate_bounds=(0.35,0.65)):
    uniq = np.unique(y_prob); cand = np.r_[0.0, uniq, 1.0]
    best_t, best_s = 0.5, -1.0
    for t in cand:
        yp = (y_prob >= t).astype(int)
        pr = float(yp.mean())
        if not (pos_rate_bounds[0] <= pr <= pos_rate_bounds[1]): 
            continue
        s = matthews_corrcoef(y_true, yp)
        if s > best_s: best_s, best_t = float(s), float(t)
    if best_s < 0:  # Fallback
        return 0.5, 0.0
    return best_t, best_s

thr, score_val = choose_threshold(yva, y_val_cal, pos_rate_bounds=(0.35,0.65))
y_test_pred = (y_test_cal >= thr).astype(int)

# Metriken
cm   = confusion_matrix(yte, y_test_pred)
rep  = classification_report(yte, y_test_pred, digits=3, output_dict=True)
fpr, tpr, _ = roc_curve(yte, y_test_cal); roc_auc = auc(fpr, tpr)
prec, rec, _ = precision_recall_curve(yte, y_test_cal); ap = average_precision_score(yte, y_test_cal)
brier_raw = brier_score_loss(yte, y_test_proba); brier_cal = brier_score_loss(yte, y_test_cal)
bal_acc = balanced_accuracy_score(yte, y_test_pred)
mcc     = matthews_corrcoef(yte, y_test_pred)

print("Confusion matrix (test):\n", cm)
print(f"MCC={mcc:.3f} | BalAcc={bal_acc:.3f} | AUROC={roc_auc:.3f} | AUPRC={ap:.3f} | Brier raw→cal {brier_raw:.4f}→{brier_cal:.4f}")

Confusion matrix (test):
 [[ 68 155]
 [ 64 165]]
MCC=0.028 | BalAcc=0.513 | AUROC=0.477 | AUPRC=0.489 | Brier raw→cal 0.2603→0.2899


In [12]:
# --- Test-Evaluation (kalibriert @ thr) ---
y_test_pred = (y_test_proba_cal >= thr).astype(int)

cm   = confusion_matrix(yte, y_test_pred)
rep  = classification_report(yte, y_test_pred, digits=3, output_dict=True)
fpr, tpr, _ = roc_curve(yte, y_test_proba_cal)
roc_auc = auc(fpr, tpr)
prec, rec, _ = precision_recall_curve(yte, y_test_proba_cal)
ap = average_precision_score(yte, y_test_proba_cal)
brier_raw = brier_score_loss(yte, y_test_proba)       # vor Kal.
brier_cal = brier_score_loss(yte, y_test_proba_cal)   # nach Kal.
bal_acc = balanced_accuracy_score(yte, y_test_pred)
mcc     = matthews_corrcoef(yte, y_test_pred)

print("Confusion matrix (test):\n", cm)
print("MCC:", round(mcc,3), "BalancedAcc:", round(bal_acc,3), "AUROC:", round(roc_auc,3), "AUPRC:", round(ap,3))

Confusion matrix (test):
 [[ 68 155]
 [ 64 165]]
MCC: 0.028 BalancedAcc: 0.513 AUROC: 0.477 AUPRC: 0.489


In [13]:
# --- Bootstrap-CI fürs MCC (Blocksampling) ---
rng = np.random.default_rng(SEED)
def block_bootstrap_mcc(y_true, y_prob, threshold, n=300, block=LOOKBACK):
    idx = np.arange(len(y_true))
    scores = []
    for _ in range(n):
        starts = rng.integers(0, max(1, len(idx)-block+1), size=max(1, len(idx)//block))
        bs = np.concatenate([np.arange(s, min(s+block, len(idx))) for s in starts])
        yp = (y_prob[bs] >= threshold).astype(int)
        scores.append(matthews_corrcoef(y_true[bs], yp))
    return np.percentile(scores, [2.5, 50, 97.5]).astype(float)
mcc_ci = block_bootstrap_mcc(yte, y_test_proba_cal, thr, n=300, block=LOOKBACK)
print("MCC Bootstrap CI [2.5,50,97.5]:", [round(x,3) for x in mcc_ci])


MCC Bootstrap CI [2.5,50,97.5]: [np.float64(-0.075), np.float64(0.011), np.float64(0.115)]


In [14]:
# --- Plots ---
FIG_DIR = RUN_DIR / "figures"; FIG_DIR.mkdir(exist_ok=True, parents=True)

# ROC / PR
plt.figure(figsize=(6,4)); plt.plot(fpr, tpr, label=f"AUC={roc_auc:.3f}")
plt.plot([0,1],[0,1],"--"); plt.xlabel("FPR"); plt.ylabel("TPR"); plt.title("ROC (Test)")
plt.legend(); plt.tight_layout(); plt.savefig(FIG_DIR / "roc_test.png", dpi=160); plt.close()

plt.figure(figsize=(6,4)); plt.plot(rec, prec, label=f"AP={ap:.3f}")
plt.xlabel("Recall"); plt.ylabel("Precision"); plt.title("Precision-Recall (Test)")
plt.legend(); plt.tight_layout(); plt.savefig(FIG_DIR / "pr_test.png", dpi=160); plt.close()

# Kalibrierungskurve
prob_true, prob_pred = calibration_curve(yte, y_test_cal, n_bins=10, strategy="quantile")
plt.figure(figsize=(6,4)); plt.plot([0,1],[0,1],"--"); plt.plot(prob_pred, prob_true, marker="o")
plt.xlabel("Vorhergesagt"); plt.ylabel("Tatsächlich"); plt.title(f"Kalibrierung (Test) – {CAL_METHOD}")
plt.tight_layout(); plt.savefig(FIG_DIR / "calibration_test.png", dpi=160); plt.close()

# Confusion Matrix
plt.figure(figsize=(4.8,4.2))
plt.imshow(cm, interpolation="nearest"); plt.title("Confusion Matrix (Test)"); plt.colorbar()
ticks = np.arange(2); plt.xticks(ticks, ["0","1"]); plt.yticks(ticks, ["0","1"])
for i in range(cm.shape[0]):
    for j in range(cm.shape[1]):
        plt.text(j, i, cm[i, j], ha="center", va="center")
plt.xlabel("Predicted"); plt.ylabel("True")
plt.tight_layout(); plt.savefig(FIG_DIR / "cm_test.png", dpi=160); plt.close()

# Histogramm Probas
plt.figure(figsize=(6,4))
plt.hist(y_test_proba, bins=30, alpha=0.6, label="raw")
plt.hist(y_test_cal,   bins=30, alpha=0.6, label=f"calibrated ({CAL_METHOD})")
plt.axvline(thr, linestyle="--", label=f"thr={thr:.3f}")
plt.title("P(y=1) – raw vs. calibrated (Test)")
plt.legend(); plt.tight_layout()
plt.savefig(FIG_DIR / "proba_hist_raw_vs_cal.png", dpi=160); plt.close()

In [15]:
# Predictions-CSV
preds_test = pd.DataFrame({
    "timestamp": idx_te, "y_true": yte,
    "y_proba_raw": y_test_proba, "y_proba_cal": y_test_cal, "y_pred": y_test_pred,
}).set_index("timestamp")
preds_test.to_csv(RUN_DIR / "preds_test.csv")

In [16]:
# --- Backtest wie gehabt (Entry@t und @t+1) ---
close = df["close"].copy()
fwd_logret = (np.log(close.shift(-HORIZON)) - np.log(close)).reindex(idx_te)
signals_t  = (preds_test["y_proba_cal"] >= thr).astype(int).reindex(idx_te)
signals_t1 = signals_t.shift(1).fillna(0)

strategy_logret_t  = (signals_t  * fwd_logret).fillna(0)
strategy_logret_t1 = (signals_t1 * fwd_logret).fillna(0)
equity_t  = strategy_logret_t.cumsum().apply(np.exp)
equity_t1 = strategy_logret_t1.cumsum().apply(np.exp)

bh_logret = (np.log(close.reindex(idx_te)) - np.log(close.reindex(idx_te).iloc[0])).fillna(0)
bh_equity = np.exp(bh_logret)

def _sharpe(logrets, periods_per_year=252):
    mu = logrets.mean() * periods_per_year
    sigma = logrets.std(ddof=1) * np.sqrt(periods_per_year)
    return float(mu / (sigma + 1e-12))

def _cagr(eq, periods_per_year=252):
    T = len(eq) / periods_per_year
    return float((eq.iloc[-1] / eq.iloc[0])**(1.0/T) - 1.0)

backtest = {
    "n_trades": int(signals_t.sum()),
    "avg_holding_h": HORIZON,
    "strategy_t": {"CAGR": _cagr(equity_t),  "Sharpe": _sharpe(strategy_logret_t.dropna()),  "final_equity": float(equity_t.iloc[-1])},
    "strategy_t1":{"CAGR": _cagr(equity_t1), "Sharpe": _sharpe(strategy_logret_t1.dropna()), "final_equity": float(equity_t1.iloc[-1])},
    "buy_hold":  {"CAGR": _cagr(bh_equity),  "final_equity": float(bh_equity.iloc[-1])},
}

plt.figure(figsize=(8,4))
plt.plot(equity_t.index, equity_t.values,   label="Entry@t (optimistisch)")
plt.plot(equity_t1.index, equity_t1.values, label="Entry@t+1 (konservativ)")
plt.plot(bh_equity.index, bh_equity.values, label="Buy & Hold", linestyle="--")
plt.title(f"Equity Curves (H={HORIZON})")
plt.legend(); plt.tight_layout(); plt.savefig(FIG_DIR / "equity_curves_t_vs_t1.png", dpi=160); plt.close()

In [17]:
# --- Vor/Nach: Histogramm + Brier ---
calib_name = "isotonic"  # oder aus deiner Pipeline lesen

brier_raw = brier_score_loss(yte, y_test_proba)
brier_cal = brier_score_loss(yte, y_test_proba_cal)

plt.figure(figsize=(6,4))
plt.hist(y_test_proba,      bins=30, alpha=0.6, label="raw")
plt.hist(y_test_proba_cal,  bins=30, alpha=0.6, label=f"calibrated ({calib_name})")
plt.axvline(thr, linestyle="--", label=f"thr={thr:.3f}")
plt.title("P(y=1) – raw vs. calibrated (Test)")
plt.legend(); plt.tight_layout()
plt.savefig(FIG_DIR / "proba_hist_raw_vs_cal.png", dpi=160); plt.close()

print(f"Brier raw={brier_raw:.4f} → calibrated={brier_cal:.4f}")


Brier raw=0.2603 → calibrated=0.2899


In [18]:
out = {
    "config": RCFG,
    "features_used": FEATURES,
    "calibration": {
        "chosen": CAL_METHOD,
        "paths": {CAL_METHOD: str(RUN_DIR / f"calibrator_{CAL_METHOD}.joblib")},
        "val_brier": {"raw": float(brier_val_raw), "iso": float(brier_val_iso), "platt": float(brier_val_platt)},
        "test_brier": {"raw": float(brier_raw), "cal": float(brier_cal)}
    },
    "threshold_selection": {
        "strategy": "max_mcc_with_pos_rate_bounds",
        "threshold": float(thr),
        "pos_rate_bounds": [0.35, 0.65],
        "val_mcc": float(score_val),
        "test_pred_pos_rate": float((y_test_cal >= thr).mean())
    },
    "metrics": {
        "test": {
            "roc_auc": float(roc_auc), "auprc": float(ap), "brier": float(brier_cal),
            "balanced_accuracy": float(bal_acc), "mcc": float(mcc),
            "confusion_matrix": cm.tolist(), "report": rep
        }
    },
    "backtest": backtest,
}
with open(RUN_DIR / "evaluation.json", "w") as f:
    json.dump(out, f, indent=2)

print("\nBlock 4 abgeschlossen. Artefakte:")
print(" -", RUN_DIR / "preds_test.csv")
print(" -", RUN_DIR / "evaluation.json")
print(" -", RUN_DIR / "figures")



Block 4 abgeschlossen. Artefakte:
 - ..\results\2025-10-18_16-01-40_lstm\preds_test.csv
 - ..\results\2025-10-18_16-01-40_lstm\evaluation.json
 - ..\results\2025-10-18_16-01-40_lstm\figures
