In [1]:
import os, sys, json, time, glob
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras
from sklearn.metrics import (
    roc_curve, auc, precision_recall_curve, f1_score,
    confusion_matrix, classification_report, brier_score_loss
)
from sklearn.calibration import calibration_curve
import joblib

In [2]:
# --- Setup & Config laden ---
ROOT = os.path.abspath("..")
if ROOT not in sys.path:
    sys.path.insert(0, ROOT)

with open(os.path.join(ROOT, "config.json"), "r") as f:
    C = json.load(f)

TICKER   = C["ticker"]; START = C["start"]; END = C["end"]; INTERVAL = C["interval"]
HORIZON  = int(C["horizon"]); LOOKBACK = int(C["lookback"])
BATCH    = int(C["batch"]);   EPOCHS   = int(C["epochs"])
SEED     = int(C.get("seed", 42))

RESULTS_DIR = Path(C.get("results_dir", "../results"))
RESULTS_DIR.mkdir(parents=True, exist_ok=True)

def _latest_run_dir(results_dir: Path) -> Path:
    runs = sorted(results_dir.glob("*_lstm"), key=lambda p: p.stat().st_mtime, reverse=True)
    if not runs:
        raise FileNotFoundError("Kein RUN_DIR gefunden. Bitte Block 3 trainieren.")
    return runs[0]

RUN_DIR = _latest_run_dir(RESULTS_DIR)
print("RUN_DIR:", RUN_DIR)

RUN_DIR: ..\results\2025-10-02_22-01-04_lstm


In [3]:
# --- Artefakte laden ---
MODEL_PATH = RUN_DIR / "model.keras"
SCALER_PATH = RUN_DIR / "scaler.joblib"
CFG_PATH = RUN_DIR / "config.json"
assert MODEL_PATH.exists() and SCALER_PATH.exists() and CFG_PATH.exists(), "Fehlende Artefakte."

with open(CFG_PATH, "r") as f:
    RCFG = json.load(f)

In [4]:
# Konsistenz prüfen
assert RCFG["horizon"] == HORIZON and RCFG["lookback"] == LOOKBACK, "Config-Mismatch."
FEATURES = RCFG.get("features", ["logret_1d"])

model = keras.models.load_model(MODEL_PATH)
scaler = joblib.load(SCALER_PATH)

TRAIN_CSV = f"../data/{TICKER}_{INTERVAL}_{START}_{END}_cls_h{HORIZON}.csv"
df = pd.read_csv(TRAIN_CSV, index_col=0, parse_dates=True).sort_index()

In [5]:
# Erwartete Spalten
exp = set(FEATURES) | {"target", "close"}
missing = exp - set(df.columns)
assert not missing, f"Fehlende Spalten: {missing}"
assert not df.index.has_duplicates
assert df.notna().all().all()

In [6]:
# --- Splits wie in Block 3 (70/15/15) ---
n = len(df)
n_train = int(n * 0.70)
n_val   = int(n * 0.15)
n_test  = n - n_train - n_val

train_idx = slice(0, n_train)
val_idx   = slice(n_train, n_train + n_val)
test_idx  = slice(n_train + n_val, n)

X = df[FEATURES].copy()
y = df["target"].astype(int).copy()

X_train, y_train = X.iloc[train_idx], y.iloc[train_idx]
X_val,   y_val   = X.iloc[val_idx],   y.iloc[val_idx]
X_test,  y_test  = X.iloc[test_idx],  y.iloc[test_idx]

In [7]:
# --- Skalierung (nur auf TRAIN gefittet, wie Block 3) ---
X_train_s = pd.DataFrame(scaler.transform(X_train), index=X_train.index, columns=FEATURES)
X_val_s   = pd.DataFrame(scaler.transform(X_val),   index=X_val.index,   columns=FEATURES)
X_test_s  = pd.DataFrame(scaler.transform(X_test),  index=X_test.index,  columns=FEATURES)

In [8]:
# --- Windowing wie Block 3 ---
def make_windows(X_df: pd.DataFrame, y_ser: pd.Series, lookback: int):
    X_values = X_df.values.astype(np.float32)
    y_values = y_ser.values.astype(np.int32)
    n = len(X_df)
    xs, ys, idx_end = [], [], []
    for i in range(lookback-1, n):
        xs.append(X_values[i - lookback + 1 : i + 1])
        ys.append(y_values[i])
        idx_end.append(X_df.index[i])  # Endzeitpunkt des Fensters
    return np.stack(xs, axis=0), np.array(ys), pd.DatetimeIndex(idx_end)

Xtr_win, ytr, idx_tr = make_windows(X_train_s, y_train, LOOKBACK)
Xva_win, yva, idx_va = make_windows(X_val_s,   y_val,   LOOKBACK)
Xte_win, yte, idx_te = make_windows(X_test_s,  y_test,  LOOKBACK)

def to_ds(X, y, batch, shuffle):
    ds = tf.data.Dataset.from_tensor_slices((X, y))
    if shuffle:
        ds = ds.shuffle(buffer_size=len(X), seed=SEED, reshuffle_each_iteration=False)
    return ds.batch(BATCH).prefetch(tf.data.AUTOTUNE)

ds_val  = to_ds(Xva_win, yva, BATCH, shuffle=False)
ds_test = to_ds(Xte_win, yte, BATCH, shuffle=False)

In [9]:
# --- Wahrscheinlichkeiten ---
y_val_proba = model.predict(ds_val, verbose=0).ravel()
y_test_proba = model.predict(ds_test, verbose=0).ravel()

In [12]:
def best_threshold_f1_strict(y_true, y_prob, min_pos_pred=1, min_neg_pred=1):
    # Kandidaten: alle einzigartigen Scores (und 0,1 zur Sicherheit)
    uniq = np.unique(y_prob)
    cand = np.r_[0.0, uniq, 1.0]

    best_t, best_f1 = 0.5, -1.0
    for t in cand:
        y_pred = (y_prob >= t).astype(int)
        pos = y_pred.sum()
        neg = len(y_pred) - pos
        if pos < min_pos_pred or neg < min_neg_pred:
            continue  # degenerierte Schwelle überspringen
        f1 = f1_score(y_true, y_pred, zero_division=0)
        if f1 > best_f1:
            best_f1, best_t = f1, float(t)

    if best_f1 < 0:  # Fallbacks, falls alles degeneriert war
        # 1) Youden-J (ROC)
        fpr, tpr, thr_roc = roc_curve(y_true, y_prob)
        j = tpr - fpr
        best_t = float(thr_roc[np.nanargmax(j)])
        # 2) Begrenzen (optional), damit nicht extrem
        best_t = float(np.clip(best_t, 0.1, 0.9))
        best_f1 = f1_score(y_true, (y_prob >= best_t).astype(int), zero_division=0)

    return best_t, best_f1

# Nutzung (auf Validation):
thr, f1_val = best_threshold_f1_strict(yva, y_val_proba)
print(f"Gewählter Threshold (Val): {thr:.3f} | F1_val={f1_val:.3f}")

# Test-Predictions
y_test_pred = (y_test_proba >= thr).astype(int)
print("Test positives predicted:", int(y_test_pred.sum()))


Gewählter Threshold (Val): 0.499 | F1_val=0.672
Test positives predicted: 449


In [13]:
# --- Evaluation Test ---
y_test_pred = (y_test_proba >= thr).astype(int)

print("Test positives true:", int(yte.sum()), "von", len(yte))
print("Test positives predicted:", int(y_test_pred.sum()))
print("Threshold:", thr)


test_report = classification_report(yte, y_test_pred, digits=3, output_dict=True)
cm = confusion_matrix(yte, y_test_pred)
fpr, tpr, _ = roc_curve(yte, y_test_proba)
roc_auc = auc(fpr, tpr)
prec, rec, _ = precision_recall_curve(yte, y_test_proba)
brier = brier_score_loss(yte, y_test_proba)

metrics_out = {
    "val": {"threshold": thr, "f1": f1_val},
    "test": {
        "roc_auc": float(roc_auc),
        "brier": float(brier),
        "report": test_report,
        "confusion_matrix": cm.tolist(),
    },
}

Test positives true: 247 von 457
Test positives predicted: 449
Threshold: 0.49851155281066895


In [14]:
# --- Ordner für Plots ---
FIG_DIR = RUN_DIR / "figures"
FIG_DIR.mkdir(exist_ok=True, parents=True)

In [16]:
# --- ROC & PR Kurven ---
plt.figure(figsize=(6,4))
plt.plot(fpr, tpr, label=f"ROC AUC={roc_auc:.3f}")
plt.plot([0,1],[0,1], linestyle="--")
plt.xlabel("FPR"); plt.ylabel("TPR"); plt.title("ROC (Test)"); plt.legend()
plt.tight_layout(); plt.savefig(FIG_DIR / "roc_test.png", dpi=160); plt.close()

plt.figure(figsize=(6,4))
apprx_auc_pr = np.trapezoid(rec, prec)
plt.plot(rec, prec, label=f"AP≈{apprx_auc_pr:.3f}")
plt.xlabel("Recall"); plt.ylabel("Precision"); plt.title("Precision-Recall (Test)"); plt.legend()
plt.tight_layout(); plt.savefig(FIG_DIR / "pr_test.png", dpi=160); plt.close()

In [17]:
# --- Kalibrierung ---
prob_true, prob_pred = calibration_curve(yte, y_test_proba, n_bins=10, strategy="quantile")
plt.figure(figsize=(6,4))
plt.plot([0,1],[0,1], "--")
plt.plot(prob_pred, prob_true, marker="o")
plt.xlabel("Vorhergesagte Wahrscheinlichkeit"); plt.ylabel("Tatsächlicher Anteil 1")
plt.title("Kalibrierung (Test)")
plt.tight_layout(); plt.savefig(FIG_DIR / "calibration_test.png", dpi=160); plt.close()

In [18]:
# --- Confusion Matrix ---
plt.figure(figsize=(4.5,4))
plt.imshow(cm, interpolation="nearest")
plt.title("Confusion Matrix (Test)")
plt.colorbar()
tick_marks = np.arange(2)
plt.xticks(tick_marks, ["0","1"]); plt.yticks(tick_marks, ["0","1"])
for i in range(cm.shape[0]):
    for j in range(cm.shape[1]):
        plt.text(j, i, cm[i, j], ha="center", va="center")
plt.xlabel("Predicted"); plt.ylabel("True")
plt.tight_layout(); plt.savefig(FIG_DIR / "cm_test.png", dpi=160); plt.close()

In [19]:
# --- Histogramm der Test-Probabilitäten ---
plt.figure(figsize=(6,4))
plt.hist(y_test_proba, bins=30)
plt.axvline(thr, linestyle="--")
plt.title("P(y=1) Verteilung (Test)")
plt.tight_layout(); plt.savefig(FIG_DIR / "proba_hist_test.png", dpi=160); plt.close()

In [20]:
# --- Predictions-CSV (Test) mit Zeitachse ---
preds_test = pd.DataFrame({
    "timestamp": idx_te,
    "y_true": yte,
    "y_proba": y_test_proba,
    "y_pred": y_test_pred,
})
preds_test.set_index("timestamp", inplace=True)
preds_test.to_csv(RUN_DIR / "preds_test.csv")

In [21]:
# --- Einfache Handels-Strategie & Backtest ---
# Signal: long wenn proba >= thr am Zeitpunkt t (Fensterende), Haltedauer bis t+HORIZON
# Realisierte Forward-Log-Return auf Close
close = df["close"].copy()
fwd_logret = (np.log(close.shift(-HORIZON)) - np.log(close)).reindex(idx_te)

signals = (preds_test["y_proba"] >= thr).astype(int).reindex(idx_te)
strategy_logret = signals * fwd_logret  # nur long, sonst 0

equity = strategy_logret.fillna(0).cumsum().apply(np.exp)  # Startkapital=1
bh_logret = (np.log(close.reindex(idx_te)) - np.log(close.reindex(idx_te).iloc[0])).fillna(0)
bh_equity = np.exp(bh_logret)

In [22]:
# Kennzahlen (naiv, ohne Gebühren/Slippage)
def _sharpe(logrets, periods_per_year=252):
    if len(logrets) < 2: return float("nan")
    mu = logrets.mean() * periods_per_year
    sigma = logrets.std(ddof=1) * np.sqrt(periods_per_year)
    return float(mu / (sigma + 1e-12))

def _cagr(equity_series, periods_per_year=252):
    if len(equity_series) < 2: return float("nan")
    T = len(equity_series) / periods_per_year
    return float((equity_series.iloc[-1] / equity_series.iloc[0])**(1.0/T) - 1.0)

backtest = {
    "n_trades": int(signals.sum()),
    "avg_holding_h": HORIZON,
    "strategy": {
        "CAGR": _cagr(equity),
        "Sharpe": _sharpe(strategy_logret.dropna()),
        "final_equity": float(equity.iloc[-1]),
    },
    "buy_hold": {
        "CAGR": _cagr(bh_equity),
        "final_equity": float(bh_equity.iloc[-1]),
    },
}

In [24]:
# Plots: Equity
plt.figure(figsize=(8,4))
plt.plot(equity.index, equity.values, label="Strategy")
plt.plot(bh_equity.index, bh_equity.values, label="Buy & Hold", linestyle="--")
plt.title(f"Equity Curve (H={HORIZON})")
plt.legend(); plt.tight_layout()
plt.savefig(FIG_DIR / "equity_curves.png", dpi=160); plt.close()

# Plots: Forward-Return nach Klassen (Qualitätscheck)
plt.figure(figsize=(6,4))
plt.boxplot([fwd_logret[signals==0].dropna(), fwd_logret[signals==1].dropna()], tick_labels=["Signal=0","Signal=1"])
plt.title("Forward Log-Return nach Signal")
plt.tight_layout(); plt.savefig(FIG_DIR / "forward_returns_by_signal.png", dpi=160); plt.close()

In [25]:
# --- Results schreiben ---
out = {
    "config": RCFG,
    "threshold_selection": {"strategy": "max_f1_on_validation", "threshold": thr, "f1_val": f1_val},
    "metrics": metrics_out,
    "backtest": backtest,
}
with open(RUN_DIR / "evaluation.json", "w") as f:
    json.dump(out, f, indent=2)

print("\nBlock 4 abgeschlossen.")
print("Artefakte:")
print(" -", RUN_DIR / "preds_test.csv")
print(" -", RUN_DIR / "evaluation.json")
print(" -", FIG_DIR)


Block 4 abgeschlossen.
Artefakte:
 - ..\results\2025-10-02_22-01-04_lstm\preds_test.csv
 - ..\results\2025-10-02_22-01-04_lstm\evaluation.json
 - ..\results\2025-10-02_22-01-04_lstm\figures
