In [21]:
import os, sys, json, time
from pathlib import Path
import numpy as np
import pandas as pd

ROOT = os.path.abspath("..")
if ROOT not in sys.path:
    sys.path.insert(0, ROOT)

with open(os.path.join(ROOT, "config.json"), "r") as f:
    C = json.load(f)

TICKER   = C["ticker"]; START = C["start"]; END = C["end"]; INTERVAL = C["interval"]
HORIZON  = int(C["horizon"]); LOOKBACK = int(C["lookback"])
BATCH    = int(C["batch"]);   EPOCHS   = int(C["epochs"])
SEED     = int(C.get("seed", 42))
FEATURESET = C.get("featureset", "v2")
EPS_MODE   = C.get("epsilon_mode", "abs")
EPSILON    = float(C.get("epsilon", 0.001))  # 10bp

RESULTS_DIR = Path(C.get("results_dir", "../results"))
RESULTS_DIR.mkdir(parents=True, exist_ok=True)
RUN_DIR   = RESULTS_DIR / time.strftime("%Y-%m-%d_%H-%M-%S_lstm")
RUN_DIR.mkdir(parents=True, exist_ok=True)
print("RUN_DIR:", RUN_DIR)

# --> Train-CSV wie in Block 2 benannt
eps_tag   = f"{EPS_MODE}{str(EPSILON).replace('.','p')}"
TRAIN_CSV = f"../data/{TICKER}_{INTERVAL}_{START}_{END}_cls_h{HORIZON}_{eps_tag}.csv"

RUN_DIR: ..\results\2025-10-18_16-13-23_lstm


In [22]:
import tensorflow as tf
from tensorflow import keras
from sklearn.metrics import (
    classification_report, confusion_matrix,
    balanced_accuracy_score, matthews_corrcoef, average_precision_score,
    roc_auc_score
)

In [23]:
import yaml

df = pd.read_csv(TRAIN_CSV, index_col=0, parse_dates=True).sort_index()

OHLCV = {"open","high","low","close","volume"}
yaml_path = f"../data/features_{FEATURESET}.yml"

with open(yaml_path, "r") as f:
    meta = yaml.safe_load(f) or {}

FEATURES = [c for c in meta.get("features", []) if c in df.columns]
assert len(FEATURES) > 0, f"Keine nutzbaren Features in {yaml_path} gefunden."

TARGET = "target"
X = df[FEATURES].copy()
y = df[TARGET].astype(int).copy()

print("FEATURES (final):", FEATURES)


FEATURES (final): ['logret_1d', 'logret_3d', 'logret_5d', 'realized_vol_10', 'bb_pos', 'rsi_14', 'macd', 'macd_sig', 'macd_diff', 'vol_z_20', 'sma_diff']


In [24]:
# === 4) Chronologische Splits (70/15/15) ===

from sklearn.preprocessing import StandardScaler

n = len(df)
n_train = int(n * 0.70)
n_val   = int(n * 0.15)
n_test  = n - n_train - n_val

train_idx = slice(0, n_train)
val_idx   = slice(n_train, n_train + n_val)
test_idx  = slice(n_train + n_val, n)

X_train, y_train = X.iloc[train_idx], y.iloc[train_idx]
X_val,   y_val   = X.iloc[val_idx],   y.iloc[val_idx]
X_test,  y_test  = X.iloc[test_idx],  y.iloc[test_idx]

print(f"Split sizes → train {len(X_train)}, val {len(X_val)}, test {len(X_test)}")

Split sizes → train 2381, val 510, test 511


In [25]:
print("X_train shape/check:", X_train.shape, " | cols:", list(X_train.columns))


X_train shape/check: (2381, 11)  | cols: ['logret_1d', 'logret_3d', 'logret_5d', 'realized_vol_10', 'bb_pos', 'rsi_14', 'macd', 'macd_sig', 'macd_diff', 'vol_z_20', 'sma_diff']


In [26]:
# === 5) Scaler nur auf TRAIN fitten ===
scaler = StandardScaler(with_mean=True, with_std=True)
X_train_s = pd.DataFrame(scaler.fit_transform(X_train), index=X_train.index, columns=FEATURES)
X_val_s   = pd.DataFrame(scaler.transform(X_val),       index=X_val.index,   columns=FEATURES)
X_test_s  = pd.DataFrame(scaler.transform(X_test),      index=X_test.index,  columns=FEATURES)

# Scaler speichern (für spätere Runs/Inference)
import joblib, io
joblib.dump(scaler, RUN_DIR / "scaler.joblib")

['..\\results\\2025-10-18_16-13-23_lstm\\scaler.joblib']

In [27]:
def drift_summary(Xa: pd.DataFrame, Xb: pd.DataFrame):
    out = []
    for c in Xa.columns:
        m1, s1 = Xa[c].mean(), Xa[c].std(ddof=1)
        m2, s2 = Xb[c].mean(), Xb[c].std(ddof=1)
        ratio_std = float((s2 + 1e-9) / (s1 + 1e-9))
        diff_mean = float(m2 - m1)
        out.append({"feature": c, "mean_diff": diff_mean, "std_ratio": ratio_std})
    return pd.DataFrame(out).sort_values("std_ratio", ascending=False)

drift_df = drift_summary(X_train_s, X_test_s)
drift_df.to_csv(RUN_DIR / "drift_train_vs_test.csv", index=False)
print(drift_df.head())


     feature  mean_diff  std_ratio
8  macd_diff   0.144330   3.356402
6       macd   0.088575   2.925897
7   macd_sig   0.049833   2.866334
2  logret_5d  -0.055026   1.067165
1  logret_3d  -0.041238   1.064453


In [28]:
# Warn-/Abbruchschwellen gegen Train→Test-Shift
bad = drift_df[(drift_df["std_ratio"] < 0.85) | (drift_df["mean_diff"].abs() > 1.0)]
if not bad.empty:
    print("\n[WARN] Starker Feature-Shift erkannt:\n", bad)
    # Optional hart abbrechen:
    # raise RuntimeError("Zu starker Drift in obigen Features – bitte Feature-Set stationär halten.")

In [29]:
# === 6) Windowing: Sequenzen der Länge LOOKBACK → Label am Endzeitpunkt ===
def make_windows(X_df: pd.DataFrame, y_ser: pd.Series, lookback: int):
    X_values = X_df.values.astype(np.float32)
    y_values = y_ser.values.astype(np.int32)
    n = len(X_df)
    xs, ys = [], []
    for i in range(lookback-1, n):
        xs.append(X_values[i - lookback + 1 : i + 1])  # inkl. i
        ys.append(y_values[i])                          # Label für Zeitpunkt i (Up/Down für i->i+H)
    return np.stack(xs, axis=0), np.array(ys)

Xtr_win, ytr = make_windows(X_train_s, y_train, LOOKBACK)
Xva_win, yva = make_windows(X_val_s,   y_val,   LOOKBACK)
Xte_win, yte = make_windows(X_test_s,  y_test,  LOOKBACK)

print("Shapes:",
      "\n  train:", Xtr_win.shape, ytr.shape,
      "\n  val  :", Xva_win.shape, yva.shape,
      "\n  test :", Xte_win.shape, yte.shape)

Shapes: 
  train: (2322, 60, 11) (2322,) 
  val  : (451, 60, 11) (451,) 
  test : (452, 60, 11) (452,)


In [30]:
# === class_weight (optional) aus Trainingslabels berechnen ===
from collections import Counter
cw = None
counts = Counter(ytr.tolist())
if len(counts) == 2:
    total = sum(counts.values())
    # einfache Invers-Häufigkeit (normalisiert), robust bei leichter Schieflage
    cw = {0: total/(2*counts.get(0, 1)), 1: total/(2*counts.get(1, 1))}
print("class_weight:", cw)


class_weight: {0: 0.9880851063829788, 1: 1.012205754141238}


In [31]:
# === 7) tf.data Pipelines ===
def to_ds(X, y, batch, shuffle):
    ds = tf.data.Dataset.from_tensor_slices((X, y))
    if shuffle:
        ds = ds.shuffle(buffer_size=len(X), seed=SEED, reshuffle_each_iteration=True)
    return ds.batch(batch).prefetch(tf.data.AUTOTUNE)

ds_train = to_ds(Xtr_win, ytr, BATCH, shuffle=True)
ds_val   = to_ds(Xva_win, yva, BATCH, shuffle=False)
ds_test  = to_ds(Xte_win, yte, BATCH, shuffle=False)

In [32]:
# Basis-Rate im Training (für Output-Bias)
pos_rate_train = float(ytr.mean())
from math import log
def _logit(p): 
    eps = 1e-6
    p = min(max(p, eps), 1-eps)
    return log(p/(1-p))
output_bias_init = tf.keras.initializers.Constant(_logit(pos_rate_train))
print("pos_rate_train:", round(pos_rate_train,3))


pos_rate_train: 0.494


In [33]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, matthews_corrcoef

logit = LogisticRegression(max_iter=200, n_jobs=None)
logit.fit(X_train_s.iloc[LOOKBACK-1:], y_train.iloc[LOOKBACK-1:])  # grob: letztes Fensterende
y_proba_lr = logit.predict_proba(X_test_s.iloc[LOOKBACK-1:])[:,1]

print(f"[Diag] y_proba range: {y_proba_lr.min():.3f} .. {y_proba_lr.max():.3f}, mean={y_proba_lr.mean():.3f}")
print("LogReg AUROC:", round(roc_auc_score(y_test.iloc[LOOKBACK-1:], y_proba_lr), 3))
print("LogReg MCC@0.5:", round(matthews_corrcoef(y_test.iloc[LOOKBACK-1:], (y_proba_lr>=0.5).astype(int)), 3))

[Diag] y_proba range: 0.214 .. 0.659, mean=0.493
LogReg AUROC: 0.441
LogReg MCC@0.5: -0.047


In [34]:
# === Modell: GRU + Regularisierung ============================================
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, regularizers

np.random.seed(SEED); tf.random.set_seed(SEED)

n_features = X_train_s.shape[1]

model = keras.Sequential([
    layers.Input(shape=(LOOKBACK, n_features)),
    layers.GRU(32, return_sequences=True, recurrent_dropout=0.10),
    layers.LayerNormalization(),
    layers.GRU(16, recurrent_dropout=0.10),
    layers.LayerNormalization(),
    layers.Dense(16, activation="relu", kernel_regularizer=regularizers.l2(1e-5)),
    layers.Dense(1, activation="sigmoid"),
])

model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=5e-4),
    loss=keras.losses.BinaryCrossentropy(),  # <-- BCE statt Focal
    metrics=[
        keras.metrics.AUC(name="auc"),
        keras.metrics.AUC(name="auprc", curve="PR"),
        keras.metrics.BinaryAccuracy(name="acc"),
        keras.metrics.Precision(name="prec"),
        keras.metrics.Recall(name="rec"),
    ],
)

ckpt_path = RUN_DIR / "best.keras"
callbacks = [
    keras.callbacks.ModelCheckpoint(
        filepath=str(ckpt_path),
        monitor="val_auprc", mode="max", save_best_only=True, verbose=1),
    keras.callbacks.EarlyStopping(
        monitor="val_auprc", mode="max", patience=12, restore_best_weights=True),
    keras.callbacks.ReduceLROnPlateau(
        monitor="val_auprc", mode="max", factor=0.5, patience=6, min_lr=1e-5, verbose=1),
]

# Datensätze wie gehabt: ds_train, ds_val, ds_test
history = model.fit(
    ds_train, validation_data=ds_val, epochs=EPOCHS,
    callbacks=callbacks, verbose=1
)
pd.DataFrame(history.history).to_csv(RUN_DIR / "history.csv", index=False)

Epoch 1/100
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - acc: 0.5247 - auc: 0.5138 - auprc: 0.5150 - loss: 0.7998 - prec: 0.5510 - rec: 0.3296
Epoch 1: val_auprc improved from None to 0.46503, saving model to ..\results\2025-10-18_16-13-23_lstm\best.keras
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 26ms/step - acc: 0.5129 - auc: 0.5071 - auprc: 0.5024 - loss: 0.7485 - prec: 0.5093 - rec: 0.3836 - val_acc: 0.4856 - val_auc: 0.4725 - val_auprc: 0.4650 - val_loss: 0.7142 - val_prec: 0.4675 - val_rec: 0.5324 - learning_rate: 5.0000e-04
Epoch 2/100
[1m34/37[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 14ms/step - acc: 0.4982 - auc: 0.5102 - auprc: 0.5055 - loss: 0.7005 - prec: 0.4953 - rec: 0.5154
Epoch 2: val_auprc improved from 0.46503 to 0.46800, saving model to ..\results\2025-10-18_16-13-23_lstm\best.keras
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - acc: 0.5146 - auc: 0.5172 - auprc: 0.5090 - l

In [35]:
# === 11) Evaluate & Berichte ===
# Best Weights sind dank EarlyStopping bereits geladen
test_metrics = model.evaluate(ds_test, return_dict=True, verbose=0)
print("Test metrics:", json.dumps(test_metrics, indent=2))

# Schwellenwert 0.5 (später kalibrierbar)
y_proba = model.predict(ds_test, verbose=0).ravel()
y_pred  = (y_proba >= 0.5).astype(int)

print("\nConfusion matrix (test):\n", confusion_matrix(yte, y_pred))
print("\nClassification report (test):\n", classification_report(yte, y_pred, digits=3))

Test metrics: {
  "acc": 0.4845132827758789,
  "auc": 0.4905026853084564,
  "auprc": 0.4944872260093689,
  "loss": 0.7168943881988525,
  "prec": 0.4882352948188782,
  "rec": 0.3624454140663147
}

Confusion matrix (test):
 [[136  87]
 [146  83]]

Classification report (test):
               precision    recall  f1-score   support

           0      0.482     0.610     0.539       223
           1      0.488     0.362     0.416       229

    accuracy                          0.485       452
   macro avg      0.485     0.486     0.477       452
weighted avg      0.485     0.485     0.477       452



In [36]:
# Val-basierte Schwelle (max MCC, Korridor) direkt in Block 3
from sklearn.metrics import matthews_corrcoef, roc_auc_score
val_proba = model.predict(ds_val, verbose=0).ravel()

def choose_threshold(y_true, y_prob, bounds=(0.35, 0.65)):
    uniq = np.unique(y_prob); cand = np.r_[0.0, uniq, 1.0]
    best_t, best_s = 0.5, -1
    for t in cand:
        yp = (y_prob >= t).astype(int)
        pr = yp.mean()
        if not (bounds[0] <= pr <= bounds[1]): 
            continue
        s = matthews_corrcoef(y_true, yp)
        if s > best_s: best_s, best_t = s, float(t)
    return best_t

thr = choose_threshold(yva, val_proba, bounds=(0.35,0.65))
y_proba = model.predict(ds_test, verbose=0).ravel()
y_pred  = (y_proba >= thr).astype(int)

In [37]:
# --- Diagnose der Probabilitäten ---
import numpy as np
from sklearn.metrics import roc_auc_score

print("Proba stats  (test): min=", float(y_proba.min()), 
      "max=", float(y_proba.max()), "mean=", float(y_proba.mean()))

# AUC auf VAL & TEST (Ranking-Qualität, unabhängig vom Threshold)
val_proba = model.predict(ds_val, verbose=0).ravel()
print("AUROC val/test:", 
      round(roc_auc_score(yva, val_proba), 3), "/", 
      round(roc_auc_score(yte, y_proba), 3))

# Quick check: Ist das Signal invertiert?
if roc_auc_score(yva, val_proba) < 0.5:
    print("⚠️ AUROC < 0.5 auf VAL → Versuch: invertiere Scores (1-p)")
    y_proba_inverted = 1.0 - y_proba
    from sklearn.metrics import classification_report, confusion_matrix
    y_pred_inv = (y_proba_inverted >= 0.5).astype(int)
    print("Confusion (inv, thr=0.5):\n", confusion_matrix(yte, y_pred_inv))
    print("Report (inv):\n", classification_report(yte, y_pred_inv, digits=3))


Proba stats  (test): min= 0.2499282956123352 max= 0.7504221200942993 mean= 0.4735203683376312
AUROC val/test: 0.539 / 0.492


In [38]:
# === Extra Test-Metriken ===
bal_acc = balanced_accuracy_score(yte, y_pred)
mcc = matthews_corrcoef(yte, y_pred)
auprc_test = average_precision_score(yte, y_proba)  # probabilistische PR-Qualität

extra = {
    "balanced_accuracy": float(bal_acc),
    "mcc": float(mcc),
    "auprc": float(auprc_test)
}
print("Extra test metrics:", json.dumps(extra, indent=2))

# persistieren
with open(RUN_DIR / "extra_test_metrics.json", "w") as f:
    json.dump(extra, f, indent=2)


Extra test metrics: {
  "balanced_accuracy": 0.48604774120273364,
  "mcc": -0.02804768357113704,
  "auprc": 0.49924492936590836
}


In [39]:
env_info = {
    "python": sys.version, "tensorflow": tf.__version__,
    "seed": SEED, "lookback": LOOKBACK, "featureset": FEATURESET,
    "features_used": FEATURES,
    "batch": BATCH, "epochs": EPOCHS,
    "cell": "GRU", "width1": 32, "width2": 16,
    "dropout": 0.10, "lr": 5e-4,
    "loss": "BCE",
    "epsilon_mode": EPS_MODE, "epsilon": EPSILON, "horizon": HORIZON
}
with open(RUN_DIR / "env_info.json", "w") as f: json.dump(env_info, f, indent=2)

# Beim finalen Config-Dump
with open(RUN_DIR / "config.json", "w") as f:
    json.dump({
        "ticker": TICKER, "start": START, "end": END, "interval": INTERVAL,
        "horizon": HORIZON, "lookback": LOOKBACK,
        "featureset": FEATURESET, "features": FEATURES,
        "scaler": "StandardScaler", "seed": SEED, "batch": BATCH, "epochs": EPOCHS,
        "cell": "GRU", "width1": 32, "width2": 16, "dropout": 0.10, "lr": 5e-4,
        "loss": "BCE", "epsilon_mode": EPS_MODE, "epsilon": EPSILON
    }, f, indent=2)


In [40]:
# === 12) Artefakte sichern ===
# Keras-Format (SavedModel) + Gewichte
model.save(RUN_DIR / "model.keras")
np.save(RUN_DIR / "y_test.npy", yte)
np.save(RUN_DIR / "y_proba.npy", y_proba)
with open(RUN_DIR / "config.json", "w") as f:
    json.dump({
        "ticker": TICKER, "start": START, "end": END, "interval": INTERVAL,
        "horizon": HORIZON, "lookback": LOOKBACK, "features": FEATURES,
        "scaler": "StandardScaler", "seed": SEED, "batch": BATCH, "epochs": EPOCHS
    }, f, indent=2)
print(f"\nArtefakte gespeichert in: {RUN_DIR}")


Artefakte gespeichert in: ..\results\2025-10-18_16-13-23_lstm
