In [1]:
# === SYSTEM & IMPORTS ===
# Standard-Imports für Dateisystem, Zeitmessung und Datenverarbeitung
import os, sys, json, time, glob
from pathlib import Path
import numpy as np
import pandas as pd

# Root-Verzeichnis setzen, damit wir Module sauber importieren können
ROOT = os.path.abspath("..")
if ROOT not in sys.path:
    sys.path.insert(0, ROOT)

In [2]:
# === 0) KONFIGURATION LADEN ===
# Wir laden die zentrale config.json. Diese steuert das gesamte Experiment.
with open(os.path.join(ROOT, "config.json"), "r") as f:
    C = json.load(f)

# Parameter extrahieren für leichteren Zugriff
TICKER   = C["ticker"]; START = C["start"]; END = C["end"]; INTERVAL = C["interval"]
HORIZON  = int(C["horizon"])  # Wie weit schauen wir in die Zukunft?
LOOKBACK = int(C["lookback"]) # Wieviele Tage Input?
BATCH    = int(C["batch"]); EPOCHS = int(C["epochs"])
SEED     = int(C.get("seed", 42))
FEATURESET = C.get("featureset", "v2")
EPS_MODE   = C.get("epsilon_mode", "abs")
EPSILON    = float(C.get("epsilon", 0.0005))

# Ergebnis-Ordner anlegen. Jeder Run bekommt einen eigenen Zeitstempel-Ordner.
RESULTS_DIR = Path(C.get("results_dir", "../results"))
RESULTS_DIR.mkdir(parents=True, exist_ok=True)
RUN_DIR   = RESULTS_DIR / time.strftime("%Y-%m-%d_%H-%M-%S_lstm")
RUN_DIR.mkdir(parents=True, exist_ok=True)
print("RUN_DIR:", RUN_DIR)

RUN_DIR: ..\results\2026-01-01_15-05-35_lstm


In [3]:
# === ABLATIONS STUDIEN (EXPERIMENTELLE SCHALTER) ===
# Hier können wir gezielt Teile des Modells an/abschalten, um zu testen, was wirklich hilft.
# Das hilft bei der "Failure Analysis".
AB = C.get("ablations", {})

def _get_bool(key, default):
    env = os.getenv(key)
    if env is not None:
        return env.strip().lower() in ("1","true","yes","y","on")
    return bool(AB.get(key.lower(), default))

# Sollen die Trainingsdaten gemischt werden? (Normalerweise JA)
ABL_SHUFFLE_TRAIN = _get_bool("ABLATION_SHUFFLE_TRAIN", True)

# Recurrent Dropout deaktivieren? (Manchmal gut für Performance/Stabilität)
ABL_NO_RECURRENT_DROPOUT = _get_bool("ABLATION_NO_RECURRENT_DROPOUT", False)

# LayerNormalization: Wo soll sie eingebaut werden? "both" (Standard) oder nur hinten
ABL_LN_LAYOUT = os.getenv("ABLATION_LN_LAYOUT", AB.get("ln_layout", "both")).lower()
if ABL_LN_LAYOUT not in {"both","after_second"}:
    ABL_LN_LAYOUT = "both"

print(f"[Ablations] shuffle_train={ABL_SHUFFLE_TRAIN} | no_recurrent_dropout={ABL_NO_RECURRENT_DROPOUT} | ln_layout={ABL_LN_LAYOUT}")

# Wir bauen den Dateinamen für die Trainingsdaten zusammen, basierend auf den Label-Parametern
eps_tag   = f"{EPS_MODE}{str(EPSILON).replace('.','p')}"
TRAIN_CSV = f"../data/{TICKER}_{INTERVAL}_{START}_{END}_cls_h{HORIZON}_{eps_tag}.csv"

# Machine Learning Imports
import tensorflow as tf
from tensorflow import keras
from sklearn.metrics import (
    classification_report, confusion_matrix,
    balanced_accuracy_score, matthews_corrcoef, average_precision_score,
    roc_auc_score
)

[Ablations] shuffle_train=True | no_recurrent_dropout=False | ln_layout=both


In [4]:
# === 1) BESTE CONFIG LADEN (Aus Hyperparameter-Optimierung) ===
# Wir suchen nach dem neuesten WFCV-Run (Walk-Forward Cross-Validation),
# um die dort gefundenen besten Parameter zu nutzen.
def _latest_best_config(results_dir="../results"):
    pattern = os.path.join(results_dir, "*_wfcv", "best_config.json")
    cands = glob.glob(pattern)
    if not cands:
        return None, None
    # Den neuesten Run nehmen
    cands = sorted(cands, key=os.path.getmtime)
    best_path = cands[-1]
    with open(best_path, "r") as f:
        best_cfg = json.load(f)
    return best_cfg, best_path

BEST_CFG, BEST_CFG_PATH = _latest_best_config(RESULTS_DIR)

# Fallback, falls keine Optimierung lief: Standardwerte nehmen
if BEST_CFG is None:
    print("[INFO] Keine best_config.json gefunden — nutze Fallback (Config.json-Defaults).")
    BEST_CFG = {
        "features_used": "all",
        "lookback": LOOKBACK,
        "cell": "GRU",
        "width1": 32,
        "width2": 16,
        "dropout": 0.10,
        "lr": 5e-4
    }
else:
    print("Gefunden best_config.json:", BEST_CFG_PATH)

# Hyperparameter setzen
CELL    = str(BEST_CFG.get("cell", "GRU")).upper() # GRU oder LSTM
WIDTH1  = int(BEST_CFG.get("width1", 32))          # Neuronen Layer 1
WIDTH2  = int(BEST_CFG.get("width2", 16))          # Neuronen Layer 2
DROPOUT = float(BEST_CFG.get("dropout", 0.10))     # Dropout (gegen Overfitting)
LR      = float(BEST_CFG.get("lr", 5e-4))          # Lernrate
LB_FROM_BEST = int(BEST_CFG.get("lookback", LOOKBACK)) # Lookback aus Optimierung
USE_LOOKBACK = LB_FROM_BEST if LB_FROM_BEST > 0 else LOOKBACK
FEATURES_USED_TAG = str(BEST_CFG.get("features_used", "all"))

# Ablation Logic anwenden: Falls "no_recurrent_dropout" an ist, setzen wir RDROP auf 0
if ABL_NO_RECURRENT_DROPOUT:
    RDROP = 0.0
    L2_DENSE = 1e-4 # Stärkerer L2-Regularizer als Ersatz
else:
    RDROP = DROPOUT
    L2_DENSE = 1e-5

print(f"[Block3 Setup] cell={CELL} width={WIDTH1}/{WIDTH2} rd={RDROP} dp_cfg={DROPOUT} lr={LR} lookback={USE_LOOKBACK} "
      f"| features_used={FEATURES_USED_TAG} | L2(Dense)={L2_DENSE}")

Gefunden best_config.json: ..\results\2026-01-01_15-04-40_wfcv\best_config.json
[Block3 Setup] cell=GRU width=32/16 rd=0.1 dp_cfg=0.1 lr=0.0005 lookback=60 | features_used=mom+vol | L2(Dense)=1e-05


In [5]:
# === 2) DATEN & FEATURES LADEN ===
import yaml, glob, os, re

# Metadaten der Features laden
yaml_path = f"../data/features_{FEATURESET}.yml"
meta = {}
if os.path.exists(yaml_path):
    with open(yaml_path, "r") as f:
        meta = yaml.safe_load(f) or {}

# Funktion, um die richtige CSV-Datei zu finden (auch wenn Dateinamen variieren)
def _resolve_train_csv():
    eps_tag_cfg = f"{EPS_MODE}{str(EPSILON).replace('.','p')}"
    exact = f"../data/{TICKER}_{INTERVAL}_{START}_{END}_cls_h{HORIZON}_{eps_tag_cfg}.csv"
    if os.path.exists(exact):
        return exact
    # Fallback-Logik...
    lab = (meta or {}).get("label", {})
    h_yaml   = int(lab.get("horizon", HORIZON))
    mode_yaml= str(lab.get("mode", EPS_MODE))
    eps_yaml = float(lab.get("epsilon", EPSILON))
    eps_tag_yaml = f"{mode_yaml}{str(eps_yaml).replace('.','p')}"
    by_yaml = f"../data/{TICKER}_{INTERVAL}_{START}_{END}_cls_h{h_yaml}_{eps_tag_yaml}.csv"
    if os.path.exists(by_yaml):
        return by_yaml
    # Wenn nichts genau passt, nimm das neueste file das ähnlich aussieht
    pat_any = f"../data/{TICKER}_{INTERVAL}_{START}_{END}_cls_h*.csv"
    cands = sorted(glob.glob(pat_any), key=os.path.getmtime)
    if cands:
        return cands[-1]
    raise FileNotFoundError("Kein TRAIN_CSV gefunden. Bitte Block 2 mit Label-Definition laufen lassen.")

TRAIN_CSV = _resolve_train_csv()
print("Loaded TRAIN_CSV:", TRAIN_CSV)

# Wir extrahieren die Label-Infos aus dem Dateinamen oder Metadaten
def _infer_label_from(meta_dict, train_csv_path, fallback_h_from_root):
    h, mode, eps = None, None, None
    # ... (Meta parsing Logic) ...
    m = re.search(r"_cls_h(\d+)_([a-zq]+)([0-9p.]+)\.csv$", str(train_csv_path))
    if m:
        if h    is None: h    = int(m.group(1))
        if mode is None: mode = m.group(2)
        if eps  is None:
            eps_tag = m.group(3)
            eps = float(str(eps_tag).replace("p", "."))
    if h is None:
        h = int(fallback_h_from_root)
    return h, mode, eps

H_DATA, MODE_DATA, EPS_DATA = _infer_label_from(meta, TRAIN_CSV, HORIZON)
HORIZON  = int(H_DATA)
if MODE_DATA is not None:   EPS_MODE = str(MODE_DATA)
if EPS_DATA  is not None:   EPSILON  = float(EPS_DATA)
print(f"[Label] using horizon={HORIZON} | mode={EPS_MODE} | epsilon={EPSILON}")

# CSV laden
df = pd.read_csv(TRAIN_CSV, index_col=0, parse_dates=True).sort_index()

# Feature-Auswahl: Haben wir "mom_only" gewählt (Momentum Strategy) oder alle?
ALL_FEATURES = [c for c in (meta.get("features", []) if meta else []) if c in df.columns]
if not ALL_FEATURES:
    OHLCV = {"open","high","low","close","volume"}
    ALL_FEATURES = [c for c in df.columns if c not in (OHLCV | {"target"})]

if FEATURES_USED_TAG == "mom_only":
    FEATURES = [c for c in ALL_FEATURES
                if ("logret" in c) or ("macd" in c) or (c in {"sma_diff","rsi_14","bb_pos"})]
else:
    FEATURES = ALL_FEATURES

TARGET = "target"
X = df[FEATURES].copy()
y = df[TARGET].astype(int).copy()
print("FEATURES (final):", FEATURES)

Loaded TRAIN_CSV: ../data/AAPL_1d_2012-01-01_2025-09-01_cls_h1_abs0p0005.csv
[Label] using horizon=1 | mode=abs | epsilon=0.0005
FEATURES (final): ['logret_1d', 'logret_3d', 'logret_5d', 'realized_vol_10', 'bb_pos', 'rsi_14', 'macd', 'macd_sig', 'macd_diff', 'vol_z_20', 'sma_diff']


In [6]:
# === 3) CHRONOLOGISCHER SPLIT (Train/Val/Test) ===
# WICHTIG: Bei Zeitreihen dürfen wir NICHT zufällig splitten! Wir müssen die Reihenfolge behalten.
# Wir nutzen 70% für Training, 15% Validierung, 15% Test.
from sklearn.preprocessing import StandardScaler
n = len(df)
n_train = int(n * 0.70)
n_val   = int(n * 0.15)
train_idx = slice(0, n_train)
val_idx   = slice(n_train, n_train + n_val)
test_idx  = slice(n_train + n_val, n)

X_train, y_train = X.iloc[train_idx], y.iloc[train_idx]
X_val,   y_val   = X.iloc[val_idx],   y.iloc[val_idx]
X_test,  y_test  = X.iloc[test_idx],  y.iloc[test_idx]
print(f"Split sizes → train {len(X_train)}, val {len(X_val)}, test {len(X_test)}")

Split sizes → train 2381, val 510, test 511


In [7]:
# === 4) SKALIERUNG (StandardScaler) ===
# Neuronale Netze brauchen skalierte Daten (Mean=0, Std=1).
# WICHTIG: Der Scaler darf NUR auf TRAIN gefittet werden, um Data Leakage zu vermeiden.
scaler = StandardScaler(with_mean=True, with_std=True)
X_train_s = pd.DataFrame(scaler.fit_transform(X_train), index=X_train.index, columns=FEATURES)
X_val_s   = pd.DataFrame(scaler.transform(X_val),       index=X_val.index,   columns=FEATURES)
X_test_s  = pd.DataFrame(scaler.transform(X_test),      index=X_test.index,  columns=FEATURES)

# Scaler speichern für später
import joblib
joblib.dump(scaler, RUN_DIR / "scaler.joblib")

# Drift-Check: Sind Train- und Testdaten sehr unterschiedlich?
# Große Unterschiede deuten auf "Regime Changes" hin, was schlecht für Modelle ist.
def drift_summary(Xa: pd.DataFrame, Xb: pd.DataFrame):
    out = []
    for c in Xa.columns:
        m1, s1 = Xa[c].mean(), Xa[c].std(ddof=1)
        m2, s2 = Xb[c].mean(), Xb[c].std(ddof=1)
        ratio_std = float((s2 + 1e-9) / (s1 + 1e-9))
        diff_mean = float(m2 - m1)
        out.append({"feature": c, "mean_diff": diff_mean, "std_ratio": ratio_std})
    return pd.DataFrame(out).sort_values("std_ratio", ascending=False)

drift_summary(X_train_s, X_test_s).to_csv(RUN_DIR / "drift_train_vs_test.csv", index=False)

In [8]:
# === 5) WINDOWING (Zeitreihen-Fenster erstellen) ===
# LSTMs brauchen Sequenzen als Input (z.B. die letzten 60 Tage).
# Diese Funktion wandelt die 2D-Daten in 3D-Daten um: (Samples, Timesteps, Features)
def make_windows(X_df: pd.DataFrame, y_ser: pd.Series, lookback: int):
    X_values = X_df.values.astype(np.float32)
    y_values = y_ser.values.astype(np.int32)
    n = len(X_df)
    xs, ys = [], []
    for i in range(lookback-1, n):
        xs.append(X_values[i - lookback + 1 : i + 1])
        ys.append(y_values[i])
    return np.stack(xs, axis=0), np.array(ys)

Xtr_win, ytr = make_windows(X_train_s, y_train, USE_LOOKBACK)
Xva_win, yva = make_windows(X_val_s,   y_val,   USE_LOOKBACK)
Xte_win, yte = make_windows(X_test_s,  y_test,  USE_LOOKBACK)

np.random.seed(SEED); tf.random.set_seed(SEED)

# TensorFlow Datasets erstellen für effizientes Laden
def to_ds(X, y, batch, shuffle):
    ds = tf.data.Dataset.from_tensor_slices((X, y))
    if shuffle:
        ds = ds.shuffle(buffer_size=len(X), seed=SEED, reshuffle_each_iteration=True)
    return ds.batch(batch).prefetch(tf.data.AUTOTUNE)

# Datasets bauen (Shuffle nur bei Training!)
ds_train = to_ds(Xtr_win, ytr, BATCH, shuffle=ABL_SHUFFLE_TRAIN)
ds_val   = to_ds(Xva_win, yva, BATCH, shuffle=False)
ds_test  = to_ds(Xte_win, yte, BATCH, shuffle=False)

In [9]:
# === 6) DIAGNOSE: LOGISTIC REGRESSION BASELINE ===
# Wie gut wäre ein einfaches lineares Modell?
# Wenn unser LSTM nicht besser ist als das hier, haben wir ein Problem.
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score

logit = LogisticRegression(max_iter=200)
logit.fit(X_train_s.iloc[USE_LOOKBACK-1:], y_train.iloc[USE_LOOKBACK-1:])
y_proba_lr = logit.predict_proba(X_test_s.iloc[USE_LOOKBACK-1:])[:,1]
print(f"[Diag] LogReg AUROC val/test = "
      f"{roc_auc_score(y_val.iloc[USE_LOOKBACK-1:], logit.predict_proba(X_val_s.iloc[USE_LOOKBACK-1:])[:,1]):.3f}/"
      f"{roc_auc_score(y_test.iloc[USE_LOOKBACK-1:], y_proba_lr):.3f}")

[Diag] LogReg AUROC val/test = 0.480/0.446


In [10]:
# === 7) MODELLBAU (LSTM/GRU) ===
from tensorflow.keras import layers, regularizers, optimizers, callbacks, models

rnn_cell = layers.GRU if CELL == "GRU" else layers.LSTM

# Architektur definieren
model_layers = [
    layers.Input(shape=(USE_LOOKBACK, len(FEATURES))),
    # 1. RNN Layer
    rnn_cell(WIDTH1, return_sequences=True, recurrent_dropout=RDROP),
]
# Optional: Normalisierung
if ABL_LN_LAYOUT == "both":
    model_layers.append(layers.LayerNormalization())

# 2. RNN Layer
model_layers += [
    rnn_cell(WIDTH2, recurrent_dropout=RDROP),
    layers.LayerNormalization(),
    # Dense Layer zur Klassifikation
    layers.Dense(16, activation="relu", kernel_regularizer=regularizers.l2(L2_DENSE)),
    # Output Layer: Sigmoid für Wahrscheinlichkeit (0...1)
    layers.Dense(1, activation="sigmoid"),
]
model = models.Sequential(model_layers)

# Kompilieren: Metriken definieren
model.compile(
    optimizer=optimizers.Adam(learning_rate=LR),
    loss="binary_crossentropy", # Standard für binäre Klassifikation
    metrics=[
        tf.keras.metrics.AUC(name="auc"),             # Area Under ROC Curve
        tf.keras.metrics.AUC(name="auprc", curve="PR"), # Area Under Precision-Recall Curve (wichtiger!)
        tf.keras.metrics.BinaryAccuracy(name="acc"),
        tf.keras.metrics.Precision(name="prec"),
        tf.keras.metrics.Recall(name="rec"),
    ],
)

# Callbacks für smartes Training
ckpt_path = RUN_DIR / "best.keras"
cbs = [
    # Speichert nur das beste Modell (basierend auf Validation AUPRC)
    callbacks.ModelCheckpoint(filepath=str(ckpt_path),
                              monitor="val_auprc", mode="max",
                              save_best_only=True, verbose=1),
    # Stoppt Training, wenn es nicht mehr besser wird
    callbacks.EarlyStopping(monitor="val_auprc", mode="max",
                            patience=12, restore_best_weights=True),
    # Verringert Lernrate bei Stagnation
    callbacks.ReduceLROnPlateau(monitor="val_auprc", mode="max",
                                factor=0.5, patience=6, min_lr=1e-5, verbose=1),
]

In [11]:
# === 8) TRAINING STARTEN ===
history = model.fit(ds_train, validation_data=ds_val, epochs=EPOCHS,
                    callbacks=cbs, verbose=1)

# Trainingsverlauf speichern
pd.DataFrame(history.history).to_csv(RUN_DIR / "history.csv", index=False)

Epoch 1/100
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - acc: 0.4715 - auc: 0.4611 - auprc: 0.4798 - loss: 0.7504 - prec: 0.4885 - rec: 0.5550
Epoch 1: val_auprc improved from None to 0.49293, saving model to ..\results\2026-01-01_15-05-35_lstm\best.keras
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 26ms/step - acc: 0.4664 - auc: 0.4624 - auprc: 0.4750 - loss: 0.7391 - prec: 0.4748 - rec: 0.4873 - val_acc: 0.5055 - val_auc: 0.4934 - val_auprc: 0.4929 - val_loss: 0.7119 - val_prec: 0.4944 - val_rec: 0.3982 - learning_rate: 5.0000e-04
Epoch 2/100
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - acc: 0.4713 - auc: 0.4644 - auprc: 0.4959 - loss: 0.7144 - prec: 0.4796 - rec: 0.5115
Epoch 2: val_auprc improved from 0.49293 to 0.49679, saving model to ..\results\2026-01-01_15-05-35_lstm\best.keras
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - acc: 0.4806 - auc: 0.4760 - auprc: 0.4953 - l

In [12]:
# === 9) TEST-EVALUATION (Rohdaten) ===
# Ein erstes schnelles Prüfen der Ergebnisse auf dem Test-Set
test_metrics = model.evaluate(ds_test, return_dict=True, verbose=0)
print("Test (keras) metrics:", json.dumps(test_metrics, indent=2))

# Schwellwert-Optimierung (Diagnose)
val_proba = model.predict(ds_val, verbose=0).ravel()
def choose_threshold(y_true, y_prob, bounds=(0.35, 0.65)):
    # Suche den Threshold, der den MCC (Matthews Correlation Coefficient) maximiert anstatt Accuracy
    uniq = np.unique(y_prob); cand = np.r_[0.0, uniq, 1.0]
    best_t, best_s = 0.5, -1
    for t in cand:
        yp = (y_prob >= t).astype(int)
        pr = yp.mean()
        if not (bounds[0] <= pr <= bounds[1]): 
            continue
        s = matthews_corrcoef(y_true, yp)
        if s > best_s: best_s, best_t = s, float(t)
    return best_t, best_s

thr_diag, mcc_val_diag = choose_threshold(yva, val_proba, bounds=(0.35, 0.65))
print(f"[Diag] thr@val(max MCC, bounds 0.35–0.65) = {thr_diag:.3f} | val_MCC={mcc_val_diag:.3f}")

y_proba = model.predict(ds_test, verbose=0).ravel()
y_pred_diag = (y_proba >= thr_diag).astype(int)

# Speichern von zusätzlichen Metriken
extra = {"balanced_accuracy": float(balanced_accuracy_score(yte, y_pred_diag)),
         "mcc": float(matthews_corrcoef(yte, y_pred_diag)),
         "auprc": float(average_precision_score(yte, y_proba))}
with open(RUN_DIR / "extra_test_metrics_diag.json", "w") as f:
    json.dump(extra, f, indent=2)

print("\n[Diag] Confusion (test, thr=thr_diag):\n", confusion_matrix(yte, y_pred_diag))
print("\n[Diag] Report (test):\n", classification_report(yte, y_pred_diag, digits=3))

Test (keras) metrics: {
  "acc": 0.528761088848114,
  "auc": 0.5311942100524902,
  "auprc": 0.540793240070343,
  "loss": 0.7023622393608093,
  "prec": 0.569767415523529,
  "rec": 0.4135020971298218
}
[Diag] thr@val(max MCC, bounds 0.35–0.65) = 0.474 | val_MCC=0.084

[Diag] Confusion (test, thr=thr_diag):
 [[106 109]
 [101 136]]

[Diag] Report (test):
               precision    recall  f1-score   support

           0      0.512     0.493     0.502       215
           1      0.555     0.574     0.564       237

    accuracy                          0.535       452
   macro avg      0.534     0.533     0.533       452
weighted avg      0.535     0.535     0.535       452



In [13]:
# === 10) SAVE & EXPORT ===
# Alle Infos zum Run speichern, damit wir es später reproduzieren können
env_info = {
    "python": sys.version,
    "tensorflow": tf.__version__,
    "seed": SEED,
    "ticker": TICKER, "start": START, "end": END, "interval": INTERVAL,
    "horizon": HORIZON, "epsilon_mode": EPS_MODE, "epsilon": EPSILON,
    "featureset": FEATURESET, "features_used": FEATURES_USED_TAG,
    "features_final": FEATURES,
    "lookback": USE_LOOKBACK, "batch": BATCH, "epochs": EPOCHS,
    "cell": CELL, "width1": WIDTH1, "width2": WIDTH2,
    "dropout_cfg": DROPOUT, "recurrent_dropout_used": RDROP,
    "ln_layout": ABL_LN_LAYOUT,
    "lr": LR,
    "loss": "BCE",
    "train_csv": TRAIN_CSV,
    "features_yaml": yaml_path,
    "best_config_path": BEST_CFG_PATH,
    "best_checkpoint_path": str(ckpt_path),
    "env": {
        "OMP_NUM_THREADS": os.getenv("OMP_NUM_THREADS"),
        # ... weitere Thread-Infos
    }
}
with open(RUN_DIR / "env_info.json", "w") as f:
    json.dump(env_info, f, indent=2)

# Kompakte Config für Übersicht speichern
final_cfg_dump = {
    "ticker": TICKER, "start": START, "end": END, "interval": INTERVAL,
    "horizon": HORIZON, "lookback": USE_LOOKBACK,
    "featureset": FEATURESET, "features": FEATURES,
    "scaler": "StandardScaler", "seed": SEED, "batch": BATCH, "epochs": EPOCHS,
    "cell": CELL, "width1": WIDTH1, "width2": WIDTH2,
    "dropout": DROPOUT, "recurrent_dropout_used": RDROP,
    "ln_layout": ABL_LN_LAYOUT,
    "lr": LR,
    "loss": "BCE",
    "epsilon_mode": EPS_MODE, "epsilon": EPSILON,
    "train_csv": TRAIN_CSV,
    "features_yaml": yaml_path,
    "wfcv_best_config_source": BEST_CFG_PATH,
    "ablations": {
        "shuffle_train": ABL_SHUFFLE_TRAIN,
        "no_recurrent_dropout": ABL_NO_RECURRENT_DROPOUT,
        "ln_layout": ABL_LN_LAYOUT,
        "l2_dense": L2_DENSE
    }
}
with open(RUN_DIR / "config.json", "w") as f:
    json.dump(final_cfg_dump, f, indent=2)

# Modell und Vorhersagen speichern für Schritt 4 & 6
model.save(RUN_DIR / "model.keras")
np.save(RUN_DIR / "y_test.npy", yte)
np.save(RUN_DIR / "y_proba.npy", y_proba)

print(f"\nArtefakte gespeichert in: {RUN_DIR}")


Artefakte gespeichert in: ..\results\2026-01-01_15-05-35_lstm
