In [1]:
# === SYSTEM & IMPORTS ===
# Block 5: Hyperparameter-Optimierung mit Walk-Forward Cross-Validation (WFCV)
#
# Ziel: Die besten Parameter f√ºr das Modell finden, ohne "in die Zukunft" zu schauen.
# Methode: Wir trainieren auf [Vergangenheit] -> testen auf [Gegenwart].
# Dann schieben wir das Fenster weiter: trainieren auf [Vergangenheit + Gegenwart] -> testen auf [Zukunft].

import os, sys, json, time, logging, glob, re
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Pfad zum Projekt-Root setzen
ROOT = os.path.abspath("..")
if ROOT not in sys.path:
    sys.path.insert(0, ROOT)

# TensorFlow-Logs unterdr√ºcken (nur Fehler anzeigen)
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
import tensorflow as tf
tf.get_logger().setLevel(logging.ERROR)

print(f"TensorFlow Version: {tf.__version__}")
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    print(f"‚úÖ GPU DETECTED: {len(gpus)} device(s)")
    for i, gpu in enumerate(gpus):
        print(f"  [{i}] {gpu.name}")
        try:
            # Versuch, Speicher-Details zu holen (optional)
            details = tf.config.experimental.get_device_details(gpu)
            print(f"      Compute Capability: {details.get('compute_capability')}")
        except:
            pass
    # Mixed Precision aktivieren
    try:
        from tensorflow.keras import mixed_precision
        policy = mixed_precision.Policy('mixed_float16')
        mixed_precision.set_global_policy(policy)
        print("üöÄ Mixed Precision ENABLED (Float16 speedup active)")
    except Exception as e:
        print(f"‚ö†Ô∏è Mixed Precision init failed: {e}")
else:
    print("‚ùå NO GPU DETECTED! Running on CPU (will be slow).")
    print("   Please check CUDA/cuDNN installation if you have an NVIDIA GPU.")

TensorFlow Version: 2.10.0
‚úÖ GPU DETECTED: 1 device(s)
  [0] /physical_device:GPU:0
      Compute Capability: (8, 9)
üöÄ Mixed Precision ENABLED (Float16 speedup active)


In [2]:
# === CONFIG & SETUP ===
# Basis-Konfiguration laden
with open(os.path.join(ROOT, "config.json"), "r") as f:
    C = json.load(f)

# Parameter √ºbernehmen
TICKER   = C["ticker"]
START    = C["start"]
END      = C["end"]
INTERVAL = C["interval"]
HORIZON  = int(C["horizon"])
LOOKBACK_DEFAULT = int(C["lookback"]) # Standard-Lookback, falls wir ihn nicht variieren
BATCH    = int(C.get("batch", 64))
SEED     = int(C.get("seed", 42))
FEATURESET = C.get("featureset", "v2")
EPS_MODE   = C.get("epsilon_mode", "abs")
EPSILON    = float(C.get("epsilon", 0.0005))

# Ergebnis-Verzeichnis
RESULTS_DIR = Path(C.get("results_dir", "../results"))

# Globalen Seed setzen
np.random.seed(SEED); tf.random.set_seed(SEED)

# Eigener Ausgabe-Ordner mit Zeitstempel f√ºr diesen WFCV-Lauf
RUN_DIR = RESULTS_DIR / time.strftime("%Y-%m-%d_%H-%M-%S_wfcv")
RUN_DIR.mkdir(parents=True, exist_ok=True)

# Unterordner f√ºr Plots
(RUN_DIR / "plots").mkdir(parents=True, exist_ok=True)

print("WFCV_RUN_DIR:", RUN_DIR)

WFCV_RUN_DIR: ..\results\2026-01-03_20-56-59_wfcv


In [3]:
# === FAST MODE ===
# WFCV kann sehr lange dauern (Stunden/Tage).
# F√ºr Debugging oder schnelle Tests gibt es den "Fast Mode".
FAST = C.get("fast_wfcv", False)

# Einstellungen f√ºr "Normal" (Full Grid) und "Fast" (Reduziert)
EPOCHS_GRID = 1   # Set to 1 for Instant Mode (<1 min)
N_FOLDS = 2       # Min Folds

if FAST:
    print("[INFO] Fast Mode ist AKTIV. Reduzierte Epochen und Folds.")
EPOCHS_GRID = 1   # Set to 1 for Instant Mode (<1 min)
N_FOLDS = 2       # Min Folds


In [4]:
# === DATEN LADEN ===
# Wir suchen robust nach der passenden CSV-Datei.
import yaml

# 1. Features-Metadaten laden, falls vorhanden
yaml_path = f"../data/features_{FEATURESET}.yml"
meta = {}
label_h = label_mode = label_eps = None

if os.path.exists(yaml_path):
    with open(yaml_path, "r") as f:
        meta = yaml.safe_load(f) or {}
    lab = (meta or {}).get("label", {})
    label_h    = lab.get("horizon")
    label_mode = lab.get("mode")
    label_eps  = lab.get("epsilon")

# Hilfsfunktion zum Parsen des Dateinamens
def _parse_h_meps_from_name(path: str):
    mH = re.search(r"_cls_h(\d+)_", path)
    me = re.search(r"_(abs|rel)(\d+p\d+)\.csv$", path)
    H  = int(mH.group(1)) if mH else None
    md = me.group(1) if me else None
    eps= float(me.group(2).replace("p",".")) if me else None
    return H, md, eps

# Hilfsfunktion zur Suche im Dateisystem
def _infer_from_existing_files(tkr, itv, start, end, mode_hint=None, eps_hint=None):
    pat = f"../data/{tkr}_{itv}_{start}_{end}_cls_h*_.csv".replace("_ .csv",".csv")
    cands = sorted(glob.glob(pat), key=os.path.getmtime)
    
    if mode_hint and (eps_hint is not None):
        tag = f"{mode_hint}{str(eps_hint).replace('.','p')}"
        cands = [c for c in cands if c.endswith(f"_{tag}.csv")]
        
    if not cands:
        return None
    return _parse_h_meps_from_name(cands[-1])

# Versuch 1: Parameter aus YAML
H_FOR_FILE    = int(label_h)    if label_h    is not None else None
MODE_FOR_FILE = str(label_mode) if label_mode is not None else None
EPS_FOR_FILE  = float(label_eps) if label_eps is not None else None

# Versuch 2: Parameter aus Dateinamen erraten
if (H_FOR_FILE is None) or (MODE_FOR_FILE is None) or (EPS_FOR_FILE is None):
    inferred = _infer_from_existing_files(TICKER, INTERVAL, START, END,
                                          mode_hint=MODE_FOR_FILE, eps_hint=EPS_FOR_FILE)
    if inferred is not None:
        H_i, M_i, E_i = inferred
        H_FOR_FILE    = H_FOR_FILE    if H_FOR_FILE    is not None else H_i
        MODE_FOR_FILE = MODE_FOR_FILE if MODE_FOR_FILE is not None else M_i
        EPS_FOR_FILE  = EPS_FOR_FILE  if EPS_FOR_FILE  is not None else E_i

if (H_FOR_FILE is None) or (MODE_FOR_FILE is None) or (EPS_FOR_FILE is None):
    raise RuntimeError("Label-Definition unklar. Bitte Block 2 pr√ºfen.")

# Dateipfad endg√ºltig bauen
eps_tag   = f"{MODE_FOR_FILE}{str(EPS_FOR_FILE).replace('.','p')}"
TRAIN_CSV = f"../data/{TICKER}_{INTERVAL}_{START}_{END}_cls_h{H_FOR_FILE}_{eps_tag}.csv"

# Existenz checken und laden
if not os.path.exists(TRAIN_CSV):
    pat = f"../data/{TICKER}_{INTERVAL}_{START}_{END}_cls_h*_{eps_tag}.csv"
    candidates = sorted(glob.glob(pat), key=os.path.getmtime)
    if candidates:
        TRAIN_CSV = candidates[-1]
    else:
        raise FileNotFoundError(f"CSV nicht gefunden: {TRAIN_CSV}")

print("Lade TRAIN_CSV:", TRAIN_CSV)
df = pd.read_csv(TRAIN_CSV, index_col=0, parse_dates=True).sort_index()

# Feature-Pool bestimmen
OHLCV = {"open","high","low","close","volume"}
if meta:
    FEATURES_ALL = [c for c in meta.get("features", []) if c in df.columns]
else:
    FEATURES_ALL = [c for c in df.columns if c not in (OHLCV | {"target"})]
    
assert len(FEATURES_ALL) > 0, "Keine Features zum Optimieren gefunden."

# Wir behalten den ganzen DataFrame im Speicher (X und y getrennt)
X_full = df[FEATURES_ALL].copy()
y_full = df["target"].astype(int).copy()

print("Label Positive Rate (gesamt):", round(y_full.mean(), 3), "| Datens√§tze:", len(y_full))

Lade TRAIN_CSV: ../data/AAPL_1d_2010-01-01_2026-01-01_cls_h1_abs0p0005.csv
Label Positive Rate (gesamt): 0.514 | Datens√§tze: 3991


In [5]:
# === SPLITTING (Walk-Forward Logik) ===
# Diese Funktion berechnet die Indizes f√ºr die verschiedenen Folds.

def make_wf_splits(n, n_folds=5, val_frac=0.20, min_train_frac=0.45):
    # n: Anzahl Datenpunkte total
    # val_frac: Wie viel % des aktuellen Fensters sind Validation?
    # min_train_frac: Wie gro√ü muss das Trainingset MINDESTENS sein?
    
    val_len   = max(60, int(round(n * val_frac)))
    min_train = max(200, int(round(n * min_train_frac)))
    
    # Start-Punkt f√ºr das Ende des ersten Validation-Sets
    start_val_end = min_train + val_len
    if start_val_end + 1 > n:
        raise ValueError(f"Dataset zu kurz f√ºr diese Split-Parameter: {n}")
    
    # Wir verteilen die Endpunkte der Folds gleichm√§√üig √ºber die verbleibende Zeit
    val_ends = np.linspace(start_val_end, n, num=n_folds, endpoint=True).astype(int)
    val_ends = np.unique(val_ends)
    
    # Falls durch Rundung zu wenige Folds entstehen (bei kleinen Daten), fixieren wir Schritte
    if len(val_ends) < n_folds:
        step = max(1, (n - start_val_end) // n_folds)
        val_ends = np.arange(start_val_end, start_val_end + step * n_folds, step)
        val_ends = np.clip(val_ends, start_val_end, n)
        
    stops = []
    for ve in val_ends[:n_folds]:
        # Das Ende des Trainings ist 'val_len' vor dem Ende des Folds
        te = int(ve - val_len)
        # Training muss gro√ü genug sein (Lookback beachten!)
        te = max(te, LOOKBACK_DEFAULT + 1)
        
        if te <= 0 or ve <= te or ve > n:
            continue
            
        # Wir speichern Slices: (Train-Bereich, Val-Bereich)
        # Train geht immer von 0 bis te (Expanding Window)
        stops.append((slice(0, te), slice(te, ve)))
        
    if len(stops) != n_folds:
        # Warnung oder Error, falls wir nicht genug Folds bauen konnten
        # Hier akzeptieren wir es implizit, pr√ºfen es aber:
        print(f"[WARN] Konnte nur {len(stops)} von {n_folds} Folds generieren.")
        
    return stops

# Splits generieren
splits = make_wf_splits(len(df), n_folds=N_FOLDS, val_frac=0.20, min_train_frac=0.45)
print("Anzahl generierter Folds:", len(splits))
if len(splits) > 0:
    tr_s, va_s = splits[0]
    print(f"  Fold1: Train bis idx={tr_s.stop}, Val bis idx={va_s.stop} (Gr√∂√üe Val: {va_s.stop - va_s.start})")

Anzahl generierter Folds: 2
  Fold1: Train bis idx=1796, Val bis idx=2594 (Gr√∂√üe Val: 798)


In [6]:
# === MODELL-HELPER FUNKTIONEN ===

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import balanced_accuracy_score, matthews_corrcoef, average_precision_score, roc_auc_score
from tensorflow.keras import layers, regularizers, callbacks, optimizers, models

# optimierte Dataset Funktion (ersetzt make_windows)
def make_dataset(X_df, y_ser, lookback, batch_size=64, shuffle=False, seed=42):
    # Cast to float32/int32 explicitly for TF
    data = X_df.values.astype("float32")
    targets = y_ser.values.astype("int32")
    
    # timeseries_dataset_from_array nutzt C++ Op -> sehr schnell
    ds = tf.keras.utils.timeseries_dataset_from_array(
        data=data,
        targets=targets,
        sequence_length=lookback,
        sequence_stride=1,
        shuffle=shuffle,
        batch_size=batch_size,
        seed=seed,
        start_index=0,
        end_index=None
    )
    # Prefetch f√ºr GPU Pipeline
    return ds.prefetch(tf.data.AUTOTUNE)

def build_model(n_features, width1=64, width2=32, dropout=0.10, lr=5e-4, use_gru=True):
    rnn = layers.GRU if use_gru else layers.LSTM
    m = models.Sequential([
        layers.Input(shape=(None, n_features)),
        rnn(width1, return_sequences=True, recurrent_dropout=dropout),
        layers.LayerNormalization(),
        rnn(width2, recurrent_dropout=dropout),
        layers.LayerNormalization(),
        layers.Dense(16, activation="relu", kernel_regularizer=regularizers.l2(1e-5)),
        layers.Dense(1, activation="sigmoid"),
    ])
    m.compile(
        optimizer=optimizers.Adam(learning_rate=lr),
        loss="binary_crossentropy",
        metrics=[tf.keras.metrics.AUC(name="auc"),
                 tf.keras.metrics.AUC(name="auprc", curve="PR")]
    )
    return m

# Hilfsfunktion: MCC f√ºr besten Threshold berechnen
def mcc_at_best_thr(y_true, y_prob):
    ts = np.r_[0.0, np.unique(y_prob), 1.0]
    best = (-1.0, 0.5)
    for t in ts:
        yp = (y_prob >= t).astype(int)
        m = matthews_corrcoef(y_true, yp)
        if m > best[0]:
            best = (float(m), float(t))
    return best  # (mcc, thr)

# Funktion f√ºr Training und Evaluation mit Dataset
def fit_eval_fold_fast(ds_tr, ds_va, y_va, n_features, hp, epochs=EPOCHS_GRID):
    tf.keras.backend.clear_session()
    
    # Modell bauen
    model = build_model(
        n_features=n_features,
        width1=hp["width1"], width2=hp["width2"],
        dropout=hp["dropout"], lr=hp["lr"], use_gru=(hp["cell"]=="GRU")
    )

    # Callbacks
    cbs = [
        callbacks.EarlyStopping(monitor="val_auprc", mode="max", patience=6, restore_best_weights=True),
        callbacks.ReduceLROnPlateau(monitor="val_auprc", mode="max", factor=0.5, patience=3, min_lr=1e-5),
        callbacks.TerminateOnNaN(),
    ]

    # Training
    hist = model.fit(ds_tr, validation_data=ds_va, epochs=epochs, verbose=0, callbacks=cbs)

    # Evaluation auf Validation-Set
    # Achtung: ds_va muss NICHT geshuffelt sein, damit die Reihenfolge zu y_va passt!
    yva_proba = model.predict(ds_va, verbose=0).ravel()
    
    # L√§nge checken (durch Windowing gehen die ersten samples verloren)
    # y_va ist der "echte" Target-Vektor nach Windowing (muss vorher gek√ºrzt werden)
    
    # Metriken berechnen
    mcc_val, thr_val = mcc_at_best_thr(y_va[-len(yva_proba):], yva_proba)
    yva_true_clipped = y_va[-len(yva_proba):]
    
    yva_pred_best = (yva_proba >= thr_val).astype(int)

    metrics = dict(
        mcc=float(mcc_val),
        thr_val=float(thr_val),
        bal_acc=float(balanced_accuracy_score(yva_true_clipped, yva_pred_best)),
        auprc=float(average_precision_score(yva_true_clipped, yva_proba)),
        auroc=float(roc_auc_score(yva_true_clipped, yva_proba)),
        epochs_trained=int(len(hist.history["loss"]))
    )
    tf.keras.backend.clear_session()
    return metrics

In [7]:
# === SUCH-GRIDS DEFINIEREN ===
# Hier definieren wir den Suchraum f√ºr die Hyperparameter.

# 1. Lookback: Wie weit schauen wir zur√ºck?
LOOKBACK_GRID = [60] if not FAST else [LOOKBACK_DEFAULT] # Optimized: only 60

# 2. Modell-Architektur und Hyperparameter
# Wir bauen eine Liste von Dicts (Grid Search)
HP_GRID = [
    dict(width1=w1, width2=w2, dropout=dp, lr=lr, cell=cell)
    for (w1, w2) in [(64,32)] # Netzgr√∂√üe (nur eine Option)
    for lr in [5e-4]          # Lernrate (nur eine Option)
    for dp in [0.2]           # Dropout (Optimized: only 0.2)
    for cell in ["GRU"] # Zelltyp (Optimized: only GRU)
]

# Falls FAST-Mode, √ºberschreiben wir das Grid mit nur einer Konfiguration
if FAST:
    HP_GRID = [dict(width1=32, width2=16, dropout=0.10, lr=5e-4, cell="GRU")]

# 3. Feature-Subsets: Welche Spalten nutzen wir?
FEATURE_SUBSETS = {
    # Standard: Alle verf√ºgbaren Features
    "all": FEATURES_ALL,
    
    # Experiment 1: Nur Momentum-basierte Indikatoren
    # "mom_only": [c for c in FEATURES_ALL  (DISABLED for speed)
    #             if ("logret" in c) or ("macd" in c) or (c in {"sma_diff","rsi_14","bb_pos"})],
                 
    # Experiment 2: Momentum + Volatilit√§t
    # "mom+vol": [c for c in FEATURES_ALL (DISABLED for speed)
    #             if (("logret" in c) or ("macd" in c) or (c in {"sma_diff","rsi_14","bb_pos"}))
    #                or (c in {"realized_vol_10","vol_z_20"})]
}

print("Gr√∂√üe Suchraum:")
print(f"  HP-Kombinationen: {len(HP_GRID)}")
print(f"  Lookback-Optionen: {len(LOOKBACK_GRID)}")
print(f"  Feature-Sets: {len(FEATURE_SUBSETS)}")
print(f"  Folds pro Kombination: {len(splits)}")
print(f"  -> Gesamte Training-Runs: {len(HP_GRID) * len(LOOKBACK_GRID) * len(FEATURE_SUBSETS) * len(splits)}")

Gr√∂√üe Suchraum:
  HP-Kombinationen: 1
  Lookback-Optionen: 1
  Feature-Sets: 1
  Folds pro Kombination: 2
  -> Gesamte Training-Runs: 2


In [8]:
# === HAUPTSCHLEIFE: SUCHE DURCHF√úHREN (OPTIMIERT) ===
from time import perf_counter

print("Starte Suche ...", flush=True)
MAX_SECONDS = 60 * 60 * 2  # Max 2 Stunden

csv_path = RUN_DIR / "wfcv_results.csv"
records = []
t0 = perf_counter()

# Resume Check
done_keys = set()
if csv_path.exists():
    try:
        done_df = pd.read_csv(csv_path)
        for _, r in done_df.iterrows():
            done_keys.add((r["features_used"], int(r["lookback"]),
                           r["cell"], int(r["width1"]), int(r["width2"]),
                           float(r["dropout"]), float(r["lr"]), int(r["fold"])))
    except:
        pass

stop_time = t0 + MAX_SECONDS

# OUTER LOOPS (Data Dimensions)
for feat_name, FEATS in FEATURE_SUBSETS.items():
    if len(FEATS) == 0: continue

    for lookback in LOOKBACK_GRID:
        
        # --- OPTIMIERUNG: Datasets vor den HP-Loops erstellen ---
        # Wir bereiten die Folds EINMALIG vor, statt in jeder HP-Runde neu.
        # Das spart massiv Zeit bei Scaling & Windowing.
        fold_datasets = {}
        
        # Wir checken, ob wir ALLE HPs f√ºr diesen (Feat, LB) Block schon haben.
        # Wenn ja, k√∂nnen wir das Erstellen der Datasets √ºberspringen.
        # (Vereinfachter Check: wir machen es pro Fold bei Bedarf, aber hier globaler Split)
        
        print(f"\n[PREP] Generiere Datasets f√ºr Feat='{feat_name}', LB={lookback} ...")
        
        datasets_ready = True
        for fold_id, (tr_s, va_s) in enumerate(splits, start=1):
            # Slice Data
            X_tr, y_tr = X_full.iloc[tr_s][FEATS], y_full.iloc[tr_s]
            X_va, y_va = X_full.iloc[va_s][FEATS], y_full.iloc[va_s]
            
            # Scale
            scaler = StandardScaler()
            X_tr_s = pd.DataFrame(scaler.fit_transform(X_tr), index=X_tr.index, columns=X_tr.columns)
            X_va_s = pd.DataFrame(scaler.transform(X_va),     index=X_va.index, columns=X_va.columns)
            
            # Make TF Datasets (using C++ generator)
            ds_tr = make_dataset(X_tr_s, y_tr, lookback, batch_size=BATCH, shuffle=True, seed=SEED)
            ds_va = make_dataset(X_va_s, y_va, lookback, batch_size=BATCH, shuffle=False)
            
            # y_va (raw values) for metric calculation (needs to be aligned with windowed output)
            # timeseries_dataset_from_array cuts off the first (lookback-1) samples.
            y_va_aligned = y_va.values[lookback-1:] 
            
            fold_datasets[fold_id] = (ds_tr, ds_va, y_va_aligned, len(FEATS))
        
        # --- INNER LOOP: Hyperparameters ---
        for hp in HP_GRID:
            for fold_id in range(1, len(splits) + 1):
                # Key check
                key = (feat_name, int(lookback),
                       hp["cell"], int(hp["width1"]), int(hp["width2"]),
                       float(hp["dropout"]), float(hp["lr"]), int(fold_id))
                
                if key in done_keys:
                    continue
                
                # Time check
                if perf_counter() > stop_time:
                    break

                # Get Pre-calc Data
                ds_tr, ds_va, y_va_true, n_feat = fold_datasets[fold_id]
                
                # Train & Eval
                # Note: y_va_true is passed explicitly to avoid re-extraction
                mets = fit_eval_fold_fast(
                    ds_tr, ds_va, y_va_true, n_feat,
                    hp=hp, epochs=EPOCHS_GRID
                )
                
                # Save
                rec = {
                    "feature_set": FEATURESET,
                    "features_used": feat_name,
                    "n_features": n_feat,
                    "lookback": lookback,
                    **hp,
                    "fold": fold_id,
                    **mets
                }
                records.append(rec)
                pd.DataFrame([rec]).to_csv(csv_path, mode='a', header=not os.path.exists(csv_path), index=False)
                
                print(f"[{feat_name[:5]}.. | LB={lookback} | {hp['cell']} | Fold{fold_id}] MCC={mets['mcc']:.3f} (Ep:{mets['epochs_trained']})")

            if perf_counter() > stop_time: break
        if perf_counter() > stop_time: break
    if perf_counter() > stop_time: 
        print("[INFO] Time limit reached.")
        break

t1 = perf_counter()
print(f"\nCompleted. Time={t1-t0:.1f}s")

Starte Suche ...

[PREP] Generiere Datasets f√ºr Feat='all', LB=60 ...
[all.. | LB=60 | GRU | Fold1] MCC=0.070 (Ep:1)
[all.. | LB=60 | GRU | Fold2] MCC=0.054 (Ep:1)

Completed. Time=32.2s


In [9]:
# === ERGEBNIS-ANALYSE ===
# Wir aggregieren die Ergebnisse aller Folds und suchen die beste Konfiguration.
# Kriterium: Hoher Durchschnitts-MCC und geringe Standardabweichung (Stabilit√§t).

import pandas as pd, json, numpy as np

csv_path = RUN_DIR / "wfcv_results.csv"
if not csv_path.exists():
    print("Keine Ergebnisse gefunden.")
else:
    results = pd.read_csv(csv_path)

    # Gruppierungs-Spalten (alles au√üer Fold und Ergebnissen)
    agg_cols = [c for c in ["feature_set","features_used","n_features","lookback",
                            "width1","width2","dropout","lr","cell"] if c in results.columns]

    # Aggregation: Mittelwert und Standardabweichung
    agg_dict = {"mcc": ["mean","std"], "auprc": ["mean","std"], "auroc": ["mean"]}
    g = results.groupby(agg_cols).agg(agg_dict)

    # Flache Spaltennamen erzeugen (MultiIndex entfernen)
    g.columns = [
        "_".join([str(x) for x in col if str(x) != ""]).strip("_")
        for col in g.columns.to_flat_index()
    ]
    g = g.reset_index()

    # Sortieren: Beste Konfiguration zuerst.
    # Wir sortieren nach MCC Mean (absteigend) und MCC Std (aufsteigend).
    g = g.sort_values(["mcc_mean","auprc_mean","mcc_std"], ascending=[False, False, True])

    # Speichern der aggregierten Tabelle
    g.to_csv(RUN_DIR / "wfcv_results_agg.csv", index=False)
    
    # Top 5 speichern
    top5 = g.head(5).copy()
    top5.to_csv(RUN_DIR / "wfcv_results_top5.csv", index=False)
    
    print("Top 3 Konfigurationen:")
    print(top5.head(3)[["features_used", "lookback", "cell", "dropout", "mcc_mean", "mcc_std"]])

    # Die allerbeste Config extrahieren und als JSON speichern
    # Diese Datei wird von Notebook 3 automatisch geladen.
    best = top5.iloc[0].to_dict() if len(top5) else {}
    with open(RUN_DIR / "best_config.json", "w") as f:
        json.dump(best, f, indent=2)

    print("\nBest config saved to:", RUN_DIR / "best_config.json")

Top 3 Konfigurationen:
  features_used  lookback cell  dropout  mcc_mean   mcc_std
0           all        60  GRU      0.2  0.061812  0.010962

Best config saved to: ..\results\2026-01-03_20-56-59_wfcv\best_config.json


In [10]:
# === VISUALISIERUNG: HEATMAPS ===
# Wir plotten Heatmaps, um zu sehen, welche Paremeter-R√§ume gut funktionieren.
import pandas as pd
import matplotlib.pyplot as plt

if csv_path.exists():
    agg = pd.read_csv(RUN_DIR / "wfcv_results_agg.csv")
    
    # Wir pivotieren die Tabelle f√ºr die Heatmap
    # Normalerweise ist Lookback eine gute Y-Achse
    pivot_index = "lookback" if "lookback" in agg.columns else agg.columns[0]
    col_candidates = ["features_used", "cell", "width1"]
    pivot_columns = [c for c in col_candidates if c in agg.columns]
    
    def _plot_grid(df: pd.DataFrame, value_col: str, fname: str):
        if value_col not in df.columns: return
        try:
            # Pivot: rows=lookback, cols=features/cell/...
            pvt = df.pivot_table(index=pivot_index, columns=pivot_columns, values=value_col, aggfunc="mean")
            
            plt.figure(figsize=(10, 6))
            im = plt.imshow(pvt.values, aspect="auto", cmap="viridis")
            plt.colorbar(im)
            
            # Achsen beschriften
            plt.yticks(range(len(pvt.index)), pvt.index)
            plt.xticks(range(len(pvt.columns)), pvt.columns, rotation=45, ha="right")
            
            plt.xlabel(" / ".join(pivot_columns)); plt.ylabel(pivot_index)
            plt.title(fname.replace("_", " ").replace(".png", ""))
            plt.tight_layout()
            plt.savefig(RUN_DIR / "plots" / fname, dpi=160)
            plt.close()
        except Exception as e:
            print(f"Konnte Plot {fname} nicht erstellen: {e}")

    _plot_grid(agg, "mcc_mean",   "heatmap_mcc.png")
    _plot_grid(agg, "auprc_mean", "heatmap_auprc.png")
    print("Heatmaps gespeichert.")

Heatmaps gespeichert.


In [11]:
# === VISUALISIERUNG: BOXPLOTS ===
# Boxplots zeigen die Stabilit√§t √ºber die Folds besser als nur der Mittelwert.

if csv_path.exists():
    results = pd.read_csv(csv_path)
    
    # Wir erstellen ein Label f√ºr jede Config (ohne Fold-Info)
    def _short_label(r):
        return f"{r['features_used']}-{r['cell']}-lb{int(r['lookback'])}-dp{r['dropout']}"
    
    results["config_label"] = results.apply(_short_label, axis=1)

    # Wir nehmen nur die Top 10 Configs f√ºr den Plot, sonst wird es unleserlich
    top_labels = results.groupby("config_label")["mcc"].mean().sort_values(ascending=False).head(10).index
    subset = results[results["config_label"].isin(top_labels)]
    
    # Plot
    plt.figure(figsize=(10, 6))
    data = [grp["mcc"].values for label, grp in subset.groupby("config_label")]
    labels = [label for label, grp in subset.groupby("config_label")]
    
    # Sortierung im Plot nach Median
    medians = [np.median(d) for d in data]
    sort_idx = np.argsort(medians)[::-1]
    data = [data[i] for i in sort_idx]
    labels = [labels[i] for i in sort_idx]

    plt.boxplot(data, showmeans=True, meanline=True)
    plt.xticks(range(1, len(labels)+1), labels, rotation=45, ha="right")
    plt.title("Top 10 Configs: MCC Varianz √ºber Folds")
    plt.ylabel("MCC Score")
    plt.grid(True, axis="y", alpha=0.3)
    plt.tight_layout()
    plt.savefig(RUN_DIR / "plots" / "boxplots_top10_mcc.png", dpi=160)
    plt.close()
    print("Boxplots gespeichert.")

Boxplots gespeichert.


In [12]:
# === INFO-DUMP ===
# Metadaten speichern
run_info = {
    "seed": SEED,
    "epochs_grid": EPOCHS_GRID,
    "n_folds": N_FOLDS,
    "val_frac": 0.20,
    "min_train_frac": 0.45,
    "lookback_grid": LOOKBACK_GRID,
    "hp_grid_size": (len(HP_GRID) if not FAST else 1),
    "feature_subsets": list(FEATURE_SUBSETS.keys()),
    "train_csv": TRAIN_CSV,
    "label_resolution": {
        "source": "yaml" if os.path.exists(yaml_path) and (label_h is not None) else "inferred_from_csv",
        "yaml_path": yaml_path
    },
    "labels": {"horizon": H_FOR_FILE, "mode": MODE_FOR_FILE, "epsilon": EPS_FOR_FILE}
}
with open(RUN_DIR / "wfcv_run_info.json", "w") as f:
    json.dump(run_info, f, indent=2)

print("\nBlock 5 abgeschlossen. Ergebnisse in:", RUN_DIR)


Block 5 abgeschlossen. Ergebnisse in: ..\results\2026-01-03_20-56-59_wfcv
