In [1]:
# =============================================================================
# Stage-A Ensemble Tuning: tune on Block 1+2, evaluate on Block 3 (PRINT ONLY)
# =============================================================================

import os
import sys
from pathlib import Path
import numpy as np
import pandas as pd


# --- Projekt-Root finden ---
def _locate_repo_root(start: Path) -> Path:
    cur = start.resolve()
    for _ in range(6):
        if (cur / "src").exists():
            return cur
        if cur.parent == cur:
            break
        cur = cur.parent
    return start.resolve()


NOTEBOOK_DIR = Path.cwd()
PROJECT_ROOT = _locate_repo_root(NOTEBOOK_DIR)
os.environ["PROJECT_ROOT"] = str(PROJECT_ROOT)
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

from src.config import outputs_for_model
from src.evaluation import rmse
from src.models.ensemble import (
    equal_weight_ensemble,
    trimmed_mean_ensemble,
    median_ensemble,
    fit_stacking_ensemble,
    fit_ewa_ensemble,
)

# ----------------------------
# CONFIG
# ----------------------------
BASE_MODELS = [
    "elastic_net_dynamic_fi",
    "lightgbm_dynamic_fi",
    "svr_dynamic_fi",
    "tabpfn_dynamic_fi",
]

TRIM_ALPHAS = [0.00, 0.05, 0.10, 0.15, 0.20]
STACK_LAMBDAS = [1e-3, 1e-2, 1e-1, 1.0, 10.0]
EWA_ETAS = [0.05, 0.10, 0.20, 0.30, 0.50, 0.80, 1.0, 2.0]
EWA_DELTA = 0.95


# ----------------------------
# HELPERS: load Stage-A block preds (champion per block)
# ----------------------------
def load_stageA_block_champion_preds(model_name: str, block: int) -> pd.DataFrame:
    """
    Loads Stage-A block{block} preds for the best config in that block (lowest rmse in rmse.csv).
    Returns DataFrame indexed by date_t_plus_1 with columns: y_true, <model_name>
    """
    outs = outputs_for_model(model_name)
    block_dir = outs["stageA"] / f"block{block}"
    preds_path = block_dir / "preds.csv"
    rmse_path = block_dir / "rmse.csv"

    if not preds_path.exists() or not rmse_path.exists():
        raise FileNotFoundError(f"Missing StageA block{block} files for {model_name}: {preds_path} / {rmse_path}")

    df_rmse = pd.read_csv(rmse_path)
    rmse_col = "rmse" if "rmse" in df_rmse.columns else ("rmse_val" if "rmse_val" in df_rmse.columns else None)
    if rmse_col is None or "config_id" not in df_rmse.columns:
        raise ValueError(f"Unexpected rmse.csv format in {rmse_path}")

    best_cfg = df_rmse.sort_values(rmse_col).iloc[0]["config_id"]

    df = pd.read_csv(preds_path)
    if "date_t_plus_1" not in df.columns or "config_id" not in df.columns:
        raise ValueError(f"Unexpected preds.csv format in {preds_path}")

    df["date_t_plus_1"] = pd.to_datetime(df["date_t_plus_1"])
    df = df[df["config_id"] == best_cfg].copy()
    df = df.sort_values("date_t_plus_1").set_index("date_t_plus_1")

    if df.index.duplicated().any():
        df = df[~df.index.duplicated(keep="last")]

    if "y_true" not in df.columns or "y_pred" not in df.columns:
        raise ValueError(f"preds.csv missing y_true/y_pred in {preds_path}")

    return df[["y_true", "y_pred"]].rename(columns={"y_pred": model_name})


def align_pool(dfs: dict, model_names: list) -> tuple[pd.Series, pd.DataFrame, pd.DatetimeIndex]:
    common = None
    for d in dfs.values():
        common = d.index if common is None else common.intersection(d.index)
    if common is None or len(common) == 0:
        raise ValueError("No common dates across models.")
    common = common.sort_values()

    first = list(dfs.keys())[0]
    y = dfs[first].loc[common, "y_true"].copy()
    y.name = "y_true"

    F = pd.DataFrame(index=common)
    for m in model_names:
        F[m] = dfs[m].loc[common, m].astype(float)

    return y, F, common


def load_block_pool(block: int, model_names: list) -> tuple[pd.Series, pd.DataFrame, pd.DatetimeIndex]:
    dfs = {}
    for m in model_names:
        dfs[m] = load_stageA_block_champion_preds(m, block)
    return align_pool(dfs, model_names)


# ----------------------------
# LOAD Stage A blocks
# ----------------------------
print(f"PROJECT_ROOT: {PROJECT_ROOT}")

y1, F1, d1 = load_block_pool(1, BASE_MODELS)
y2, F2, d2 = load_block_pool(2, BASE_MODELS)
y3, F3, d3 = load_block_pool(3, BASE_MODELS)

# Tune set = Block1+2 (concatenate then align on shared columns; dates are disjoint anyway)
y_val = pd.concat([y1, y2]).sort_index()
F_val = pd.concat([F1, F2]).sort_index()

# Full for EWA run (val + block3), so it can arrive at block3 "trained"
y_full = pd.concat([y_val, y3]).sort_index()
F_full = pd.concat([F_val, F3]).sort_index()

val_dates = list(y_val.index)
test_dates = list(y3.index)

print("\nStage A blocks loaded:")
print(f"  Block1: {d1.min().date()} -> {d1.max().date()} (n={len(d1)})")
print(f"  Block2: {d2.min().date()} -> {d2.max().date()} (n={len(d2)})")
print(f"  Block3: {d3.min().date()} -> {d3.max().date()} (n={len(d3)})")
print(f"  VAL (B1+B2): n={len(y_val)} | TEST (B3): n={len(y3)}")

# ----------------------------
# TUNE + EVAL
# ----------------------------
results = []


def add_result(name: str, params: dict, yhat_val: pd.Series, yhat_test: pd.Series):
    r_val = rmse(y_val.values, yhat_val.loc[y_val.index].values)
    r_test = rmse(y3.values, yhat_test.loc[y3.index].values)
    results.append((name, params, r_val, r_test))


# Equal / Median (no params)
yhat_equal_val = equal_weight_ensemble(F_val)
yhat_equal_test = equal_weight_ensemble(F3)
add_result("Equal-Weight", {}, yhat_equal_val, yhat_equal_test)

yhat_median_val = median_ensemble(F_val)
yhat_median_test = median_ensemble(F3)
add_result("Median", {}, yhat_median_val, yhat_median_test)

# Trimmed mean: tune alpha on VAL
best_alpha, best_r = None, np.inf
for a in TRIM_ALPHAS:
    yh = trimmed_mean_ensemble(F_val, alpha=a)
    r = rmse(y_val.values, yh.values)
    if r < best_r:
        best_r, best_alpha = r, a

yhat_trim_val = trimmed_mean_ensemble(F_val, alpha=best_alpha)
yhat_trim_test = trimmed_mean_ensemble(F3, alpha=best_alpha)
add_result("Trimmed-Mean", {"alpha": best_alpha}, yhat_trim_val, yhat_trim_test)

# Stacking: tune lambda (and weights) on VAL, then apply to Block3
stack_res = fit_stacking_ensemble(
    y=y_val,
    F=F_val,
    cal_dates=val_dates,
    lambdas=STACK_LAMBDAS
)
w = stack_res.weights.values
yhat_stack_val = stack_res.y_pred.loc[y_val.index]
yhat_stack_test = pd.Series(F3.values @ w, index=F3.index, name="stacked")
add_result("Stacking", {"lambda": float(stack_res.lambda_opt)}, yhat_stack_val, yhat_stack_test)

# EWA: tune eta on VAL, run online over (VAL+Block3), evaluate on Block3
ewa_res = fit_ewa_ensemble(
    y=y_full,
    F=F_full,
    cal_dates=val_dates,
    etas=EWA_ETAS,
    delta=EWA_DELTA
)
yhat_ewa_val = ewa_res.y_pred.loc[y_val.index]
yhat_ewa_test = ewa_res.y_pred.loc[y3.index]
add_result("EWA", {"eta": float(ewa_res.eta_opt), "delta": float(ewa_res.delta)}, yhat_ewa_val, yhat_ewa_test)

# ----------------------------
# PRINT SUMMARY
# ----------------------------
print("\n" + "=" * 72)
print("TUNING on Block1+2, EVALUATION on Block3")
print("=" * 72)

# Pretty print
results_sorted = sorted(results, key=lambda x: x[3])  # sort by Block3 RMSE
for name, params, r_val, r_test in results_sorted:
    p = ", ".join([f"{k}={v}" for k, v in params.items()]) if params else "-"
    print(f"{name:<14} | params: {p:<20} | RMSE VAL(B1+2): {r_val:.4f} | RMSE TEST(B3): {r_test:.4f}")

best = results_sorted[0]
print("\nBest on Block3:")
print(f"  {best[0]}  (params: {best[1]})  -> RMSE(B3) = {best[3]:.4f}")

print("\n(Stacking weights from VAL):")
print(stack_res.weights)

print("\nDone.")


PROJECT_ROOT: /Users/jonasschernich/Documents/Masterarbeit/Code

Stage A blocks loaded:
  Block1: 2006-03-01 -> 2007-10-01 (n=20)
  Block2: 2007-11-01 -> 2009-06-01 (n=20)
  Block3: 2009-07-01 -> 2011-02-01 (n=20)
  VAL (B1+B2): n=40 | TEST (B3): n=20

TUNING on Block1+2, EVALUATION on Block3
Equal-Weight   | params: -                    | RMSE VAL(B1+2): 1.9974 | RMSE TEST(B3): 1.5429
Trimmed-Mean   | params: alpha=0.0            | RMSE VAL(B1+2): 1.9974 | RMSE TEST(B3): 1.5429
Median         | params: -                    | RMSE VAL(B1+2): 1.9981 | RMSE TEST(B3): 1.5513
EWA            | params: eta=2.0, delta=0.95  | RMSE VAL(B1+2): 1.9970 | RMSE TEST(B3): 1.5584
Stacking       | params: lambda=0.001         | RMSE VAL(B1+2): 1.9877 | RMSE TEST(B3): 1.6107

Best on Block3:
  Equal-Weight  (params: {})  -> RMSE(B3) = 1.5429

(Stacking weights from VAL):
elastic_net_dynamic_fi    1.054712e-15
lightgbm_dynamic_fi       0.000000e+00
svr_dynamic_fi            1.000000e+00
tabpfn_dynamic_f

In [5]:
# # ==============================================================================
# Masterarbeit Ensemble Tuning & Evaluation (Full Timeline 2011-2024)
# ==============================================================================

import os
import sys
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# --- Projekt-Root finden ---
def _locate_repo_root(start: Path) -> Path:
    cur = start.resolve()
    for _ in range(5):
        if (cur / "src").exists(): return cur
        if cur.parent == cur: break
        cur = cur.parent
    return start.resolve()

NOTEBOOK_DIR = Path.cwd()
PROJECT_ROOT = _locate_repo_root(NOTEBOOK_DIR)
os.environ["PROJECT_ROOT"] = str(PROJECT_ROOT)
if str(PROJECT_ROOT) not in sys.path: sys.path.insert(0, str(PROJECT_ROOT))

# Imports
from src.config import GlobalConfig
from src.evaluation import rmse, mae
from src.models.ensemble import (
    load_level0_pool,
    load_calibration_pool,
    equal_weight_ensemble,
    trimmed_mean_ensemble,
    median_ensemble,
    fit_stacking_ensemble,
    fit_ewa_ensemble,
)

print(f"Projekt-Root: {PROJECT_ROOT}")

# --- KONFIGURATION ---
# Die exakten Ordnernamen unter outputs/stageB/
BASE_MODELS = [
    "elastic_net_dynamic_fi",
    "lightgbm_dynamic_fi",
    "svr_dynamic_fi",
    "tabpfn_dynamic_fi",
]

STACKING_LAMBDAS = [1e-2, 0.1, 1.0]
EWA_ETAS = [0.1, 0.3, 0.2, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 2, 3]
EWA_DELTA = 0.95

OUT_DIR = PROJECT_ROOT / "outputs" / "ensembles"
OUT_DIR.mkdir(parents=True, exist_ok=True)

# --- DATEN LADEN (Stage A + Stage B) ---

print("\n1. Lade Stage B (Test Set)...")
try:
    pool_test = load_level0_pool(BASE_MODELS)
    F_test = pool_test.F
    y_test = pool_test.y_true
    dates_test = pool_test.dates
    print(f"   Test Period: {dates_test[0].date()} bis {dates_test[-1].date()} (n={len(dates_test)})")
except Exception as e:
    print(f"   FEHLER: Konnte Stage B nicht laden: {e}")
    raise

print("\n2. Lade Stage A (Calibration Set, Block 3)...")
try:
    pool_cal = load_calibration_pool(BASE_MODELS)
    F_cal = pool_cal.F
    y_cal = pool_cal.y_true
    dates_cal = pool_cal.dates
    print(f"   Cal Period:  {dates_cal[0].date()} bis {dates_cal[-1].date()} (n={len(dates_cal)})")

    # Merge für Training (Historie A + B)
    F_full = pd.concat([F_cal, F_test])
    y_full = pd.concat([y_cal, y_test])
    # Sicherstellen, dass Index sortiert ist
    F_full.sort_index(inplace=True)
    y_full.sort_index(inplace=True)

    cal_dates_list = list(dates_cal) # Wir trainieren NUR auf A

except Exception as e:
    print(f"   WARNUNG: Konnte Stage A nicht laden ({e}).")
    print("   Fallback: Nutze die ersten 24 Monate von Stage B als Kalibrierung (Verlust von Testdaten!).")
    split_idx = 24
    F_full = F_test
    y_full = y_test
    cal_dates_list = list(dates_test[:split_idx])
    # Für die Evaluation später schneiden wir dann ab

# --- ENSEMBLE BERECHNUNG ---

print("\n--- Benchmark Ensembles (Equal/Trim/Median) ---")
# Diese brauchen kein Training, können direkt auf Test berechnet werden
ens_equal = equal_weight_ensemble(F_test)
ens_trim10 = trimmed_mean_ensemble(F_test, alpha=0.10)
ens_median = median_ensemble(F_test)

print("\n--- Stacking (Learned on Stage A) ---")
stack_res = fit_stacking_ensemble(
    y=y_full,
    F=F_full,
    cal_dates=cal_dates_list,
    lambdas=STACKING_LAMBDAS
)
# Extrahiere Vorhersagen nur für Stage B
ens_stack = stack_res.y_pred.loc[dates_test]

print(f"  Best Lambda: {stack_res.lambda_opt}")
print("  Weights (from Stage A):")
print(stack_res.weights)

print("\n--- EWA (Online Learning A -> B) ---")
# EWA läuft über die ganze Historie (A+B), damit es bei B schon "schlau" ist
ewa_res = fit_ewa_ensemble(
    y=y_full,
    F=F_full,
    cal_dates=cal_dates_list, # Tuning von Eta auf A
    etas=EWA_ETAS,
    delta=EWA_DELTA
)
ens_ewa = ewa_res.y_pred.loc[dates_test]

print(f"  Best Eta: {ewa_res.eta_opt}")

# --- EVALUATION ---

df_ens = pd.DataFrame(index=dates_test)
df_ens["y_true"] = y_test
for m in BASE_MODELS:
    df_ens[f"base_{m}"] = F_test[m]

df_ens["ens_equal"] = ens_equal
df_ens["ens_stack"] = ens_stack
df_ens["ens_ewa"] = ens_ewa
df_ens["dispersion"] = F_test.std(axis=1)

print("\n" + "="*60)
print(f"RESULTS (Full Stage B: {dates_test[0].date()} - {dates_test[-1].date()})")
print("="*60)

def score(y, y_hat, label):
    r = rmse(y, y_hat)
    print(f"{label:<25} | RMSE: {r:.4f}")

print("--- Base Models ---")
for m in BASE_MODELS:
    score(y_test, F_test[m], m)

print("\n--- Ensembles ---")
score(y_test, ens_equal, "Equal-Weight")
score(y_test, ens_stack, "Stacked (Static)")
score(y_test, ens_ewa, "EWA (Online)")

# Speichern
df_ens.to_csv(OUT_DIR / "ensemble_predictions.csv")
print(f"\nSaved to {OUT_DIR}")
# ==============================================================================
# 12. Export als "Fake" Stage B Modelle für einfache Vergleichbarkeit
# ==============================================================================
# Ziel: Erstelle Ordnerstruktur wie bei normalen Modellen:
# outputs/stageB/ensemble_stacking/monthly/preds.csv
# outputs/stageB/ensemble_ewa/monthly/preds.csv
# ...

def export_ensemble_to_stageB(
    df_results: pd.DataFrame,
    col_name: str,
    model_export_name: str,
    base_dir: Path
):
    """
    Speichert eine Ensemble-Spalte im Format der Stage B Predictions.
    """
    # Zielverzeichnis erstellen
    model_dir = base_dir / "stageB" / model_export_name
    monthly_dir = model_dir / "monthly"
    summary_dir = model_dir / "summary"

    monthly_dir.mkdir(parents=True, exist_ok=True)
    summary_dir.mkdir(parents=True, exist_ok=True)

    # 1. preds.csv erstellen
    # Das Format muss exakt den anderen Modellen entsprechen
    df_out = pd.DataFrame()
    df_out["date_t_plus_1"] = df_results.index
    # Wir brauchen eine Dummy 't' Spalte (Origin), nehmen wir einfach Monat davor
    df_out["t"] = df_results.index - pd.DateOffset(months=1)
    df_out["y_true"] = df_results["y_true"].values
    df_out["y_pred"] = df_results[col_name].values

    # Metadaten faken, damit die Analysis-Skripte nicht abstürzen
    df_out["config_id"] = "ensemble_v1" # Dummy Config ID
    df_out["is_active"] = True          # Ensembles sind immer "aktiv"

    # Speichern
    out_path = monthly_dir / "preds.csv"
    df_out.to_csv(out_path, index=False)
    print(f"  Exportiert: {out_path}")

    # 2. summary.csv erstellen (wird oft zum Laden der Configs gesucht)
    # Berechne RMSE für dieses Ensemble
    mse = ((df_out["y_true"] - df_out["y_pred"])**2).mean()
    rmse_val = np.sqrt(mse)

    df_sum = pd.DataFrame([{
        "config_id": "ensemble_v1",
        "model": model_export_name,
        "rmse_overall": rmse_val, # Gesamt-RMSE
        "setup_name": "Ensemble"
    }])
    df_sum.to_csv(summary_dir / "summary.csv", index=False)

print("\n--- Exportiere Ensembles in Stage B Struktur ---")

# Mapping: Spaltenname im DataFrame -> Name des Ordners in outputs/stageB
ensembles_to_export = {
    "ens_equal":  "ensemble_equal_weight",
    "ens_trim10": "ensemble_trimmed",
    "ens_median": "ensemble_median",
    "ens_stack":  "ensemble_stacking",
    "ens_ewa":    "ensemble_ewa"
}

OUTPUTS_ROOT = PROJECT_ROOT / "outputs"

for col, folder_name in ensembles_to_export.items():
    if col in df_ens.columns:
        export_ensemble_to_stageB(df_ens, col, folder_name, OUTPUTS_ROOT)

print("\nFertig! Die Ensembles sind jetzt bereit für die Vergleichs-Notebooks.")

Projekt-Root: /Users/jonasschernich/Documents/Masterarbeit/Code

1. Lade Stage B (Test Set)...
   Test Period: 2011-03-01 bis 2024-12-01 (n=166)

2. Lade Stage A (Calibration Set, Block 3)...
   Cal Period:  2009-07-01 bis 2011-02-01 (n=20)

--- Benchmark Ensembles (Equal/Trim/Median) ---

--- Stacking (Learned on Stage A) ---
  Best Lambda: 0.01
  Weights (from Stage A):
elastic_net_dynamic_fi    1.000000e+00
lightgbm_dynamic_fi       5.606626e-15
svr_dynamic_fi            1.454392e-14
tabpfn_dynamic_fi         0.000000e+00
Name: weights, dtype: float64

--- EWA (Online Learning A -> B) ---
  Best Eta: 0.1

RESULTS (Full Stage B: 2011-03-01 - 2024-12-01)
--- Base Models ---
elastic_net_dynamic_fi    | RMSE: 2.3141
lightgbm_dynamic_fi       | RMSE: 2.3706
svr_dynamic_fi            | RMSE: 2.3422
tabpfn_dynamic_fi         | RMSE: 2.3302

--- Ensembles ---
Equal-Weight              | RMSE: 2.3175
Stacked (Static)          | RMSE: 2.3141
EWA (Online)              | RMSE: 2.3170

Saved to 