In [1]:
# =========================
# Elastic Net – Tuning-Pipeline (ET-Logik)
# =========================
# - Ein Master-Schalter:
#     USE_DYNAMIC_FI_PIPELINE = False -> Full FE (ifo + optionale Target-Blöcke)
#     USE_DYNAMIC_FI_PIPELINE = True  -> Dynamic FI (rolling Feature Importance Top-N)
# - Modell: src.models.EN.ForecastModel
# - Grid-Erzeugung mit itertools.product (keine tiefen For-Schachtelungen)

import os, sys
from pathlib import Path
import numpy as np
import pandas as pd
from itertools import product

# --- 1) Pfad-Setup (wie in ET.ipynb) ---
def _locate_repo_root(start: Path) -> Path:
    cur = start.resolve()
    for _ in range(6):
        if (cur / "src").exists():
            return cur
        if cur.parent == cur:
            break
        cur = cur.parent
    return start.resolve()

NOTEBOOK_DIR = Path.cwd()
PROJECT_ROOT = _locate_repo_root(NOTEBOOK_DIR)
os.environ["PROJECT_ROOT"] = str(PROJECT_ROOT)
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

print("PROJECT_ROOT =", PROJECT_ROOT)

# --- 2) Imports aus dem Repo ---
from src.config import (
    GlobalConfig,
    DEFAULT_CORR_SPEC,    # expanding
    EWMA_CORR_SPEC,       # ewma
    outputs_for_model,
)
from src.tuning import run_stageA, run_stageB
from src.io_timesplits import (
    load_target,
    load_ifo_features,
    load_full_lagged_features,
    load_rolling_importance,
)
from src.models.EN import ForecastModel  # ElasticNet-Wrapper

# --- 3) Master-Schalter & Meta ---
USE_DYNAMIC_FI_PIPELINE = False  # False => Full FE; True => Dynamic FI
SEED   = 42

if USE_DYNAMIC_FI_PIPELINE:
    MODEL_NAME = "elastic_net_dynamic_fi"
else:
    MODEL_NAME = "elastic_net"

outputs_for_model(MODEL_NAME)
print(f"Modell {MODEL_NAME} wird getunt.")

# --- 4) Daten laden ---
y = load_target()             # ΔIP (DatetimeIndex)
X_ifo = load_ifo_features()   # ifo features (für Full FE)

FI_BASE_DIR = PROJECT_ROOT / "outputs" / "feature_importance" / "outputs_no_missing"
if USE_DYNAMIC_FI_PIPELINE:
    try:
        X_full_lagged = load_full_lagged_features(base_dir=FI_BASE_DIR)
        rolling_imp   = load_rolling_importance(base_dir=FI_BASE_DIR)
        idx_fi = y.index.intersection(X_full_lagged.index).intersection(rolling_imp.index)
        y_fi          = y.loc[idx_fi]
        X_full_lagged = X_full_lagged.loc[idx_fi]
        rolling_imp   = rolling_imp.loc[idx_fi]
        print("Dynamic-FI Daten gefunden. Shapes:", X_full_lagged.shape, rolling_imp.shape, y_fi.shape)
    except FileNotFoundError as e:
        print("FEHLER beim Laden der Dynamic-FI Artefakte:", e)
        print("Bitte erst die FI-Jobs laufen lassen (feature_importance.ipynb).")
        raise
else:
    # Full FE: Indizes alignen
    idx_common = y.index.intersection(X_ifo.index)
    y    = y.loc[idx_common]
    X_ifo = X_ifo.loc[idx_common]
    X_full_lagged = rolling_imp = y_fi = None
    print("Full-FE Daten geladen. Shapes:", X_ifo.shape, y.shape)

# --- 5) Base-Config (wie Thesis-Policy in ET) ---
def base_cfg_thesis() -> GlobalConfig:
    cfg = GlobalConfig(preset="thesis")  # Offizielle Splits
    cfg.policy_window   = 24
    cfg.policy_decay    = 0.95
    cfg.policy_gain_min = 0.03
    cfg.policy_cooldown = 3
    return cfg

cfg0 = base_cfg_thesis()

# --- 6) Corr-Helper (wie ET) ---
def make_corr_spec(kind: str) -> dict:
    if kind == "expanding":
        return dict(DEFAULT_CORR_SPEC)
    elif kind == "ewm":
        return dict(EWMA_CORR_SPEC)
    else:
        raise ValueError("kind must be 'expanding' or 'ewm'")

# --- 7) Grids -------------------------------------------------
if USE_DYNAMIC_FI_PIPELINE:
    # --------- Dynamic FI (nur Modell-HPs + n_features_to_use) ----------
    print("Erstelle HP-Grid für 'Dynamic FI'...")

    N_FEATURES_TO_USE = 20

    # Elastic Net HPs (Thesis-Notation):
    alpha_list   = [0.1, 0.5, 0.9]               # -> l1_ratio
    lambda_list  = [1e-4, 1e-3, 1e-2, 1e-1]      # -> sklearn alpha
    weight_opts  = [{"sample_weight_decay": None},
                    {"sample_weight_decay": 0.98}]

    def build_model_grid_dynamic_fi():
        grid = []
        for a, lam in product(alpha_list, lambda_list):
            base = {
                "alpha": a,
                "lambda": lam,
                "seed": SEED,
                "n_features_to_use": N_FEATURES_TO_USE,
            }
            for w in weight_opts:
                hp = dict(base)
                hp.update(w)
                grid.append(hp)
        return grid

    model_grid = build_model_grid_dynamic_fi()

else:
    # ----------------- Full FE (ET-Logik) ---------------------
    print("Erstelle HP-Grid für 'Full FE'...")

    # A) FE/DR-Listen (reduziert wie ET.ipynb)
    corr_options = [
        ("expanding", make_corr_spec("expanding")),
        ("ewm",       make_corr_spec("ewm")),
    ]
    lag_candidates_list   = [(1, 2, 3, 6, 12)]
    top_k_lags_list       = [1]
    use_rm3_list          = [True]
    k1_topk_list          = [100, 300]
    redundancy_param_list = [0.90]
    dr_options_list       = [
        {"dr_method": "none"},
        {"dr_method": "pca", "pca_var_target": 0.95, "pca_kmax": 50},
        {"dr_method": "pls", "pls_components": 8},
    ]

    # B) EN-HPs (kompakt)
    alpha_list   = [0.1, 0.5, 0.9]               # Mixing -> l1_ratio
    lambda_list  = [1e-4, 1e-3, 1e-2, 1e-1]      # Penalty -> sklearn alpha

    # C) Target Blocks & Weighting
    target_block_options = [None, ["AR1"], ["Chronos"], ["TSFresh"]]
    weight_opts          = [{"sample_weight_decay": None}]

    def build_model_grid_full_fe():
        hp_grid = []

        # FE/DR-Produkt
        fe_lists = [
            lag_candidates_list,      # lags
            top_k_lags_list,          # k_lags
            use_rm3_list,             # rm3
            k1_topk_list,             # k1
            redundancy_param_list,    # red
            dr_options_list,          # dr_opt (dict)
        ]

        for (corr_tag, corr_spec) in corr_options:
            for (lags, k_lags, rm3, k1, red, dr_opt) in product(*fe_lists):
                # gleiche kleine Einschränkung wie ET:
                if k1 == 100 and dr_opt["dr_method"] != "none":
                    continue

                base_fe = {
                    "lag_candidates": lags,
                    "top_k_lags_per_feature": k_lags,
                    "use_rm3": rm3,
                    "k1_topk": k1,
                    "redundancy_param": red,
                    **dr_opt,
                    "corr_tag": corr_tag,
                    "corr_spec": corr_spec,
                }

                for a, lam in product(alpha_list, lambda_list):
                    base_model = {"alpha": a, "lambda": lam, "seed": SEED}
                    for block_set in target_block_options:
                        for w in weight_opts:
                            hp = {
                                **base_fe,
                                **base_model,
                                "target_block_set": block_set,
                                **w,
                            }
                            hp_grid.append(hp)

        return hp_grid

    model_grid = build_model_grid_full_fe()

print("Optimierte HP-Kombinationen:", len(model_grid))
print("Erstes HP-Set:", model_grid[0] if model_grid else "Grid ist leer")

# --- 8) Stage A/B Lauf (wie ET) ------------------------------------------
if model_grid:
    if USE_DYNAMIC_FI_PIPELINE:
        # Dynamic FI Lauf
        shortlist = run_stageA(
            model_name=MODEL_NAME,
            model_ctor=lambda hp: ForecastModel(hp),
            model_grid=model_grid,
            X=X_ifo,   # Platzhalter
            y=y_fi,
            cfg=cfg0,
            keep_top_k_final=min(5, len(model_grid)),
            min_survivors_per_block=max(1, len(model_grid)//4),
            X_full_lagged=X_full_lagged,
            rolling_imp=rolling_imp,
        )
        run_stageB(
            model_name=MODEL_NAME,
            model_ctor=lambda hp: ForecastModel(hp),
            shortlist=shortlist,
            X=X_ifo,  # Platzhalter
            y=y_fi,
            cfg=cfg0,
            X_full_lagged=X_full_lagged,
            rolling_imp=rolling_imp,
        )
    else:
        # Full FE Lauf
        shortlist = run_stageA(
            model_name=MODEL_NAME,
            model_ctor=lambda hp: ForecastModel(hp),
            model_grid=model_grid,
            X=X_ifo,
            y=y,
            cfg=cfg0,
            keep_top_k_final=min(5, len(model_grid)),
            min_survivors_per_block=max(1, len(model_grid)//4),
        )
        run_stageB(
            model_name=MODEL_NAME,
            model_ctor=lambda hp: ForecastModel(hp),
            shortlist=shortlist,
            X=X_ifo,
            y=y,
            cfg=cfg0,
        )
else:
    print("Keine gültigen HP-Kombinationen gefunden, Stages übersprungen.")

print(f"\nDone. Check outputs/stageA|stageB/{MODEL_NAME} for results.")




PROJECT_ROOT = /Users/jonasschernich/Documents/Masterarbeit/Code
INFO in load_ifo_features: Renaming columns to ensure validity.
Shapes (Setup I/II Basis): (407, 2160) (407,)
Dynamic-FI Inputs gefunden. Shapes: (407, 2160) (407, 2160) (407,)

=== Setup I (ifo-only) :: elastic_net_setup1_ifo_only ===
Grid size: 1680
[Stage A] Using FULL FE pipeline (Gleis 1 & 2).
[Stage A][Block 1] train_end=180, OOS=181-200 | configs=1680
  - Config 1/1680
    · Month 5/20 processed | running...RMSE=16.7824
    · Month 10/20 processed | running...RMSE=18.2129
    · Month 15/20 processed | running...RMSE=18.7451
    · Month 20/20 processed | running...RMSE=18.9708
  - Config 2/1680


  model = cd_fast.enet_coordinate_descent(


    · Month 5/20 processed | running...RMSE=16.6593
    · Month 10/20 processed | running...RMSE=18.0823
    · Month 15/20 processed | running...RMSE=18.6121
    · Month 20/20 processed | running...RMSE=18.8366
  - Config 3/1680


  model = cd_fast.enet_coordinate_descent(


    · Month 5/20 processed | running...RMSE=15.5151
    · Month 10/20 processed | running...RMSE=16.8682
    · Month 15/20 processed | running...RMSE=17.3758
    · Month 20/20 processed | running...RMSE=17.5889
  - Config 4/1680


  model = cd_fast.enet_coordinate_descent(


    · Month 5/20 processed | running...RMSE=8.2350
    · Month 10/20 processed | running...RMSE=9.1387
    · Month 15/20 processed | running...RMSE=9.5058
    · Month 20/20 processed | running...RMSE=9.6434
  - Config 5/1680


  model = cd_fast.enet_coordinate_descent(


    · Month 5/20 processed | running...RMSE=1.6207
    · Month 10/20 processed | running...RMSE=1.9438
    · Month 15/20 processed | running...RMSE=2.1564
    · Month 20/20 processed | running...RMSE=2.1763
  - Config 6/1680
    · Month 5/20 processed | running...RMSE=16.7784
    · Month 10/20 processed | running...RMSE=18.2087
    · Month 15/20 processed | running...RMSE=18.7408
    · Month 20/20 processed | running...RMSE=18.9665
  - Config 7/1680


  model = cd_fast.enet_coordinate_descent(


    · Month 5/20 processed | running...RMSE=16.6137
    · Month 10/20 processed | running...RMSE=18.0339
    · Month 15/20 processed | running...RMSE=18.5628
    · Month 20/20 processed | running...RMSE=18.7869
  - Config 8/1680


  model = cd_fast.enet_coordinate_descent(


    · Month 5/20 processed | running...RMSE=13.9620
    · Month 10/20 processed | running...RMSE=15.2199
    · Month 15/20 processed | running...RMSE=15.6975
    · Month 20/20 processed | running...RMSE=15.8950
  - Config 9/1680


  model = cd_fast.enet_coordinate_descent(


    · Month 5/20 processed | running...RMSE=2.3182
    · Month 10/20 processed | running...RMSE=2.7982
    · Month 15/20 processed | running...RMSE=3.0468
    · Month 20/20 processed | running...RMSE=3.0989
  - Config 10/1680


  model = cd_fast.enet_coordinate_descent(


    · Month 5/20 processed | running...RMSE=1.8056
    · Month 10/20 processed | running...RMSE=1.4598
    · Month 15/20 processed | running...RMSE=1.3583
    · Month 20/20 processed | running...RMSE=1.2277
  - Config 11/1680
    · Month 5/20 processed | running...RMSE=16.7743
    · Month 10/20 processed | running...RMSE=18.2043
    · Month 15/20 processed | running...RMSE=18.7363
    · Month 20/20 processed | running...RMSE=18.9620
  - Config 12/1680


  model = cd_fast.enet_coordinate_descent(


    · Month 5/20 processed | running...RMSE=16.5569
    · Month 10/20 processed | running...RMSE=17.9737
    · Month 15/20 processed | running...RMSE=18.5015
    · Month 20/20 processed | running...RMSE=18.7250
  - Config 13/1680


  model = cd_fast.enet_coordinate_descent(


    · Month 5/20 processed | running...RMSE=11.9566
    · Month 10/20 processed | running...RMSE=13.0913
    · Month 15/20 processed | running...RMSE=13.5301
    · Month 20/20 processed | running...RMSE=13.7072
  - Config 14/1680


  model = cd_fast.enet_coordinate_descent(


KeyboardInterrupt: 