In [3]:
# --- Setup & Imports ---
import os, sys
from pathlib import Path
import pandas as pd
import numpy as np

# --- 1. Pfad-Setup (wie in ET.ipynb) ---
def _locate_repo_root(start: Path) -> Path:
    cur = start.resolve()
    for _ in range(5):
        if (cur / 'src').exists():
            return cur
        if cur.parent == cur:
            break
        cur = cur.parent
    return start.resolve()

NOTEBOOK_DIR = Path.cwd()
PROJECT_ROOT = _locate_repo_root(NOTEBOOK_DIR)
os.environ['PROJECT_ROOT'] = str(PROJECT_ROOT)
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

from src.config import GlobalConfig, DEFAULT_CORR_SPEC, EWMA_CORR_SPEC, outputs_for_model
from src.tuning import run_stageA, run_stageB
from src.io_timesplits import (
    load_target, load_ifo_features,
    load_full_lagged_features, load_rolling_importance
)
# --- MODELL-IMPORT (LGBM-Wrapper im ET/EN-API-Stil) ---
from src.models.LGBM import ForecastModel  # LightGBM Regressor-Wrapper

print('PROJECT_ROOT =', PROJECT_ROOT)

# --- 2. Master-Switch & Pfade ---
USE_DYNAMIC_FI_PIPELINE = False  # False => normale FE-Pipeline, True => Dynamic FI
FI_PATH = PROJECT_ROOT / "outputs" / "feature_importance" / "outputs_no_missing"

MODEL_NAME = "lightgbm_dynamic_fi_debug" if USE_DYNAMIC_FI_PIPELINE else "lightgbm_debug"
outputs_for_model(MODEL_NAME)
print(f'Modell {MODEL_NAME} wird getunt (DEBUG).')

# --- 3. Daten laden ---
y = load_target()          # ΔIP (DatetimeIndex)
X_ifo = load_ifo_features()

if USE_DYNAMIC_FI_PIPELINE:
    try:
        X_full_lagged = load_full_lagged_features(base_dir=FI_PATH)
        rolling_imp   = load_rolling_importance(base_dir=FI_PATH)
        idx_fi = y.index.intersection(X_full_lagged.index).intersection(rolling_imp.index)
        y_fi, X_full_lagged, rolling_imp = y.loc[idx_fi], X_full_lagged.loc[idx_fi], rolling_imp.loc[idx_fi]
        print('Dynamic-FI Daten geladen. Shapes:', X_full_lagged.shape, rolling_imp.shape)
    except FileNotFoundError as e:
        print(f"FEHLER: {e}")
        print("Hinweis: Für Dynamic FI müssen die Artefakte aus feature_importance.ipynb vorhanden sein.")
        raise
else:
    X_full_lagged, rolling_imp = None, None
    idx = y.index.intersection(X_ifo.index)
    y, X_ifo = y.loc[idx], X_ifo.loc[idx]
    print('Normale FE-Daten geladen. Shapes:', X_ifo.shape, y.shape)

# --- 4. Base config: DEBUG (mit Fallback) ---
def base_cfg_debug() -> GlobalConfig:
    try:
        cfg = GlobalConfig(preset="debug")  # schneller, weniger Splits
    except Exception:
        cfg = GlobalConfig(preset="thesis")
        # kompaktere Policy als Fallback
        cfg.policy_window   = 12
        cfg.policy_decay    = 0.95
        cfg.policy_gain_min = 0.05
        cfg.policy_cooldown = 2
        return cfg
    # Debug-Policy
    cfg.policy_window   = 12
    cfg.policy_decay    = 0.95
    cfg.policy_gain_min = 0.05
    cfg.policy_cooldown = 2
    return cfg

cfg0 = base_cfg_debug()

# --- 5. Corr-Spezifikation (wie ET) ---
def make_corr_spec(kind: str) -> dict:
    if kind == 'expanding':
        return dict(DEFAULT_CORR_SPEC)
    elif kind == 'ewm':
        return dict(EWMA_CORR_SPEC)
    else:
        raise ValueError("kind must be 'expanding' or 'ewm'")

# --- 6. Mini-Grids ohne tiefe Schachtelung (je Modus genau 1 HP-Set) ---

if USE_DYNAMIC_FI_PIPELINE:
    print("Erstelle MINI-Grid für 'Dynamic FI' (DEBUG)...")
    model_grid = [{
        # Dynamic-FI spezifisch
        'n_features_to_use': 10,
        # LGBM Minimal-HPs (schnell)
        'n_estimators': 300,
        'learning_rate': 0.05,
        'num_leaves': 31,
        'max_depth': -1,
        'min_child_samples': 20,
        'colsample_bytree': 1.0,
        'subsample': 1.0,
        'bagging_freq': 0,
        'reg_alpha': 0.0,
        'reg_lambda': 0.0,
        'min_split_gain': 0.0,
        'min_child_weight': 1e-3,
        'importance_type': 'gain',
        # Early Stopping off (Speed)
        'early_stopping_rounds': None,
        'val_tail': None,
        'seed': 42,
        'n_jobs': 1,
        # Gewichtung
        'sample_weight_decay': None,
    }]
else:
    print("Erstelle MINI-Grid für 'Full FE' (DEBUG)...")
    # nur eine Corr-Variante und eine FE-Kombi, um die ET-Logik zu spiegeln
    corr_tag, corr_spec = ("expanding", make_corr_spec("expanding"))

    model_grid = [{
        # --- FE/DR/Blocks (minimal) ---
        'lag_candidates': (1, 3, 12),
        'top_k_lags_per_feature': 1,
        'use_rm3': True,
        'k1_topk': 100,
        'redundancy_param': 0.90,
        'dr_method': 'none',
        'corr_tag': corr_tag,
        'corr_spec': corr_spec,
        'target_block_set': None,
        'sample_weight_decay': None,
        # --- LGBM Minimal-HPs ---
        'n_estimators': 300,
        'learning_rate': 0.05,
        'num_leaves': 31,
        'max_depth': -1,
        'min_child_samples': 20,
        'colsample_bytree': 1.0,
        'subsample': 1.0,
        'bagging_freq': 0,
        'reg_alpha': 0.0,
        'reg_lambda': 0.0,
        'min_split_gain': 0.0,
        'min_child_weight': 1e-3,
        'importance_type': 'gain',
        'early_stopping_rounds': None,
        'val_tail': None,
        'seed': 42,
        'n_jobs': 1,
    }]

print("HP-Kombinationen (DEBUG):", len(model_grid))
print("Erstes HP-Set:", model_grid[0] if model_grid else "Grid ist leer")

# --- 7. Stage A/B Lauf (minimale Einstellungen) ---
if model_grid:
    if USE_DYNAMIC_FI_PIPELINE:
        shortlist = run_stageA(
            model_name=MODEL_NAME,
            model_ctor=lambda hp: ForecastModel(hp),
            model_grid=model_grid,
            X=X_ifo,           # Platzhalter
            y=y_fi,
            cfg=cfg0,
            keep_top_k_final=1,
            min_survivors_per_block=1,
            X_full_lagged=X_full_lagged,
            rolling_imp=rolling_imp
        )
        run_stageB(
            model_name=MODEL_NAME,
            model_ctor=lambda hp: ForecastModel(hp),
            shortlist=shortlist,
            X=X_ifo,           # Platzhalter
            y=y_fi,
            cfg=cfg0,
            X_full_lagged=X_full_lagged,
            rolling_imp=rolling_imp
        )
    else:
        shortlist = run_stageA(
            model_name=MODEL_NAME,
            model_ctor=lambda hp: ForecastModel(hp),
            model_grid=model_grid,
            X=X_ifo,
            y=y,
            cfg=cfg0,
            keep_top_k_final=1,
            min_survivors_per_block=1,
        )
        run_stageB(
            model_name=MODEL_NAME,
            model_ctor=lambda hp: ForecastModel(hp),
            shortlist=shortlist,
            X=X_ifo,
            y=y,
            cfg=cfg0,
        )
else:
    print("Keine gültigen HP-Kombinationen gefunden, Stages übersprungen.")

print(f"\nDone (DEBUG). Results in outputs/stageA|stageB/{MODEL_NAME}.")



PROJECT_ROOT = /Users/jonasschernich/Documents/Masterarbeit/Code
Modell lightgbm_debug wird getunt (DEBUG).
INFO in load_ifo_features: Renaming columns to ensure validity.
Normale FE-Daten geladen. Shapes: (407, 2160) (407,)
Erstelle MINI-Grid für 'Full FE' (DEBUG)...
HP-Kombinationen (DEBUG): 1
Erstes HP-Set: {'lag_candidates': (1, 3, 12), 'top_k_lags_per_feature': 1, 'use_rm3': True, 'k1_topk': 100, 'redundancy_param': 0.9, 'dr_method': 'none', 'corr_tag': 'expanding', 'corr_spec': {'mode': 'expanding', 'window': None, 'lam': None}, 'target_block_set': None, 'sample_weight_decay': None, 'n_estimators': 300, 'learning_rate': 0.05, 'num_leaves': 31, 'max_depth': -1, 'min_child_samples': 20, 'colsample_bytree': 1.0, 'subsample': 1.0, 'bagging_freq': 0, 'reg_alpha': 0.0, 'reg_lambda': 0.0, 'min_split_gain': 0.0, 'min_child_weight': 0.001, 'importance_type': 'gain', 'early_stopping_rounds': None, 'val_tail': None, 'seed': 42, 'n_jobs': 1}
[Stage A] Using FULL FE pipeline (Gleis 1 & 2).
[S