In [None]:
import os
import pandas as pd
import itertools
import tqdm
from prophet import Prophet
from prophet.diagnostics import cross_validation, performance_metrics

# --------------------------------------
# 1. Daten laden & vorbereiten für Prophet
# --------------------------------------
pfad = os.path.abspath("../data/processed/sickness_table.parquet")
df = pd.read_parquet(pfad)
df.index = pd.to_datetime(df.index)
df = df.asfreq('D')
df = df.rename(columns={"calls": "y"})
df["ds"] = df.index

holiday_pfad = os.path.abspath("../data/processed/holiday.parquet")
holidays_raw = pd.read_parquet(holiday_pfad)
holiday_df = pd.DataFrame({
    "holiday": holidays_raw["name"],
    "ds": holidays_raw["date"],
    "lower_window": -2,  
    "upper_window": 2   
})

# --------------------------------------
# 2. Feature Grid
# --------------------------------------
grid = {
    "season": ["season_0", "season_1", "season_2", "season_3"],
    "month_start_end": ["is_month_start", "is_month_end"],
    "cyclic_encoding_month": ["month_sin", "month_cos"],
    "holiday": ["holiday", "holiday_before", "holiday_after"],
    "holiday_sig": ["holiday_significant", "holiday_significant_before", "holiday_significant_after"],
    "holiday_effects": [
        "holiday_cluster", "holiday_density_7d", "holiday_on_monday_or_friday",
        "holiday_cluster_and_bruecke", "holiday_and_weekday_cat"
    ]
}

# --------------------------------------
# 3. Alle Kombinationen generieren
# --------------------------------------
all_combinations = []
keys = list(grid.keys())

for r in range(1, len(keys) + 1):
    for combo in itertools.combinations(keys, r):
        feature_list = []
        for key in combo:
            feature_list.extend(grid[key])
        
        for use_holidays in [False, True]:
            group_name = "+".join(sorted(combo)) + ("+H" if use_holidays else "")
            all_combinations.append({
                "groups": group_name,
                "features": feature_list,
                "use_holidays": use_holidays
            })

# In DataFrame zur besseren Übersicht 
df_combinations = pd.DataFrame(all_combinations)

# --------------------------------------
# 4. Prophet Grid Search & Cross-Validation
# --------------------------------------

# Parameter-Grid
param_grid = {
    'changepoint_prior_scale': [0.05, 0.5],
    'seasonality_prior_scale': [5, 15],
    'holidays_prior_scale': [1, 10],
    'seasonality_mode': ['additive', 'multiplicative']
}
all_params = [dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_grid.values())]

# Ergebnisse sammeln
results = []

for _, row in tqdm.tqdm(df_combinations.iterrows(), total=len(df_combinations)):
    group_string = row["groups"]
    features = row["features"]
    use_holidays = row["use_holidays"]

    df_model = df[["ds", "y"] + features].copy()

    for params in all_params:
        model = Prophet(
            weekly_seasonality=True,
            yearly_seasonality=True,
            holidays=holiday_df if use_holidays else None,
            **params
        )

        for feat in features:
            model.add_regressor(feat)

        try:
            model.fit(df_model)

            df_cv = cross_validation(
                model,
                initial="730 days",
                period="30 days",
                horizon="30 days",
                parallel="processes"
            )

            df_p = performance_metrics(df_cv, rolling_window=1)

            df_cv["under"] = (df_cv["yhat"] < df_cv["y"]).astype(int)
            df_cv["over"] = (df_cv["yhat"] > df_cv["y"]).astype(int)

            results.append({
                "groups": group_string,
                "features": features,
                "params": params,
                "mae": df_p["mae"].mean(),
                "rmse": df_p["rmse"].mean(),
                "under": df_cv["under"].mean(),
                "over": df_cv["over"].mean()
            })

        except Exception as e:
            print(f"Fehler bei Kombination {group_string} mit Parametern {params}: {e}")
            continue

# --------------------------------------
# 5. Ergebnisse speichern
# --------------------------------------
results_df = pd.DataFrame(results)

# Speicherpfade
results_path = os.path.abspath("../results/prophet/cv_metrics_full.parquet")
models_path = os.path.abspath("../models/prophet/best_params.parquet")

# Ordner anlegen
os.makedirs(os.path.dirname(results_path), exist_ok=True)
os.makedirs(os.path.dirname(models_path), exist_ok=True)

# Score berechnen und sortieren
results_df["score"] = (
    results_df["mae"] * 0.4 +
    results_df["rmse"] * 0.3 +
    results_df["under"] * 100 * 0.3
)
best_models = results_df.sort_values("score")

# Speichern
results_df.to_parquet(results_path, index=False)
best_models.to_parquet(models_path, index=False)

display(best_models.head())