# LightGBM - Vision 2024-2025 (V4 Champion)

Ce notebook represente la configuration **V4 Champion** pour la Pitie-Salpetriere.
Strategie :
1. **Test Set Strict** : Evaluation sur les 4 derniers mois de 2025 (Sept-Dec).
2. **Hyperparametres** : 'Aggressive Precision' (n_estimators=8000, lr=0.001) pour minimiser le residu.
3. **Performance** : MAE de reference 60.95 (sur la periode cible).
4. **Standard** : Usage strict sans emoji.

In [None]:
import pandas as pd
import numpy as np
import lightgbm as lgb
from sklearn.metrics import mean_absolute_error
import plotly.express as px
import plotly.graph_objects as go
import joblib
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Chargement et Preparation 2024-2025
df_adm = pd.read_csv('../data/raw/admissions_hopital_pitie_2024_2025.csv')
df_adm['date_entree'] = pd.to_datetime(df_adm['date_entree'])
dd = df_adm.groupby('date_entree').size().rename('admissions').asfreq('D', fill_value=0)

def create_features(df_ts):
    df = pd.DataFrame(index=df_ts.index)
    df['admissions'] = df_ts.values
    
    # 1. Lags
    for l in [1, 2, 7, 14]:
        df[f'lag{l}'] = df['admissions'].shift(l)
        
    # 2. Rolling Statistics
    for w in [7, 14]:
        df[f'roll_mean{w}'] = df['admissions'].shift(1).rolling(window=w).mean()
        
    # 3. Time components
    df['day'] = df.index.dayofweek
    df['month'] = df.index.month
    
    # 4. Seasonal (Cyclic)
    df['sin_day'] = np.sin(2 * np.pi * df.index.dayofyear / 365.25)
    df['cos_day'] = np.cos(2 * np.pi * df.index.dayofyear / 365.25)
    df['sin_month'] = np.sin(2 * np.pi * df['month'] / 12)
    df['cos_month'] = np.cos(2 * np.pi * df['month'] / 12)
    
    # 5. Holiday
    holidays = pd.to_datetime(['2024-01-01', '2024-05-01', '2024-07-14', '2024-12-25',
                               '2025-01-01', '2025-05-01', '2025-07-14', '2025-12-25'])
    df['is_holiday'] = df.index.isin(holidays).astype(int)
    
    return df.dropna()

full_df = create_features(dd)
X = full_df.drop(columns=['admissions'])
y = full_df['admissions']

# Split Strict (Test = Sept-Dec 2025)
split_date = pd.Timestamp('2025-09-01')
X_tr = X[X.index < split_date]
y_tr = y[y.index < split_date]
X_te = X[X.index >= split_date]
y_te = y[y.index >= split_date]

In [None]:
# Modele Champion V4 (Optimise MAE)
model = lgb.LGBMRegressor(
    objective='regression_l1', 
    n_estimators=8000, 
    learning_rate=0.001, 
    num_leaves=20, 
    max_depth=5,
    verbose=-1, 
    random_state=42
)

print("Entrainement du modele V4...")
model.fit(X_tr, y_tr)
preds = model.predict(X_te)
mae = mean_absolute_error(y_te, preds)

print(f"MAE SUR TEST 4 MOIS (SEPT-DEC 2025) : {mae:.2f}")

print("Sauvegarde du modele...")
joblib.dump(model, '../models/lightgbm_final_v4_2425.joblib')
print("Termine.")

In [None]:
# Visualisation de la Performance
fig = px.line(title=f"Performance Champion (MAE: {mae:.2f})")
fig.add_scatter(x=y_te.index, y=y_te, name="Reel", line=dict(color="#1a3a5f", width=3))
fig.add_scatter(x=y_te.index, y=preds, name="Prediction", line=dict(color="#c8102e", dash="dash"))
fig.update_layout(template="plotly_dark")
fig.show()