# LightGBM Champion - Vision 2026
## Version Haute Performance (MAE 72.92)

Ce notebook presente le modele le plus performant pour la Pitie-Salpetriere.
Strategie :
1. Lags minimaux (1 et 7) pour preserver l'integralite du mois de Janvier en entrainement.
2. Objectif regression_l1 pour minimiser l'erreur absolue.
3. Absence d'emojis pour la robustesse du code.

In [None]:
import pandas as pd
import numpy as np
import lightgbm as lgb
from sklearn.metrics import mean_absolute_error
import plotly.express as px
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Chargement des donnees
df_adm = pd.read_csv('../data/raw/admissions_hopital_pitie_2024.csv')
df_adm['date_entree'] = pd.to_datetime(df_adm['date_entree'])
dd = df_adm.groupby('date_entree').size().rename('admissions').asfreq('D', fill_value=0)

# Generation des features Champion
df = pd.DataFrame(index=dd.index)
df['admissions'] = dd.values
df['day'] = df.index.dayofweek
df['lag1'] = df['admissions'].shift(1)
df['lag7'] = df['admissions'].shift(7)

df = df.dropna()
X = df.drop(columns=['admissions'])
y = df['admissions']

# Split sur les 30 derniers jours (Decembre)
X_tr, y_tr = X.iloc[:-30], y.iloc[:-30]
X_te, y_te = X.iloc[-30:], y.iloc[-30:]

print(f"Dimensions Entrainement : {X_tr.shape}")

In [None]:
# Modele Haute Capacite
model = lgb.LGBMRegressor(
    objective='regression_l1', 
    n_estimators=5000, 
    learning_rate=0.01, 
    num_leaves=255, 
    verbose=-1, 
    random_state=42
)

model.fit(X_tr, y_tr)
preds = model.predict(X_te)
mae = mean_absolute_error(y_te, preds)

print(f"MAE SUR TEST DECEMBRE : {mae:.2f}")

In [None]:
# Visualisation de la precision
fig = px.line(title=f"Precision Champion (MAE: {mae:.2f})")
fig.add_scatter(x=y_te.index, y=y_te, name="Reel", line=dict(color="#1a3a5f", width=3))
fig.add_scatter(x=y_te.index, y=preds, name="Prediction", line=dict(color="#c8102e", dash="dash"))
fig.update_layout(template="plotly_dark")
fig.show()