# LightGBM - Vision 2026

In [9]:
import pandas as pd
import numpy as np
import lightgbm as lgb
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import RandomizedSearchCV, TimeSeriesSplit
import plotly.express as px
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Chargement et Feature Engineering 
df_adm = pd.read_csv('../data/raw/admissions_hopital_pitie_2024.csv')
df_adm['date_entree'] = pd.to_datetime(df_adm['date_entree'])
dd = df_adm.groupby('date_entree').size().rename('admissions').asfreq('D', fill_value=0)

def create_optimized_features(df_ts):
    df = pd.DataFrame(index=df_ts.index)
    df['admissions'] = df_ts.values
    
    # Temporel
    df['day'] = df.index.dayofweek
    df['is_weekend'] = (df['day'] >= 5).astype(int)
    df['month'] = df.index.month
    
    # Lags & Dynamique
    for l in [1, 2, 7, 14]:
        df[f'lag_{l}'] = df['admissions'].shift(l)
    
    df['diff_1'] = df['lag_1'] - df['lag_2']
    df['diff_7'] = df['lag_1'] - df['lag_7']
    
    # Stats Mobiles
    for w in [7, 14, 28]:
        df[f'mean_{w}'] = df['admissions'].shift(1).rolling(window=w).mean()
        df[f'std_{w}'] = df['admissions'].shift(1).rolling(window=w).std()
    
    return df.dropna()

full_df = create_optimized_features(dd)
X = full_df.drop('admissions', axis=1)
y = full_df['admissions']

train_size = int(len(X) * 0.9)
X_train, X_test = X.iloc[:train_size], X.iloc[train_size:]
y_train, y_test = y.iloc[:train_size], y.iloc[train_size:]

In [None]:
# Tuning HyperParamètre
param_dist = {
    'num_leaves': [31, 63, 127],
    'learning_rate': [0.01, 0.05],
    'n_estimators': [500, 1000],
    'feature_fraction': [0.7, 0.9],
    'bagging_fraction': [0.7, 0.9],
    'bagging_freq': [5]
}

tscv = TimeSeriesSplit(n_splits=3)
rs = RandomizedSearchCV(
    lgb.LGBMRegressor(objective='regression_l1', random_state=42, verbose=-1),
    param_distributions=param_dist,
    n_iter=10,
    cv=tscv,
    scoring='neg_mean_absolute_error',
    n_jobs=-1
)

print("Tuning en cours...")
rs.fit(X_train, y_train)
best_lgbm = rs.best_estimator_
print(f"Meilleur MAE CV : {-rs.best_score_:.2f}")

Tuning en cours...
Meilleur MAE CV : 80.43


In [12]:
# Évaluation et Visualisation
preds = best_lgbm.predict(X_test)
mae = mean_absolute_error(y_test, preds)
print(f"\nMAE SUR TEST (Décembre) : {mae:.2f}")

fig = px.line(title=f"Performance LightGBM (MAE: {mae:.2f})")
fig.add_scatter(x=y_test.index, y=y_test, name="Réel", line=dict(color="#1a3a5f", width=3))
fig.add_scatter(x=y_test.index, y=preds, name="LightGBM Pro", line=dict(color="#c8102e", dash="dash"))
fig.update_layout(template="plotly_dark")
fig.show()


MAE SUR TEST (Décembre) : 113.99
