# Modélisation - Vision 2026 (XGBoost)

In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import xgboost as xgb
import warnings
warnings.filterwarnings('ignore')


## CHARGEMENT DES DONNEES

In [2]:
df = pd.read_csv('../data/raw/admissions_hopital_pitie_2024_2025.csv')
df['date_entree'] = pd.to_datetime(df['date_entree'])

# Serie temporelle quotidienne
daily_ts = df.groupby('date_entree').size().rename('admissions')
daily_ts = daily_ts.asfreq('D', fill_value=0)


## FEATURE ENGINEERING 

In [3]:
def create_features_vectorized(ts):
    df_feat = pd.DataFrame({'admissions': ts})
    
    # Features temporelles
    df_feat['day_of_week'] = df_feat.index.dayofweek
    df_feat['day_of_month'] = df_feat.index.day
    df_feat['month'] = df_feat.index.month
    df_feat['quarter'] = df_feat.index.quarter
    df_feat['week_of_year'] = df_feat.index.isocalendar().week.astype(int)
    df_feat['is_weekend'] = (df_feat.index.dayofweek >= 5).astype(int)
    df_feat['is_month_start'] = df_feat.index.is_month_start.astype(int)
    df_feat['is_month_end'] = df_feat.index.is_month_end.astype(int)
    
    # Lags
    for lag in [1, 2, 3, 7, 14, 21, 28]:
        df_feat[f'lag_{lag}'] = df_feat['admissions'].shift(lag)
    
    # Rolling features
    for window in [7, 14, 28]:
        df_feat[f'rolling_mean_{window}'] = df_feat['admissions'].shift(1).rolling(window=window).mean()
        df_feat[f'rolling_std_{window}'] = df_feat['admissions'].shift(1).rolling(window=window).std()
        df_feat[f'rolling_min_{window}'] = df_feat['admissions'].shift(1).rolling(window=window).min()
        df_feat[f'rolling_max_{window}'] = df_feat['admissions'].shift(1).rolling(window=window).max()
    
    # Expanding features
    df_feat['expanding_mean'] = df_feat['admissions'].shift(1).expanding().mean()
    df_feat['expanding_std'] = df_feat['admissions'].shift(1).expanding().std()
    
    # Momentum
    df_feat['momentum_7'] = df_feat['admissions'].shift(1) - df_feat['admissions'].shift(8)
    df_feat['momentum_14'] = df_feat['admissions'].shift(1) - df_feat['admissions'].shift(15)
    
    # Fourier features (saisonnalite)
    for k in [1, 2, 3]:
        df_feat[f'sin_year_{k}'] = np.sin(2 * np.pi * k * df_feat.index.dayofyear / 365.25)
        df_feat[f'cos_year_{k}'] = np.cos(2 * np.pi * k * df_feat.index.dayofyear / 365.25)
    
    return df_feat.dropna()

df_features = create_features_vectorized(daily_ts)


## SPLIT TEMPOREL

In [4]:
# Train: Jan-Aout 2025, Test: Sept-Dec 2025
train_mask = df_features.index < '2025-09-01'
test_mask = df_features.index >= '2025-09-01'

X_train = df_features.loc[train_mask].drop(columns=['admissions'])
y_train = df_features.loc[train_mask, 'admissions']
X_test = df_features.loc[test_mask].drop(columns=['admissions'])
y_test = df_features.loc[test_mask, 'admissions']

print(f"Train: {X_train.shape[0]} jours | Test: {X_test.shape[0]} jours")


Train: 581 jours | Test: 122 jours


## PARAMETRES XGBOOST OPTIMISES

In [5]:
# HyperParametres
xgb_params = {
    'objective': 'reg:squarederror',
    'learning_rate': 0.01,           
    'max_depth': 6,                  
    'min_child_weight': 3,           
    'subsample': 0.8,                
    'colsample_bytree': 0.8,        
    'gamma': 0.1,                   
    'reg_alpha': 0.1,                
    'reg_lambda': 1.0,               
    'n_estimators': 3000,            
    'random_state': 42,
    'n_jobs': -1,
    'verbosity': 0
}


## CROSS-VALIDATION TEMPORELLE

In [6]:
tscv = TimeSeriesSplit(n_splits=5)
cv_scores = []

print("\n=== Cross-Validation Temporelle ===")
for fold, (train_idx, val_idx) in enumerate(tscv.split(X_train), 1):
    X_tr, X_val = X_train.iloc[train_idx], X_train.iloc[val_idx]
    y_tr, y_val = y_train.iloc[train_idx], y_train.iloc[val_idx]
    
    model = xgb.XGBRegressor(**xgb_params)
    model.fit(
        X_tr, y_tr,
        eval_set=[(X_val, y_val)],
        early_stopping_rounds=50,
        verbose=False
    )
    
    y_pred_val = model.predict(X_val)
    mae = mean_absolute_error(y_val, y_pred_val)
    cv_scores.append(mae)
    print(f"Fold {fold}: MAE = {mae:.2f}")

print(f"\nMAE moyenne Cross-Val: {np.mean(cv_scores):.2f} ± {np.std(cv_scores):.2f}")



=== Cross-Validation Temporelle ===


TypeError: XGBModel.fit() got an unexpected keyword argument 'early_stopping_rounds'

## ENTRAINEMENT

In [None]:
print("\n=== Entrainement Final ===")
final_model = xgb.XGBRegressor(**xgb_params)
final_model.fit(
    X_train, y_train,
    eval_set=[(X_test, y_test)],
    early_stopping_rounds=50,
    verbose=False
)


In [None]:
# EVALUATION SUR TEST 
y_pred_test = final_model.predict(X_test)

mae_test = mean_absolute_error(y_test, y_pred_test)
rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test))
r2_test = r2_score(y_test, y_pred_test)

print(f"\n=== Performance Test (Sept-Dec 2025) ===")
print(f"MAE:  {mae_test:.2f}")
print(f"RMSE: {rmse_test:.2f}")
print(f"R²:   {r2_test:.4f}")


## VISUALISATION PREDICTIONS VS REEL

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=y_test.index,
    y=y_test.values,
    mode='lines',
    name='Reel',
    line=dict(color='red', width=1)
))
fig.add_trace(go.Scatter(
    x=y_test.index,
    y=y_pred_test,
    mode='lines',
    name='Predit XGBoost',
    line=dict(color='#00d2ff', width=2, dash='dot')
))
fig.update_layout(
    title='Validation XGBoost - Test Set (Sept-Dec 2025)',
    xaxis_title='Date',
    yaxis_title='Admissions',
    template='plotly_dark',
    height=500
)
fig.show()


## FEATURE IMPORTANCE 

In [None]:
importance_df = pd.DataFrame({
    'Feature': X_train.columns,
    'Importance': final_model.feature_importances_
}).sort_values('Importance', ascending=False).head(15)

fig_imp = px.bar(
    importance_df,
    x='Importance',
    y='Feature',
    orientation='h',
    title='Top 15 Features XGBoost',
    template='plotly_dark',
    color='Importance',
    color_continuous_scale='Viridis'
)
fig_imp.update_layout(height=500)
fig_imp.show()


In [None]:
# SAUVEGARDE DU MODELE
import joblib
joblib.dump(final_model, '../models/xgboost_final_2425.joblib')
print("\nModele sauvegarde: models/xgboost_final_2425.joblib")
