# Modelisation Predictive - Vision 2026

Ce notebook contient la pipeline d'entrainement du modele XGBoost pour prevoir les admissions quotidiennes a l'Hopital Pitie-Salpetriere.

In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import xgboost as xgb
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, mean_absolute_percentage_error
from sklearn.model_selection import TimeSeriesSplit
import joblib
import os

# Pas d'emoji dans le code

## 1. Chargement et Preparation des Donnees

In [None]:
# Chargement des admissions
df_adm = pd.read_csv('../data/raw/admissions_hopital_pitie_2024.csv')
df_adm['date_entree'] = pd.to_datetime(df_adm['date_entree'])

# Agregation quotidienne
daily_data = df_adm.groupby('date_entree').size().rename('admissions').reset_index()
daily_data = daily_data.set_index('date_entree').asfreq('D', fill_value=0)

print(f"Periode d'analyse : {daily_data.index.min()} -> {daily_data.index.max()}")
print(f"Nombre de jours : {len(daily_data)}")

## 2. Feature Engineering

Creation de variables temporelles cycliques (sin/cos), retards (lags) et statistiques mobiles.

In [None]:
def create_features(df):
    df = df.copy()
    
    # Cyclical features for time
    df['month_sin'] = np.sin(2 * np.pi * df.index.month / 12)
    df['month_cos'] = np.cos(2 * np.pi * df.index.month / 12)
    df['day_sin'] = np.sin(2 * np.pi * df.index.dayofweek / 7)
    df['day_cos'] = np.cos(2 * np.pi * df.index.dayofweek / 7)
    
    df['dayofyear'] = df.index.dayofyear
    df['weekofyear'] = df.index.isocalendar().week.astype(int)
    
    # Lags
    df['lag1'] = df['admissions'].shift(1)
    df['lag7'] = df['admissions'].shift(7)
    df['lag14'] = df['admissions'].shift(14)
    
    # Rolling features
    df['roll_mean_7'] = df['admissions'].shift(1).rolling(window=7).mean()
    df['roll_std_7'] = df['admissions'].shift(1).rolling(window=7).std()
    
    return df

features_df = create_features(daily_data)
features_df = features_df.dropna()

print(f"Colonnes générées : {features_df.columns.tolist()}")

## 3. Entrainement du Modele XGBoost

In [None]:
FEATURES = ['month_sin', 'month_cos', 'day_sin', 'day_cos', 'dayofyear', 
            'weekofyear', 'lag1', 'lag7', 'lag14', 'roll_mean_7', 'roll_std_7']
TARGET = 'admissions'

# Split Train/Test (Derniers 30 jours pour le test)
train = features_df.iloc[:-30]
test = features_df.iloc[-30:].copy()

X_train = train[FEATURES]
y_train = train[TARGET]
X_test = test[FEATURES]
y_test = test[TARGET]

# Correction du base_score pour mieux capter l'echelle
reg = xgb.XGBRegressor(base_score=y_train.mean(), booster='gbtree',    
                       n_estimators=1000,
                       early_stopping_rounds=50,
                       objective='reg:squarederror',
                       max_depth=4,
                       learning_rate=0.05)

reg.fit(X_train, y_train,
        eval_set=[(X_train, y_train), (X_test, y_test)],
        verbose=100)

print("Entrainement termine.")

## 4. Evaluation des Performances

In [None]:
test['prediction'] = reg.predict(X_test)

mae = mean_absolute_error(test[TARGET], test['prediction'])
rmse = np.sqrt(mean_squared_error(test[TARGET], test['prediction']))
r2 = r2_score(test[TARGET], test['prediction'])
mape = mean_absolute_percentage_error(test[TARGET], test['prediction'])

print(f'MAE  : {mae:.2f}')
print(f'RMSE : {rmse:.2f}')
print(f'R2   : {r2:.4f}')
print(f'MAPE : {mape:.4f}')

# Visualisation avec Plotly
fig = go.Figure()
fig.add_trace(go.Scatter(x=test.index, y=test[TARGET], name='Donnees Reelles', line=dict(color='#005ba1')))
fig.add_trace(go.Scatter(x=test.index, y=test['prediction'], name='Predictions XGBoost', line=dict(color='#c8102e', dash='dash')))
fig.update_layout(
    title='Comparaison Reel vs Prediction (Test Set - Decembre 2024)',
    xaxis_title='Date',
    yaxis_title='Nombre d\'admissions',
    template='plotly_dark',
    height=500
)
fig.show()

## 5. Importance des Features

In [None]:
importance = pd.DataFrame(data=reg.feature_importances_, index=FEATURES, columns=['importance']).sort_values('importance', ascending=True)
fig_imp = px.bar(importance, x='importance', y=importance.index, orientation='h', title='Importance des Variables (XGBoost)', template='plotly_dark')
fig_imp.show()

## 6. Export du Modele

In [None]:
os.makedirs('../models', exist_ok=True)
joblib.dump(reg, '../models/xgboost_admissions_v1.joblib')
print("Modele sauvegarde dans models/xgboost_admissions_v1.joblib")