# Modelisation Predictive - Vision 2026

Ce notebook contient la pipeline d'entrainement du modele XGBoost pour prevoir les admissions quotidiennes a l'Hopital Pitie-Salpetriere.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import xgboost as xgb
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import TimeSeriesSplit
import joblib
import os

# Configuration style
plt.style.use('ggplot')
sns.set_palette('viridis')

## 1. Chargement et Preparation des Donnees

In [None]:
# Chargement des admissions
df_adm = pd.read_csv('../data/raw/admissions_hopital_pitie_2024.csv')
df_adm['date_entree'] = pd.to_datetime(df_adm['date_entree'])

# Agregation quotidienne
daily_data = df_adm.groupby('date_entree').size().rename('admissions').reset_index()
daily_data = daily_data.set_index('date_entree').asfreq('D', fill_value=0)

print(f"Periode d'analyse : {daily_data.index.min()} -> {daily_data.index.max()}")
print(f"Nombre de jours : {len(daily_data)}")

## 2. Feature Engineering

Creation de variables temporelles, retards (lags) et statistiques mobiles.

In [None]:
def create_features(df):
    df = df.copy()
    df['dayofweek'] = df.index.dayofweek
    df['quarter'] = df.index.quarter
    df['month'] = df.index.month
    df['year'] = df.index.year
    df['dayofyear'] = df.index.dayofyear
    df['dayofmonth'] = df.index.day
    df['weekofyear'] = df.index.isocalendar().week.astype(int)
    
    # Lags
    df['lag1'] = df['admissions'].shift(1)
    df['lag7'] = df['admissions'].shift(7)
    df['lag14'] = df['admissions'].shift(14)
    
    # Rolling features
    df['roll_mean_7'] = df['admissions'].shift(1).rolling(window=7).mean()
    df['roll_std_7'] = df['admissions'].shift(1).rolling(window=7).std()
    
    return df

features_df = create_features(daily_data)
features_df = features_df.dropna()

print(f"Colonnes générées : {features_df.columns.tolist()}")

## 3. Entrainement du Modele XGBoost

In [None]:
FEATURES = ['dayofweek', 'quarter', 'month', 'year', 'dayofyear', 'dayofmonth', 
            'weekofyear', 'lag1', 'lag7', 'lag14', 'roll_mean_7', 'roll_std_7']
TARGET = 'admissions'

# Split Train/Test (Derniers 30 jours pour le test)
train = features_df.iloc[:-30]
test = features_df.iloc[-30:]

X_train = train[FEATURES]
y_train = train[TARGET]
X_test = test[FEATURES]
y_test = test[TARGET]

reg = xgb.XGBRegressor(base_score=0.5, booster='gbtree',    
                       n_estimators=1000,
                       early_stopping_rounds=50,
                       objective='reg:squarederror',
                       max_depth=3,
                       learning_rate=0.01)

reg.fit(X_train, y_train,
        eval_set=[(X_train, y_train), (X_test, y_test)],
        verbose=100)

print("Entrainement termine.")

## 4. Evaluation des Performances

In [None]:
test['prediction'] = reg.predict(X_test)

mae = mean_absolute_error(test[TARGET], test['prediction'])
rmse = np.sqrt(mean_squared_error(test[TARGET], test['prediction']))

print(f'MAE  : {mae:.2f}')
print(f'RMSE : {rmse:.2f}')

# Visualisation interactable
plt.figure(figsize=(15, 5))
plt.plot(test.index, test[TARGET], label='Reel')
plt.plot(test.index, test['prediction'], label='Predi', linestyle='--')
plt.title('Comparaison Reel vs Predictions (XGBoost)')
plt.legend()
plt.show()

## 5. Export du Modele

In [None]:
os.makedirs('../models', exist_ok=True)
joblib.dump(reg, '../models/xgboost_admissions_v1.joblib')
print("Modele sauvegarde dans models/xgboost_admissions_v1.joblib")