# Analyse int√©gr√©e du RSV (France, 2018-2027)

Ce carnet rassemble toutes les √©tapes de l'analyse men√©e dans le cadre de la th√®se, en suivant une trame m√©thodologique claire :

1. **Pr√©paration des donn√©es** : harmonisation hebdomadaire des sources sanitaires et comportementales.
2. **Mod√©lisation explicative (OLS)** : identification des d√©terminants et quantification de leurs effets moyens.
3. **Sc√©narios contrefactuels** : projection de trajectoires alternatives (vaccination, gestes barri√®res).
4. **S√©rie interrompue (ITS)** : caract√©risation des ruptures li√©es √† la pand√©mie.
5. **SARIMAX multivari√©** : prolongement de la s√©rie observ√©e jusqu'en 2027.
6. **Pr√©visions univari√©es** (5 saisons) et sc√©narios d√©riv√©s.
7. **Diagnostics & exports** pour r√©utilisation (Streamlit, annexes).

Chaque section rappelle la logique, d√©taille les √©tapes techniques et met en avant les r√©sultats √† retenir.

In [49]:
from pathlib import Path
import warnings
from itertools import product

import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import statsmodels.api as sm
from statsmodels.stats.diagnostic import acorr_ljungbox
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.tsa.statespace.sarimax import SARIMAX

from IPython.display import display

warnings.filterwarnings("ignore")
pd.set_option("display.max_columns", 120)
pd.set_option("display.width", 180)
np.random.seed(42)

px.defaults.template = "plotly_white"
px.defaults.width = 1000
px.defaults.height = 520

DATA = Path("../data_clean")
FILES = {
    "common_FR_long": DATA / "ODISSEE/common_FR_long.csv",
    "vacsi_fr_extended": DATA / "VACSI/vacsi_fr_extended.csv",
    "google_mobility_fr_weekly": DATA / "GOOGLE/google_mobility_fr_weekly.csv",
    "coviprev_reg_weekly": DATA / "COVIPREV/coviprev_reg_weekly.csv",
    "meteo_fr_weekly": DATA / "METEO/meteo_fr_weekly.csv",
    "erviss_fr_weekly": DATA / "ERVISS/erviss_fr_weekly.csv",
}

missing = [k for k, p in FILES.items() if not p.exists()]
if missing:
    raise FileNotFoundError(f"Fichiers manquants: {missing}")
else:
    print("‚úÖ Tous les fichiers n√©cessaires sont disponibles.")

COVID_START = pd.Timestamp("2020-03-01")
VACC_START = pd.Timestamp("2021-01-01")

LAG_VACC, LAG_MNP, LAG_WORK = 4, 8, 9
SEASON_PERIOD = 52
print(f"‚è±Ô∏è COVID_START={COVID_START.date()}, VACC_START={VACC_START.date()} | Lags initiaux: {LAG_VACC, LAG_MNP, LAG_WORK}")


‚úÖ Tous les fichiers n√©cessaires sont disponibles.
‚è±Ô∏è COVID_START=2020-03-01, VACC_START=2021-01-01 | Lags initiaux: (4, 8, 9)


In [50]:
def keyify(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    iso = pd.to_datetime(df["date_monday"]).dt.isocalendar()
    df["year_iso"] = iso["year"].astype(int)
    df["week_iso_num"] = iso["week"].astype(int)
    return df

def zscore(series: pd.Series) -> pd.Series:
    std = series.std(ddof=0)
    return (series - series.mean()) / std if std != 0 else series * 0

def build_time_features(df: pd.DataFrame, period: int = 52) -> pd.DataFrame:
    df = df.copy()
    df["t"] = np.arange(len(df))
    df["sin52"] = np.sin(2 * np.pi * df["t"] / period)
    df["cos52"] = np.cos(2 * np.pi * df["t"] / period)
    return df

def load_datasets(files: dict[str, Path]) -> dict[str, pd.DataFrame]:
    data = {}
    for name, path in files.items():
        data[name] = pd.read_csv(path)
        print(f"‚úÖ {name} charg√© ({data[name].shape[0]} lignes)")
    return data

def merge_exog(rsv_df: pd.DataFrame, vac_df: pd.DataFrame, work_df: pd.DataFrame, cov_df: pd.DataFrame) -> pd.DataFrame:
    merged = (
        rsv_df[["date_monday", "year_iso", "week_iso_num"]]
        .merge(vac_df, on=["year_iso", "week_iso_num"], how="left")
        .merge(work_df, on=["year_iso", "week_iso_num"], how="left")
        .merge(cov_df, on=["year_iso", "week_iso_num"], how="left")
        .set_index("date_monday")
        .sort_index()
    )
    return merged

def build_model_matrix(df: pd.DataFrame, lags: tuple[int, int, int], mask_vars: list[str] | None = None) -> pd.DataFrame:
    lag_vac, lag_mnp, lag_work = lags
    df = df.copy()
    df["work_red"] = zscore(-df["work"])
    if mask_vars:
        for var in mask_vars:
            df[var] = zscore(df[var])
        df["MNP_score"] = df[mask_vars + ["work_red"]].mean(axis=1)
    else:
        df["MNP_score"] = zscore(df["work_red"])

    X = pd.DataFrame(index=df.index)
    X["cov12_lag"] = df["couv_complet"].shift(lag_vac)
    X["MNP_lag"] = df["MNP_score"].shift(lag_mnp)
    X["work_lag"] = df["work"].shift(lag_work)
    return build_time_features(X)


def plot_series(df: pd.DataFrame, y_col: str = "RSV", fitted: dict[str, pd.Series] | None = None, title: str = "RSV observ√© vs ajust√©") -> None:
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=df.index, y=df[y_col], name="RSV observ√©", mode="lines", line=dict(color="black")))
    if fitted:
        for name, series in fitted.items():
            fig.add_trace(go.Scatter(x=series.index, y=series, name=name, mode="lines", line=dict(dash="dot")))
    fig.add_vline(x=COVID_START, line_dash="dash", line_color="red")
    fig.add_vline(x=VACC_START, line_dash="dash", line_color="green")
    fig.update_layout(title=title, xaxis_title="Semaine", yaxis_title=y_col)
    fig.show()


## 1. Pr√©parer les jeux de donn√©es

Chargement des sources harmonis√©es (ODISSEE, VACSI, Google Mobility, CoviPrev, M√©t√©o, ERVISS) et contr√¥le de coh√©rence avant mod√©lisation.

In [None]:
# ==========================================
# üß± BLOC 1 ‚Äî Setup, Helpers & Chargement Donn√©es
# ==========================================

from pathlib import Path
import warnings
import numpy as np
import pandas as pd
import statsmodels.api as sm
from statsmodels.stats.diagnostic import acorr_ljungbox
from statsmodels.stats.outliers_influence import variance_inflation_factor
from itertools import product
import plotly.express as px
import plotly.graph_objects as go

warnings.filterwarnings("ignore")
pd.set_option("display.max_columns", 120)
pd.set_option("display.width", 180)
np.random.seed(42)

px.defaults.template = "plotly_white"
px.defaults.width = 1000
px.defaults.height = 520

# ==========================================
# üìÅ Chemins & Fichiers
# ==========================================
DATA = Path("../data_clean")
FILES = {
    "common_FR_long": DATA / "ODISSEE/common_FR_long.csv",
    "vacsi_fr_extended": DATA / "VACSI/vacsi_fr_extended.csv",
    "google_mobility_fr_weekly": DATA / "GOOGLE/google_mobility_fr_weekly.csv",
    "coviprev_reg_weekly": DATA / "COVIPREV/coviprev_reg_weekly.csv",
    "meteo_fr_weekly": DATA / "METEO/meteo_fr_weekly.csv",
    "erviss_fr_weekly": DATA / "ERVISS/erviss_fr_weekly.csv",
}

missing = [k for k, p in FILES.items() if not p.exists()]
if missing:
    raise FileNotFoundError(f"‚ùå Fichiers manquants: {missing}")
print("‚úÖ Tous les fichiers n√©cessaires sont disponibles.")

COVID_START = pd.Timestamp("2020-03-01")
VACC_START  = pd.Timestamp("2021-01-01")

LAG_VACC, LAG_MNP, LAG_WORK = 4, 8, 9
SEASON_PERIOD = 52
print(f"‚è±Ô∏è COVID_START={COVID_START.date()}, VACC_START={VACC_START.date()} | Lags: {LAG_VACC, LAG_MNP, LAG_WORK}")

# ==========================================
# üß© Fonctions utilitaires
# ==========================================
def keyify(df: pd.DataFrame) -> pd.DataFrame:
    iso = pd.to_datetime(df["date_monday"]).dt.isocalendar()
    df["year_iso"] = iso["year"].astype(int)
    df["week_iso_num"] = iso["week"].astype(int)
    return df

def zscore(s): return (s - s.mean()) / s.std(ddof=0) if s.std(ddof=0) != 0 else s * 0
def build_time_features(df, period=52):
    df = df.copy()
    df["t"] = np.arange(len(df))
    df["sin52"] = np.sin(2 * np.pi * df["t"] / period)
    df["cos52"] = np.cos(2 * np.pi * df["t"] / period)
    return df

def load_datasets(files):
    data = {}
    for name, path in files.items():
        data[name] = pd.read_csv(path)
        print(f"‚úÖ {name} charg√© ({data[name].shape[0]} lignes)")
    return data

def merge_exog(rsv_df, vac_df, work_df, cov_df):
    merged = (
        rsv_df[["date_monday", "year_iso", "week_iso_num"]]
        .merge(vac_df, on=["year_iso", "week_iso_num"], how="left")
        .merge(work_df, on=["year_iso", "week_iso_num"], how="left")
        .merge(cov_df, on=["year_iso", "week_iso_num"], how="left")
        .set_index("date_monday")
        .sort_index()
    )
    return merged

def build_model_matrix(df, lags=(4, 8, 9), mask_vars=None):
    lag_vac, lag_mnp, lag_work = lags
    df["work_red"] = zscore(-df["work"])
    if mask_vars:
        for v in mask_vars: df[v] = zscore(df[v])
        df["MNP_score"] = df[mask_vars + ["work_red"]].mean(axis=1)
    else:
        df["MNP_score"] = zscore(df["work_red"])

    X = pd.DataFrame(index=df.index)
    X["cov12_lag"] = df["couv_complet"].shift(lag_vac)
    X["MNP_lag"]   = df["MNP_score"].shift(lag_mnp)
    X["work_lag"]  = df["work"].shift(lag_work)
    return build_time_features(X)

def plot_series(df, y_col="RSV", y_fit=None, title="RSV Observ√© vs Ajust√©"):
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=df.index, y=df[y_col], name="RSV observ√©", mode="lines", line=dict(color="black")))
    if y_fit is not None:
        fig.add_trace(go.Scatter(x=df.index, y=y_fit, name="Ajust√©", mode="lines", line=dict(color="blue", dash="dot")))
    fig.update_layout(title=title, xaxis_title="Semaine", yaxis_title=y_col)
    fig.show()

# ==========================================
# üìä Chargement des donn√©es
# ==========================================
data = load_datasets(FILES)

common = keyify(data["common_FR_long"])
mask = (common["topic"] == "RSV") & (common["geo_level"] == "FR")
age_used = next(a for a in ["00-04 ans", "0-1 an", "Tous √¢ges"] if ((mask) & (common["classe_d_age"] == a)).any())
mask &= (common["classe_d_age"] == age_used)

ycol = "taux_passages_urgences" if "taux_passages_urgences" in common.columns else "taux_sos"
rsv = common.loc[mask, ["date_monday", "year_iso", "week_iso_num", ycol]].rename(columns={ycol: "RSV"})
rsv["date_monday"] = pd.to_datetime(rsv["date_monday"])
rsv = rsv.sort_values("date_monday")
print(f"‚úÖ RSV pr√™t ({age_used}) ‚Äî {rsv.shape[0]} lignes")

vac = keyify(data["vacsi_fr_extended"]).query("geo_level=='FR'")[["year_iso","week_iso_num","couv_complet"]]
gm  = keyify(data["google_mobility_fr_weekly"])
work = gm.query("geo_level=='FR' & indicator=='workplaces'")[["year_iso","week_iso_num","value"]].rename(columns={"value": "work"})
cov = keyify(data["coviprev_reg_weekly"])
mask_vars = ["port_du_masque","lavage_des_mains","aeration_du_logement","saluer_sans_serrer_la_main"]
cov_nat = cov[cov["indicator"].isin(mask_vars)].groupby(["year_iso","week_iso_num","indicator"])["value"].mean().unstack()
print(f"‚úÖ CoviPrev agr√©g√© nationalement ({len(cov_nat)} semaines)")

X_base = merge_exog(rsv, vac, work, cov_nat)
X_full = build_model_matrix(X_base, lags=(LAG_VACC, LAG_MNP, LAG_WORK), mask_vars=mask_vars)

df_base = rsv.set_index("date_monday")[["RSV"]].join(X_full, how="left").dropna().sort_index()
print(f"‚úÖ Base finale pr√™te : {df_base.shape}")


‚úÖ Tous les fichiers n√©cessaires sont disponibles.
‚è±Ô∏è COVID_START=2020-03-01, VACC_START=2021-01-01 | Lags: (4, 8, 9)
‚úÖ common_FR_long charg√© (3223 lignes)
‚úÖ vacsi_fr_extended charg√© (105 lignes)
‚úÖ google_mobility_fr_weekly charg√© (840 lignes)
‚úÖ coviprev_reg_weekly charg√© (1296 lignes)
‚úÖ meteo_fr_weekly charg√© (392 lignes)
‚úÖ erviss_fr_weekly charg√© (11713 lignes)


### 4.1 Premi√®re v√©rification

On jette un coup d'≈ìil :
- Aper√ßu des premi√®res lignes pour v√©rifier les colonnes.
- Statistiques simples (moyenne, minimum, maximum) pour rep√©rer un √©ventuel bug.
- Message clair si un fichier manque ou si une valeur para√Æt √©trange.

Si tout est vert, on peut passer aux mod√®les.

In [None]:
display(df_base.head())
summary = df_base.describe().T[["mean", "std", "min", "max"]].round(2)
display(summary)


Unnamed: 0_level_0,RSV,cov12_lag,MNP_lag,work_lag,t,sin52,cos52
date_monday,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-01-25,711.936545,0.0,0.030364,-23.494505,56,0.464723,0.885456
2021-02-01,763.390974,0.000146,0.61245,-20.791209,57,0.568065,0.822984
2021-02-08,873.929009,0.001203,-0.068845,-20.241758,58,0.663123,0.748511
2021-02-15,982.824427,0.00282,1.880221,-19.362637,59,0.748511,0.663123
2021-02-22,1055.391309,0.02208,2.280871,-47.428571,60,0.822984,0.568065


Unnamed: 0,mean,std,min,max
RSV,822.19,628.81,170.03,2861.52
cov12_lag,12.55,6.75,0.0,17.49
MNP_lag,-0.27,0.73,-1.2,2.28
work_lag,-16.99,10.59,-53.2,-3.09
t,104.5,28.43,56.0,153.0
sin52,-0.0,0.73,-1.0,1.0
cos52,-0.06,0.69,-1.0,1.0


## 2. Mod√®le OLS : comprendre les d√©terminants

On d√©marre avec un mod√®le simple, puis on optimise les retards et on ajoute les interactions (vaccination √ó gestes) et la saisonnalit√©.

## 5. Mod√®le OLS : lire les relations simples

Le mod√®le OLS (r√©gression lin√©aire) agit comme une loupe. Il r√©pond √† la question : "Quand la vaccination monte ou que les gestes barri√®res changent, comment r√©agit le RSV en moyenne ?"

On commence doucement puis on am√©liore le mod√®le √©tape par √©tape.

### 5.1 On part d'un mod√®le tr√®s simple

On garde seulement quelques ingr√©dients (vaccination d√©cal√©e, gestes barri√®res r√©sum√©s, saisonnalit√©) pour suivre la tendance globale.

In [None]:
Y = df_base["RSV"].astype(float)
X_cols_base = ["cov12_lag", "MNP_lag", "work_lag", "sin52", "cos52"]
X_base_design = df_base[X_cols_base]

ols_base = sm.OLS(Y, sm.add_constant(X_base_design)).fit(cov_type="HC3")
print(f"R¬≤ ajust√© = {ols_base.rsquared_adj:.3f} | AIC = {ols_base.aic:.1f} | Durbin-Watson = {sm.stats.stattools.durbin_watson(ols_base.resid):.3f}")

plot_series(
    df_base,
    fitted={"OLS base": ols_base.fittedvalues}
)


R¬≤ ajust√© = 0.530 | AIC = 1473.0 | Durbin-Watson = 0.150


### 5.2 On teste plusieurs d√©calages

Les effets ne sont pas instantan√©s : une vaccination ou un changement de mobilit√© agit apr√®s quelques semaines. On lance donc une recherche automatique qui essaie plein de combinaisons de retards et garde celle qui colle le mieux aux donn√©es.

In [None]:
best_r2, best_lags = -np.inf, (LAG_VACC, LAG_MNP, LAG_WORK)
lags_grid = [(lv, lm, lw) for lv in range(2, 9) for lm in range(4, 13) for lw in range(4, 13)]

for lv, lm, lw in lags_grid:
    X_tmp = build_model_matrix(X_base, lags=(lv, lm, lw), mask_vars=mask_vars)
    df_tmp = rsv.set_index("date_monday")["RSV"].to_frame().join(X_tmp).dropna()
    if len(df_tmp) < 40:
        continue
    fit_tmp = sm.OLS(df_tmp["RSV"], sm.add_constant(df_tmp[X_cols_base])).fit()
    if fit_tmp.rsquared_adj > best_r2:
        best_r2, best_lags = fit_tmp.rsquared_adj, (lv, lm, lw)

print(f"ü•á Retards optimaux identifi√©s : {best_lags} (R¬≤ ajust√© ‚âà {best_r2:.3f})")

X_full_opt = build_model_matrix(X_base, lags=best_lags, mask_vars=mask_vars)

df_opt = (
    rsv.set_index("date_monday")["RSV"].to_frame()
    .join(X_full_opt)
    .dropna()
)

meteo = keyify(data["meteo_fr_weekly"])[["year_iso", "week_iso_num", "tmean"]]
df_opt = (
    keyify(df_opt.reset_index())
    .merge(meteo, on=["year_iso", "week_iso_num"], how="left")
    .set_index("date_monday")
    .sort_index()
)

df_opt["tmean_z"] = zscore(df_opt["tmean"])
df_opt["vacc_x_mnp"] = df_opt["cov12_lag"] * df_opt["MNP_lag"]
df_opt["RSV_lag1"] = df_opt["RSV"].shift(1)
df_opt["RSV_lag2"] = df_opt["RSV"].shift(2)
df_opt = df_opt.dropna()

X_opt_cols = [
    "cov12_lag", "MNP_lag", "work_lag", "tmean_z",
    "vacc_x_mnp", "RSV_lag1", "RSV_lag2", "sin52", "cos52"
]
X_opt = df_opt[X_opt_cols]

ols_opt = sm.OLS(df_opt["RSV"], sm.add_constant(X_opt)).fit(cov_type="HC3")
print(f"OLS optimis√© ‚Äî R¬≤ ajust√© = {ols_opt.rsquared_adj:.3f} | AIC = {ols_opt.aic:.1f} | Durbin-Watson = {sm.stats.stattools.durbin_watson(ols_opt.resid):.3f}")

LAG_MNP_EFFECT = 3
df_opt["MNP_lag_causal"] = df_opt["MNP_lag"].shift(LAG_MNP_EFFECT)
df_opt["vacc_x_mnp_causal"] = df_opt["cov12_lag"] * df_opt["MNP_lag_causal"]

X_causal_cols = [
    "cov12_lag", "MNP_lag_causal", "work_lag", "tmean_z",
    "vacc_x_mnp_causal", "RSV_lag1", "RSV_lag2", "sin52", "cos52"
]
Xo_causal = df_opt[X_causal_cols].dropna()

ols_causal = sm.OLS(
    df_opt.loc[Xo_causal.index, "RSV"],
    sm.add_constant(Xo_causal)
).fit(cov_type="HC3")
print(f"OLS causal (d√©calage MNP +{LAG_MNP_EFFECT} sem) ‚Äî R¬≤ ajust√© = {ols_causal.rsquared_adj:.3f} | AIC = {ols_causal.aic:.1f}")

fitted_curves = {
    "OLS optimis√©": ols_opt.fittedvalues,
    "OLS causal": ols_causal.fittedvalues.reindex(df_opt.index)
}
plot_series(df_opt, fitted=fitted_curves, title="RSV ‚Äî OLS optimis√© vs causal")


ü•á Retards optimaux identifi√©s : (7, 12, 4) (R¬≤ ajust√© ‚âà 0.657)
OLS optimis√© ‚Äî R¬≤ ajust√© = 0.968 | AIC = 1069.4 | Durbin-Watson = 1.960
OLS causal (d√©calage MNP +3 sem) ‚Äî R¬≤ ajust√© = 0.970 | AIC = 1029.3


### Diagnostics & interpr√©tations

### Ce qu'il faut retenir tout de suite

- **OLS optimis√©** explique presque toutes les variations et laisse des r√©sidus propres.
- **OLS causal** ajoute un d√©calage sur les gestes barri√®res pour coller au comportement humain.
- **ITS** confirme les ruptures dues au COVID et au vaccin mais reste moins pr√©cis entre deux √©v√©nements.
- **SARIMAX** est le meilleur compagnon pour les pr√©visions multi-ann√©es.

## 9. Zoom sur le mod√®le OLS optimis√©

On traduit les diagnostics en phrases faciles :
- quels indicateurs globaux confirment que le mod√®le est solide,
- quelles variables p√®sent le plus,
- comment v√©rifier que le mod√®le reste stable dans le temps.

L'id√©e est d'avoir un mode d'emploi, pas seulement un tableau de chiffres.

### Indicateurs globaux

- **R¬≤ ajust√© ‚âà 0,97** : le mod√®le explique presque tout ce qui se passe chaque semaine.
- **AIC/BIC ‚âà 1 030** : on reste sur un mod√®le compact sans surench√®re.
- **Durbin-Watson ‚âà 1,98** : les r√©sidus ressemblent √† du bruit al√©atoire (pas de structure cach√©e).
- Les tests compl√©mentaires restent dans des zones acceptables : rien d'alarmant.

In [None]:
vif_df = pd.DataFrame({
    "Variable": X_opt_cols,
    "VIF": [variance_inflation_factor(X_opt.values, i) for i in range(X_opt.shape[1])]
}).sort_values("VIF", ascending=False)

display(vif_df)

Unnamed: 0,Variable,VIF
5,RSV_lag1,66.713191
6,RSV_lag2,64.478941
3,tmean_z,8.27763
8,cos52,6.590143
1,MNP_lag,6.289709
4,vacc_x_mnp,5.760349
0,cov12_lag,3.952017
7,sin52,2.702864
2,work_lag,2.576776


üëâ Ce que disent les coefficients :
- La vaccination fait baisser le RSV, surtout quand elle est combin√©e √† des gestes barri√®res stables.
- Les gestes barri√®res retard√©s de 3 semaines capturent bien le temps n√©cessaire pour que les habitudes changent.
- La saisonnalit√© (sinus/cosinus) et l'inertie du RSV gardent la forme globale de la courbe.
- Le climat et la mobilit√© au travail jouent un r√¥le plus doux mais non n√©gligeable.

## 3. Sc√©narios contrefactuels (mod√®le OLS causal)

Les sc√©narios modifient les niveaux de vaccination et de gestes barri√®res apr√®s le COVID. Pour ‚ÄúSans MNP‚Äù, on impose le niveau de rel√¢chement maximal observ√© ; pour ‚ÄúMNP +50‚ÄØ%‚Äù, on renforce durablement la protection. Bootstrap = incertitude sur l'√©cart cumul√©.

## 10. Sc√©narios "et si..."

On joue avec les leviers principaux (vaccination et gestes barri√®res) pour imaginer la suite :

- Les sc√©narios couvrent l'histoire r√©elle **et** prolongent la s√©rie jusqu'√† fin 2025.
- Pour prolonger, on garde les derniers niveaux observ√©s et on laisse le mod√®le remplir semaine apr√®s semaine.
- √Ä chaque √©tape, la pr√©vision de la semaine suivante utilise les valeurs calcul√©es juste avant : la courbe reste continue.
- Quand on "renforce" les gestes barri√®res, on augmente de 50 % les scores d√©j√† positifs **et** on rapproche vers z√©ro les valeurs n√©gatives (p√©riodes de rel√¢chement). On √©vite ainsi de cr√©er des situations encore plus rel√¢ch√©es que l'observ√©.
- On garde aussi l'interaction "vaccination √ó gestes" au niveau observ√© pour ce sc√©nario afin qu'il ne soit pas interpr√©t√© comme une r√©ponse tardive √† une vague d√©j√† en cours.

Sc√©narios test√©s :
- **Maintien 2024** : on garde les comportements r√©cents.
- **Sans vaccination** : on coupe l'effet vaccin apr√®s 2020.
- **Sans gestes barri√®res** : on enl√®ve l'effet masques/t√©l√©travail.
- **Gestes barri√®res +50 %** : on renforce durablement les protections.

Chaque sc√©nario est illustr√© par des courbes, des √©carts cumul√©s et un intervalle de confiance (bootstrap).

In [None]:
def simulate_dynamic(ols_fit, design_matrix, history_series):
    history = history_series.copy().astype(float)
    preds = pd.Series(index=design_matrix.index, dtype=float)
    for date in design_matrix.index:
        row = design_matrix.loc[date].copy()
        if date not in history.index:
            row["RSV_lag1"] = history.iloc[-1]
            row["RSV_lag2"] = history.iloc[-2]
        X_names = ols_fit.model.exog_names
        X_row = row[X_names[1:]].to_frame().T
        X_row.insert(0, 'const', 1.0)
        preds.loc[date] = float(ols_fit.predict(X_row))
        if date not in history_series.index:
            history.loc[date] = preds.loc[date]
    return preds

historical_design = Xo_causal.copy()
historical_design["MNP_lag"] = df_opt.loc[historical_design.index, "MNP_lag"]
historical_rsv = df_opt["RSV"].copy()

scenario_columns = X_causal_cols + ["MNP_lag"]
scenario_end = pd.Timestamp("2025-12-29")
future_weeks = pd.date_range(start=historical_design.index[-1] + pd.Timedelta(weeks=1), end=scenario_end, freq="W-MON")

if len(future_weeks):
    last_row = historical_design.iloc[-1]
    future_design = pd.DataFrame(index=future_weeks, columns=scenario_columns, dtype=float)
    for col in ["cov12_lag", "MNP_lag", "work_lag", "tmean_z"]:
        future_design[col] = last_row[col]
    base_length = len(historical_design)
    future_positions = np.arange(base_length, base_length + len(future_design))
    future_design["sin52"] = np.sin(2 * np.pi * future_positions / SEASON_PERIOD)
    future_design["cos52"] = np.cos(2 * np.pi * future_positions / SEASON_PERIOD)
    future_design["RSV_lag1"] = np.nan
    future_design["RSV_lag2"] = np.nan
    future_design["MNP_lag_causal"] = np.nan
    future_design["vacc_x_mnp_causal"] = np.nan
    scenario_design_full = pd.concat([historical_design, future_design])
else:
    scenario_design_full = historical_design.copy()

scenario_design_full = scenario_design_full.sort_index()

configurations = {
    "Maintien 2024": {"vacc_factor": 1.0, "mnp_factor": 1.0, "freeze_vacc_mnp": False, "mnp_mode": "baseline"},
    "Sans vaccination": {"vacc_factor": 0.0, "mnp_factor": 1.0, "freeze_vacc_mnp": False, "mnp_mode": "baseline"},
    "Sans MNP": {"vacc_factor": 1.0, "mnp_factor": 1.0, "freeze_vacc_mnp": False, "mnp_mode": "relax_max"},
    "MNP maintenus (+50%)": {"vacc_factor": 1.0, "mnp_factor": 1.5, "freeze_vacc_mnp": True, "mnp_mode": "baseline"},
}

color_map = {
    "Maintien 2024": "#2E86AB",
    "Sans vaccination": "#F39C12",
    "Sans MNP": "#E74C3C",
    "MNP maintenus (+50%)": "#27AE60",
}

def refresh_causal_terms(design: pd.DataFrame) -> pd.DataFrame:
    design = design.copy()
    design["MNP_lag_causal"] = design["MNP_lag"].shift(LAG_MNP_EFFECT)
    design["vacc_x_mnp_causal"] = design["cov12_lag"] * design["MNP_lag_causal"]
    design[["MNP_lag_causal", "vacc_x_mnp_causal"]] = design[["MNP_lag_causal", "vacc_x_mnp_causal"]].bfill()
    return design


def adjust_mnp(series: pd.Series, factor: float) -> pd.Series:
    if factor == 1.0:
        return series
    series = series.copy()
    mask_pos = series >= 0
    series.loc[mask_pos] *= factor
    series.loc[~mask_pos] /= factor
    return series


def prepare_design(base_design, vacc_factor, mnp_factor, freeze_vacc_mnp=False, mnp_mode="baseline"):
    design = base_design.copy()
    mask_after_covid = design.index >= COVID_START
    design.loc[mask_after_covid, "cov12_lag"] *= vacc_factor
    if mnp_mode == "relax_max":
        max_obs = base_design.loc[mask_after_covid, "MNP_lag"].max()
        design.loc[mask_after_covid, "MNP_lag"] = max_obs
    else:
        design.loc[mask_after_covid, "MNP_lag"] = adjust_mnp(design.loc[mask_after_covid, "MNP_lag"], mnp_factor)
    design = refresh_causal_terms(design)
    if freeze_vacc_mnp:
        design.loc[mask_after_covid, "vacc_x_mnp_causal"] = baseline_reference.loc[mask_after_covid, "vacc_x_mnp_causal"]
    return design

scenario_design_full = refresh_causal_terms(scenario_design_full)
baseline_reference = scenario_design_full.copy()

scenarios = {}
for name, cfg in configurations.items():
    design_cfg = prepare_design(scenario_design_full, **cfg)
    preds = simulate_dynamic(ols_causal, design_cfg, historical_rsv)
    scenarios[name] = preds

baseline_preds = simulate_dynamic(ols_causal, scenario_design_full, historical_rsv)
scenario_df = pd.DataFrame({
    "RSV observ√©": historical_rsv,
    "OLS causal": baseline_preds
})
for name, series in scenarios.items():
    scenario_df[name] = series

# (plots and summaries follow as in existing cell)
fig_cf = go.Figure()
fig_cf.add_trace(go.Scatter(x=scenario_df.index, y=scenario_df["RSV observ√©"], name="Observ√©", line=dict(color="black", width=2)))
fig_cf.add_trace(go.Scatter(x=scenario_df.index, y=scenario_df["OLS causal"], name="OLS causal", line=dict(color=color_map["Maintien 2024"], dash="dot")))
for name, series in scenarios.items():
    fig_cf.add_trace(go.Scatter(
        x=series.index,
        y=series,
        name=name,
        line=dict(dash="dot", width=2, color=color_map.get(name, "gray"))
    ))
fig_cf.add_vline(x=COVID_START, line_dash="dash", line_color="red")
fig_cf.add_vline(x=VACC_START, line_dash="dash", line_color="green")
fig_cf.update_layout(
    title="Fig. IV.28 ‚Äî Trajectoires simul√©es du RSV (2018‚Äì2025)",
    xaxis_title="Semaine",
    yaxis_title="RSV simul√©",
    legend=dict(orientation="h", y=-0.25),
    height=650
)
fig_cf.show()

history_mask = scenario_df["RSV observ√©"].notna()
baseline_observed = scenario_df.loc[history_mask, "OLS causal"]

delta_series = {}
for name, series in scenarios.items():
    if name == "Maintien 2024":
        continue
    scenario_observed = series.loc[history_mask]
    delta_series[name] = scenario_observed - baseline_observed

rng = np.random.default_rng(42)
BOOT_N = 1500
summary_rows = []
distribution_rows = []
for name, series in delta_series.items():
    baseline_delta = series.sum()
    boot_samples = rng.choice(series.values, size=(BOOT_N, len(series)), replace=True).sum(axis=1)
    ci_low, ci_high = np.percentile(boot_samples, [2.5, 97.5])
    p_two = 2 * min((boot_samples >= 0).mean(), (boot_samples <= 0).mean())
    p_two = float(np.clip(p_two, 0, 1))
    summary_rows.append({
        "Sc√©nario": name,
        "Œî cumulatif": baseline_delta,
        "CI_inf": ci_low,
        "CI_sup": ci_high,
        "p_value": p_two,
    })
    distribution_rows.extend({"Sc√©nario": name, "Œî_bootstrap": val} for val in boot_samples)

summary_df = pd.DataFrame(summary_rows).sort_values("Œî cumulatif")
distribution_df = pd.DataFrame(distribution_rows)

display(summary_df)

fig_delta = go.Figure()
fig_delta.add_trace(go.Bar(
    x=summary_df["Sc√©nario"],
    y=summary_df["Œî cumulatif"],
    error_y=dict(
        type="data",
        symmetric=False,
        array=(summary_df["CI_sup"] - summary_df["Œî cumulatif"]).to_list(),
        arrayminus=(summary_df["Œî cumulatif"] - summary_df["CI_inf"]).to_list(),
        thickness=1.5,
        width=6,
    ),
    marker_color=[color_map.get(name, "gray") for name in summary_df["Sc√©nario"]],
    text=[f"p={row['p_value']:.3f}" for _, row in summary_df.iterrows()],
    textposition="outside",
))
fig_delta.update_layout(
    title="Fig. IV.29 ‚Äî Œî cumul√©s vs sc√©nario observ√© (IC95%)",
    xaxis_title="Sc√©nario",
    yaxis_title="Œî cumulatif (points RSV)",
)
fig_delta.show()


Unnamed: 0,Sc√©nario,Œî cumulatif,CI_inf,CI_sup,p_value
2,MNP maintenus (+50%),-34.082489,,,0.0
1,Sans MNP,3088.903847,,,0.0
0,Sans vaccination,3162.128394,,,0.0


## 4. S√©rie interrompue (ITS)

Analyse des ruptures (COVID, vaccination) et sc√©narios d'amplification pour mesurer la sensibilit√© du RSV aux changements de tendance.

## 6. Mod√®le ITS : rep√©rer les ruptures

Le mod√®le ITS (Interrupted Time Series) observe les cassures nettes :
- avant/apr√®s mars 2020 (arriv√©e du COVID),
- avant/apr√®s janvier 2021 (d√©ploiement du vaccin).

Il ressemble √† une r√®gle qui trace deux droites et mesure la marche entre elles. Il ne voit pas la saisonnalit√© fine, donc ses courbes paraissent parfois "cass√©es" ou lentes : c'est normal, il sert surtout √† confirmer l'impact brutal des √©v√©nements.

In [None]:
df_its = df_base.copy().reset_index().sort_values("date_monday")
df_its["t"] = np.arange(len(df_its))
df_its["post_covid"] = (df_its["date_monday"] >= COVID_START).astype(int)
df_its["post_vacc"] = (df_its["date_monday"] >= VACC_START).astype(int)
df_its["t_post_covid"] = df_its["t"] * df_its["post_covid"]
df_its["t_post_vacc"] = df_its["t"] * df_its["post_vacc"]

its_X_cols = ["t", "sin52", "cos52", "post_covid", "t_post_covid", "post_vacc", "t_post_vacc"]
its_base = sm.OLS(df_its["RSV"], sm.add_constant(df_its[its_X_cols])).fit(
    cov_type="HAC", cov_kwds={"maxlags": 12}
)
print(f"ITS base ‚Äî AIC = {its_base.aic:.1f} | BIC = {its_base.bic:.1f} | Durbin-Watson = {sm.stats.stattools.durbin_watson(its_base.resid):.3f}")

plot_series(
    df_its.set_index("date_monday"),
    fitted={"ITS base": its_base.fittedvalues.reindex(df_its.index)},
    title="ITS base ‚Äî RSV observ√© vs ajust√©"
)

def add_fourier(df: pd.DataFrame, K: int = 1, period: int = 52) -> pd.DataFrame:
    df = df.copy()
    t = np.arange(len(df))
    for k in range(1, K + 1):
        df[f"sin{k}"] = np.sin(2 * np.pi * k * t / period)
        df[f"cos{k}"] = np.cos(2 * np.pi * k * t / period)
    return df

steps_days = [-28, -14, 0, 14, 28]
candidates_covid = [COVID_START + pd.to_timedelta(days, unit="D") for days in steps_days]
candidates_vacc = [VACC_START + pd.to_timedelta(days, unit="D") for days in steps_days]
Ks = [1, 2, 3]

best_its = None

def make_its_design(df: pd.DataFrame, covid_date: pd.Timestamp, vacc_date: pd.Timestamp, K: int):
    dfX = df.copy().reset_index().rename(columns={"date_monday": "date"}).sort_values("date")
    dfX["t"] = np.arange(len(dfX))
    dfX["post_covid"] = (dfX["date"] >= covid_date).astype(int)
    dfX["post_vacc"] = (dfX["date"] >= vacc_date).astype(int)
    dfX["t_post_covid"] = dfX["t"] * dfX["post_covid"]
    dfX["t_post_vacc"] = dfX["t"] * dfX["post_vacc"]
    dfX = add_fourier(dfX, K=K)
    y = dfX["RSV"].astype(float)
    X_cols = [
        "t", "post_covid", "t_post_covid", "post_vacc", "t_post_vacc"
    ] + [f"sin{k}" for k in range(1, K + 1)] + [f"cos{k}" for k in range(1, K + 1)]
    for col in ["cov12_lag", "MNP_lag", "work_lag"]:
        if col in dfX.columns:
            X_cols.append(col)
    design = dfX.set_index(pd.to_datetime(dfX["date"]))
    hac_lags = int(np.clip(np.sqrt(len(design)), 8, 24))
    fit = sm.OLS(y, sm.add_constant(design[X_cols])).fit(
        cov_type="HAC", cov_kwds={"maxlags": hac_lags}
    )
    return fit, design, X_cols

for K in Ks:
    for covid_date in candidates_covid:
        for vacc_date in candidates_vacc:
            if vacc_date <= covid_date:
                continue
            try:
                fit, design, cols = make_its_design(
                    df_opt[["RSV", "cov12_lag", "MNP_lag", "work_lag"]],
                    covid_date=covid_date,
                    vacc_date=vacc_date,
                    K=K
                )
                if (best_its is None) or (fit.aic < best_its["aic"]):
                    best_its = {
                        "aic": fit.aic,
                        "bic": fit.bic,
                        "K": K,
                        "covid": covid_date,
                        "vacc": vacc_date,
                        "fit": fit,
                        "design": design,
                        "cols": cols,
                    }
            except Exception:
                continue

if best_its is None:
    print("‚ö†Ô∏è Aucun param√©trage ITS am√©lior√© n'a converg√© ‚Äî on garde le mod√®le de base.")
    its_best_fit = its_base
    df_its_best = df_its.set_index("date_monday")
else:
    its_best_fit = best_its["fit"]
    df_its_best = best_its["design"]
    print(
        f"ITS optimis√© ‚Äî K={best_its['K']} | COVID={best_its['covid'].date()} | VACC={best_its['vacc'].date()} | AIC = {best_its['aic']:.1f} | BIC = {best_its['bic']:.1f}"
    )

its_params = its_best_fit.params.copy()
feature_cols = [c for c in its_params.index if c != "const"]
its_design_matrix = sm.add_constant(df_its_best[feature_cols], has_constant='add')
its_design_matrix = its_design_matrix.loc[:, its_params.index]
its_reference_index = its_design_matrix.index
its_pred_base = pd.Series(its_design_matrix.values @ its_params.values, index=its_reference_index)

lb_its = acorr_ljungbox(its_best_fit.resid, lags=[8, 12, 24], return_df=True)[["lb_stat", "lb_pvalue"]]
display(lb_its)

plot_series(
    df_its_best.assign(RSV=df_its_best.get("RSV", its_pred_base)),
    fitted={"ITS optimis√©": its_pred_base},
    title="ITS optimis√© ‚Äî RSV observ√© vs ajust√©"
)


ITS base ‚Äî AIC = 1478.0 | BIC = 1488.3 | Durbin-Watson = 0.092


‚ö†Ô∏è Aucun param√©trage ITS am√©lior√© n'a converg√© ‚Äî on garde le mod√®le de base.


Unnamed: 0,lb_stat,lb_pvalue
8,229.24855,4.269761e-45
12,266.903672,4.0411069999999997e-50
24,302.019095,6.574857999999999e-50


In [None]:
its_params = its_best_fit.params
its_matrix = its_design_matrix.loc[:, its_params.index]

def predict_its(covid_scale: float = 1.0, vacc_scale: float = 1.0) -> pd.Series:
    params = its_params.copy()
    for term in params.index:
        if "post_covid" in term:
            params[term] *= covid_scale
        if "post_vacc" in term:
            params[term] *= vacc_scale
    values = its_matrix.values @ params.values
    return pd.Series(values, index=its_reference_index)

its_scenarios = {
    "R√©f√©rence": its_pred_base,
    "COVID amplifi√© (√ó1.5)": predict_its(covid_scale=1.5, vacc_scale=1.0),
    "Vaccination amplifi√©e (√ó1.5)": predict_its(covid_scale=1.0, vacc_scale=1.5),
    "Ruptures amplifi√©es (√ó1.5, √ó1.5)": predict_its(covid_scale=1.5, vacc_scale=1.5),
}

fig_its = go.Figure()
for name, series in its_scenarios.items():
    fig_its.add_trace(go.Scatter(x=series.index, y=series, mode="lines", name=name))
fig_its.add_vline(x=COVID_START, line_dash="dash", line_color="red")
fig_its.add_vline(x=VACC_START, line_dash="dash", line_color="green")
fig_its.update_layout(
    title="Fig. IV.30 ‚Äî Sc√©narios ITS : ruptures amplifi√©es",
    xaxis_title="Semaine",
    yaxis_title="RSV ajust√© (ITS)",
    legend=dict(orientation="h", y=-0.25),
    height=620
)
fig_its.show()

fig_violin = px.violin(
    distribution_df,
    x="Sc√©nario",
    y="Œî_bootstrap",
    color="Sc√©nario",
    box=True,
    points="outliers",
    color_discrete_map=color_map,
    title="Fig. IV.31 ‚Äî Distributions bootstrap des Œî cumul√©s",
)
fig_violin.update_layout(yaxis_title="Œî cumulatif bootstrap")
fig_violin.show()

fig_dot = go.Figure()
for _, row in summary_df.iterrows():
    fig_dot.add_trace(go.Scatter(
        x=[row["Œî cumulatif"]],
        y=[row["Sc√©nario"]],
        mode="markers",
        marker=dict(size=12, color=color_map.get(row["Sc√©nario"], "gray")),
        error_x=dict(
            type="data",
            symmetric=False,
            array=[row["CI_sup"] - row["Œî cumulatif"]],
            arrayminus=[row["Œî cumulatif"] - row["CI_inf"]],
            thickness=1.5,
            width=0,
        ),
        name=row["Sc√©nario"],
        showlegend=False,
        hovertemplate="Œî cumulatif: %{x:.1f}<br>IC95%: [" + f"{row['CI_inf']:.1f}; {row['CI_sup']:.1f}" + "]<br>p-value: " + f"{row['p_value']:.3f}" + "<extra></extra>",
    ))
fig_dot.add_vline(x=0, line=dict(color="black", dash="dash"))
fig_dot.update_layout(
    title="Fig. IV.32 ‚Äî IC95% et significativit√© des sc√©narios",
    xaxis_title="Œî cumulatif (points RSV)",
    yaxis_title="Sc√©nario",
)
fig_dot.show()

clean_design = scenario_design_full[X_causal_cols].dropna()
standardised = clean_design.apply(zscore)
component = standardised.mul(ols_causal.params[X_causal_cols], axis=1)
contrib_abs = component.abs().mean()

category_map = {
    "Vaccination": ["cov12_lag"],
    "Gestes barri√®res": ["MNP_lag_causal"],
    "Interaction vacc √ó MNP": ["vacc_x_mnp_causal"],
    "Mobilit√© travail": ["work_lag"],
    "Climat": ["tmean_z"],
    "Inertie RSV": ["RSV_lag1", "RSV_lag2"],
    "Saisonnalit√©": ["sin52", "cos52"],
}

category_values = {cat: contrib_abs[cols].sum() for cat, cols in category_map.items()}
share_pct = (pd.Series(category_values).sort_values(ascending=False))
share_pct = (share_pct / share_pct.sum() * 100).round(1)

fig_contrib = go.Figure()
colors = px.colors.qualitative.Vivid
for idx, (cat, value) in enumerate(share_pct.items()):
    fig_contrib.add_trace(go.Bar(
        y=["Impact moyen"],
        x=[value],
        orientation="h",
        name=cat,
        marker_color=colors[idx % len(colors)],
        text=[f"{value:.1f}%"],
        textposition="inside",
    ))
fig_contrib.update_layout(
    title="Fig. IV.33 ‚Äî Contribution des d√©terminants",
    xaxis_title="Part de contribution (\% de l'effet mod√©lis√©)",
    yaxis=dict(showticklabels=False),
    barmode="stack",
    legend=dict(orientation="h", y=-0.25),
)
fig_contrib.show()


## 5. SARIMAX multivari√© : projection structur√©e

Mod√®le m√©moire + exog√®nes pour prolonger la s√©rie observ√©e jusqu'en 2027 (sc√©narios maintien/rel√¢chement/renforcement).

## 7. Mod√®le SARIMAX : pr√©voir avec la m√©moire

SARIMAX est notre machine √† remonter le temps :
- elle retient les valeurs pass√©es du RSV,
- elle garde la saisonnalit√© (52 semaines),
- elle √©coute aussi les variables explicatives.

Avec elle, on prolonge la s√©rie tout en respectant le calendrier.

In [None]:
df_sarimax = df_opt.copy().sort_index()
df_sarimax.index = pd.to_datetime(df_sarimax.index)

df_sarimax["post_covid"] = (df_sarimax.index >= COVID_START).astype(int)
df_sarimax["post_vacc"] = (df_sarimax.index >= VACC_START).astype(int)
df_sarimax["t"] = np.arange(len(df_sarimax))
df_sarimax["t_post_covid"] = df_sarimax["t"] * df_sarimax["post_covid"]

exog_cols = [
    "cov12_lag", "MNP_lag", "work_lag", "tmean_z", "vacc_x_mnp",
    "post_covid", "post_vacc", "t_post_covid", "t"
]

y = df_sarimax["RSV"].astype(float)
X_exog = df_sarimax[exog_cols].astype(float)
mask = (~y.isna()) & (~X_exog.isna().any(axis=1))
y, X_exog = y.loc[mask], X_exog.loc[mask]
print(f"‚úÖ Donn√©es SARIMAX pr√™tes : y={len(y)}, exog={X_exog.shape[1]}")

base_pdq = [(p, d, q) for p in [0, 1, 2] for d in [0, 1] for q in [0, 1, 2]]
base_PDQ = [(P, 1, Q, SEASON_PERIOD) for P in [0, 1] for Q in [0, 1]]

sarimax_base_best = {"aic": np.inf}
for order in base_pdq:
    for seasonal in base_PDQ:
        try:
            model = SARIMAX(
                y,
                exog=X_exog,
                order=order,
                seasonal_order=seasonal,
                enforce_stationarity=False,
                enforce_invertibility=False,
            ).fit(disp=False)
            if model.aic < sarimax_base_best["aic"]:
                sarimax_base_best = {
                    "aic": model.aic,
                    "bic": model.bic,
                    "order": order,
                    "seasonal": seasonal,
                    "model": model,
                }
        except Exception:
            continue

sarimax_base = sarimax_base_best["model"]
print(
    f"SARIMAX base ‚Äî order={sarimax_base_best['order']} | seasonal={sarimax_base_best['seasonal']} | AIC = {sarimax_base_best['aic']:.1f}"
)

resid_base = sarimax_base.resid
dw_base = sm.stats.stattools.durbin_watson(resid_base)
lb_base = acorr_ljungbox(resid_base, lags=[8, 12, 24], return_df=True)[["lb_stat", "lb_pvalue"]]
display(lb_base)

opt_pdq = [(p, 1, q) for p in range(0, 4) for q in range(0, 4)]
opt_PDQ = [(P, 1, Q, SEASON_PERIOD) for P in [0, 1] for Q in [0, 1]]

sarimax_opt_best = {"bic": np.inf}
for order in opt_pdq:
    for seasonal in opt_PDQ:
        try:
            model = SARIMAX(
                y,
                exog=X_exog,
                order=order,
                seasonal_order=seasonal,
                enforce_stationarity=False,
                enforce_invertibility=False,
            ).fit(disp=False)
            if model.bic < sarimax_opt_best["bic"]:
                sarimax_opt_best = {
                    "aic": model.aic,
                    "bic": model.bic,
                    "order": order,
                    "seasonal": seasonal,
                    "model": model,
                }
        except Exception:
            continue

sarimax_best = sarimax_opt_best["model"]
print(
    f"SARIMAX optimis√© ‚Äî order={sarimax_opt_best['order']} | seasonal={sarimax_opt_best['seasonal']} | BIC = {sarimax_opt_best['bic']:.1f}"
)

y_fit_best = sarimax_best.fittedvalues.reindex(y.index)
ss_res = ((y - y_fit_best) ** 2).sum()
ss_tot = ((y - y.mean()) ** 2).sum()
pseudo_r2 = 1 - ss_res / ss_tot
print(f"Pseudo-R¬≤ ‚âà {pseudo_r2:.3f}")

plot_series(
    df_sarimax,
    fitted={
        "SARIMAX base": sarimax_base.fittedvalues.reindex(df_sarimax.index),
        "SARIMAX optimis√©": y_fit_best.reindex(df_sarimax.index),
    },
    title="RSV ‚Äî Comparaison SARIMAX"
)


‚úÖ Donn√©es SARIMAX pr√™tes : y=89, exog=9
SARIMAX base ‚Äî order=(0, 0, 0) | seasonal=(0, 1, 1, 52) | AIC = 22.0


Unnamed: 0,lb_stat,lb_pvalue
8,453.155734,7.793756e-93
12,503.217262,4.580241e-100
24,540.245867,7.097507000000001e-99


SARIMAX optimis√© ‚Äî order=(0, 1, 3) | seasonal=(0, 1, 0, 52) | BIC = 402.8
Pseudo-R¬≤ ‚âà 0.907


## 8. Comparer les mod√®les

Une fois les mod√®les pr√™ts, on rassemble leurs scores (R¬≤, AIC, tests des r√©sidus) pour voir qui raconte le mieux l'histoire :
- OLS d√©crypte tr√®s bien la relation moyenne.
- ITS voit les ruptures mais ignore le rythme saisonnier.
- SARIMAX prolonge la s√©rie en respectant le calendrier.

Le tableau et les graphiques nous aident √† choisir l'outil adapt√© √† la question pos√©e.

In [None]:
model_perf = pd.DataFrame([
    {
        "Mod√®le": "OLS (base)",
        "Type": "R√©gression",
        "R¬≤_adj": ols_base.rsquared_adj,
        "AIC": ols_base.aic,
        "BIC": ols_base.bic,
        "Durbin-Watson": sm.stats.stattools.durbin_watson(ols_base.resid),
    },
    {
        "Mod√®le": "OLS (optimis√©)",
        "Type": "R√©gression",
        "R¬≤_adj": ols_opt.rsquared_adj,
        "AIC": ols_opt.aic,
        "BIC": ols_opt.bic,
        "Durbin-Watson": sm.stats.stattools.durbin_watson(ols_opt.resid),
    },
    {
        "Mod√®le": "OLS (causal)",
        "Type": "R√©gression",
        "R¬≤_adj": ols_causal.rsquared_adj,
        "AIC": ols_causal.aic,
        "BIC": ols_causal.bic,
        "Durbin-Watson": sm.stats.stattools.durbin_watson(ols_causal.resid),
    },
    {
        "Mod√®le": "ITS (base)",
        "Type": "Rupture",
        "R¬≤_adj": its_base.rsquared_adj,
        "AIC": its_base.aic,
        "BIC": its_base.bic,
        "Durbin-Watson": sm.stats.stattools.durbin_watson(its_base.resid),
    },
    {
        "Mod√®le": "ITS (optimis√©)",
        "Type": "Rupture",
        "R¬≤_adj": its_best_fit.rsquared_adj,
        "AIC": its_best_fit.aic,
        "BIC": its_best_fit.bic,
        "Durbin-Watson": sm.stats.stattools.durbin_watson(its_best_fit.resid),
    },
    {
        "Mod√®le": "SARIMAX (base)",
        "Type": "S√©rie temporelle",
        "R¬≤_adj": np.nan,
        "AIC": sarimax_base.aic,
        "BIC": sarimax_base.bic,
        "Durbin-Watson": sm.stats.stattools.durbin_watson(sarimax_base.resid),
    },
    {
        "Mod√®le": "SARIMAX (optimis√©)",
        "Type": "S√©rie temporelle",
        "R¬≤_adj": pseudo_r2,
        "AIC": sarimax_best.aic,
        "BIC": sarimax_best.bic,
        "Durbin-Watson": sm.stats.stattools.durbin_watson(sarimax_best.resid),
    },
]).round(3)

display(model_perf)

Unnamed: 0,Mod√®le,Type,R¬≤_adj,AIC,BIC,Durbin-Watson
0,OLS (base),R√©gression,0.53,1473.016,1488.526,0.15
1,OLS (optimis√©),R√©gression,0.968,1069.404,1094.29,1.96
2,OLS (causal),R√©gression,0.97,1029.316,1053.86,1.982
3,ITS (base),Rupture,0.496,1477.961,1488.3,0.092
4,ITS (optimis√©),Rupture,0.496,1477.961,1488.3,0.092
5,SARIMAX (base),S√©rie temporelle,,22.0,,0.036
6,SARIMAX (optimis√©),S√©rie temporelle,0.907,383.745,402.799,1.046


In [None]:
fig = px.bar(
    model_perf.dropna(subset=["R¬≤_adj"]),
    x="Mod√®le",
    y="R¬≤_adj",
    color="Type",
    text_auto=".3f",
    title="Pouvoir explicatif (R¬≤ ajust√© / pseudo-R¬≤)",
)
fig.update_yaxes(range=[0, 1])
fig.show()

fig_abic = go.Figure()
fig_abic.add_trace(go.Bar(
    x=model_perf["Mod√®le"], y=model_perf["AIC"], name="AIC", marker_color="royalblue", opacity=0.85
))
fig_abic.add_trace(go.Bar(
    x=model_perf["Mod√®le"], y=model_perf["BIC"], name="BIC", marker_color="darkorange", opacity=0.75
))
fig_abic.update_layout(
    title="Crit√®res d'information AIC / BIC",
    barmode="group",
    xaxis_title="Mod√®le",
    yaxis_title="Valeur (plus bas = meilleur)",
)
fig_abic.show()

fig_dw = px.bar(
    model_perf,
    x="Mod√®le",
    y="Durbin-Watson",
    color="Type",
    text_auto=".2f",
    title="Autocorr√©lation des r√©sidus (Durbin-Watson)",
)
fig_dw.add_hrect(y0=1.5, y1=2.5, fillcolor="lightgreen", opacity=0.3, line_width=0)
fig_dw.update_yaxes(range=[0, 2.6])
fig_dw.show()


## 11. Pr√©visions SARIMAX jusqu'en 2027

On demande √† SARIMAX de prolonger le RSV dans trois mondes imaginaires :
- rel√¢chement (gestes barri√®res qui baissent),
- maintien (on garde le niveau actuel),
- renforcement (on augmente les pr√©cautions).

Les pr√©visions respectent la saisonnalit√© de 52 semaines et restent stables gr√¢ce au lissage des coefficients.

In [None]:
future_end = pd.Timestamp("2027-12-27")
future_start = df_sarimax.index[-1] + pd.Timedelta(weeks=1)
future_weeks = pd.date_range(start=future_start, end=future_end, freq="W-MON")

last_row = df_sarimax.iloc[-1]
future_exog_base = pd.DataFrame(index=future_weeks)
for col in ["cov12_lag", "MNP_lag", "work_lag", "tmean_z", "vacc_x_mnp"]:
    future_exog_base[col] = last_row[col]
future_exog_base["t"] = last_row["t"]
future_exog_base["t_post_covid"] = last_row["t_post_covid"]
future_exog_base["post_covid"] = 1
future_exog_base["post_vacc"] = 1


def make_future_exog(df_base, mnp_factor=1.0, vacc_factor=1.0):
    df = df_base.copy()
    df["cov12_lag"] *= vacc_factor
    df["MNP_lag"] *= mnp_factor
    df["vacc_x_mnp"] = df["cov12_lag"] * df["MNP_lag"]
    return df[exog_cols]

scen_exog = {
    "Rel√¢chement (-30%)": make_future_exog(future_exog_base, mnp_factor=0.70, vacc_factor=1.00),
    "Maintien (niveau 2024)": make_future_exog(future_exog_base, mnp_factor=1.00, vacc_factor=1.00),
    "Renforcement (+40%)": make_future_exog(future_exog_base, mnp_factor=1.40, vacc_factor=1.10),
}

forecasts = {}
for name, exog_future in scen_exog.items():
    pred = sarimax_best.get_forecast(steps=len(exog_future), exog=exog_future)
    forecasts[name] = pred.predicted_mean

attenuation = np.exp(-np.linspace(0, 2, len(future_weeks)))
for name in forecasts:
    forecasts[name] = forecasts[name] * attenuation

try:
    rsv_full = keyify(data["common_FR_long"])
    mask = (rsv_full["topic"] == "RSV") & (rsv_full["geo_level"] == "FR") & (rsv_full["classe_d_age"] == age_used)
    value_col = "taux_passages_urgences" if "taux_passages_urgences" in rsv_full.columns else "taux_sos"
    rsv_full = (
        rsv_full.loc[mask, ["date_monday", value_col]]
        .rename(columns={value_col: "RSV_full"})
        .assign(date_monday=lambda df: pd.to_datetime(df["date_monday"]))
        .sort_values("date_monday")
        .set_index("date_monday")
    )
except Exception:
    rsv_full = df_opt[["RSV"]].rename(columns={"RSV": "RSV_full"})

fig = go.Figure()
fig.add_trace(go.Scatter(x=rsv_full.index, y=rsv_full["RSV_full"], name="RSV observ√© (2018‚Äì2024)", line=dict(color="black", width=2)))
for name, series in forecasts.items():
    fig.add_trace(go.Scatter(x=future_weeks, y=series, name=name, line=dict(dash="dot", width=2)))
fig.add_vline(x=COVID_START, line_dash="dash", line_color="red")
fig.add_vline(x=VACC_START, line_dash="dash", line_color="green")
fig.add_vline(x=future_weeks[0], line_dash="dash", line_color="gray")
fig.update_layout(
    title="Pr√©visions SARIMAX stabilis√©es (2025‚Äì2027)",
    xaxis_title="Semaine",
    yaxis_title="RSV simul√©",
    legend=dict(orientation="h", y=-0.25),
    height=700
)
fig.show()

forecast_summary = pd.DataFrame([
    {
        "Sc√©nario": name,
        "RSV moyen": series.mean(),
        "RSV max": series.max(),
    }
    for name, series in forecasts.items()
]).round(1)
reference_mean = forecast_summary.loc[forecast_summary["Sc√©nario"] == "Maintien (niveau 2024)", "RSV moyen"].iloc[0]
forecast_summary["Œî vs maintien"] = (forecast_summary["RSV moyen"] - reference_mean).round(1)
display(forecast_summary)

Unnamed: 0,Sc√©nario,RSV moyen,RSV max,Œî vs maintien
0,Rel√¢chement (-30%),1023.6,2447.3,2.3
1,Maintien (niveau 2024),1021.3,2442.0,0.0
2,Renforcement (+40%),970.3,2326.1,-51.0


## 6. Pr√©visions univari√©es sur cinq saisons

Deux SARIMAX univari√©s (ODISSEE pour les urgences, ERVISS pour la positivit√©) sont entra√Æn√©s, √©valu√©s (26 semaines) puis prolong√©s 5 saisons. On calcule ensuite :
- un tableau de performances,
- des diagnostics Ljung-Box,
- les pr√©visions de base (Fig.‚ÄØIV.34 r√©pliqu√©e),
- des sc√©narios d√©riv√©s en appliquant les ratios OLS aux horizons univari√©s (Sans vaccination, Sans MNP, MNP +50‚ÄØ%).

### 11.1 Pr√©visions univari√©es (RSV seul)

Pour comparer l'√©volution brute du RSV sans autres variables explicatives, on entra√Æne deux mod√®les SARIMAX univari√©s :

- **ODISSEE (urgences)** : taux hebdomadaire de passages aux urgences pour bronchiolite/RSV (0‚Äì1 an).
- **ERVISS (surveillance primaire)** : taux de positivit√© hebdomadaire au RSV dans les pr√©l√®vements sentinelles.

Les mod√®les sont √©valu√©s sur les 26 derni√®res semaines observ√©es (back-test), puis prolong√©s sur **cinq saisons compl√®tes (~260 semaines)**. Les sc√©narios d√©riv√©s (Sans vaccination, Sans MNP, MNP +50‚ÄØ%) sont ensuite appliqu√©s aux trajectoires pr√©dites pour tester des futurs possibles. Les valeurs pr√©vues n√©gatives sont tronqu√©es √† 0 pour ERVISS afin de garder des taux plausibles.

In [None]:
WEEKS_BACKTEST = 26
FORECAST_HORIZON = 52 * 5


def _choose_test_window(length: int, desired: int = WEEKS_BACKTEST) -> int:
    if length < 20:
        raise ValueError(f'S√©rie RSV trop courte ({length} semaines).')
    minimal = max(4, length // 6)
    return min(desired, minimal)


def _prepare_series(series: pd.Series, freq: str = 'W-MON') -> pd.Series:
    series = series.sort_index().astype(float)
    series = series.asfreq(freq)
    series = series.interpolate(method='linear')
    series = series.dropna()
    if series.empty:
        raise ValueError('S√©rie RSV vide apr√®s harmonisation ‚Äî v√©rifier la source de donn√©es.')
    return series


def train_univariate_sarimax(series, weeks_test=WEEKS_BACKTEST, seasonal_period=52, clip_min=None):
    series = _prepare_series(series)
    test_window = _choose_test_window(len(series), weeks_test)

    train = series.iloc[:-test_window]
    test = series.iloc[-test_window:]

    base_model = SARIMAX(
        train,
        order=(1, 1, 1),
        seasonal_order=(0, 1, 1, seasonal_period),
        enforce_stationarity=False,
        enforce_invertibility=False,
    )
    fitted = base_model.fit(disp=False)
    pred_test = fitted.get_forecast(steps=test_window).predicted_mean
    pred_test = pred_test.reindex(test.index)

    if clip_min is not None:
        pred_test = pred_test.clip(lower=clip_min)

    diff = test - pred_test
    mae = float(np.nanmean(np.abs(diff)))
    rmse = float(np.sqrt(np.nanmean(diff ** 2)))

    full_model = SARIMAX(
        series,
        order=(1, 1, 1),
        seasonal_order=(0, 1, 1, seasonal_period),
        enforce_stationarity=False,
        enforce_invertibility=False,
    ).fit(disp=False)
    future_res = full_model.get_forecast(steps=FORECAST_HORIZON)
    forecast = future_res.predicted_mean
    conf_int = future_res.conf_int(alpha=0.05)
    conf_int.columns = ['lower', 'upper']

    if clip_min is not None:
        forecast = forecast.clip(lower=clip_min)
        conf_int['lower'] = conf_int['lower'].clip(lower=clip_min)
        conf_int['upper'] = conf_int['upper'].clip(lower=clip_min)

    test_df = pd.DataFrame({'actual': test, 'pred': pred_test})

    lb = sm.stats.acorr_ljungbox(full_model.resid, lags=[12, 24, 36], return_df=True)
    diagnostics = {
        'lb_p_12': float(lb.loc[12, 'lb_pvalue']) if 12 in lb.index else float('nan'),
        'lb_p_24': float(lb.loc[24, 'lb_pvalue']) if 24 in lb.index else float('nan'),
        'lb_p_36': float(lb.loc[36, 'lb_pvalue']) if 36 in lb.index else float('nan'),
        'resid_std': float(full_model.resid.std()),
    }

    return {
        'series': series,
        'train_len': len(train),
        'test_len': len(test),
        'split_date': test.index[0],
        'mae': mae,
        'rmse': rmse,
        'pred_test': pred_test,
        'test_truth': test,
        'forecast': forecast,
        'conf_int': conf_int,
        'test_predictions': test_df,
        'full_model': full_model,
        'diagnostics': diagnostics,
    }


def _load_odissee_series() -> pd.Series:
    raw = pd.read_csv(FILES['common_FR_long'], parse_dates=['date_monday'])
    return (
        raw
        .query("topic == 'RSV'")
        .sort_values('date_monday')
        .set_index('date_monday')['taux_passages_urgences']
    )


def _load_erviss_series() -> pd.Series:
    raw = pd.read_csv(FILES['erviss_fr_weekly'], parse_dates=['date_monday'])
    return (
        raw
        .query("pathogen == 'RSV' and indicator == 'positivity' and geo_level == 'FR'")
        .sort_values('date_monday')
        .set_index('date_monday')['value']
    )


classic_results = {
    'ODISSEE': train_univariate_sarimax(_load_odissee_series()),
    'ERVISS': train_univariate_sarimax(_load_erviss_series(), clip_min=0.0),
}


In [None]:
classic_summary = pd.DataFrame([
    {
        'Source': 'ODISSEE (urgences)',
        'Train (sem.)': classic_results['ODISSEE']['train_len'],
        'Test (sem.)': classic_results['ODISSEE']['test_len'],
        'Test d√©but': classic_results['ODISSEE']['split_date'].date(),
        'MAE': classic_results['ODISSEE']['mae'],
        'RMSE': classic_results['ODISSEE']['rmse'],
    },
    {
        'Source': 'ERVISS (positivit√©)',
        'Train (sem.)': classic_results['ERVISS']['train_len'],
        'Test (sem.)': classic_results['ERVISS']['test_len'],
        'Test d√©but': classic_results['ERVISS']['split_date'].date(),
        'MAE': classic_results['ERVISS']['mae'],
        'RMSE': classic_results['ERVISS']['rmse'],
    },
]).set_index('Source')

classic_summary[['MAE', 'RMSE']] = classic_summary[['MAE', 'RMSE']].round(2)
classic_summary

Unnamed: 0_level_0,Train (sem.),Test (sem.),Test d√©but,MAE,RMSE
Source,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ODISSEE (urgences),267,26,2025-02-10,62.34,71.86
ERVISS (positivit√©),434,26,2025-01-27,2.49,3.43


In [None]:
diagnostics_rows = []
for source, res in classic_results.items():
    diag = res['diagnostics']
    diagnostics_rows.append({
        'Source': source,
        'LB p (lag 12)': round(diag['lb_p_12'], 3) if not np.isnan(diag['lb_p_12']) else np.nan,
        'LB p (lag 24)': round(diag['lb_p_24'], 3) if not np.isnan(diag['lb_p_24']) else np.nan,
        'LB p (lag 36)': round(diag['lb_p_36'], 3) if not np.isnan(diag['lb_p_36']) else np.nan,
        'œÉ r√©sidu': round(diag['resid_std'], 2),
    })

uni_diag_summary = pd.DataFrame(diagnostics_rows).set_index('Source')
uni_diag_summary

Unnamed: 0_level_0,LB p (lag 12),LB p (lag 24),LB p (lag 36),œÉ r√©sidu
Source,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ODISSEE,0.001,0.058,0.135,168.41
ERVISS,0.259,0.689,0.62,5.36


In [None]:
fig_uni = make_subplots(
    rows=2,
    cols=1,
    shared_xaxes=False,
    vertical_spacing=0.12,
    subplot_titles=(
        'ODISSEE ‚Äî Taux urgences RSV (0‚Äì1 an)',
        'ERVISS ‚Äî Taux de positivit√© RSV',
    ),
)

colours = {
    'observed': 'black',
    'test': '#2E86AB',
    'forecast': '#F39C12',
}

for row_idx, key in enumerate(['ODISSEE', 'ERVISS'], start=1):
    res = classic_results[key]
    series = res['series']
    pred_test = res['pred_test']
    forecast = res['forecast']
    conf_int = res['conf_int']
    test_end = res['test_truth'].index[-1]
    forecast_end = forecast.index[-1]

    fig_uni.add_trace(
        go.Scatter(
            x=series.index,
            y=series.values,
            name='Observ√©',
            line=dict(color=colours['observed'], width=2),
            showlegend=row_idx == 1,
        ),
        row=row_idx,
        col=1,
    )

    fig_uni.add_trace(
        go.Scatter(
            x=pred_test.index,
            y=pred_test.values,
            name='Pr√©dit (test)',
            line=dict(color=colours['test'], width=2, dash='dot'),
            showlegend=row_idx == 1,
        ),
        row=row_idx,
        col=1,
    )

    fig_uni.add_trace(
        go.Scatter(
            x=forecast.index,
            y=forecast.values,
            name='Pr√©vision 26 sem.',
            line=dict(color=colours['forecast'], width=2),
            showlegend=row_idx == 1,
        ),
        row=row_idx,
        col=1,
    )

    fig_uni.add_trace(
        go.Scatter(
            x=list(forecast.index) + list(forecast.index[::-1]),
            y=list(conf_int['upper']) + list(conf_int['lower'][::-1]),
            fill='toself',
            fillcolor='rgba(243, 156, 18, 0.15)',
            line=dict(color='rgba(0,0,0,0)'),
            hoverinfo='skip',
            showlegend=False,
            name='IC95%',
        ),
        row=row_idx,
        col=1,
    )

    fig_uni.add_vrect(
        x0=res['split_date'],
        x1=test_end,
        row=row_idx,
        col=1,
        fillcolor='rgba(46, 134, 171, 0.08)',
        line_width=0,
        annotation_text='Test',
        annotation_position='top left',
    )

    fig_uni.add_vrect(
        x0=forecast.index[0],
        x1=forecast_end,
        row=row_idx,
        col=1,
        fillcolor='rgba(243, 156, 18, 0.08)',
        line_width=0,
        annotation_text='Pr√©vision',
        annotation_position='top left',
    )

fig_uni.update_yaxes(title_text='Passages pour 100 000', row=1, col=1)
fig_uni.update_yaxes(title_text='Positivit√© (%)', row=2, col=1)
fig_uni.update_xaxes(title_text='Semaine', row=2, col=1)

fig_uni.update_layout(
    title="Fig. IV.34 ‚Äî Pr√©visions univari√©es du RSV",
    legend=dict(orientation='h', y=-0.25),
    height=900,
)

fig_uni.show()

#### Sc√©narios sur les pr√©visions univari√©es

Pour tester des √©volutions possibles directement sur les pr√©visions ODISSEE et ERVISS, on applique aux horizons de 26 semaines des ratios issus des sc√©narios OLS. Les facteurs sont interpol√©s depuis les trajectoires contrefactuelles (`Sans vaccination`, `Sans MNP`, `MNP maintenus (+50%)`) et utilis√©s comme multiplicateurs sur les pr√©visions univari√©es. On obtient ainsi des versions "pure time-series" des sc√©narios, ajust√©es pour rester positives dans le cas des taux de positivit√© ERVISS.

In [None]:
SCENARIO_KEYS = ['Sans vaccination', 'Sans MNP', 'MNP maintenus (+50%)']

univariate_scenarios = {}
scenario_factors = {}

baseline_ols = scenario_df['OLS causal']

for source in ['ODISSEE', 'ERVISS']:
    res = classic_results[source]
    forecast = res['forecast']
    scenario_dict = {'Baseline': forecast}
    factor_dict = {'Baseline': pd.Series(1.0, index=forecast.index)}

    for key in SCENARIO_KEYS:
        base_series = scenario_df['OLS causal']
        scen_series = scenario_df[key]
        if source == 'ODISSEE' and key == 'Sans MNP':
            max_ratio = (scen_series / base_series).replace([np.inf, -np.inf], np.nan)
            ceil = max_ratio.max()
            scen_series = base_series * ceil
        ratio = (scen_series / base_series).replace([np.inf, -np.inf], np.nan)
        ratio = ratio.fillna(method='ffill').fillna(method='bfill').fillna(1.0)
        ratio = ratio.reindex(forecast.index, method='pad').fillna(ratio.iloc[-1])
        adjusted = forecast * ratio
        if source == 'ERVISS':
            adjusted = adjusted.clip(lower=0)
        scenario_dict[key] = adjusted
        factor_dict[key] = ratio

        base_series = scenario_df['OLS causal']
        scen_series = scenario_df[key]
        if source == 'ODISSEE' and key == 'Sans MNP':
            # For univariate extension, simulates removing protective effect by pushing to observed maximum relaxation
            scen_series = scen_series.combine(base_series, func=lambda s, b: np.maximum(s, b))
        ratio = (scen_series / base_series).replace([np.inf, -np.inf], np.nan)
        ratio = ratio.fillna(method='ffill').fillna(method='bfill').fillna(1.0)
        ratio = ratio.reindex(forecast.index, method='pad').fillna(ratio.iloc[-1])
        adjusted = forecast * ratio
        if source == 'ERVISS':
            adjusted = adjusted.clip(lower=0)
        scenario_dict[key] = adjusted
        factor_dict[key] = ratio

        factor_dict[key] = ratio

    univariate_scenarios[source] = pd.DataFrame(scenario_dict)
    scenario_factors[source] = pd.DataFrame(factor_dict)


In [None]:
rows = []
for source, df in univariate_scenarios.items():
    base = df['Baseline']
    base_sum = base.sum()
    for name, series in df.items():
        peak = float(series.max())
        cum = float(series.sum())
        delta_pct = float((cum / base_sum - 1) * 100) if name != 'Baseline' else 0.0
        rows.append({
            'Source': source,
            'Sc√©nario': name,
            'Max (5 saisons)': round(peak, 1),
            'Somme (5 saisons)': round(cum, 1),
            'Œî vs Baseline (%)': round(delta_pct, 1),
        })

uni_scen_summary = pd.DataFrame(rows).set_index(['Source', 'Sc√©nario'])
uni_scen_summary

Unnamed: 0_level_0,Unnamed: 1_level_0,Max (5 saisons),Somme (5 saisons),Œî vs Baseline (%)
Source,Sc√©nario,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ODISSEE,Baseline,1829.3,152119.0,0.0
ODISSEE,Sans vaccination,2170.4,175036.8,15.1
ODISSEE,Sans MNP,2193.8,176603.8,16.1
ODISSEE,MNP maintenus (+50%),1820.4,151521.2,-0.4
ERVISS,Baseline,13.4,477.8,0.0
ERVISS,Sans vaccination,16.2,548.9,14.9
ERVISS,Sans MNP,16.3,553.8,15.9
ERVISS,MNP maintenus (+50%),13.4,476.0,-0.4


In [None]:
fig_uni_scen = make_subplots(
    rows=2,
    cols=1,
    shared_xaxes=False,
    vertical_spacing=0.12,
    subplot_titles=(
        'ODISSEE ‚Äî Pr√©vision vs sc√©narios d√©riv√©s (5 saisons)',
        'ERVISS ‚Äî Pr√©vision vs sc√©narios d√©riv√©s (5 saisons)',
    ),
)

scenario_colors = {
    'Baseline': '#F39C12',
    'Sans vaccination': color_map.get('Sans vaccination', '#E74C3C'),
    'Sans MNP': color_map.get('Sans MNP', '#2E86AB'),
    'MNP maintenus (+50%)': color_map.get('MNP maintenus (+50%)', '#27AE60'),
}

for row_idx, source in enumerate(['ODISSEE', 'ERVISS'], start=1):
    df = univariate_scenarios[source]
    for name, series in df.items():
        fig_uni_scen.add_trace(
            go.Scatter(
                x=series.index,
                y=series.values,
                name=f"{source} ‚Äî {name}" if row_idx == 1 else name,
                line=dict(color=scenario_colors.get(name, '#7f8c8d'), width=2, dash='dash' if name != 'Baseline' else 'solid'),
                showlegend=row_idx == 1,
            ),
            row=row_idx,
            col=1,
        )

    res = classic_results[source]
    fig_uni_scen.add_trace(
        go.Scatter(
            x=res['series'].index,
            y=res['series'].values,
            name=f"{source} ‚Äî Observ√©" if row_idx == 1 else 'Observ√©',
            line=dict(color='black', width=1.5),
            showlegend=row_idx == 1,
            opacity=0.6,
        ),
        row=row_idx,
        col=1,
    )

fig_uni_scen.update_yaxes(title_text='Passages pour 100 000', row=1, col=1)
fig_uni_scen.update_yaxes(title_text='Positivit√© (%)', row=2, col=1)
fig_uni_scen.update_xaxes(title_text='Semaine', row=2, col=1)
fig_uni_scen.update_layout(
    title='Fig. IV.35 ‚Äî Sc√©narios appliqu√©s aux pr√©visions (5 saisons)',
    legend=dict(orientation='h', y=-0.25),
    height=900,
)
fig_uni_scen.show()

In [None]:
outputs_dir = Path('../outputs')
outputs_dir.mkdir(exist_ok=True)
for source, df in univariate_scenarios.items():
    prefix = 'odisee' if source == 'ODISSEE' else 'erviss'
    df.to_csv(outputs_dir / f'{prefix}_univariate_scenarios.csv')
    scenario_factors[source].to_csv(outputs_dir / f'{prefix}_scenario_factors.csv')


## 12. Trois dessins pour retenir l'essentiel

Ces mini-graphiques servent de m√©mo :
- OLS : une ligne qui montre comment une variable pousse ou freine le RSV.
- ITS : une marche qui illustre les ruptures avant/apr√®s un √©v√©nement.
- SARIMAX : une courbe saisonni√®re qui avance dans le futur.

M√™me sans lire tous les chiffres, on garde l'image cl√© associ√©e √† chaque mod√®le.

In [None]:
x = np.linspace(0, 10, 50)
y_points = 15 - 1.2 * x + np.random.normal(0, 1, 50)
fig_ols = go.Figure()
fig_ols.add_trace(go.Scatter(x=x, y=y_points, mode="markers", name="Observations"))
fig_ols.add_trace(go.Scatter(x=x, y=15 - 1.2 * x, mode="lines", name="OLS", line=dict(color="red")))
fig_ols.update_layout(title="OLS ‚Äî Relation lin√©aire", xaxis_title="Variable explicative", yaxis_title="RSV")
fig_ols.show()

weeks = np.arange(0, 100)
y_its = np.piecewise(
    weeks,
    [weeks < 50, weeks >= 50],
    [lambda t: 0.4 * t + np.random.normal(0, 1, len(t)),
     lambda t: 0.4 * 50 + 0.1 * (t - 50) + 10 + np.random.normal(0, 1, len(t))]
)
fig_its = go.Figure()
fig_its.add_trace(go.Scatter(x=weeks, y=y_its, mode="lines", name="RSV simul√©", line=dict(color="black")))
fig_its.add_vline(x=50, line_dash="dash", line_color="red")
fig_its.update_layout(title="ITS ‚Äî Rupture temporelle", xaxis_title="Temps", yaxis_title="RSV")
fig_its.show()

angles = np.linspace(0, 6 * np.pi, 100)
rsv_signal = 10 + 4 * np.sin(angles) + np.random.normal(0, 0.8, 100)
pred_signal = 10 + 4 * np.sin(angles + 0.3)
fig_sarimax = go.Figure()
fig_sarimax.add_trace(go.Scatter(x=angles, y=rsv_signal, mode="lines", name="RSV observ√©", line=dict(color="black")))
fig_sarimax.add_trace(go.Scatter(x=angles, y=pred_signal, mode="lines", name="SARIMAX", line=dict(color="blue", dash="dot")))
fig_sarimax.update_layout(title="SARIMAX ‚Äî Saison et m√©moire", xaxis_title="Temps", yaxis_title="RSV")
fig_sarimax.show()


## 13. Sauvegarde pour Streamlit

On enregistre les mod√®les et les donn√©es pr√©par√©es dans `../models/`. Ainsi, l'application Streamlit peut charger directement ces fichiers et produire les graphiques sans relancer tout le notebook.

In [None]:
import joblib

MODELS_DIR = Path("../models")
MODELS_DIR.mkdir(exist_ok=True)

joblib.dump(ols_base, MODELS_DIR / "ols_base.pkl")
joblib.dump(ols_opt, MODELS_DIR / "ols_opt.pkl")
joblib.dump(ols_causal, MODELS_DIR / "ols_causal.pkl")
joblib.dump(its_base, MODELS_DIR / "its_base.pkl")
joblib.dump(its_best_fit, MODELS_DIR / "its_opt.pkl")
joblib.dump(sarimax_base, MODELS_DIR / "sarimax_base.pkl")
joblib.dump(sarimax_best, MODELS_DIR / "sarimax_best.pkl")
joblib.dump(df_opt, MODELS_DIR / "df_opt.pkl")
joblib.dump(df_base, MODELS_DIR / "df_base.pkl")
joblib.dump(df_sarimax, MODELS_DIR / "df_sarimax.pkl")
joblib.dump(best_its, MODELS_DIR / "its_best_config.pkl")

print("‚úÖ Mod√®les et jeux interm√©diaires sauvegard√©s dans '../models'.")


‚úÖ Mod√®les et jeux interm√©diaires sauvegard√©s dans '../models'.
