# Package

In [15]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
import shap
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from scipy.stats import pearsonr
from sklearn.utils import shuffle
import pickle
from dateutil.relativedelta import relativedelta
import pickle
from statsmodels.tsa.ar_model import AutoReg

# Importation

In [16]:
# Les données de test
df_stationary_test = pd.read_csv("df_stationary_test.csv", index_col="date")
df_stationary_test.index = pd.to_datetime(df_stationary_test.index)

In [17]:
df_stationary_test_unrate = df_stationary_test["UNRATE"]

# Importer le modèle AR(1) final dans la validation

In [18]:
with open("AR1_last_trained_model.pkl", "rb") as f:
    ar1 = pickle.load(f)

# Utile pour savoir sur quoi il a été entraîné
try:
    meta = pd.read_csv("AR1_last_trained_model_meta.csv", index_col=0).squeeze("columns")
    print(meta)
except FileNotFoundError:
    meta = None

trained_until    1988-11-01 00:00:00
p_used                             1
nobs                             346
aic                164.6473364963893
bic               176.18665282156246
eval_start                1983-01-01
eval_end                  1989-12-31
mae_83_89         0.9775539701031503
rmse_83_89        1.4610097145509617
r2_83_89         -1.7237671605297926
n_eval                            83
Name: 0, dtype: object


# Tester le modèle AR(1)

In [19]:
# --- 2️⃣ Charger série TEST ---
y = df_stationary_test_unrate.sort_index().astype(float).dropna()
y.index = pd.to_datetime(y.index).to_period("M").to_timestamp(how="start")
y = y.asfreq("MS")

# --- 3️⃣ Paramètres ---
h = 12
min_train_n = 36
test_start = pd.Timestamp("1990-01-01")
test_end   = pd.Timestamp("2025-12-31")

rows = []
last_model = ar1
last_fit_end = pd.Timestamp(meta.get("trained_until")) if meta is not None and "trained_until" in meta else None

# --- 4️⃣ Expanding sur période TEST uniquement ---
for t_end in y.index:
    if len(y.loc[:t_end]) < min_train_n:
        continue

    # ré-entraîner AR(1) avec toutes les données connues jusqu’à t_end
    y_tr = y.loc[:t_end]
    ar1 = AutoReg(y_tr, lags=1, old_names=False).fit()
    last_model = ar1
    last_fit_end = t_end

    # prévision h=12
    fc = ar1.predict(start=len(y_tr), end=len(y_tr)+h-1)
    yhat_h = float(fc.iloc[-1])
    t_fore = t_end + relativedelta(months=h)

    if t_fore in y.index:
        rows.append((t_fore, yhat_h, float(y.loc[t_fore]), t_end))

# --- 5️⃣ Agrégation des résultats ---
df_oos_ar1_test = (
    pd.DataFrame(rows, columns=["date","y_hat","y_true","trained_until"])
      .set_index("date").sort_index()
)

# --- 6️⃣ Évaluation TEST 1990–2025 ---
if len(df_oos_ar1_test) > 0:
    mae  = mean_absolute_error(df_oos_ar1_test["y_true"], df_oos_ar1_test["y_hat"])
    rmse = np.sqrt(mean_squared_error(df_oos_ar1_test["y_true"], df_oos_ar1_test["y_hat"]))
    r2   = r2_score(df_oos_ar1_test["y_true"], df_oos_ar1_test["y_hat"]) if len(df_oos_ar1_test) > 1 else np.nan

    print(f"\n✅ AR(1) OOS (h=12) — TEST 1990–2025 — n={len(df_oos_ar1_test)}")
    print(f"MAE = {mae:.3f} | RMSE = {rmse:.3f} | R² = {r2:.3f}")

    df_oos_ar1_test["year"] = df_oos_ar1_test.index.year
    yearly = (
        df_oos_ar1_test.groupby("year")[["y_true","y_hat"]]
        .apply(lambda g: pd.Series({
            "n": len(g),
            "MAE": mean_absolute_error(g["y_true"], g["y_hat"]),
            "RMSE": np.sqrt(mean_squared_error(g["y_true"], g["y_hat"]))
        }))
    )
    print("\n--- MAE/RMSE par année (1990–2025) ---")
    print(yearly.to_string())
else:
    print("⚠️ Aucune observation dans la période TEST.")

# --- 7️⃣ Aperçu final ---
print("\nAperçu des premières prévisions TEST :")
print(df_oos_ar1_test.head())


✅ AR(1) OOS (h=12) — TEST 1990–2025 — n=382
MAE = 0.976 | RMSE = 2.066 | R² = -0.695

--- MAE/RMSE par année (1990–2025) ---
         n       MAE      RMSE
year                          
1993   2.0  1.498653  1.499022
1994  12.0  0.446063  0.548815
1995  12.0  0.601402  0.672835
1996  12.0  0.487258  0.671977
1997  12.0  0.356406  0.409216
1998  12.0  0.241908  0.281248
1999  12.0  0.206554  0.252937
2000  12.0  0.135741  0.177466
2001  12.0  0.993441  1.122368
2002  12.0  0.984497  1.075732
2003  12.0  0.619897  0.727642
2004  12.0  0.607788  0.694265
2005  12.0  0.168884  0.226606
2006  12.0  0.193603  0.227198
2007  12.0  0.345020  0.442722
2008  12.0  1.198255  1.290358
2009  12.0  2.599408  2.737578
2010  12.0  3.720531  3.900240
2011  12.0  1.003851  1.284328
2012  12.0  0.358421  0.441594
2013  12.0  0.210179  0.284988
2014  12.0  0.642944  0.716390
2015  12.0  0.304527  0.344435
2016  12.0  0.378560  0.422067
2017  12.0  0.369318  0.409792
2018  12.0  0.195516  0.248363
2019  