In [52]:
import sys
import os
sys.path.append("..") 

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from src.preprocessing import Preprocessor
from src.lstm_forecasting import LSTMForecaster, LSTMTimeSeries
from sklearn.metrics import mean_squared_error
from preprocess import *
from add_features import *
from models import *
# Pour ne pas afficher de warnings inutiles
import warnings
warnings.filterwarnings("ignore")
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)


%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [53]:
champs_elysees_df = pd.read_csv('../data/traffic/champs_elysees.csv', sep=";")
convention_df = pd.read_csv('../data/traffic/convention.csv', sep=";")
sts_peres_df = pd.read_csv('../data/traffic/sts_peres.csv', sep=";")

weather_df_pred = pd.read_csv('../data/weather/weather_paris.csv')
holidays_df = pd.read_csv('../data/events/french_holidays.csv', sep=";")
football_matches_df = pd.read_csv('../data/events/football_matches_2024_2025.csv')

In [54]:
champs_elysees_df = pipeline(champs_elysees_df, window=3,fill_hours=True)
convention_df =pipeline(convention_df,window=3,fill_hours=True)
sts_peres_df = pipeline(sts_peres_df,window=3,fill_hours=True)

8627
9525
8627
9525
8627
9525


In [83]:
import pandas as pd

# --- Sous-fonction 1 : création du squelette de test ---
def create_base_test_dataset():
    """Crée un DataFrame test (9–11 novembre 2025) avec toutes les features de base."""
    date_range = pd.date_range("2025-11-09 00:00:00", "2025-11-11 23:00:00", freq="H")

    df_test = pd.DataFrame({
        "Date et heure de comptage": date_range.strftime("%Y-%m-%dT%H:%M:%S+01:00"),
        "Débit horaire": [None] * len(date_range),
        "Taux d'occupation": [None] * len(date_range),
        "Etat trafic": [None] * len(date_range),
    })

    # Application du préprocessing
    df_test = order_by_date(df_test)
    df_test = create_datetime_features(df_test, fill_hours=True)
    df_test = vacances_by_zone(df_test)
    df_test = add_school_holidays_paris(df_test)
    df_test = create_holidays(df_test)
    df_test["day_type"] = df_test.apply(day_type, axis=1)
    df_test = add_cyclic_features(df_test)
    df_test = merge_meteo(df_test)

    # Ajout des colonnes d'outliers
    new_cols = [
        'Débit horaire_outlier_high',
        'Débit horaire_outlier_low',
        'Débit horaire_special_event',
        "Taux d'occupation_outlier_high",
        "Taux d'occupation_outlier_low",
        "Taux d'occupation_special_event",
    ]
    for col in new_cols:
        df_test[col] = False

    return df_test


# --- Sous-fonction 2 : copie des valeurs d'outliers depuis les données 2024 ---
def copy_outliers_from_2024(df_test, df_train, new_cols, name):
    """Copie les colonnes d’outliers/special_event de 2024 sur 2025 pour une période donnée."""
    df_train["date"] = pd.to_datetime(df_train["date"])

    mask = (
        (df_train["date"] >= pd.to_datetime("2024-11-09")) &
        (df_train["date"] <= pd.to_datetime("2024-11-11"))
    )
    df_period = df_train.loc[mask].copy()

    if len(df_period) != len(df_test):
        print(f"⚠️ Attention : {name} n’a pas le même nombre d’heures ({len(df_period)} vs {len(df_test)})")

    for col in new_cols:
        df_test[col] = df_period[col].reset_index(drop=True)

    print(f"✅ {name} – valeurs copiées ({len(df_period)} heures)")
    return df_test


# --- Fonction principale ---
def create_test_dataset(champs_elysees_df, convention_df, sts_peres_df):
    """Crée les trois DataFrames test (Champs, Convention, Pères) avec copie des outliers 2024 et lags 72h/168h."""

    df_test = create_base_test_dataset()
    new_cols = [
        'Débit horaire_outlier_high',
        'Débit horaire_outlier_low',
        'Débit horaire_special_event',
        "Taux d'occupation_outlier_high",
        "Taux d'occupation_outlier_low",
        "Taux d'occupation_special_event",
    ]

    # Création des trois DataFrames
    df_test_champs_2025 = df_test.copy()
    df_test_convention_2025 = df_test.copy()
    df_test_peres_2025 = df_test.copy()

    # --- Copie des valeurs d’outliers depuis 2024 ---
    df_test_champs_2025 = copy_outliers_from_2024(df_test_champs_2025, champs_elysees_df, new_cols, "Champs-Élysées")
    df_test_convention_2025 = copy_outliers_from_2024(df_test_convention_2025, convention_df, new_cols, "Convention")
    df_test_peres_2025 = copy_outliers_from_2024(df_test_peres_2025, convention_df, new_cols, "Pères")

    lag_hours = [72,168]
    targets = ["Débit horaire", "Taux d'occupation"]

    for df_test, df_train, name in [
        (df_test_champs_2025, champs_elysees_df, "Champs-Élysées"),
        (df_test_convention_2025, convention_df, "Convention"),
        (df_test_peres_2025, sts_peres_df, "Pères")
    ]:
        # S'assurer que date est bien datetime
        df_train["date"] = pd.to_datetime(df_train["date"])

        # Créer un index datetime pour le train à partir de date+hour
        df_train_indexed = df_train.copy()
        df_train_indexed["datetime_index"] = df_train_indexed["date"] + pd.to_timedelta(df_train_indexed["hour"], unit="h")
        df_train_indexed = df_train_indexed.set_index("datetime_index")

        for target in targets:
            for lag in lag_hours:
                col_name = f"{target}_lag_{lag}h"
                lag_values = []

                for idx, row in df_test.iterrows():
                    dt = pd.to_datetime(row["date"]) + pd.Timedelta(hours=row["hour"])
                    dt_lag = dt - pd.Timedelta(hours=lag)

                    if dt_lag in df_train_indexed.index:
                        lag_values.append(df_train_indexed.loc[dt_lag, target])
                    else:
                        lag_values.append(np.nan)

                df_test[col_name] = lag_values
    return df_test_champs_2025, df_test_convention_2025, df_test_peres_2025


In [84]:
# --- Appel de la fonction principale ---
df_test_champs_2025, df_test_convention_2025, df_test_peres_2025 = create_test_dataset(champs_elysees_df,convention_df,sts_peres_df)


72
72
✅ Champs-Élysées – valeurs copiées (72 heures)
✅ Convention – valeurs copiées (72 heures)
✅ Pères – valeurs copiées (72 heures)


In [82]:
df_test_champs_2025

Unnamed: 0,Débit horaire,Taux d'occupation,Etat trafic,date,hour,year,month,weekday,is_weekend,Date et heure de comptage,Vacances Zone A,Vacances Zone B,Vacances Zone C,Nom Vacances,Vacances Toutes Zones,Vacances Scolaires Paris,is_holiday,day_type,hour_sin,hour_cos,weekday_sin,weekday_cos,month_sin,month_cos,day_of_year,dayofyear_sin,dayofyear_cos,time,temperature_2m (°C),wind_speed_10m (km/h),relative_humidity_2m (%),precipitation (mm),cloud_cover (%),weather_code (wmo code),surface_pressure (hPa),apparent_temperature (°C),rain (mm),Débit horaire_outlier_high,Débit horaire_outlier_low,Débit horaire_special_event,Taux d'occupation_outlier_high,Taux d'occupation_outlier_low,Taux d'occupation_special_event,Débit horaire_lag_500h,Taux d'occupation_lag_500h
0,,,,2025-11-09,0,2025,11,6,True,2025-11-09 00:00:00+01:00,False,False,False,,False,0,False,Normal Day,0.0,1.0,-0.781831,0.62349,-0.866025,0.5,313,-0.780296,0.625411,2025-11-09,,,,,,,,,,0,0,0,0,0,0,615.0,10.62889
1,,,,2025-11-09,1,2025,11,6,True,2025-11-09 01:00:00+01:00,False,False,False,,False,0,False,Normal Day,0.258819,0.9659258,-0.781831,0.62349,-0.866025,0.5,313,-0.780296,0.625411,2025-11-09,,,,,,,,,,0,0,0,0,0,0,577.333333,8.6663
2,,,,2025-11-09,2,2025,11,6,True,2025-11-09 02:00:00+01:00,False,False,False,,False,0,False,Normal Day,0.5,0.8660254,-0.781831,0.62349,-0.866025,0.5,313,-0.780296,0.625411,2025-11-09,,,,,,,,,,0,0,0,0,0,0,494.666667,6.640373
3,,,,2025-11-09,3,2025,11,6,True,2025-11-09 03:00:00+01:00,False,False,False,,False,0,False,Normal Day,0.7071068,0.7071068,-0.781831,0.62349,-0.866025,0.5,313,-0.780296,0.625411,2025-11-09,,,,,,,,,,0,0,0,0,0,0,374.0,4.682223
4,,,,2025-11-09,4,2025,11,6,True,2025-11-09 04:00:00+01:00,False,False,False,,False,0,False,Normal Day,0.8660254,0.5,-0.781831,0.62349,-0.866025,0.5,313,-0.780296,0.625411,2025-11-09,,,,,,,,,,0,0,0,0,0,0,304.333333,3.885557
5,,,,2025-11-09,5,2025,11,6,True,2025-11-09 05:00:00+01:00,False,False,False,,False,0,False,Normal Day,0.9659258,0.258819,-0.781831,0.62349,-0.866025,0.5,313,-0.780296,0.625411,2025-11-09,,,,,,,,,,0,0,0,0,0,0,350.333333,5.177777
6,,,,2025-11-09,6,2025,11,6,True,2025-11-09 06:00:00+01:00,False,False,False,,False,0,False,Normal Day,1.0,6.123234000000001e-17,-0.781831,0.62349,-0.866025,0.5,313,-0.780296,0.625411,2025-11-09,,,,,,,,,,0,0,0,0,0,0,458.666667,7.515557
7,,,,2025-11-09,7,2025,11,6,True,2025-11-09 07:00:00+01:00,False,False,False,,False,0,False,Normal Day,0.9659258,-0.258819,-0.781831,0.62349,-0.866025,0.5,313,-0.780296,0.625411,2025-11-09,,,,,,,,,,0,0,0,0,0,0,594.666667,10.312593
8,,,,2025-11-09,8,2025,11,6,True,2025-11-09 08:00:00+01:00,False,False,False,,False,0,False,Normal Day,0.8660254,-0.5,-0.781831,0.62349,-0.866025,0.5,313,-0.780296,0.625411,2025-11-09,,,,,,,,,,0,0,0,0,0,0,676.666667,12.586853
9,,,,2025-11-09,9,2025,11,6,True,2025-11-09 09:00:00+01:00,False,False,False,,False,0,False,Normal Day,0.7071068,-0.7071068,-0.781831,0.62349,-0.866025,0.5,313,-0.780296,0.625411,2025-11-09,,,,,,,,,,0,0,0,0,0,0,716.666667,14.676667


In [65]:
df_test_convention_2025

Unnamed: 0,Débit horaire,Taux d'occupation,Etat trafic,date,hour,year,month,weekday,is_weekend,Date et heure de comptage,Vacances Zone A,Vacances Zone B,Vacances Zone C,Nom Vacances,Vacances Toutes Zones,Vacances Scolaires Paris,is_holiday,day_type,hour_sin,hour_cos,weekday_sin,weekday_cos,month_sin,month_cos,day_of_year,dayofyear_sin,dayofyear_cos,time,temperature_2m (°C),wind_speed_10m (km/h),relative_humidity_2m (%),precipitation (mm),cloud_cover (%),weather_code (wmo code),surface_pressure (hPa),apparent_temperature (°C),rain (mm),Débit horaire_outlier_high,Débit horaire_outlier_low,Débit horaire_special_event,Taux d'occupation_outlier_high,Taux d'occupation_outlier_low,Taux d'occupation_special_event
0,,,,2025-11-09,0,2025,11,6,True,2025-11-09 00:00:00+01:00,False,False,False,,False,0,False,Normal Day,0.0,1.0,-0.781831,0.62349,-0.866025,0.5,313,-0.780296,0.625411,2025-11-09,,,,,,,,,,0,0,0,0,0,0
1,,,,2025-11-09,1,2025,11,6,True,2025-11-09 01:00:00+01:00,False,False,False,,False,0,False,Normal Day,0.258819,0.9659258,-0.781831,0.62349,-0.866025,0.5,313,-0.780296,0.625411,2025-11-09,,,,,,,,,,0,0,0,0,0,0
2,,,,2025-11-09,2,2025,11,6,True,2025-11-09 02:00:00+01:00,False,False,False,,False,0,False,Normal Day,0.5,0.8660254,-0.781831,0.62349,-0.866025,0.5,313,-0.780296,0.625411,2025-11-09,,,,,,,,,,0,0,0,0,0,0
3,,,,2025-11-09,3,2025,11,6,True,2025-11-09 03:00:00+01:00,False,False,False,,False,0,False,Normal Day,0.7071068,0.7071068,-0.781831,0.62349,-0.866025,0.5,313,-0.780296,0.625411,2025-11-09,,,,,,,,,,0,0,0,0,0,0
4,,,,2025-11-09,4,2025,11,6,True,2025-11-09 04:00:00+01:00,False,False,False,,False,0,False,Normal Day,0.8660254,0.5,-0.781831,0.62349,-0.866025,0.5,313,-0.780296,0.625411,2025-11-09,,,,,,,,,,0,0,0,0,0,0
5,,,,2025-11-09,5,2025,11,6,True,2025-11-09 05:00:00+01:00,False,False,False,,False,0,False,Normal Day,0.9659258,0.258819,-0.781831,0.62349,-0.866025,0.5,313,-0.780296,0.625411,2025-11-09,,,,,,,,,,0,0,0,0,0,0
6,,,,2025-11-09,6,2025,11,6,True,2025-11-09 06:00:00+01:00,False,False,False,,False,0,False,Normal Day,1.0,6.123234000000001e-17,-0.781831,0.62349,-0.866025,0.5,313,-0.780296,0.625411,2025-11-09,,,,,,,,,,0,0,0,0,0,0
7,,,,2025-11-09,7,2025,11,6,True,2025-11-09 07:00:00+01:00,False,False,False,,False,0,False,Normal Day,0.9659258,-0.258819,-0.781831,0.62349,-0.866025,0.5,313,-0.780296,0.625411,2025-11-09,,,,,,,,,,0,0,0,0,0,0
8,,,,2025-11-09,8,2025,11,6,True,2025-11-09 08:00:00+01:00,False,False,False,,False,0,False,Normal Day,0.8660254,-0.5,-0.781831,0.62349,-0.866025,0.5,313,-0.780296,0.625411,2025-11-09,,,,,,,,,,0,0,0,0,0,0
9,,,,2025-11-09,9,2025,11,6,True,2025-11-09 09:00:00+01:00,False,False,False,,False,0,False,Normal Day,0.7071068,-0.7071068,-0.781831,0.62349,-0.866025,0.5,313,-0.780296,0.625411,2025-11-09,,,,,,,,,,0,0,0,0,0,0


In [66]:
df_test_peres_2025

Unnamed: 0,Débit horaire,Taux d'occupation,Etat trafic,date,hour,year,month,weekday,is_weekend,Date et heure de comptage,Vacances Zone A,Vacances Zone B,Vacances Zone C,Nom Vacances,Vacances Toutes Zones,Vacances Scolaires Paris,is_holiday,day_type,hour_sin,hour_cos,weekday_sin,weekday_cos,month_sin,month_cos,day_of_year,dayofyear_sin,dayofyear_cos,time,temperature_2m (°C),wind_speed_10m (km/h),relative_humidity_2m (%),precipitation (mm),cloud_cover (%),weather_code (wmo code),surface_pressure (hPa),apparent_temperature (°C),rain (mm),Débit horaire_outlier_high,Débit horaire_outlier_low,Débit horaire_special_event,Taux d'occupation_outlier_high,Taux d'occupation_outlier_low,Taux d'occupation_special_event
0,,,,2025-11-09,0,2025,11,6,True,2025-11-09 00:00:00+01:00,False,False,False,,False,0,False,Normal Day,0.0,1.0,-0.781831,0.62349,-0.866025,0.5,313,-0.780296,0.625411,2025-11-09,,,,,,,,,,0,0,0,0,0,0
1,,,,2025-11-09,1,2025,11,6,True,2025-11-09 01:00:00+01:00,False,False,False,,False,0,False,Normal Day,0.258819,0.9659258,-0.781831,0.62349,-0.866025,0.5,313,-0.780296,0.625411,2025-11-09,,,,,,,,,,0,0,0,0,0,0
2,,,,2025-11-09,2,2025,11,6,True,2025-11-09 02:00:00+01:00,False,False,False,,False,0,False,Normal Day,0.5,0.8660254,-0.781831,0.62349,-0.866025,0.5,313,-0.780296,0.625411,2025-11-09,,,,,,,,,,0,0,0,0,0,0
3,,,,2025-11-09,3,2025,11,6,True,2025-11-09 03:00:00+01:00,False,False,False,,False,0,False,Normal Day,0.7071068,0.7071068,-0.781831,0.62349,-0.866025,0.5,313,-0.780296,0.625411,2025-11-09,,,,,,,,,,0,0,0,0,0,0
4,,,,2025-11-09,4,2025,11,6,True,2025-11-09 04:00:00+01:00,False,False,False,,False,0,False,Normal Day,0.8660254,0.5,-0.781831,0.62349,-0.866025,0.5,313,-0.780296,0.625411,2025-11-09,,,,,,,,,,0,0,0,0,0,0
5,,,,2025-11-09,5,2025,11,6,True,2025-11-09 05:00:00+01:00,False,False,False,,False,0,False,Normal Day,0.9659258,0.258819,-0.781831,0.62349,-0.866025,0.5,313,-0.780296,0.625411,2025-11-09,,,,,,,,,,0,0,0,0,0,0
6,,,,2025-11-09,6,2025,11,6,True,2025-11-09 06:00:00+01:00,False,False,False,,False,0,False,Normal Day,1.0,6.123234000000001e-17,-0.781831,0.62349,-0.866025,0.5,313,-0.780296,0.625411,2025-11-09,,,,,,,,,,0,0,0,0,0,0
7,,,,2025-11-09,7,2025,11,6,True,2025-11-09 07:00:00+01:00,False,False,False,,False,0,False,Normal Day,0.9659258,-0.258819,-0.781831,0.62349,-0.866025,0.5,313,-0.780296,0.625411,2025-11-09,,,,,,,,,,0,0,0,0,0,0
8,,,,2025-11-09,8,2025,11,6,True,2025-11-09 08:00:00+01:00,False,False,False,,False,0,False,Normal Day,0.8660254,-0.5,-0.781831,0.62349,-0.866025,0.5,313,-0.780296,0.625411,2025-11-09,,,,,,,,,,0,0,0,0,0,0
9,,,,2025-11-09,9,2025,11,6,True,2025-11-09 09:00:00+01:00,False,False,False,,False,0,False,Normal Day,0.7071068,-0.7071068,-0.781831,0.62349,-0.866025,0.5,313,-0.780296,0.625411,2025-11-09,,,,,,,,,,0,0,0,0,0,0
