In [2]:
import pandas as pd
import numpy as np

# Génération des données mensuelles
data = pd.DataFrame({
    'date': pd.date_range(start='2024-01-01', end='2024-11-15', freq='D'),
    'value': np.random.randn(320)  # 320 jours de données aléatoires
})

# Vérifions les données
data.head()

Unnamed: 0,date,value
0,2024-01-01,1.208942
1,2024-01-02,-0.494255
2,2024-01-03,0.229376
3,2024-01-04,0.46389
4,2024-01-05,-0.696427


In [4]:
from dateutil.relativedelta import relativedelta

def generate_rolling_windows(data, date_col, value_col):
    windows = []
    start_date = data[date_col].min()  # Date de début
    end_date = data[date_col].max()    # Date de fin

    while start_date + relativedelta(months=2) <= end_date:
        # Définir les périodes
        train_start = start_date
        train_end = start_date + relativedelta(months=1) - pd.Timedelta(days=1)
        tampon_start = train_end + pd.Timedelta(days=1)
        tampon_end = tampon_start + relativedelta(months=1) - pd.Timedelta(days=1)
        test_start = tampon_end + pd.Timedelta(days=1)
        test_end = test_start + relativedelta(months=1) - pd.Timedelta(days=1)

        # Sélectionner les données pour chaque période
        train_data = data[(data[date_col] >= train_start) & (data[date_col] <= train_end)]
        test_data = data[(data[date_col] >= test_start) & (data[date_col] <= test_end)]

        # Ajouter la fenêtre
        windows.append({
            "train": train_data[value_col].values,
            "test": test_data[value_col].values,
            "train_dates": train_data[date_col].values,
            "test_dates": test_data[date_col].values
        })

        # Avancer la fenêtre
        start_date += relativedelta(months=1)

    return windows

# Génération des fenêtres
windows = generate_rolling_windows(data, date_col='date', value_col='value')

# Exemple de la première fenêtre
print("Première fenêtre :")
print("Train data :", windows[0]['train'])
print("Test data :", windows[0]['test'])

Première fenêtre :
Train data : [ 1.20894213 -0.49425469  0.22937598  0.46388957 -0.69642744  0.63819439
  0.89538666 -0.62073845  0.74001969 -0.75921436 -1.75027    -0.59983802
  0.56751792 -1.34551425  0.74805118 -0.15616636  0.18567768 -1.10064746
 -1.29293132 -0.24627519  0.64588054 -0.09809849 -0.84308443  0.39586634
  1.46558444  0.62070093 -1.05224463  0.63157493  1.68695292 -1.04862762
  2.02470869]
Test data : [-0.17556562 -0.27900946  0.58073474 -0.41656328  1.12131831  0.94181249
  0.40588228  1.45502206 -0.3330021   1.29105368 -0.94555557 -1.20660716
  0.98109239 -0.99751816  1.8479838  -1.35592257 -0.40920879 -3.44894815
  0.09442762  0.41031304 -1.36150118  1.02275112  1.95123071 -1.80229381
  0.01258558 -1.12167842 -0.29999761 -0.36181716 -0.49157023  1.08444748
 -0.21342243]


In [5]:
for i, window in enumerate(windows[:3]):  # Afficher les trois premières fenêtres
    print(f"Fenêtre {i+1}:")
    print(f"  Train: {window['train_dates'][0]} à {window['train_dates'][-1]}")
    print(f"  Test: {window['test_dates'][0]} à {window['test_dates'][-1]}")

Fenêtre 1:
  Train: 2024-01-01T00:00:00.000000000 à 2024-01-31T00:00:00.000000000
  Test: 2024-03-01T00:00:00.000000000 à 2024-03-31T00:00:00.000000000
Fenêtre 2:
  Train: 2024-02-01T00:00:00.000000000 à 2024-02-29T00:00:00.000000000
  Test: 2024-04-01T00:00:00.000000000 à 2024-04-30T00:00:00.000000000
Fenêtre 3:
  Train: 2024-03-01T00:00:00.000000000 à 2024-03-31T00:00:00.000000000
  Test: 2024-05-01T00:00:00.000000000 à 2024-05-31T00:00:00.000000000
