In [17]:
import pandas as pd
import numpy as np

from darts import TimeSeries
from darts.models import (
    AutoARIMA,
    ExponentialSmoothing,
    NBEATSModel,
    TCNModel,
    RegressionModel,
    SKLearnModel,
    Prophet  # if you want
)
from darts.dataprocessing.transformers import Scaler
from darts.dataprocessing.encoders.encoders import DatetimeAttributeEncoder, CyclicTemporalEncoder, SequentialEncoder
from darts.metrics import mae, mape, rmse
from darts.utils.missing_values import fill_missing_values

In [18]:
df

Unnamed: 0_level_0,id,valeur_NO2,valeur_CO,valeur_O3,valeur_PM10,valeur_PM25
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-01-01 00:00:00,2020-01-01 00,42.9,0.718,15.7,73.1,64.4
2020-01-01 01:00:00,2020-01-01 01,33.6,0.587,10.1,74.8,66.0
2020-01-01 02:00:00,2020-01-01 02,29.3,,5.1,51.0,44.9
2020-01-01 03:00:00,2020-01-01 03,30.5,0.246,7.2,27.7,25.1
2020-01-01 04:00:00,2020-01-01 04,29.3,0.204,8.3,15.3,13.6
...,...,...,...,...,...,...
2024-09-03 18:00:00,2024-09-03 18,,0.222,55.1,12.0,5.3
2024-09-03 19:00:00,2024-09-03 19,,0.245,48.2,13.4,7.0
2024-09-03 20:00:00,2024-09-03 20,,0.234,44.5,12.4,7.1
2024-09-03 21:00:00,2024-09-03 21,,0.225,25.9,10.6,5.4


In [None]:
# --- 1. Load/prep your data ---
df = pd.read_csv('../../data/train.csv')
df["datetime"] = pd.to_datetime(df["id"], format="%Y-%m-%d %H")
df = df.sort_values('datetime').set_index('datetime')

# Make sure to have a regular hourly frequency, filling missing timestamps:
series = TimeSeries.from_series(
    df['valeur_PM25'].astype(np.float32),
    fill_missing_dates=True,
    freq='H'
)

# Impute missing values
series = fill_missing_values(series, method='linear')  # or other method

# Optionally scale
scaler = Scaler()
series_scaled = scaler.fit_transform(series)

# --- 2. Define forecast horizon and backtest parameters ---
forecast_horizon = 3 * 7 * 24  # 3 weeks ahead = 504 hours

# For backtesting, choose where to start the first historical forecast
# For example, start backtest at 70% of the series, then do rolling / expanding.
start = 0.98  # 70% of data used before we begin forecasting

# --- 3. Create time/cyclical encoders -- features from the datetime index ---
# You can use Darts’ encoders:

add_encoders = {
    # Cyclical encoding for hour of day
    'cyclic': {'future': ['hour'], 'past': ['hour']},
    # Also possibly day_of_week, month, etc.
    'datetime_attribute': {'future': ['day_of_week'], 'past': ['day_of_week']},
    # Using a transformer to scale the encodings
    'transformer': Scaler(),
}

# Alternatively you can use SequentialEncoder directly if your model uses it

# --- 4. Define candidate models with these encoders ---

models = {
    "ARIMA": AutoARIMA(),  
    "ExpSmoothing": ExponentialSmoothing(),
    "NBEATS": NBEATSModel(
        input_chunk_length=7 * 24 * 8,   # e.g. use past 2 weeks for input
        output_chunk_length=forecast_horizon,
        add_encoders=add_encoders
    ),
    "TCN": TCNModel(
        input_chunk_length=7 * 24 * 8,       # past 1 week
        output_chunk_length=forecast_horizon,
        add_encoders=add_encoders
    ),
    "Linear_Regression_Lags": SKLearnModel(
        lags= [-1, -24, -24*7, -24*7*4],  # for example: 1 hour, 24 hours, 7 days lags
        add_encoders=add_encoders
    )
}

# --- 5. Evaluate models via backtesting / historical forecasts ---

results = {}

import torch
torch.set_default_dtype(torch.float32)

for name, model in models.items():
    print(f"Evaluating model: {name}")
    # historical_forecasts does: repeatedly forecast horizon ahead from past, collects forecasts
    hf = model.historical_forecasts(
        series_scaled,
        start=start,
        forecast_horizon=forecast_horizon,
        stride=forecast_horizon,  # non-overlapping, you could use smaller stride for overlap
        retrain=True,
        verbose=False,
        # Provide encoders so the model can generate covariates
    )
    # hf is a TimeSeries of forecasts aligned to the true series

    # Rescale back to original

    # Compare with actuals
    # Extract actuals aligned to forecast times
    actual = series.slice_intersect(hf)

    # compute error metrics
    results[name] = {
        'MAE': mae(actual, hf),
        'MAPE': mape(actual, hf),
        'RMSE': rmse(actual, hf)
    }
    print(f"{name} done: {results[name]}")

print("All results:", results)

  resampled_times = resampled_times.asfreq(freq)


Evaluating model: ARIMA
ARIMA done: {'MAE': np.float64(5.669981266081351), 'MAPE': np.float64(97.75829447624486), 'RMSE': np.float64(5.669981266081351)}
Evaluating model: ExpSmoothing


Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.


ExpSmoothing done: {'MAE': np.float64(5.648703846926065), 'MAPE': np.float64(97.39144243390741), 'RMSE': np.float64(5.648703846926065)}
Evaluating model: NBEATS


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name            | Type             | Params | Mode 
-------------------------------------------------------------
0 | criterion       | MSELoss          | 0      | train
1 | train_criterion | MSELoss          | 0      | train
2 | val_criterion   | MSELoss          | 0      | train
3 | train_metrics   | MetricCollection | 0      | train
4 | val_metrics     | MetricCollection | 0      | train
5 | stacks          | ModuleList       | 48.6 M | train
-------------------------------------------------------------
48.6 M    Trainable params
33.5 K    Non-trainable params
48.6 M    Total params
194.497   Total estimated model params size (MB)
396       Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

In [23]:
(365*4*0.1) / (7*4)

5.214285714285714