
# Pipeline Demo

This notebook illustrates the usage of our evaluation pipeline for TimeSeries forecasting

Emphasis is put on reproducibility and avoiding boilerplate code within experiment notebooks

## Setup & Imports

In [None]:
# Colab Setup

# download repo
! git clone https://github.com/MarthyGarcia/IFT6759_B_H23.git
%cd IFT6759_B_H23

# install make
! apt-get install binutils

# run dependencies
! make requirements
! make sync_data

In [None]:
# Local Setup
%cd ~/Documents/school/projet/IFT6759_B_H23

In [None]:
from src.pipeline.pipeline import ExperimentPipeline
from src.pipeline.experiment import Experiment, HyperParameter, BayesOptHyperParameter

from darts.metrics import mae

from darts.dataprocessing import Pipeline
from darts.dataprocessing.transformers import Scaler

from darts.models.forecasting.forecasting_model import LocalForecastingModel
from darts.models import RNNModel

from pytorch_lightning.callbacks import EarlyStopping

## Experiment Definition

### Dataset

We set our dataset and the processing operations

Here we only do scaling within [0,1]

In [None]:
dataset = 'traffic'  # ['exchange_rate', 'traffic', 'electricity', 'BTC', 'ETH'], only uses first covariate

In [None]:
# define preprocessing operations with `.fit_transform()` sklearn API
preprocessing = Pipeline([
    Scaler()
])

### Evaluation

Define the evaluation metric

In [None]:
# Mean Average Error
metric = mae  # [mape, mare, mase, mse, rmse, ...]

### Model

In [None]:
# set DARTS model class
model = RNNModel

Here we set our determined HParams

| HParams           |                     Value |
|:------------------|--------------------------:|
| Model             |                      LSTM |
| Input chunk len   |                  ONE_WEEK |
| Max number epochs |                        50 |
| Optimizer args    |                   LR=1e-3 |
| Trainer args      | EarlyStop with patience=5 |

In [None]:
ONE_WEEK =  24*7

deterministic_params = {
    'model': 'LSTM',
    'input_chunk_length': ONE_WEEK,
    'n_epochs': 50,
    'optimizer_kwargs': {"lr": 1e-3},
    'pl_trainer_kwargs': {'callbacks': [EarlyStopping(monitor="val_loss", patience=5, min_delta=1e-5, mode='min')]}
}

deterministic_params = [HyperParameter(name=k, value=v) for k, v in deterministic_params.items()]

Here we set our parameters we want to optimize:

| HParams           | Type          |                          Values |
|-------------------|---------------|--------------------------------:|
| Dropout           | `float`       |                         [0,0.8] |
| Hidden Dimensions | `categorical` |               {16, 32, 64, 128} |
| RNN Layers        | `int`         |                           [1,3] |

In [None]:
optuna_params = {
    'dropout': {
        'optuna_suggest_method': 'suggest_float',
        'value': {'low': 0.0, 'high': 0.8}
    },
    'hidden_dim': {
        'optuna_suggest_method': 'suggest_categorical',
        'value': {'choices': [2**i for i in range(4, 10)]}  # 16 => 512
    },
    'n_rnn_layers': {
        'optuna_suggest_method': 'suggest_int',
        'value': {'low': 1, 'high': 10}        
    }
}

optuna_params = [BayesOptHyperParameter(name=k, **v) for k, v in optuna_params.items()]

### Experiment

We re-use the experiment parameters defined earlier as well as defining new parameters.

In [None]:
ONE_MINUTE = 60

params = Experiment(
    dataset=dataset,
    preprocessing=preprocessing,
    model=model,
    hyper_parameters=deterministic_params + optuna_params,
    metric=metric,
    
    horizon=ONE_WEEK,             # Horizon for prediction
    optuna_timeout=ONE_MINUTE,    # Time allocated for HParam search in seconds
    n_backtest=100,               # Number of validation samples for the backtest, means len(valid_dataset)
    n_train_samples=500           # Number of samples for our .fit(), means len(train_dataset)
)

## Running the pipeline

In [None]:
pipeline = ExperimentPipeline(params)
pipeline.run()

## Validation

### Retrain with best HParams

Sample model with fast train time and decent performance

runs for 11 epochs

In [None]:
best_hparams = {
    "model": 'LSTM',
    "hidden_dim": 32,
    "dropout": 0.5,
    "n_rnn_layers": 5,
    "input_chunk_length": ONE_WEEK,
    "n_epochs": 100,
    "optimizer_kwargs": {"lr": 1e-3},
    "pl_trainer_kwargs": {'callbacks': [EarlyStopping(monitor="val_loss", patience=3, min_delta=1e-5, mode='min')]}
}

In [None]:
train = pipeline.data['train']
valid = pipeline.data['valid']
test =  pipeline.data['test']

In [None]:
model = RNNModel(**best_hparams)
model.fit(series=train, val_series=valid)

### Plot

In [None]:
first_week = test[:ONE_WEEK]
second_week = test[ONE_WEEK:ONE_WEEK*2]

preds_second_week = model.predict(n=ONE_WEEK, series=first_week)

In [None]:
preds_second_week.plot(label='prediction')
second_week.plot(label='truth')

mae(preds_second_week, second_week)

### Full Data Backtesting

In [None]:
# evaluate with backtest on test data with 
N_EVALUATION_POINTS = 100
series = train.append(valid)

metric = model.backtest(
    metric=mae,
    series=series.append(test) + 1e-9,
    start=len(series),
    forecast_horizon=ONE_WEEK,
    stride=len(test) // N_EVALUATION_POINTS,
    retrain=isinstance(model, LocalForecastingModel),
)

In [None]:
metric