# Interface for Pipeline Execution 
## (Simulated Data from SARIMAX Process)

## 1) Import Models, Metrics, Paths, and Functions

In [1]:
from src.utils.helpers import csv_reader
from src.utils.paths import *
from src.paper import display_ranking_table
from src.pipeline.run_pipeline import run_pipeline

from src.models import MODELS
from src.metrics import METRICS


Loading helper functions...
Loading paths...
Loading data transformers...
Loading models...
Loading metrics...

Successfully loaded metrics: MAPE, RMSE, sMAPE

Successfully loaded forecasters:
 - without covariates: Naive, AutoTheta, AutoSARIMA, Exponential Smoothing, TiDE, STL, XGBoost
 - covariates: AutoSARIMAX, XGBoostCov

Successfully loaded ensemblers:
 - weighted: Simple, Inverse RMSE, Inverse Variance, Inverse Error Covariance
 - meta: SVR, Random Forest


## 2) Load Simulated Data

In [2]:
select_datasets = ["eurusd"]
df_dict = {}
for file in os.listdir(TESTDATA_DIR):
    if file.replace(".csv", "") in select_datasets:
        df = csv_reader(TESTDATA_DIR, file)
        df_dict[df.attrs['file_name']] = df
    else:
        print(f"Skipping {file}...")


Skipping airline-passengers.csv...
Skipping DailyDelhiClimateTest.csv...
Skipping GoldUP.csv...
Skipping hospital_dataset.tsf...
Skipping predictors...


In [3]:
display(df_dict['eurusd'].head())

Unnamed: 0_level_0,bid_open,bid_high,bid_low,bid_close,ask_open,ask_high,ask_low,ask_close
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-01-01 00:00:00,,,,,,,,
2020-01-01 00:15:00,,,,,,,,
2020-01-01 00:30:00,,,,,,,,
2020-01-01 00:45:00,,,,,,,,
2020-01-01 01:00:00,,,,,,,,


## 3) Run Pipeline

In [4]:
output_dictionaries = run_pipeline(
    df=df_dict, models=MODELS, metrics=METRICS,
    agg_freq="B", agg_method="last", target="bid_close",
    sort_by="MAPE", covariates=None,
    fh=1, verbose=True, export_path=PAPER_PIPE_OUTPUT_DIR
) 

[2024-03-11 15:12] Starting  Pipeline for eurusd dataset...[0m
[33;1m
== Pipeline Step 1: Data Preprocessing ==
[0m
[33;1mSearching time information...[0m
[33;1mDates found in 'index' column![0m
[33;1mInferred frequency: 15T[0m
[33;1mData goes from 2020-01-01 00:00 to 2024-02-06 00:00, resulting in 143713 observations.
[0m
[33;1mAggregating data to frequency 'business day' using method 'last' and dropping NaNs...[0m
[33;1m...finished!
Data now has 1069 observations.
[0m
[33;1mSelecting target...[0m
[33;1mTarget: bid_close[0m
[33;1mCovariates: None[0m
[33;1m
Data Insights:[0m
[33;1m            bid_close
datetime             
2020-01-02    1.11702
2020-01-03    1.11599
2020-01-06    1.11948
2020-01-07    1.11530
2020-01-08    1.11124[0m
[33;1m
[Time elapsed: 00s]
[0m
[33;1m
== Pipeline Step 2: Individual Models' Predictions ==
[0m
[33;1mSplitting data for training of forecasters (train/test ratio: 30/70)...[0m
[33;1mInitial training set has 321 observation

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=60` reached.


[33;1mTrain dataset contains 316 samples.[0m
[33;1mTime series values are 64-bits; casting model to float64.[0m


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name             | Type             | Params
------------------------------------------------------
0 | criterion        | MSELoss          | 0     
1 | train_metrics    | MetricCollection | 0     
2 | val_metrics      | MetricCollection | 0     
3 | encoders         | Sequential       | 18.0 K
4 | decoders         | Sequential       | 20.6 K
5 | temporal_decoder | _ResidualBlock   | 594   
6 | lookback_skip    | Linear           | 6     
------------------------------------------------------
39.3 K    Trainable params
0         Non-trainable params
39.3 K    Total params
0.157     Total estimated model params size (MB)


Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=60` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU c

[33;1mTrain dataset contains 442 samples.[0m
[33;1mTime series values are 64-bits; casting model to float64.[0m


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name             | Type             | Params
------------------------------------------------------
0 | criterion        | MSELoss          | 0     
1 | train_metrics    | MetricCollection | 0     
2 | val_metrics      | MetricCollection | 0     
3 | encoders         | Sequential       | 18.0 K
4 | decoders         | Sequential       | 20.6 K
5 | temporal_decoder | _ResidualBlock   | 594   
6 | lookback_skip    | Linear           | 6     
------------------------------------------------------
39.3 K    Trainable params
0         Non-trainable params
39.3 K    Total params
0.157     Total estimated model params size (MB)


Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=60` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU c

[33;1mTrain dataset contains 568 samples.[0m
[33;1mTime series values are 64-bits; casting model to float64.[0m


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name             | Type             | Params
------------------------------------------------------
0 | criterion        | MSELoss          | 0     
1 | train_metrics    | MetricCollection | 0     
2 | val_metrics      | MetricCollection | 0     
3 | encoders         | Sequential       | 18.0 K
4 | decoders         | Sequential       | 20.6 K
5 | temporal_decoder | _ResidualBlock   | 594   
6 | lookback_skip    | Linear           | 6     
------------------------------------------------------
39.3 K    Trainable params
0         Non-trainable params
39.3 K    Total params
0.157     Total estimated model params size (MB)


Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=60` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU c

[33;1mTrain dataset contains 694 samples.[0m
[33;1mTime series values are 64-bits; casting model to float64.[0m


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name             | Type             | Params
------------------------------------------------------
0 | criterion        | MSELoss          | 0     
1 | train_metrics    | MetricCollection | 0     
2 | val_metrics      | MetricCollection | 0     
3 | encoders         | Sequential       | 18.0 K
4 | decoders         | Sequential       | 20.6 K
5 | temporal_decoder | _ResidualBlock   | 594   
6 | lookback_skip    | Linear           | 6     
------------------------------------------------------
39.3 K    Trainable params
0         Non-trainable params
39.3 K    Total params
0.157     Total estimated model params size (MB)


Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=60` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU c

[33;1mTrain dataset contains 820 samples.[0m
[33;1mTime series values are 64-bits; casting model to float64.[0m


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name             | Type             | Params
------------------------------------------------------
0 | criterion        | MSELoss          | 0     
1 | train_metrics    | MetricCollection | 0     
2 | val_metrics      | MetricCollection | 0     
3 | encoders         | Sequential       | 18.0 K
4 | decoders         | Sequential       | 20.6 K
5 | temporal_decoder | _ResidualBlock   | 594   
6 | lookback_skip    | Linear           | 6     
------------------------------------------------------
39.3 K    Trainable params
0         Non-trainable params
39.3 K    Total params
0.157     Total estimated model params size (MB)


Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=60` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU c

[33;1mTrain dataset contains 946 samples.[0m
[33;1mTime series values are 64-bits; casting model to float64.[0m


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name             | Type             | Params
------------------------------------------------------
0 | criterion        | MSELoss          | 0     
1 | train_metrics    | MetricCollection | 0     
2 | val_metrics      | MetricCollection | 0     
3 | encoders         | Sequential       | 18.0 K
4 | decoders         | Sequential       | 20.6 K
5 | temporal_decoder | _ResidualBlock   | 594   
6 | lookback_skip    | Linear           | 6     
------------------------------------------------------
39.3 K    Trainable params
0         Non-trainable params
39.3 K    Total params
0.157     Total estimated model params size (MB)


Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=60` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU c

[33;1mNow performing corresponding out-of-sample predictions...[0m
[33;1mTrain dataset contains 1064 samples.[0m


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


[33;1mAttempting to retrain/fine-tune the model without resuming from a checkpoint. This is currently discouraged. Consider model `TiDEModel.load_weights()` to load the weights for fine-tuning.[0m



  | Name             | Type             | Params
------------------------------------------------------
0 | criterion        | MSELoss          | 0     
1 | train_metrics    | MetricCollection | 0     
2 | val_metrics      | MetricCollection | 0     
3 | encoders         | Sequential       | 18.0 K
4 | decoders         | Sequential       | 20.6 K
5 | temporal_decoder | _ResidualBlock   | 594   
6 | lookback_skip    | Linear           | 6     
------------------------------------------------------
39.3 K    Trainable params
0         Non-trainable params
39.3 K    Total params
0.157     Total estimated model params size (MB)


Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=60` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


[33;1m...finished![0m
[33;1m
Now generating 748 one-step ahead historical expanding window predictions from model: STL (sktime)[0m
[33;1mPerforming out-of-sample predictions...[0m
[33;1m...finished![0m
[33;1m
Now generating 748 one-step ahead historical expanding window predictions from model: XGBoost (darts)[0m
[33;1mNow performing corresponding out-of-sample predictions...[0m
[33;1m...finished![0m
[33;1m
Skipping covariate forecasters since no covariates are given.[0m
[33;1m
Finished predictions of individual forecasters![0m
[33;1m
Insights into forecasters' historical predictions:[0m
[33;1m              Naive  AutoTheta  AutoSARIMA  Exponential Smoothing      TiDE  \
Date                                                                          
2021-03-26  1.17751   1.177948    1.177089               1.177893  1.178451   
2021-03-29  1.17869   1.179253    1.178824               1.179140  1.179870   
2021-03-30  1.17681   1.177607    1.176597               1.1775

In [6]:
print(output_dictionaries)

{'target and covariates': (datetime
2020-01-02    1.11702
2020-01-03    1.11599
2020-01-06    1.11948
2020-01-07    1.11530
2020-01-08    1.11124
               ...   
2024-01-31    1.08033
2024-02-01    1.08735
2024-02-02    1.07790
2024-02-05    1.07404
2024-02-06    1.07415
Freq: B, Name: bid_close, Length: 1069, dtype: float64, None), 'historical_individual_predictions': (             Target    Naive  AutoTheta  AutoSARIMA  Exponential Smoothing  \
Date                                                                         
2021-03-26  1.17869  1.17751   1.177948    1.177089               1.177893   
2021-03-29  1.17681  1.17869   1.179253    1.178824               1.179140   
2021-03-30  1.17225  1.17681   1.177607    1.176597               1.177527   
2021-03-31  1.17266  1.17225   1.171883    1.171733               1.171704   
2021-04-01  1.17768  1.17266   1.172446    1.172706               1.172426   
...             ...      ...        ...         ...                    ... 

## 3) Show Ranking Tables

In [7]:
# Sort
# sorted(output_dictionaries.items(), key=lambda pair: select_datasets.index(pair[0]))

# Show
display(output_dictionaries['metrics ranking'])


Unnamed: 0_level_0,MAPE,RMSE,sMAPE,MAPE Ranking,RMSE Ranking,sMAPE Ranking
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Naive,0.004097,0.005659,0.273124,1,1,1
Exponential Smoothing,0.004101,0.005682,0.273425,2,4,2
AutoTheta,0.004102,0.005677,0.273445,3,3,3
AutoSARIMA,0.004105,0.00567,0.273631,4,2,4
Weighted Ensemble: Inverse Error Covariance,0.004111,0.005688,0.274062,5,5,5
Weighted Ensemble: Inverse RMSE,0.004277,0.00593,0.285201,6,6,6
Weighted Ensemble: Simple,0.004408,0.006122,0.294004,7,7,7
Weighted Ensemble: Inverse Variance,0.00443,0.006153,0.295471,8,8,8
Meta Ensemble: Random Forest,0.004872,0.006813,0.324746,9,9,9
XGBoost,0.005359,0.007561,0.357145,10,10,10


Note: For now you need to restart kernel and if you want to re-run pipeline with different settings.

In [8]:
display_ranking_table(output_dictionaries['metrics ranking'])

Unnamed: 0_level_0,MAPE,RMSE,sMAPE,MAPE Ranking,RMSE Ranking,sMAPE Ranking
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Naive,0.004,0.006,0.273,1,1,1
Exponential Smoothing,0.004,0.006,0.273,2,4,2
AutoTheta,0.004,0.006,0.273,3,3,3
AutoSARIMA,0.004,0.006,0.274,4,2,4
Weighted Ensemble: Inverse Error Covariance,0.004,0.006,0.274,5,5,5
Weighted Ensemble: Inverse RMSE,0.004,0.006,0.285,6,6,6
Weighted Ensemble: Simple,0.004,0.006,0.294,7,7,7
Weighted Ensemble: Inverse Variance,0.004,0.006,0.295,8,8,8
Meta Ensemble: Random Forest,0.005,0.007,0.325,9,9,9
XGBoost,0.005,0.008,0.357,10,10,10
