# Interface for Pipeline Execution 
## (Simulated Data from SARIMAX Process)

## 1) Import Models, Metrics, Paths, and Functions

In [1]:
from src.utils.helpers import csv_reader
from src.utils.paths import *
from src.paper import display_ranking_table
from src.pipeline.run_pipeline import run_pipeline

from src.models import MODELS
from src.metrics import METRICS


Loading helper functions...
Loading paths...
Loading data transformers...
Loading models...
Loading metrics...

Successfully loaded metrics: MAPE, RMSE, sMAPE

Successfully loaded forecasters:
 - without covariates: Naive, Naive (drift), AutoSARIMA, Exponential Smoothing, STL, XGBoost
 - covariates: AutoSARIMAX, XGBoostCov

Successfully loaded ensemblers:
 - weighted: Simple, Inverse RMSE, Inverse Variance, Inverse Error Covariance
 - meta: SVR, Random Forest


## 2) Load Simulated Data

In [2]:
select_datasets = ["RW", "TrendSeasRW", "WeakSARIMA", "StrongSARIMA", "SARIMAX"]
df_dict = {}
for file in os.listdir(SIMDATA_DIR):
    if file.replace(".csv", "") in select_datasets:
        df = csv_reader(SIMDATA_DIR, file)
        df_dict[df.attrs['file_name']] = df
    else:
        print(f"Skipping {file}...")


In [3]:
display(df_dict['RW'].head())

Unnamed: 0_level_0,RW
Date,Unnamed: 1_level_1
2004-01-01,75.08842
2004-02-01,72.932225
2004-03-01,60.153464
2004-04-01,57.815302
2004-05-01,54.129793


## 3) Run Pipeline

In [4]:
df_dict = df_dict["WeakSARIMA"]
output_dictionaries = run_pipeline(
    df=df_dict, models=MODELS, metrics=METRICS,
    # end="2010-12-31",
    # select_ensemblers=["Inverse RMSE", "Simple"],
    # select_forecasters=["Naive", "AutoSARIMAX"],
    fh=1, verbose=True, export_path=None#export_path=PAPER_PIPE_OUTPUT_DIR
) 

[2024-03-10 11:27] Starting  Pipeline for RW...[0m
[33;1m
== Pipeline Step 1: Data Preprocessing ==
[0m
[33;1mSearching time information...[0m
[33;1mDates found in 'index' column![0m
[33;1mInferred frequency: month start[0m
[33;1mData goes from 2004-01 to 2023-12, resulting in 240 observations.
[0m
[33;1mSelecting target and covariates...[0m
[33;1mTarget: RW[0m
[33;1mCovariates: None[0m
[33;1m
Data Insights:[0m
[33;1m                RW
Date              
2004-01  75.088420
2004-02  72.932225
2004-03  60.153464
2004-04  57.815302
2004-05  54.129793[0m
[33;1m
[Time elapsed: 00s]
[0m
[33;1m
== Pipeline Step 2: Individual Models' Predictions ==
[0m
[33;1mSplitting data for training of forecasters (train/test ratio: 30/70)...[0m
[33;1mInitial training set has 72 observations and goes from 2004-01 to 2009-12.[0m
[33;1m
In an historical expanding window approach, there are 168 periods to be forecasted by the individual models: 2010-01 to 2023-12[0m
[33;1mOut-of

## 3) Show Ranking Tables

In [5]:
# Sort
# sorted(output_dictionaries.items(), key=lambda pair: select_datasets.index(pair[0]))
# Show
for df, output_dict in output_dictionaries.items():
    print(f"Metrics ranking for {df}:")
    display_ranking_table(output_dict['metrics ranking'])
    print("\n")

Metrics ranking for RW:


Unnamed: 0_level_0,MAPE,RMSE,sMAPE,MAPE Ranking,RMSE Ranking,sMAPE Ranking
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Weighted Ensemble: Inverse Variance,0.056,4.906,3.679,1,1,1
Weighted Ensemble: Inverse RMSE,0.056,4.924,3.7,2,2,2
Weighted Ensemble: Simple,0.056,4.954,3.717,3,3,3
AutoSARIMA,0.057,4.966,3.776,4,4,4
Exponential Smoothing,0.057,4.968,3.776,5,5,5
Naive,0.059,5.15,3.925,6,6,6
Naive (drift),0.059,5.165,3.938,7,7,7
Weighted Ensemble: Inverse Error Covariance,0.061,5.25,4.022,8,9,8
Meta Ensemble: SVR,0.061,5.224,4.057,9,8,9
XGBoost,0.063,5.522,4.181,10,10,10




Metrics ranking for SARIMAX:


Unnamed: 0_level_0,MAPE,RMSE,sMAPE,MAPE Ranking,RMSE Ranking,sMAPE Ranking
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
AutoSARIMAX with covariates,0.028,2.61,1.852,1,1,1
Exponential Smoothing,0.028,2.642,1.852,2,2,2
Weighted Ensemble: Inverse RMSE,0.028,2.674,1.859,3,3,3
AutoSARIMA,0.028,2.678,1.868,4,4,4
Weighted Ensemble: Inverse Variance,0.028,2.689,1.872,5,5,5
Weighted Ensemble: Simple,0.028,2.694,1.876,6,6,6
Naive (drift),0.028,2.745,1.893,7,8,7
Naive,0.028,2.736,1.894,8,7,8
Weighted Ensemble: Inverse Error Covariance,0.029,2.775,1.969,9,10,9
Meta Ensemble: SVR,0.03,2.772,1.976,10,9,10




Metrics ranking for StrongSARIMA:


Unnamed: 0_level_0,MAPE,RMSE,sMAPE,MAPE Ranking,RMSE Ranking,sMAPE Ranking
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
AutoSARIMA,0.022,1.99,1.491,1,1,1
Weighted Ensemble: Inverse RMSE,0.022,2.027,1.5,2,5,2
Weighted Ensemble: Inverse Variance,0.023,2.044,1.506,3,6,3
Weighted Ensemble: Simple,0.023,2.072,1.52,4,7,4
Naive,0.023,2.015,1.522,5,2,5
Exponential Smoothing,0.023,2.015,1.522,6,3,6
Naive (drift),0.023,2.019,1.523,7,4,7
Meta Ensemble: SVR,0.024,2.161,1.576,8,8,8
Meta Ensemble: Random Forest,0.026,2.367,1.71,9,9,9
XGBoost,0.026,2.5,1.772,10,10,10




Metrics ranking for TrendSeasRW:


Unnamed: 0_level_0,MAPE,RMSE,sMAPE,MAPE Ranking,RMSE Ranking,sMAPE Ranking
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Weighted Ensemble: Inverse RMSE,0.037,3.214,2.462,1,1,1
Weighted Ensemble: Simple,0.037,3.218,2.462,2,2,2
Weighted Ensemble: Inverse Variance,0.037,3.219,2.464,3,3,3
AutoSARIMA,0.038,3.294,2.529,4,4,4
Weighted Ensemble: Inverse Error Covariance,0.039,3.362,2.564,5,5,5
Exponential Smoothing,0.039,3.386,2.625,6,6,6
Meta Ensemble: SVR,0.04,3.595,2.684,7,9,7
Naive,0.041,3.502,2.725,8,7,8
Naive (drift),0.041,3.512,2.734,9,8,9
Meta Ensemble: Random Forest,0.042,3.741,2.792,10,10,10




Metrics ranking for WeakSARIMA:


Unnamed: 0_level_0,MAPE,RMSE,sMAPE,MAPE Ranking,RMSE Ranking,sMAPE Ranking
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Weighted Ensemble: Simple,0.03,2.644,2.019,1,3,1
Weighted Ensemble: Inverse RMSE,0.03,2.64,2.019,2,2,2
Weighted Ensemble: Inverse Variance,0.03,2.639,2.019,3,1,3
Weighted Ensemble: Inverse Error Covariance,0.031,2.762,2.055,4,5,4
AutoSARIMA,0.031,2.71,2.073,5,4,5
Exponential Smoothing,0.033,2.782,2.175,6,6,6
Meta Ensemble: SVR,0.033,2.973,2.191,7,9,7
Naive,0.033,2.791,2.195,8,7,8
Naive (drift),0.033,2.799,2.199,9,8,9
XGBoost,0.035,3.181,2.328,10,10,10




Note: For now you need to restart kernel and if you want to re-run pipeline with different settings.

In [6]:
display_ranking_table(output_dictionaries['metrics ranking'])

KeyError: 'metrics ranking'