# Interface for Pipeline Execution 
## (Simulated Data from SARIMAX Process)

## 1) Import Models, Metrics, Paths, and Functions

In [2]:
from src.utils.helpers import csv_reader, display_ranking_table
from src.utils.paths import *
from src.pipeline.run_pipeline import run_pipeline


from src.models import MODELS
from src.metrics import METRICS


Loading helper functions...
Loading paths...
Loading data transformers...
Loading models...
Loading metrics...


In [3]:
# print(METRICS)
# print(MODELS)

## 2) Select Input Data 

In [4]:
# Read input data
df = csv_reader(SIMDATA_DIR, 'noisy_simdata')
print(df.head())

                    y          x1         x2          x3
Date                                                    
2004-01-01  50.840469  427.595799  55.337904  900.325291
2004-02-01  52.871538  434.062163  54.959155  900.775888
2004-03-01  53.769316  453.264284  56.470633  899.510058
2004-04-01  57.672973  459.367523  56.704233  903.524834
2004-05-01  57.182051  462.354356  61.557907  905.071762


## 3) Run Pipeline

In [5]:
output_dict = run_pipeline(
    df=df, models=MODELS, metrics=METRICS,
    fh=1,
    select_forecasters=['Naive', 'XGBoost', 'STL'],
    select_ensemblers='Simple',
    start="2004-01-01", end="2015-12-31",  # filtering the first 6 years of data
    verbose=True
)

[2024-03-08 09:50] Starting  Pipeline...[0m
[33;1m
== Pipeline Step 1: Data Preprocessing ==
[0m
[33;1mSearching time information...[0m
[33;1mDates found in 'index' column![0m
[33;1mInferred frequency: month start[0m
[33;1mData goes from 2004-01 to 2015-12, resulting in 144 observations.
[0m
[33;1mSelecting target and covariates...[0m
[33;1mTarget: y[0m
[33;1mCovariates: x1, x2, x3[0m
[33;1m
Data Insights:[0m
[33;1m                 y          x1         x2          x3
Date                                                 
2004-01  50.840469  427.595799  55.337904  900.325291
2004-02  52.871538  434.062163  54.959155  900.775888
2004-03  53.769316  453.264284  56.470633  899.510058
2004-04  57.672973  459.367523  56.704233  903.524834
2004-05  57.182051  462.354356  61.557907  905.071762[0m
[33;1m
[Time elapsed: 00s]
[0m
[33;1m
== Pipeline Step 2: Individual Models' Predictions ==
[0m
[33;1mSplitting data for training of forecasters (train/test ratio: 30/70)...

## 4) Show Ranking Table

In [6]:
display_ranking_table(output_dict['metrics ranking'])

Unnamed: 0_level_0,RMSE,MAPE,sMAPE,RMSE Ranking,MAPE Ranking,sMAPE Ranking
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Naive,2.883,0.029,1.958,1,1,1
Weighted Ensemble: Simple,3.122,0.03,2.043,2,2,2
XGBoost,3.342,0.032,2.187,3,3,3
STL,4.902,0.05,3.361,4,4,4
