# Interface for Pipeline Execution 
## (Simulated Data from SARIMAX Process)

## 1) Import Models, Metrics, Paths, and Functions

In [1]:
from utils.helpers import csv_reader, display_ranking_table
from utils.paths import *
from pipeline.run_pipeline import run_pipeline


from models import MODELS
from metrics import METRICS


Loading paths...
Loading helper functions...
Loading data transformers...
Loading models...
Loading metrics...


In [2]:
# print(METRICS)
# print(MODELS)

## 2) Select Input Data 

In [3]:
# Read input data
df = csv_reader(SIMDATA_DIR, 'noisy_simdata')
print(df.head())

                    y          x1         x2          x3
Date                                                    
2004-01-01  50.840469  427.595799  55.337904  900.325291
2004-02-01  52.871538  434.062163  54.959155  900.775888
2004-03-01  53.769316  453.264284  56.470633  899.510058
2004-04-01  57.672973  459.367523  56.704233  903.524834
2004-05-01  57.182051  462.354356  61.557907  905.071762


In [4]:
# ## Using the EUR-USD Exchange Rate data
# 
# df = csv_reader(TESTDATA_DIR, 'eurusd', columns=['datetime', 'bid_close'])
# import sys
# import warnings
# # warnings( sys.__stdout__())
# 
# #sys.jupyter_stdout = sys.__stdout__
# print("hey")
# print(df.head())
# 
# # For testng pipeline for now I use a small subset:
# df = df.iloc[:10000, :]

## 3) Run Pipeline

In [5]:
output_dict = run_pipeline(
    df=df, models=MODELS, metrics=METRICS,
    fh=1,
    # select_forecasters=['Naive', 'XGBoost', 'STL'],
    # select_ensemblers='Simple',
    start="2004-01-01", end="2015-12-31",  # filtering the first 12 years of data
    verbose=True
)

[2024-03-07 11:32] Starting  Pipeline...[0m
[33;1m
== Pipeline Step 1: Data Preprocessing ==
[0m
[33;1mSearching time information...[0m
[33;1mDates found in 'index' column![0m
[33;1mInferred frequency: month start[0m
[33;1mData goes from 2004-01 to 2015-12, resulting in 144 observations.
[0m
[33;1mSelecting target and covariates...[0m
[33;1mTarget: y[0m
[33;1mCovariates: x1, x2, x3[0m
[33;1m
Data Insights:[0m
[33;1m                 y          x1         x2          x3
Date                                                 
2004-01  50.840469  427.595799  55.337904  900.325291
2004-02  52.871538  434.062163  54.959155  900.775888
2004-03  53.769316  453.264284  56.470633  899.510058
2004-04  57.672973  459.367523  56.704233  903.524834
2004-05  57.182051  462.354356  61.557907  905.071762[0m
[33;1m
[Time elapsed: 00s]
[0m
[33;1m
== Pipeline Step 2: Individual Models' Predictions ==
[0m
[33;1mSplitting data for training of forecasters (train/test ratio: 30/70)...

## 4) Show Ranking Table

In [6]:
display_ranking_table(output_dict['metrics ranking'])

Model,RMSE,MAPE,sMAPE,RMSE Ranking,MAPE Ranking,sMAPE Ranking
Weighted Ensemble: Inverse RMSE,2.811,0.028,1.906,1,1,1
Weighted Ensemble: Inverse Variance,2.829,0.028,1.909,2,2,2
Weighted Ensemble: Simple,2.849,0.028,1.911,4,3,3
Exponential Smoothing,2.838,0.029,1.948,3,4,4
Naive,2.883,0.029,1.958,6,5,5
Naive (drift),2.882,0.03,1.979,5,6,6
Meta Ensemble: Ridge,2.937,0.03,2.009,8,7,7
Meta Ensemble: SVR,3.079,0.03,2.046,10,8,9
AutoSARIMA,2.883,0.03,2.037,7,9,8
Meta Ensemble: Linear Regression,3.047,0.031,2.083,9,10,10
