# Interface for Pipeline Execution

## 1) Import Models, Metrics, Paths, and Functions

In [1]:
from utils.helpers import csv_reader
from utils.paths import *
from pipeline.run_pipeline import run_pipeline


from models import MODELS
from metrics import METRICS


Loading paths...
Loading helper functions...
Loading data transformers...
Loading models...
Loading metrics...


In [2]:
# print(METRICS)
# print(MODELS)

## 2) Select Input Data 

In [3]:
# Read input data
df = csv_reader(SIMDATA_DIR, 'noisy_simdata')
print(df.head())

# For testng pipeline for now I use a subset of 10 years:
df = df.iloc[:120, :]

                    y          x1         x2          x3
Date                                                    
2004-01-01  50.840469  427.595799  55.337904  900.325291
2004-02-01  52.871538  434.062163  54.959155  900.775888
2004-03-01  53.769316  453.264284  56.470633  899.510058
2004-04-01  57.672973  459.367523  56.704233  903.524834
2004-05-01  57.182051  462.354356  61.557907  905.071762


In [4]:
# ## Using the EUR-USD Exchange Rate data
# 
# df = csv_reader(TESTDATA_DIR, 'eurusd', columns=['datetime', 'bid_close'])
# import sys
# import warnings
# # warnings( sys.__stdout__())
# 
# #sys.jupyter_stdout = sys.__stdout__
# print("hey")
# print(df.head())
# 
# # For testng pipeline for now I use a small subset:
# df = df.iloc[:10000, :]

## 3) Run Pipeline

### 3.1) ... on Simulated Data

In [5]:
output_dict = run_pipeline(
    df=df, models=MODELS, metrics=METRICS,
    select_forecasters=['all'],
    verbose=True
)

[2024-03-06 08:05] Starting  Pipeline...[0m
[33;1m
== Starting Step 1 in Pipeline: Data Preprocessing ==
[0m
[33;1mSearching for time information...[0m
[33;1mDates found in 'index' column![0m
[33;1mInferred frequency: month start[0m
[33;1mData from goes from 2004-01-01 to 2013-12-01, resulting in 120 observations.
[0m
[33;1mSelecting target and covariates...[0m
[33;1mTarget: y[0m
[33;1mCovariates: x1, x2, x3[0m
[33;1m
Data Insight:
[0m
[33;1m                    y          x1         x2          x3
Date                                                    
2004-01-01  50.840469  427.595799  55.337904  900.325291
2004-02-01  52.871538  434.062163  54.959155  900.775888
2004-03-01  53.769316  453.264284  56.470633  899.510058
2004-04-01  57.672973  459.367523  56.704233  903.524834
2004-05-01  57.182051  462.354356  61.557907  905.071762[0m
[33;1m[Time elapsed: 00s]
[0m
[33;1m
== Starting Step 2 in Pipeline: Individual Forecasts ==
[0m
[33;1mSplitting data (train/t

### 3.2) ... on EUR-USD Exchange Data

In [6]:
# output_dict = (
#     run_pipeline(
#         df=df, models=MODELS, metrics=METRICS,
#         agg_method='last', agg_freq='B',
#         forecast_init_train=0.3, ensemble_init_train=0.3,
#         autosarimax_refit_interval=0.33,
#         select_forecasters=['Naive', 'STL'],
#         select_ensemblers=['Simple', 'Inverse RMSE'],
#         verbose=True
#     )
# )

## 4) Show Ranking Table

In [7]:
display(output_dict['metrics ranking'].style.hide())

Model,MAE,RMSE,MAPE,sMAPE,MAE Ranking,RMSE Ranking,MAPE Ranking,sMAPE Ranking
Weighted Ensemble: Inverse RMSE,1.987962,2.618257,0.027556,1.846892,1,1,1,1
Weighted Ensemble: Simple,1.992834,2.636096,0.027619,1.850825,2,2,2,2
Weighted Ensemble: Inverse Variance,2.004749,2.638848,0.02779,1.863383,3,3,3,3
Exponential Smoothing,2.073113,2.706564,0.028698,1.923401,4,4,4,4
Naive,2.090017,2.747925,0.028985,1.94118,5,6,5,5
AutoSARIMA,2.111371,2.73058,0.029175,1.953598,6,5,6,6
Naive (drift),2.117502,2.748343,0.029382,1.963672,7,7,7,7
Meta Ensemble: RandomForest,2.209277,2.790585,0.030548,2.048812,8,8,8,8
AutoSARIMAX with covariates,2.249503,2.968497,0.031303,2.08566,10,9,9,9
Weighted Ensemble: Inverse Error Covariance,2.260794,3.03137,0.031419,2.105506,11,12,10,10
