# Interface for Pipeline Execution

## 1) Import Models, Metrics, Paths, and Functions

In [1]:
from utils.helpers import csv_reader
from utils.paths import *
from pipeline.run_pipeline import run_pipeline


from models import MODELS
from metrics import METRICS


Loading paths...
Loading helper functions...
Loading data transformers...
Loading models...
Loading metrics...


In [2]:
# print(METRICS)
# print(MODELS)

## 2) Select Input Data 

In [3]:
# Read input data
df = csv_reader(SIMDATA_DIR, 'noisy_simdata')
print(df.head())

# For testng pipeline for now I use a subset of 10 years:
df = df.iloc[:120, :]

                    y          x1         x2          x3
Date                                                    
2004-01-01  50.840469  427.595799  55.337904  900.325291
2004-02-01  52.871538  434.062163  54.959155  900.775888
2004-03-01  53.769316  453.264284  56.470633  899.510058
2004-04-01  57.672973  459.367523  56.704233  903.524834
2004-05-01  57.182051  462.354356  61.557907  905.071762


In [4]:
# ## Using the EUR-USD Exchange Rate data
# 
# df = csv_reader(TESTDATA_DIR, 'eurusd', columns=['datetime', 'bid_close'])
# import sys
# import warnings
# # warnings( sys.__stdout__())
# 
# #sys.jupyter_stdout = sys.__stdout__
# print("hey")
# print(df.head())
# 
# # For testng pipeline for now I use a small subset:
# df = df.iloc[:10000, :]

## 3) Run Pipeline

### 3.1) ... on Simulated Data

In [5]:
output_dict = run_pipeline(
    df=df, models=MODELS, metrics=METRICS,
    select_forecasters=['all'],
    verbose=True
)

[2024-03-06 08:05] Starting  Pipeline...[0m
[33;1m
== Starting Step 1 in Pipeline: Data Preprocessing ==
[0m
[33;1mSearching for time information...[0m
[33;1mDates found in 'index' column![0m
[33;1mInferred frequency: month start[0m
[33;1mData from goes from 2004-01-01 to 2013-12-01, resulting in 120 observations.
[0m
[33;1mSelecting target and covariates...[0m
[33;1mTarget: y[0m
[33;1mCovariates: x1, x2, x3[0m
[33;1m
Data Insight:
[0m
[33;1m                    y          x1         x2          x3
Date                                                    
2004-01-01  50.840469  427.595799  55.337904  900.325291
2004-02-01  52.871538  434.062163  54.959155  900.775888
2004-03-01  53.769316  453.264284  56.470633  899.510058
2004-04-01  57.672973  459.367523  56.704233  903.524834
2004-05-01  57.182051  462.354356  61.557907  905.071762[0m
[33;1m[Time elapsed: 00s]
[0m
[33;1m
== Starting Step 2 in Pipeline: Individual Forecasts ==
[0m
[33;1mSplitting data (train/t

### 3.2) ... on EUR-USD Exchange Data

In [6]:
# output_dict = (
#     run_pipeline(
#         df=df, models=MODELS, metrics=METRICS,
#         agg_method='last', agg_freq='B',
#         forecast_init_train=0.3, ensemble_init_train=0.3,
#         autosarimax_refit_interval=0.33,
#         select_forecasters=['Naive', 'STL'],
#         select_ensemblers=['Simple', 'Inverse RMSE'],
#         verbose=True
#     )
# )

## 4) Show Ranking Table

In [25]:
import pandas as pd
pd.set_option('display.float_format', '{:.2f}'.format)
# Format performance metric values
def format_numeric(val):
    if isinstance(val, (float)):
        return '{:.3f}'.format(val)
    return val
formatted_metrics = output_dict['metrics ranking'].applymap(format_numeric)
display(formatted_metrics.style.hide())

Model,MAE,RMSE,MAPE,sMAPE,MAE Ranking,RMSE Ranking,MAPE Ranking,sMAPE Ranking
Weighted Ensemble: Inverse RMSE,1.988,2.6183,0.0276,1.8469,1,1,1,1
Weighted Ensemble: Simple,1.9928,2.6361,0.0276,1.8508,2,2,2,2
Weighted Ensemble: Inverse Variance,2.0047,2.6388,0.0278,1.8634,3,3,3,3
Exponential Smoothing,2.0731,2.7066,0.0287,1.9234,4,4,4,4
Naive,2.09,2.7479,0.029,1.9412,5,6,5,5
AutoSARIMA,2.1114,2.7306,0.0292,1.9536,6,5,6,6
Naive (drift),2.1175,2.7483,0.0294,1.9637,7,7,7,7
Meta Ensemble: RandomForest,2.2093,2.7906,0.0305,2.0488,8,8,8,8
AutoSARIMAX with covariates,2.2495,2.9685,0.0313,2.0857,10,9,9,9
Weighted Ensemble: Inverse Error Covariance,2.2608,3.0314,0.0314,2.1055,11,12,10,10


In [19]:
+output_dict

{'target and covariates': (Date
  2004-01-01   50.84
  2004-02-01   52.87
  2004-03-01   53.77
  2004-04-01   57.67
  2004-05-01   57.18
                ... 
  2013-08-01   72.60
  2013-09-01   72.94
  2013-10-01   75.54
  2013-11-01   75.67
  2013-12-01   78.51
  Freq: MS, Name: y, Length: 120, dtype: float64,
                 x1    x2     x3
  Date                          
  2004-01-01 427.60 55.34 900.33
  2004-02-01 434.06 54.96 900.78
  2004-03-01 453.26 56.47 899.51
  2004-04-01 459.37 56.70 903.52
  2004-05-01 462.35 61.56 905.07
  ...           ...   ...    ...
  2013-08-01 353.11 52.07 918.10
  2013-09-01 353.01 52.22 917.28
  2013-10-01 358.91 51.91 916.61
  2013-11-01 352.36 52.79 917.95
  2013-12-01 367.60 52.98 922.26
  
  [120 rows x 3 columns]),
 'individual_predictions':          Target  Naive  Naive (drift)  AutoSARIMA  Exponential Smoothing  \
 Date                                                                       
 2007-01   48.25  48.86          48.80       49.