# Interface for Pipeline Execution
## (EUR-USD Exchange Data)

In [1]:
# Import models, metrics, paths and functions
from utils.helpers import csv_reader, display_ranking_table
from utils.paths import *
from pipeline.run_pipeline import run_pipeline

from models import MODELS
from metrics import METRICS

Loading paths...
Loading helper functions...
Loading data transformers...
Loading models...
Loading metrics...


In [2]:
# Get current models and metrics
# print(METRICS)
# print(MODELS)

In [3]:
## Using the EUR-USD Exchange Rate data

# Read EUR-USD exchange data
df = csv_reader(TESTDATA_DIR, 'eurusd', columns=['datetime', 'bid_close'])
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 143713 entries, 2020-01-01 00:00:00 to 2024-02-06 00:00:00
Data columns (total 1 columns):
 #   Column     Non-Null Count   Dtype  
---  ------     --------------   -----  
 0   bid_close  101921 non-null  float64
dtypes: float64(1)
memory usage: 2.2+ MB


In [4]:
display(df)

Unnamed: 0_level_0,bid_close
datetime,Unnamed: 1_level_1
2020-01-01 00:00:00,
2020-01-01 00:15:00,
2020-01-01 00:30:00,
2020-01-01 00:45:00,
2020-01-01 01:00:00,
...,...
2024-02-05 23:00:00,1.07419
2024-02-05 23:15:00,1.07423
2024-02-05 23:30:00,1.07420
2024-02-05 23:45:00,1.07404


In [5]:
# Define reasonable subset for testing:
subset = df.head(10000)
display(subset)

Unnamed: 0_level_0,bid_close
datetime,Unnamed: 1_level_1
2020-01-01 00:00:00,
2020-01-01 00:15:00,
2020-01-01 00:30:00,
2020-01-01 00:45:00,
2020-01-01 01:00:00,
...,...
2020-04-14 02:45:00,1.09466
2020-04-14 03:00:00,1.09463
2020-04-14 03:15:00,1.09456
2020-04-14 03:30:00,1.09432


In [6]:
output_dict = (
    run_pipeline(
        df=subset, models=MODELS, metrics=METRICS,
        #start="2020-01-01", end="2020-03-31",
        agg_method='last', agg_freq='B',
        forecast_init_train=0.3, ensemble_init_train=0.3,
        select_forecaster='all',
        select_ensemblers='all',
        verbose=True,
        fh=1
    )
)

[2024-03-07 13:02] Starting  Pipeline...[0m
[33;1m
== Pipeline Step 1: Data Preprocessing ==
[0m
[33;1mSearching time information...[0m
[33;1mDates found in 'index' column![0m
[33;1mInferred frequency: 15T[0m
[33;1mData goes from 2020-01-01 00:00 to 2020-04-14 03:45, resulting in 10000 observations.
[0m
[33;1mAggregating data to frequency 'business day' using method 'last' and dropping NaNs...[0m
[33;1m...finished!
Data now has 74 observations.
[0m
[33;1mSelecting target and covariates...[0m
[33;1mTarget: bid_close[0m
[33;1mCovariates: None[0m
[33;1m
Data Insights:[0m
[33;1m            bid_close
datetime             
2020-01-02    1.11702
2020-01-03    1.11599
2020-01-06    1.11948
2020-01-07    1.11530
2020-01-08    1.11124[0m
[33;1m
[Time elapsed: 00s]
[0m
[33;1m
== Pipeline Step 2: Individual Models' Predictions ==
[0m
[33;1mSplitting data for training of forecasters (train/test ratio: 30/70)...[0m
[33;1mInitial training set has 23 observations and go

  aggregated_df = data.resample(agg_freq).apply(method)
  aggregated_df = data.resample(agg_freq).apply(method)


[33;1mPerforming out-of-sample predictions...[0m
[33;1m...finished![0m
[33;1m
Now generating 51 one-step ahead historical expanding window predictions from model: Naive (drift) (sktime)[0m
[33;1mPerforming out-of-sample predictions...[0m
[33;1m...finished![0m
[33;1m
Now generating 51 one-step ahead historical expanding window predictions from model: AutoSARIMA (sktime)[0m
[33;1mAuto-fitting model. Refitting every 17th period.[0m
[33;1m...forecast 1 / 51[0m
[33;1m...forecast 13 / 51[0m
[33;1m...forecast 26 / 51[0m
[33;1m...forecast 39 / 51[0m
[33;1mPerforming out-of-sample predictions...[0m
[33;1m...finished![0m
[33;1m
Now generating 51 one-step ahead historical expanding window predictions from model: Exponential Smoothing (sktime)[0m
[33;1mPerforming out-of-sample predictions...[0m
[33;1m...finished![0m
[33;1m
Now generating 51 one-step ahead historical expanding window predictions from model: STL (sktime)[0m
[33;1mPerforming out-of-sample prediction

In [8]:
display_ranking_table(output_dict['metrics ranking'])


Model,RMSE,MAPE,sMAPE,RMSE Ranking,MAPE Ranking,sMAPE Ranking
Weighted Ensemble: Inverse Error Covariance,0.009,0.006,0.395,1,1,1
Naive,0.009,0.006,0.418,2,2,2
Exponential Smoothing,0.009,0.006,0.419,3,3,3
Naive (drift),0.009,0.006,0.427,4,4,4
Meta Ensemble: Linear Regression,0.01,0.007,0.441,5,5,5
XGBoost,0.01,0.007,0.473,6,6,6
Weighted Ensemble: Inverse RMSE,0.01,0.007,0.482,8,7,7
Weighted Ensemble: Inverse Variance,0.01,0.007,0.487,7,8,8
Weighted Ensemble: Simple,0.011,0.008,0.506,10,9,9
Meta Ensemble: RandomForest,0.01,0.008,0.517,9,10,10
