# Interface for Pipeline Execution
## (EUR-USD exchange data)

In [1]:
# Import models, metrics, paths and functions
from utils.helpers import csv_reader
from utils.paths import *
from pipeline.run_pipeline import run_pipeline

from models import MODELS
from metrics import METRICS

Loading paths...
Loading helper functions...
Loading data transformers...
Loading models...
Loading metrics...


In [4]:
# Get current models and metrics
print(METRICS)
print(MODELS)

{'MAE': <function mean_absolute_error at 0x000002D98E3F5620>, 'RMSE': <function root_mean_squared_error at 0x000002D98E520220>, 'MAPE': <function mean_absolute_percentage_error at 0x000002D98E3F59E0>, 'sMAPE': <function symmetric_mean_absolute_percentage_error at 0x000002D98E520360>}


In [9]:
## Using the EUR-USD Exchange Rate data

# Read EUR-USD exchange data
df = csv_reader(TESTDATA_DIR, 'eurusd', columns=['datetime', 'bid_close'])

# Use a small subset of the original data for testing:
df = df.iloc[:10000, :]
display(df)

Unnamed: 0_level_0,bid_close
datetime,Unnamed: 1_level_1
2020-01-01 00:00:00,
2020-01-01 00:15:00,
2020-01-01 00:30:00,
2020-01-01 00:45:00,
2020-01-01 01:00:00,
...,...
2020-04-14 02:45:00,1.09466
2020-04-14 03:00:00,1.09463
2020-04-14 03:15:00,1.09456
2020-04-14 03:30:00,1.09432


In [10]:
output_dict = (
    run_pipeline(
        df=df, models=MODELS, metrics=METRICS,
        agg_method='last', agg_freq='B',
        forecast_init_train=0.3, ensemble_init_train=0.3,
        autosarimax_refit_interval=0.33,
        select_forecasters=['Naive', 'STL'],
        select_ensemblers=['Simple', 'Inverse RMSE'],
        verbose=True
    )
)

[2024-03-06 18:29] Starting  Pipeline...[0m
[33;1m
== Starting Step 1 in Pipeline: Data Preprocessing ==
[0m
[33;1mSearching for time information...[0m
[33;1mDates found in 'index' column![0m
[33;1mInferred frequency: 15T[0m
[33;1mData from goes from 2020-01-01 to 2020-04-14, resulting in 10000 observations.
[0m
[33;1mAggregating data to frequency 'business day' using method 'last' and dropping NaNs...[0m
[33;1m...finished!
Data now has 74 observations.
[0m
[33;1mSelecting target and covariates...[0m
[33;1mTarget: bid_close[0m
[33;1mCovariates: None[0m
[33;1m
Data Insight:
[0m
[33;1m            bid_close
datetime             
2020-01-02    1.11702
2020-01-03    1.11599
2020-01-06    1.11948
2020-01-07    1.11530
2020-01-08    1.11124[0m
[33;1m[Time elapsed: 00s]
[0m
[33;1m
== Starting Step 2 in Pipeline: Individual Forecasts ==
[0m
[33;1mSplitting data (train/test ratio: 30/70)...
Initial training set has 22 observations [0m
[33;1mand goes from 2020-01-0

In [13]:
import pandas as pd

pd.set_option('display.float_format', '{:.2f}'.format)
# Format performance metric values
def format_numeric(val):
    if isinstance(val, float):
        return '{:.3f}'.format(val)
    return val
formatted_metrics = output_dict['metrics ranking'].applymap(format_numeric)

display(formatted_metrics.style.hide())


Model,MAE,RMSE,MAPE,sMAPE,MAE Ranking,RMSE Ranking,MAPE Ranking,sMAPE Ranking
Naive,0.007,0.009,0.006,0.403,1,1,1,1
Weighted Ensemble: Inverse RMSE,0.009,0.011,0.008,0.545,2,2,2,2
Weighted Ensemble: Simple,0.01,0.012,0.009,0.604,3,3,3,3
STL,0.014,0.018,0.013,0.86,4,4,4,4
