# Interface for Pipeline Execution

## 1) Import Individual Forecasting Models, Ensemble Methods and Models, and Metrics

In [1]:
from models.forecasting import FC_MODELS
from models.ensembling import ENS_METHODS
from utils.metrics import metrics

Loading individual forecasting models...
Loading ensemble methods and models...
Loading metrics...


## 2) Import Modules, Functions, and Paths

In [2]:
from utils.helpers import csv_reader
from utils.paths import *
from pipeline.run_pipeline import run_pipeline

Loading helper functions...
Loading paths...


## 3) Select Input Data 

In [3]:
## Read input data
# df = csv_reader(SIMDATA_DIR, 'noisy_simdata')
# print(df.head())

# For testng pipeline for now I use a subset of 10 years:
# df = df.iloc[:120, :]

In [4]:
## Using the EUR-USD Exchange Rate data
df = csv_reader(TESTDATA_DIR, 'eurusd', columns=['datetime', 'bid_close'])
print(df.head())

# For testng pipeline for now I use a small subset:
df = df.iloc[:10000, :]

                     bid_close
datetime                      
2020-01-01 00:00:00        NaN
2020-01-01 00:15:00        NaN
2020-01-01 00:30:00        NaN
2020-01-01 00:45:00        NaN
2020-01-01 01:00:00        NaN


## 4) Run Pipeline

### 4.1) ... on Simulated Data

In [5]:
# # output_dict = (
#     run_pipeline(
#         df=df,  forecasting_models=FC_MODELS, ensemble_methods=ENS_METHODS, metrics=metrics,
#         verbose=True
#     )
# )

### 4.2) ... on EUR-USD Exchange Data

In [6]:
output_dict = (
    run_pipeline(
        df=df, forecasting_models=FC_MODELS, ensemble_methods=ENS_METHODS, metrics=metrics, 
        agg_method='last', agg_freq='B',
        forecast_init_train=0.3, ensemble_init_train=0.3,
        autosarimax_refit_interval=0.33,
        select_individual_models=['Naive'],
        select_ensemble_methods=['Simple', 'Inverse RMSE'],
        verbose=True
    )
)

[2024-03-04 14:05] Starting  Pipeline...[0m
[33;1m
== Starting Step 1 in Pipeline: Data Preprocessing ==
[0m
[33;1mSearching for time information...[0m
[33;1mDates found in 'index' column![0m
[33;1mInferred frequency: 15T[0m
[33;1mData from goes from 2020-01-01 to 2020-04-14, resulting in 10000 observations.
[0m
[33;1mAggregating data to frequency 'business day' using method 'last' and dropping NaNs...[0m
[33;1m...finished!
Data now has 74 observations.
[0m
[33;1mSelecting target and covariates...[0m
[33;1mTarget: bid_close[0m
[33;1mCovariates: None[0m
[33;1m
Data Insight:
[0m
[33;1m            bid_close
datetime             
2020-01-02    1.11702
2020-01-03    1.11599
2020-01-06    1.11948
2020-01-07    1.11530
2020-01-08    1.11124[0m
[33;1m[Time elapsed: 00s]
[0m
[33;1m
== Starting Step 2 in Pipeline: Individual Forecasts ==
[0m
[33;1mSplitting data (train/test ratio: 30/70)...
Initial training set has 22 observations [0m
[33;1mand goes from 2020-01-0

## 5) Show Ranking Table

In [7]:
display(output_dict['metrics ranking'].style.hide())

Model,MAPE,RMSE,SMAPE,MAPE Ranking,RMSE Ranking,SMAPE Ranking
Naive,0.604318,0.008753,0.402667,2,2,2
Weighted Ensemble: Simple,0.604318,0.008753,0.402667,2,2,2
Weighted Ensemble: Inverse RMSE,0.604318,0.008753,0.402667,2,2,2
