# Interface for Pipeline Execution

## 1) Import Individual Forecasting Models, Ensemble Methods and Models, and Metrics

In [1]:
from models.forecasting import FC_MODELS
from models.ensembling import ENS_METHODS
from utils.metrics import metrics

Loading individual forecasting models...
Loading ensemble methods and models...
Loading metrics...


## 2) Import Modules, Functions, and Paths

In [2]:
from utils.helpers import csv_reader
from utils.paths import *
from pipeline.run_pipeline import run_pipeline

Loading helper functions...
Loading paths...


## 3) Select Input Data 

In [3]:
## Read input data
# df = csv_reader(SIMDATA_DIR, 'noisy_simdata')
# print(df.head())

# For testng pipeline for now I use a subset of 10 years:
# df = df.iloc[:120, :]

In [4]:
## Using the EUR-USD Exchange Rate data
df = csv_reader(TESTDATA_DIR, 'eurusd_df', columns=['datetime', 'bid_close'])
print(df.head())

# For testng pipeline for now I use a small subset:
df = df.iloc[:10000, :]

                     bid_close
datetime                      
2020-01-01 00:00:00        NaN
2020-01-01 00:15:00        NaN
2020-01-01 00:30:00        NaN
2020-01-01 00:45:00        NaN
2020-01-01 01:00:00        NaN


## 4) Run Pipeline

## 4.1) ... on Simulated Data

In [None]:
# target, covariates, individual_predictions, full_predictions, metrics_ranking = (
#     run_pipeline(
#         df=df, forecasting_models=FC_MODELS, ensemble_methods=ENS_METHODS, metrics=metrics, 
#         autosarimax_refit_interval=0.33,
#         forecast_init_train=0.3, ensemble_init_train=0.3,
#         verbose=True
#     )
# )

## 4.1) ... on EUR-USD Exchange Data

In [10]:
target, covariates, individual_predictions, full_predictions, metrics_ranking = (
    run_pipeline(
        df=df, forecasting_models=FC_MODELS, ensemble_methods=ENS_METHODS, metrics=metrics, 
        agg_method='last', agg_freq='B',
        forecast_init_train=0.3, ensemble_init_train=0.3,
        autosarimax_refit_interval=0.33,
        select_individual_models=['Naive', 'STL'], select_ensemble_methods=['Simple', 'Inverse RMSE'],
        verbose=True
    )
)

Starting  Pipeline...

== Starting Step 1 in Pipeline: Data Preprocessing ==

Searching for time information...
Dates found in 'index' column!
Inferred frequency: 15T
Data from goes from 2020-01-01 to 2020-04-14, resulting in 10000 observations.

Aggregating data to frequency 'business day' using method 'last' and dropping NaNs...
...finished! Data now has 74 observations.

Selecting target and covariates...
Target: bid_close
Covariates: None

Data Insight:
            bid_close
datetime             
2020-01-02    1.11702
2020-01-03    1.11599
2020-01-06    1.11948
2020-01-07    1.11530
2020-01-08    1.11124 

== Starting Step 2 in Pipeline: Individual Forecasts ==
Splitting data (train/test ratio: 30/70)... 
Initial training set has 22 observations and goes from 2020-01-02 to 2020-01-31 
There are 52 periods to be forecasted: 2020-02-03 to 2020-04-14

Now generating 52 one-step ahead expanding window predictions from model: Naive (sktime)
...finished!
Now generating 52 one-step ahead 

## 5) Show Ranking Table

In [None]:
display(metrics_ranking.style.hide())