# Interface for Pipeline Execution

## 1) Import Individual Forecasting Models, Ensemble Methods and Models, and Metrics

In [1]:
from models.forecasting_models import forecasting_models
from models.ensemble_models import ensemble_methods
from utils.metrics import metrics

Loading individual forecasting models...
Loading ensemble methods and models...
Loading metrics...


## 2) Import Modules, Functions, and Paths

In [2]:
import pandas as pd

from paths import *
from pipeline.run_pipeline import run_pipeline

Loading paths...
Loading helper functions...


## 3) Select Input Data 

In [3]:
## Define input data

# Define the directory path and file name
file_name = 'noisy_simdata.csv'

# Combine the directory path and file name
FILE_PATH = os.path.join(SIMDATA_DIR, file_name) 

# Read and preprocess Dataset
df = pd.read_csv(FILE_PATH, index_col = 0)

# For testng pipeline for now I use a subset of 10 years:
# df = df.iloc[:120, :]

## 4) Run Pipeline

In [4]:
# fyi: training takes about 9 min on complete dataset and 4.5 min on subset
target, covariates, individual_predictions, full_predictions, metrics_ranking = run_pipeline(df=df, forecasting_models=forecasting_models, ensemble_methods=ensemble_methods, metrics=metrics, verbose=True)

== Starting Pipeline ==
Data Insights:
                    y          x1         x2          x3
Date                                                    
2004-01-01  50.840469  427.595799  55.337904  900.325291
2004-02-01  52.871538  434.062163  54.959155  900.775888
2004-03-01  53.769316  453.264284  56.470633  899.510058
2004-04-01  57.672973  459.367523  56.704233  903.524834
2004-05-01  57.182051  462.354356  61.557907  905.071762 

Monthly data from goes from 2004-01-01 to 2023-12-01, resulting in 240 observations.

== Starting Step 1 in Pipeline: Data Preprocessing ==
Target: y
Covariates: x1, x2, x3

== Starting Step 2 in Pipeline: Individual Forecasts ==
Splitting data for individual forecasts (train/test ratio: 30/70)...
Initial training set has 72 observations and goes from 2004-01 to 2009-12
There are 168 periods to be forecasted by the individual models 2010-01 to 2023-12

Now generating 168 expanding window predictions for individual model: Naive
...finished!

Now generatin

## 5) Show Ranking Table

In [5]:
display(metrics_ranking.style.hide())

Model,MAPE,RMSE,SMAPE,MAPE_Ranking,RMSE_Ranking,SMAPE_Ranking
Naive,3.38177,2.698995,2.246949,1,2,1
Naive (drift),3.40006,2.717839,2.257468,2,4,2
AutoSARIMA,3.432,2.683448,2.281666,3,1,3
Exponential Smoothing,3.468154,2.69932,2.304909,4,3,4
AutoSARIMAX,3.617485,2.903758,2.400976,5,9,5
Ens_weighted_Inv_RMSE,3.618176,2.779209,2.404765,6,5,6
Ens_meta_SVR,3.692357,2.896261,2.443058,7,8,7
Ens_weighted_Simple,3.69328,2.838778,2.455142,8,7,8
Ens_weighted_Inv_Variance,3.704023,2.828534,2.461753,9,6,9
Ens_meta_RandomForest,4.081674,3.243891,2.702571,10,10,10
