# Run Pipeline

In [1]:
# Import modules
import pandas as pd
from pipeline.run_pipeline import run_pipeline
from paths import *

Loading paths...
Loading helper functions...
Loading metrics...
Loading ensembling methods...


In [2]:
## Define input data

# Define the directory path and file name
file_name = 'noisy_simdata.csv'

# Combine the directory path and file name
FILE_PATH = os.path.join(SIMDATA_DIR, file_name) 

# Read and preprocess Dataset
df = pd.read_csv(FILE_PATH, index_col = "Date")

# For testng pipeline for now I use a subset of 10 years:
df = df.iloc[:120, :]

In [3]:
print(df.head(), "\n")

print(f"Monthly data from goes from {df.index[0]} to {df.index[-1]}, resulting in {len(df)} observations.")

                    y          x1         x2          x3
Date                                                    
2004-01-01  50.840469  427.595799  55.337904  900.325291
2004-02-01  52.871538  434.062163  54.959155  900.775888
2004-03-01  53.769316  453.264284  56.470633  899.510058
2004-04-01  57.672973  459.367523  56.704233  903.524834
2004-05-01  57.182051  462.354356  61.557907  905.071762 

Monthly data from goes from 2004-01-01 to 2013-12-01, resulting in 120 observations.


In [4]:
target, covariates, individual_predictions, full_predictions, metrics_ranking = run_pipeline(df=df, verbose=True)

== Starting Step 1 in Pipeline: Data Preprocessing ==
Target: y
Covariates: x1, x2, x3

== Starting Step 2 in Pipeline: Individual Forecasts ==
Splitting data for individual forecasts (train/test ratio: 30/70)...
Initial training set has 36 observations and goes from 2004-01 to 2006-12
There are 84 periods to be forecasted by the individual models 2007-01 to 2013-12
Now generating 84 expanding window predictions for individual model: Naive
...finished!

Now generating 84 expanding window predictions for individual model: Naive (drift)
...finished!

Now generating 84 expanding window predictions for individual model: AutoSARIMA
Auto-fitting model...
AutoSARIMA forecast 1 / 84
AutoSARIMA forecast 10 / 84
AutoSARIMA forecast 20 / 84
...automatic refitting...
AutoSARIMA forecast 30 / 84
AutoSARIMA forecast 40 / 84
...automatic refitting...
AutoSARIMA forecast 50 / 84
AutoSARIMA forecast 60 / 84
...automatic refitting...
AutoSARIMA forecast 70 / 84
AutoSARIMA forecast 80 / 84
AutoSARIMA for

In [9]:
display(individual_predictions.head())

Unnamed: 0_level_0,Target,Naive,Naive (drift),AutoSARIMA,AutoSARIMAX,Exponential Smoothing,Theta,STL,XGBoost,XGBoost (+ X)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2007-01,48.254157,48.857243,48.80058,49.606623,44.513789,49.016606,48.606701,50.913535,50.449028,50.17403
2007-02,47.765006,48.254157,48.182315,48.515764,44.359219,48.500499,48.615046,48.156229,50.282722,49.681396
2007-03,46.394433,47.765006,47.681886,47.973535,48.442625,48.001824,48.621316,47.900027,48.891392,53.583061
2007-04,44.212976,46.394433,46.277432,46.969779,49.511544,46.904911,48.621665,46.457,49.013432,47.341328
2007-05,41.846055,44.212976,44.043041,45.108821,53.863918,45.025232,48.613532,44.662606,49.433262,47.58807


In [8]:
display(full_predictions.head())

Unnamed: 0_level_0,Ensemble_Simple,Ensemble_RSME,Ensemble_Variance,Ensemble_ErrorCorrelation,Ensemble_Metamodel_SVR,Ensemble_RandomForest,Naive,Naive (drift),AutoSARIMA,AutoSARIMAX,Exponential Smoothing,Theta,STL,XGBoost,XGBoost (+ X)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2009-02,58.721839,59.745097,48.948741,62.718376,62.610705,60.740552,60.443593,60.603645,62.170074,59.722797,60.600149,48.921312,61.936112,56.637573,57.461292
2009-03,58.020736,58.69684,48.968398,58.468973,58.410762,59.457494,58.960481,59.093595,59.591622,58.30464,59.336316,48.939636,59.631223,58.510246,59.818867
2009-04,57.406757,58.146572,48.985439,58.329507,58.369294,59.99693,59.107003,59.240334,58.591094,59.308861,59.159211,48.956654,58.818551,54.334373,59.14473
2009-05,57.031052,57.64281,49.000631,56.939218,56.894555,58.571507,57.922611,58.035026,58.658673,58.932197,58.203721,48.970097,58.237344,54.292839,60.026958
2009-06,58.166756,59.029451,49.024672,59.750536,58.828152,59.567902,60.408252,60.557749,59.548324,59.811478,59.897806,48.986705,59.00866,55.755554,59.526279


In [7]:
display(metrics_ranking.style.hide())

Model,MAPE,RMSE,MAPE_Ranking,RMSE_Ranking
AutoSARIMA,0.028379,2.718926,1,2
Exponential Smoothing,0.028698,2.706564,2,1
Ensemble_RSME,0.02895,2.844613,3,5
Naive,0.028985,2.747925,4,3
Naive (drift),0.029382,2.748343,5,4
Ensemble_Metamodel_SVR,0.030341,3.052429,6,9
Ensemble_RandomForest,0.030751,2.85704,7,6
AutoSARIMAX,0.031036,2.956004,8,7
Ensemble_ErrorCorrelation,0.031813,3.011904,9,8
STL,0.034365,3.26407,10,10
