SK TIME REGRESSION 

In [39]:
#   IMPORTS
from sktime.utils.plotting import plot_series
from sktime.forecasting.base import ForecastingHorizon
from sktime.forecasting.model_selection import temporal_train_test_split
from sktime.forecasting.naive import NaiveForecaster
from sktime.performance_metrics.forecasting import MeanAbsolutePercentageError
from sktime.performance_metrics.forecasting import mean_absolute_error
from sktime.forecasting.compose import make_reduction
from sktime.forecasting.fbprophet import Prophet
from prophet.forecaster import Prophet 
from sktime.forecasting.ets import AutoETS
from sktime.forecasting.exp_smoothing import ExponentialSmoothing
from sktime.forecasting.bats import BATS
from sktime.forecasting.tbats import TBATS
from sktime.forecasting.var import VAR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from xgboost import XGBRegressor
from sktime.forecasting.arima import AutoARIMA
from sktime.forecasting.arima import ARIMA
from sktime.forecasting.sarimax import SARIMAX
from pandas import read_csv
import pandas as pd 
import numpy as np
import warnings
import datetime
warnings.filterwarnings("ignore")


In [40]:
#   LOAD IN DATA
data = pd.read_csv('data_for_inequalities.csv')
#View head
data.head()

Unnamed: 0,Date,SG_Y
0,2017/12,76.5
1,2018/01,82.75
2,2018/02,85.5
3,2018/03,86.125
4,2018/04,88.7


In [41]:
#   USER DEFINED
#Set target_col to column name you want to forecast 
#Example uses column named SG_Y
target_col = 'SG_Y'

In [42]:
#   TRAIN/TEST SPLIT 
y = data[target_col]
y_train, y_test = temporal_train_test_split(y, test_size=12)
y_test

36    104.250000
37    103.458333
38    103.916667
39    108.541667
40    108.458333
41    107.291667
42    106.583333
43    101.208333
44    100.125000
45     99.791667
46    100.791667
47    102.958333
Name: SG_Y, dtype: float64

In [43]:
#   Set Date
data['Date'][36]

'2021/12'

In [44]:
#Set Forecasting horizon ------ 12 months
fh = np.arange(1, len(y_test)+1)
fh

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12])

UNIVARIATRE REGRESSION MODELS 

In [45]:

#   Build Random Forest Model and score MAE in sample Forecast
r_forest = RandomForestRegressor(random_state = 0)
RFforecaster = make_reduction(r_forest, window_length = 12)
#Fits model on training data
RFforecaster.fit(y_train)
#Forecasts
RFy_preds = RFforecaster.predict(fh)
#Calculates MAE for Random Forest model
RFmae = mean_absolute_error(y_test, RFy_preds)
#Prints MAE score for model
print('Random Forest Model - MAE is %.6f' % (RFmae))
print('--------Random Forest Forecast--------')
RFy_preds

Random Forest Model - MAE is 2.606042
--------Random Forest Forecast--------


36    104.089167
37    104.138333
38    104.135000
39    104.172500
40    104.088333
41    104.195000
42    104.084167
43    104.195000
44    104.084167
45    104.195000
46    104.084167
47    104.195000
dtype: float64

In [46]:
#   Build Decision Tree Model and score MAE in sample Forecast
d_tree = DecisionTreeRegressor(random_state = 0)
DTforecaster = make_reduction(d_tree, window_length = 12)
#Fits model on training data
DTforecaster.fit(y_train)
#Forecasts
DTy_preds = DTforecaster.predict(fh)
#Calculates MAE for Decision Tree model
DTmae = mean_absolute_error(y_test, DTy_preds)
#Prints MAE score for model
print('Decision Tree Model - MAE is %.6f' % (DTmae))
print('--------Decision Tree Forecast--------')
DTy_preds

Decision Tree Model - MAE is 2.579861
--------Decision Tree Forecast--------


36    104.333333
37    103.916667
38    104.416667
39    103.916667
40    104.416667
41    103.916667
42    104.416667
43    103.916667
44    104.416667
45    103.916667
46    104.416667
47    103.916667
dtype: float64

In [60]:
#   Build XGBoost Model and score MAE in sample Forecast
xgb = XGBRegressor(random_state = 0, objective='reg:squarederror', n_estimators = 1000)
XGBforecaster = make_reduction(xgb, window_length = 12, strategy="recursive")
#Fits model on training data
XGBforecaster.fit(y_train)
#Forecasts
XGBy_preds = XGBforecaster.predict(fh)
#Calculates MAE for XGBoost
XGBmae = mean_absolute_error(y_test, XGBy_preds)
#Prints MAE score for model
print('XGBOOST Model - MAE is %.6f' % (XGBmae))
print('--------XGBoost Forecast--------')
XGBy_preds

XGBOOST Model - MAE is 2.593700
--------XGBoost Forecast--------


36    103.999245
37    104.329330
38    103.836288
39    103.922264
40    103.922264
41    103.922264
42    103.922264
43    103.922264
44    103.922264
45    103.922264
46    103.922264
47    103.922264
dtype: float64

In [48]:
#   Build K Nearest Neighbors Model and score MAE in sample Forecast
kNN = KNeighborsRegressor(n_neighbors=1)
KNNforecaster = make_reduction(kNN, window_length=12, strategy="recursive")
#Fits model on training data
KNNforecaster.fit(y_train)
#Forecasts
KNNy_preds = KNNforecaster.predict(fh)
#Calculates MAE for KNN
KNNmae = mean_absolute_error(y_test, KNNy_preds)
#Prints MAE score for model
print('K-Nearest Neighbors Model - MAE is %.6f' % (KNNmae))
print('--------K-NN Forecast--------')
KNNy_preds

K-Nearest Neighbors Model - MAE is 2.559028
--------K-NN Forecast--------


36    103.916667
37    103.916667
38    103.916667
39    103.916667
40    103.916667
41    103.916667
42    103.916667
43    103.916667
44    103.916667
45    103.916667
46    103.916667
47    103.916667
dtype: float64

In [49]:
#   Build AUTO ARIMA Model and score MAE in sample Forecast
AAforecaster = AutoARIMA(sp=12)
#Fits model on training data
AAforecaster.fit(y_train)
#Forecasts
AAy_preds = AAforecaster.predict(fh)
#Calculates MAE for Auto ARIMA
autoArimaMAE = mean_absolute_error(y_test, AAy_preds)
#Prints MAE score for model
print('Auto ARIMA Model - MAE is %.6f' % (autoArimaMAE))
print('--------Auto ARIMA Forecast--------')
AAy_preds

Auto ARIMA Model - MAE is 6.104247
--------Auto ARIMA Forecast--------


36    104.606131
37    106.023768
38    107.734597
39    109.315119
40    109.625139
41    110.065467
42    111.059604
43    110.099123
44    111.158413
45    112.543473
46    113.505033
47    114.890093
dtype: float64

In [50]:
#   Build ARIMA Model and score MAE in sample Forecast
Aforecaster = ARIMA(order=(1,0,1))

#Fits model on training data
Aforecaster.fit(y_train)
#Forecasts
Ay_preds = Aforecaster.predict(-fh)
#Calculates MAE for Auto ARIMA
ArimaMAE = mean_absolute_error(y_test, Ay_preds)
#Prints MAE score for model
print('ARIMA Model - MAE is %.6f' % (ArimaMAE))
print('--------ARIMA Forecast--------')
Ay_preds

ARIMA Model - MAE is 7.521689
--------ARIMA Forecast--------


23     92.321507
24     97.198792
25     96.302627
26     95.893231
27     94.143612
28     93.547725
29     97.139154
30     98.163094
31     98.386985
32    106.331560
33    102.874812
34    103.858961
dtype: float64

In [51]:
#   Build SARIMAX Model and score MAE in sample Forecast
Sforecaster = SARIMAX(trend='t', seasonal_order=(1, 0, 0, 12))
#Fits model on training data
Sforecaster.fit(y_train)
#Forecasts
Sy_preds = Sforecaster.predict(fh)
#Calculates MAE for SARIMAX
SarimaxMAE = mean_absolute_error(y_test, Sy_preds)
#Prints MAE score for model
print('SARIMAX Model - MAE is %.6f' % (SarimaxMAE))
print('--------SARIMAX Forecast--------')
Sy_preds

SARIMAX Model - MAE is 9.939516
--------SARIMAX Forecast--------


36    105.138474
37    106.942127
38    109.038337
39    111.068942
40    112.069702
41    113.225204
42    114.893564
43    114.953479
44    116.765914
45    118.898294
46    120.716968
47    122.938188
Name: SG_Y, dtype: float64

In [52]:

from sktime.forecasting.fbprophet import Prophet
#   Build Prophet Model and Score MAE in sample Forecast
Pforecaster = Prophet()
#Fits model on training data
Pforecaster.fit(y_train)
#Forecasts
PROPHy_preds = Pforecaster.predict(fh)
#Calculates MAE for KNN
Proph_mae = mean_absolute_error(y_test, PROPHy_preds)
#Prints MAE score for model
print('PROPHET Model - MAE is %.6f' % (Proph_mae))
print('--------Prophet Forecast--------')
PROPHy_preds

09:02:44 - cmdstanpy - INFO - Chain [1] start processing
09:02:44 - cmdstanpy - INFO - Chain [1] done processing


PROPHET Model - MAE is 6.942886
--------Prophet Forecast--------


36    105.038421
37    106.195435
38    108.219995
39    109.443236
40    109.607162
41    110.253746
42    110.377098
43    111.648478
44    112.805492
45    114.830051
46    116.053293
47    116.217219
dtype: float64

In [53]:
#   Build Auto ETS Model and score MAE in sample Forecast
ETSforecaster = AutoETS(auto=True, sp=4, n_jobs=-1)
#Fits model on training data
ETSforecaster.fit(y_train)
#Forecasts
ETSy_preds = ETSforecaster.predict(fh)
#Calculates MAE for ETS 
ETSmae = mean_absolute_error(y_test, ETSy_preds, symmetric=False)
#Prints MAE score for model
print('AUTO ETS Model - MAE is %.6f' % (ETSmae))
print('--------Auto ETS Forecast--------')
ETSy_preds

AUTO ETS Model - MAE is 2.600905
--------Auto ETS Forecast--------


36    103.960567
37    103.995618
38    104.023659
39    104.046092
40    104.064038
41    104.078394
42    104.089880
43    104.099068
44    104.106419
45    104.112300
46    104.117004
47    104.120768
dtype: float64

In [54]:
#   Build Exponential Smoothing Model and score MAE in sample Forecast
ESforecaster = ExponentialSmoothing(trend="add", seasonal="additive", sp=12)
#Fits model on training data
ESforecaster.fit(y_train)
#Forecasts
ESy_preds = ESforecaster.predict(fh)
#Calculates MAE for Exponential Smoothing
ESmae = mean_absolute_error(y_test, ESy_preds, symmetric=False)
#Prints MAE score for model
print('Exponential Smoothing Model - MAE is %.6f' % (ESmae))
print('--------Exponential Smoothing Forecast--------')
ESy_preds

Exponential Smoothing Model - MAE is 5.073832
--------Exponential Smoothing Forecast--------


36    103.370652
37    103.458847
38    102.519828
39    100.345130
40     99.543366
41     99.094908
42     98.062865
43     98.437097
44     96.397269
45     95.312068
46     94.812829
47     95.135187
Name: SG_Y, dtype: float64

In [55]:
#   Build BATS Model and score MAE in sample Forecast
BATSforecaster = BATS(sp=12, use_trend=True, use_box_cox=False)
#Fits model on training data
BATSforecaster.fit(y_train)
#Forecasts
BATSy_preds = BATSforecaster.predict(fh)
#Calculates MAE for BATS Model
BATSmae = mean_absolute_error(y_test, BATSy_preds)
#Prints MAE score for model
print('BATS Model - MAE is %.6f' % (BATSmae))
print('--------BATS Model Forecast--------')
BATSy_preds

BATS Model - MAE is 3.050711
--------BATS Model Forecast--------


36    104.148186
37    104.329425
38    104.510663
39    104.691902
40    104.873140
41    105.054379
42    105.235617
43    105.416856
44    105.598094
45    105.779333
46    105.960571
47    106.141810
Name: SG_Y, dtype: float64

In [56]:
#   Build TBATS Model and score MAE in sample Forecast
TBATSforecaster = TBATS(sp=12, use_trend=True, use_box_cox=False)
#Fits model on training data
TBATSforecaster.fit(y_train)
#Forecasts
TBATSy_preds = TBATSforecaster.predict(fh)
#Calculates MAE for BATS Model
TBATSmae = mean_absolute_error(y_test, TBATSy_preds)
#Prints MAE score for model
print('TBATS Model - MAE is %.6f' % (TBATSmae))
print('--------TBATS Model Forecast--------')
TBATSy_preds

TBATS Model - MAE is 3.050711
--------TBATS Model Forecast--------


36    104.148186
37    104.329425
38    104.510663
39    104.691902
40    104.873140
41    105.054379
42    105.235617
43    105.416856
44    105.598094
45    105.779333
46    105.960571
47    106.141810
Name: SG_Y, dtype: float64

In [57]:
mae_scores = np.array([RFmae, DTmae, XGBmae, KNNmae, autoArimaMAE, ArimaMAE, SarimaxMAE, Proph_mae, ETSmae, ESmae, BATSmae, TBATSmae])

In [58]:
#   EVALUATE
print('Random Forest Model - MAE is %.6f' % (RFmae))
print('Decision Tree Model - MAE is %.6f' % (DTmae))
print('XGBOOST Model       - MAE is %.6f' % (XGBmae))
print('K-NN Model          - MAE is %.6f' % (KNNmae))
print('Auto ARIMA Model    - MAE is %.6f' % (autoArimaMAE))
print('ARIMA Model         - MAE is %.6f' % (ArimaMAE))
print('SARIMAX Model       - MAE is %.6f' % (SarimaxMAE))
print('PROPHET Model       - MAE is %.6f' % (Proph_mae))
print('AUTO ETS Model      - MAE is %.6f' % (ETSmae))
print('Exponential S Model - MAE is %.6f' % (ESmae))
print('BATS Model          - MAE is %.6f' % (BATSmae))
print('TBATS Model         - MAE is %.6f' % (TBATSmae))

Random Forest Model - MAE is 2.606042
Decision Tree Model - MAE is 2.579861
XGBOOST Model       - MAE is 2.496213
K-NN Model          - MAE is 2.559028
Auto ARIMA Model    - MAE is 6.104247
ARIMA Model         - MAE is 7.521689
SARIMAX Model       - MAE is 9.939516
PROPHET Model       - MAE is 6.942886
AUTO ETS Model      - MAE is 2.600905
Exponential S Model - MAE is 5.073832
BATS Model          - MAE is 3.050711
TBATS Model         - MAE is 3.050711


In [59]:
#   Find Best Fit Model
best_fit_model = np.amin(mae_scores)
print(best_fit_model)
#   For this data it shows ARIMA as best fit

2.4962126438981116
