In [2]:
#   IMPORTS
from sktime.utils.plotting import plot_series
from sktime.forecasting.base import ForecastingHorizon
from sktime.forecasting.model_selection import temporal_train_test_split
from sktime.forecasting.naive import NaiveForecaster
from sktime.performance_metrics.forecasting import MeanAbsolutePercentageError
from sktime.performance_metrics.forecasting import mean_absolute_error
from sktime.forecasting.compose import make_reduction
from sktime.forecasting.fbprophet import Prophet
from sktime.forecasting.ets import AutoETS
from sktime.forecasting.exp_smoothing import ExponentialSmoothing
from sktime.forecasting.bats import BATS
from sktime.forecasting.tbats import TBATS
from sktime.forecasting.var import VAR
from sktime.forecasting.arima import AutoARIMA
from sktime.forecasting.arima import ARIMA
from sktime.forecasting.sarimax import SARIMAX
from sktime.forecasting.theta import ThetaForecaster
from sktime.forecasting.trend import TrendForecaster
from sktime.forecasting.compose import EnsembleForecaster

from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from prophet.forecaster import Prophet 

import pandas as pd

from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import StackingRegressor
from sklearn.ensemble import BaggingRegressor
from sklearn.ensemble import BaseEnsemble
from sklearn.metrics import mean_squared_log_error

from pandas import read_csv
import pandas as pd 
import numpy as np
import warnings
import datetime
import warnings
warnings.filterwarnings("ignore")

In [3]:
#   LOAD IN DATA
data = pd.read_csv('data_for_inequalities.csv')
#View head
data.head()

Unnamed: 0,Date,SG_Y
0,2017/12,76.5
1,2018/01,82.75
2,2018/02,85.5
3,2018/03,86.125
4,2018/04,88.7


In [4]:
#   USER DEFINED
#Set target_col to column name you want to forecast 
#Example uses column named SG_Y
target_col = 'SG_Y'

In [5]:
#   TRAIN/TEST SPLIT 
y = data[target_col]
y_train, y_test = temporal_train_test_split(y, test_size=12)
y_train

0      76.500000
1      82.750000
2      85.500000
3      86.125000
4      88.700000
5      89.916667
6      89.214286
7      89.062500
8      84.833333
9      83.550000
10     83.772727
11     84.375000
12     84.375000
13     85.708333
14     87.791667
15     87.541667
16     85.791667
17     86.375000
18     88.125000
19     88.125000
20     89.708333
21     91.625000
22     92.291667
23     95.708333
24     96.750000
25     96.166667
26     94.833333
27     93.833333
28     96.083333
29     98.000000
30     98.500000
31    104.000000
32    104.333333
33    103.833333
34    104.416667
35    103.916667
Name: SG_Y, dtype: float64

In [6]:
#Set Forecasting horizon ------ Forecast next 730 hours (730 hours in a month)
fh = np.arange(1, len(y_test) +1)
fh

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12])

In [7]:
#Create estimator and mae score array to be appended with models in ensemble 
estimators=[]
mae_scores=[]

In [8]:
#   Build Decision Tree Model and score MAE in sample Forecast
DTmodel = DecisionTreeRegressor()
#Make reduction for univariate data - one column 
model1 = make_reduction(DTmodel, window_length = 24)
#Append estimators array by adding model1
estimators.append(("D Tree",model1))
#Fits model on training data
model1.fit(y_train)
#Forecasts
target_pred1 = model1.predict(fh)
#Calculates MAE for model1
DTREEmae = mean_absolute_error(y_test, target_pred1, symmetric=False)
#Calculate RMSLE score for ensemble model
DTREErmsle = mean_squared_log_error(y_test, target_pred1, squared=False)
#Append mae_scores array by adding model1 mae score 
mae_scores.append(DTREEmae)
#Prints scores for model1
print('Decision Tree Model - MAE: %.3f' % (DTREEmae))
print("Decision Tree Model - RMSLE: %.3f" % DTREErmsle)
print('--------Decision Tree Forecast--------')
target_pred1

Decision Tree Model - MAE: 2.580
Decision Tree Model - RMSLE: 0.029
--------Decision Tree Forecast--------


36    104.333333
37    103.916667
38    104.416667
39    103.916667
40    104.416667
41    103.916667
42    104.416667
43    103.916667
44    104.416667
45    103.916667
46    104.416667
47    103.916667
dtype: float64

In [9]:
#   Build BATS Model and score MAE in sample Forecast
model2 = SARIMAX(trend='t', seasonal_order=(1, 0, 0, 12))
#Append estimators array by adding model2
estimators.append(("SARIMAX",model2))
#Fits model on training data
model2.fit(y_train)
#Forecasts
target_pred2 = model2.predict(fh)
#Calculates MAE for model2
SARIMAXmae = mean_absolute_error(y_test, target_pred2, symmetric=False)
#Calculate RMSLE score for model2
SARIMAXrmsle = mean_squared_log_error(y_test, target_pred2, squared=False)
#Append mae_scores array by adding model2 mae score 
mae_scores.append(SARIMAXmae)
#Prints scores for model1
print('SARIMAX Model - MAE: %.3f' % (SARIMAXmae))
print("SARIMAX Model - RMSLE: %.3f" % SARIMAXrmsle)
print('--------SARIMAX Forecast--------')
target_pred2

SARIMAX Model - MAE: 9.940
SARIMAX Model - RMSLE: 0.110
--------SARIMAX Forecast--------


36    105.138474
37    106.942127
38    109.038337
39    111.068942
40    112.069702
41    113.225204
42    114.893564
43    114.953479
44    116.765914
45    118.898294
46    120.716968
47    122.938188
Name: SG_Y, dtype: float64

In [10]:
#   Build Auto ETS Model and score MAE in sample Forecast
model3 = AutoETS(auto=True, sp=4, n_jobs=-1)
#Append estimators array by adding model3
estimators.append(("Auto ETS",model3))
#Fits model on training data
model3.fit(y_train)
#Forecasts
target_pred3 = model3.predict(fh)
#Calculates MAE for model3
ETSmae = mean_absolute_error(y_test, target_pred3, symmetric=False)
#Calculate RMSLE score for model3
ETSrmsle = mean_squared_log_error(y_test, target_pred3, squared=False)
#Append mae_scores array by adding model3 mae score 
mae_scores.append(ETSmae)
#Prints scores for model3
print('AUTO ETS Model - MAE: %.3f' % (ETSmae))
print("AUTO ETS Model - RMSLE: %.3f" % ETSrmsle)
print('--------Auto ETS Forecast--------')
target_pred3

AUTO ETS Model - MAE: 2.601
AUTO ETS Model - RMSLE: 0.029
--------Auto ETS Forecast--------


36    103.960567
37    103.995618
38    104.023659
39    104.046092
40    104.064038
41    104.078394
42    104.089880
43    104.099068
44    104.106419
45    104.112300
46    104.117004
47    104.120768
dtype: float64

In [11]:
'''   
ENSEMBLE TOP 3 ALGORITHMS (AutoETS, BATS, TBATS) USING SKTIME'S ENSMEBLE FORECASTER
'''
ensemble_model = EnsembleForecaster(forecasters=estimators)
#Fit model on training data 
er = ensemble_model.fit(y_train)
#Forecast
target_pred = er.predict(fh)
#Calculate MAE score for ensemble model
Ensemblemae = mean_absolute_error(y_test, target_pred, symetric=False)
#Calculate RMSLE score for ensemble model
EnsembleRMSLE = mean_squared_log_error(y_test, target_pred, squared=False)
#Print scores for model3
print("Ensemble MAE: %.3f" % Ensemblemae)
print("Ensemble RMSLE: %.3f" % EnsembleRMSLE)
print(target_pred)

Ensemble MAE: 4.055
Ensemble RMSLE: 0.050
36    104.366347
37    104.979248
38    105.687332
39    106.371678
40    106.711246
41    107.101199
42    107.661148
43    107.684182
44    108.290778
45    109.003531
46    109.611324
47    110.352985
dtype: float64


In [12]:
df=pd.DataFrame({'Actual':y_test, 'Predicted':target_pred})
df

Unnamed: 0,Actual,Predicted
36,104.25,104.366347
37,103.458333,104.979248
38,103.916667,105.687332
39,108.541667,106.371678
40,108.458333,106.711246
41,107.291667,107.101199
42,106.583333,107.661148
43,101.208333,107.684182
44,100.125,108.290778
45,99.791667,109.003531


In [13]:
#Evaluation side by side
print("AUTO ETS RMSLE      : %.6f" % ETSrmsle)
print("SARIMAX RMSLE       : %.6f" % SARIMAXrmsle)
print("Decision Tree RMSLE : %.6f" % DTREErmsle)
print("Ensemble RMSLE      : %.6f" % EnsembleRMSLE)

AUTO ETS RMSLE      : 0.029033
SARIMAX RMSLE       : 0.110113
Decision Tree RMSLE : 0.028960
Ensemble RMSLE      : 0.050275
