In [1]:
#   IMPORTS
from sktime.utils.plotting import plot_series
from sktime.forecasting.base import ForecastingHorizon
from sktime.forecasting.model_selection import temporal_train_test_split
from sktime.forecasting.naive import NaiveForecaster
from sktime.performance_metrics.forecasting import MeanAbsolutePercentageError
from sktime.performance_metrics.forecasting import mean_absolute_error
from sktime.forecasting.compose import make_reduction
from sktime.forecasting.fbprophet import Prophet
from sktime.forecasting.ets import AutoETS
from sktime.forecasting.exp_smoothing import ExponentialSmoothing
from sktime.forecasting.bats import BATS
from sktime.forecasting.tbats import TBATS
from sktime.forecasting.var import VAR
from sktime.forecasting.arima import AutoARIMA
from sktime.forecasting.arima import ARIMA
from sktime.forecasting.sarimax import SARIMAX
from sktime.forecasting.theta import ThetaForecaster
from sktime.forecasting.trend import TrendForecaster
from sktime.forecasting.compose import EnsembleForecaster

from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from prophet.forecaster import Prophet 

from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import StackingRegressor
from sklearn.ensemble import BaggingRegressor
from sklearn.ensemble import BaseEnsemble
from sklearn.metrics import mean_squared_log_error

from pandas import read_csv
import pandas as pd 
import numpy as np
import warnings
import datetime
import warnings
warnings.filterwarnings("ignore")

In [2]:
#   LOAD IN DATA
data = pd.read_csv('daily-total-female-births.csv')
#View head
data.head()

Unnamed: 0,Date,Births
0,1959-01-01,35
1,1959-01-02,32
2,1959-01-03,30
3,1959-01-04,31
4,1959-01-05,44


In [3]:
#   USER DEFINED
#Set target_col to column name you want to forecast 
#Example uses column named SG_Y
target_col = 'Births'

In [4]:
#Train/Test split - 75% / 25%  366 rows/100 * 25 = 91.5 aka 92
y = data[target_col]
y_train, y_test = temporal_train_test_split(y, test_size=92)
y_train

0      35
1      32
2      30
3      31
4      44
       ..
268    43
269    40
270    47
271    51
272    56
Name: Births, Length: 273, dtype: int64

In [5]:
#Set Forecasting horizon ------ Forecast Horizon = length of test
fh = np.arange(1, len(y_test) +1)
fh

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
       35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
       52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68,
       69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85,
       86, 87, 88, 89, 90, 91, 92])

In [6]:
#Create estimator and mae score array to be appended with models in ensemble 
estimators=[]
mae_scores=[]

In [7]:
#   Build Auto ETS Model and score MAE in sample Forecast
model1 = AutoETS(auto=True, sp=12, n_jobs=1)
#Append estimators array by adding model1
estimators.append(("Auto ETS",model1))
#Fits model on training data
model1.fit(y_train)
#Forecasts
target_pred1 = model1.predict(fh)
#Calculates MAE for model1
ETSmae = mean_absolute_error(y_test, target_pred1, symmetric=False)
#Calculate RMSLE score for ensemble model
ETSrmsle = mean_squared_log_error(y_test, target_pred1, squared=False)
#Append mae_scores array by adding model1 mae score 
mae_scores.append(ETSmae)
#Prints scores for model1
print('AUTO ETS Model - MAE: %.3f' % (ETSmae))
print("AUTO ETS Model - RMSLE: %.3f" % ETSrmsle)
print('--------Auto ETS Forecast--------')
target_pred1

AUTO ETS Model - MAE: 5.891
AUTO ETS Model - RMSLE: 0.166
--------Auto ETS Forecast--------


273    47.149352
274    47.149352
275    47.149352
276    47.149352
277    47.149352
         ...    
360    47.149352
361    47.149352
362    47.149352
363    47.149352
364    47.149352
Length: 92, dtype: float64

In [8]:
#   Build BATS Model and score MAE in sample Forecast
model2 = BATS(sp=12)
#Append estimators array by adding model2
estimators.append(("BATS",model2))
#Fits model on training data
model2.fit(y_train)
#Forecasts
target_pred2 = model2.predict(fh)
#Calculates MAE for model2
BATSmae = mean_absolute_error(y_test, target_pred2, symmetric=False)
#Calculate RMSLE score for model2
BATSrmsle = mean_squared_log_error(y_test, target_pred2, squared=False)
#Append mae_scores array by adding model2 mae score 
mae_scores.append(BATSmae)
#Prints scores for model1
print('BATS Model - MAE: %.3f' % (BATSmae))
print("BATS Model - RMSLE: %.3f" % BATSrmsle)
print('--------BATS Forecast--------')
target_pred2

BATS Model - MAE: 6.237
BATS Model - RMSLE: 0.179
--------BATS Forecast--------


273    48.312670
274    43.409958
275    50.604114
276    46.610369
277    46.412655
         ...    
360    46.610369
361    46.412655
362    46.053692
363    44.121403
364    46.516257
Name: Births, Length: 92, dtype: float64

In [9]:
#   Build BATS Model and score MAE in sample Forecast
model3 = TBATS(sp=12)
#Append estimators array by adding model3
estimators.append(("TBATS",model3))
#Fits model on training data
model3.fit(y_train)
#Forecasts
target_pred3 = model3.predict(fh)
#Calculates MAE for model3
TBATSmae = mean_absolute_error(y_test, target_pred3, symmetric=False)
#Calculate RMSLE score for model3
TBATSrmsle = mean_squared_log_error(y_test, target_pred3, squared=False)
#Append mae_scores array by adding model3 mae score 
mae_scores.append(TBATSmae)
#Prints scores for model3
print('TBATS Model - MAE: %.3f' % (TBATSmae))
print('TBATS Model - RMSLE: %.3f' % TBATSrmsle)
print('--------TBATS Forecast--------')
target_pred3

TBATS Model - MAE: 5.793
TBATS Model - RMSLE: 0.164
--------TBATS Forecast--------


273    46.903691
274    46.903691
275    46.903691
276    46.903691
277    46.903691
         ...    
360    46.903691
361    46.903691
362    46.903691
363    46.903691
364    46.903691
Name: Births, Length: 92, dtype: float64

In [10]:
'''   
ENSEMBLE TOP 3 ALGORITHMS (AutoETS, BATS, TBATS) USING SKTIME'S ENSMEBLE FORECASTER
'''
ensemble_model = EnsembleForecaster(forecasters=estimators)
#Fit model on training data 
er = ensemble_model.fit(y_train)
#Forecast
target_pred = er.predict(fh)
#Calculate MAE score for ensemble model
Ensemblemae = mean_absolute_error(y_test, target_pred, symetric=False)
#Calculate RMSLE score for ensemble model
EnsembleRMSLE = mean_squared_log_error(y_test, target_pred, squared=False)
#Print scores for model3
print("Ensemble MAE: %.3f" % Ensemblemae)
print("Ensemble RMSLE: %.3f" % EnsembleRMSLE)
print(target_pred)

Ensemble MAE: 5.933
Ensemble RMSLE: 0.168
273    47.455238
274    45.821000
275    48.219052
276    46.887804
277    46.821899
         ...    
360    46.887804
361    46.821899
362    46.702245
363    46.058149
364    46.856433
Length: 92, dtype: float64


In [11]:
#View actual values agaianst predictions
df=pd.DataFrame({'Actual':y_test, 'Predicted':target_pred})
df

Unnamed: 0,Actual,Predicted
273,49,47.455238
274,54,45.821000
275,56,48.219052
276,47,46.887804
277,44,46.821899
...,...,...
360,37,46.887804
361,52,46.821899
362,48,46.702245
363,55,46.058149


In [16]:
#Evaluation side by side - RMSLE
print("AutoETS RMSLE      : %.6f" % ETSrmsle)
print("BATS RMSLE         : %.6f" % BATSrmsle)
print("TBATS RMSLE        : %.6f" % TBATSrmsle)
print("Ensemble RMSLE     : %.6f" % EnsembleRMSLE)

AutoETS RMSLE      : 0.166019
BATS RMSLE         : 0.178638
TBATS RMSLE        : 0.163558
Ensemble RMSLE     : 0.168025


In [17]:
#Evaluation side by side - MAE
print("AutoETS RMSLE      : %.6f" % ETSmae)
print("BATS RMSLE         : %.6f" % BATSmae)
print("TBATS RMSLE        : %.6f" % TBATSmae)
print("Ensemble RMSLE     : %.6f" % Ensemblemae)

AutoETS RMSLE      : 5.891023
BATS RMSLE         : 6.236692
TBATS RMSLE        : 5.792588
Ensemble RMSLE     : 5.933328
