In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from datetime import timedelta
from pandas.plotting import register_matplotlib_converters
from statsmodels.tsa.stattools import acf, pacf
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import statsmodels.api as sm
register_matplotlib_converters()
from time import time
from sklearn.linear_model import LinearRegression as LR

In [3]:
# Import predictions from the different models

df_pred_SARIMA = pd.read_csv('predictions_SARIMA.csv', sep=',')
#print("SARIMA")
#print(df_pred_SARIMA.head())
df_pred_LSTM = pd.read_csv('85perc_prediction_lstm.csv', sep=';')
#print("LSTM")
#print(df_pred_LSTM.head())

In [4]:
# --------------------------
# Mean Square Error Weights
# Calculates weighting between the models
# --------------------------

MSE_LSTM = np.array(pd.read_csv('MeanSqError_LSTM.csv', sep=';'))
MSE_SARIMA = np.array(pd.read_csv('MeanSqError_SARIMA.csv', sep=';'))

summen_models=[]
for i in range(45):
    summe = 1/MSE_LSTM[i][1] + 1/MSE_SARIMA[i][1]
    summen_models.append(summe)

weights_SARIMA=[]

for i in range(45):
    weights_SARIMA.append((1/MSE_SARIMA[i][1])/summen_models[i])

print(weights_SARIMA)

[0.789577090611337, 0.9132495858586296, 0.5917629531297102, 0.5262012697106471, 0.9451554283023434, 0.7904901830428713, 0.8916348687275396, 0.8746572282232657, 0.5706263286323038, 0.901611065563858, 0.5627606081195534, 0.6423953437279664, 0.9420702484397607, 0.4792646727608928, 0.9684740159539477, 0.9417927288436314, 0.9339495499330691, 0.7304323485109221, 0.9541333589079276, 0.9085167256883142, 0.8251089903182603, 0.962556457361492, 0.9089125645615748, 0.6998535828140814, 0.9775790146621537, 0.7512112767136325, 0.9806431642730142, 0.7285827880528994, 0.9022678450701157, 0.9848670901910845, 0.8772070047739008, 0.9283248259103114, 0.7568559255694111, 0.9709586825690766, 0.37325699118053335, 0.9648604718201537, 0.9779099765274739, 0.9658417932115432, 0.908199278651513, 0.9313441206531691, 0.6551556869178032, 0.9534680474694899, 0.8928857762280755, 0.5119329216157095, 0.9008536825057804]


In [5]:
# Creates the forecast for all shops with the MSEW Hybrid Model

MSEW = []
for i in range(45):
    MSEW_SARIMA = df_pred_SARIMA['{}'.format(i)] * weights_SARIMA[i]
    MSEW_LSTM = df_pred_LSTM['Store{}'.format(i+1)] * (1-weights_SARIMA[i])
    MSEW_store = []
    for j in range(len(df_pred_SARIMA['{}'.format(i)])):
        MSEW_single = MSEW_SARIMA.iloc[j]+MSEW_LSTM.iloc[j]
        MSEW_store.append(MSEW_single)
    MSEW.append(MSEW_store)

df_MSEW = pd.DataFrame(MSEW)
df_MSEW = df_MSEW.transpose()

df_MSEW.index = df_pred_SARIMA.Date
print(df_MSEW)
df_MSEW.to_csv("MSEW.csv", sep=',',index=True)

                      0             1              2             3   \
Date                                                                  
2012-06-08  1.557909e+06  1.906367e+06  418601.041638  2.142600e+06   
2012-06-15  1.567360e+06  1.943716e+06  416465.723885  2.172709e+06   
2012-06-22  1.531805e+06  1.854149e+06  410563.195544  2.119601e+06   
2012-06-29  1.564065e+06  1.890791e+06  412959.265688  2.096784e+06   
2012-07-06  1.594209e+06  1.923364e+06  424161.762765  2.147745e+06   
2012-07-13  1.567550e+06  1.897869e+06  413618.505670  2.155379e+06   
2012-07-20  1.536843e+06  1.848733e+06  411856.837557  2.140065e+06   
2012-07-27  1.550074e+06  1.818604e+06  407257.765935  2.125528e+06   
2012-08-03  1.660086e+06  1.917377e+06  426338.064935  2.196974e+06   
2012-08-10  1.609089e+06  1.899045e+06  417263.454807  2.189599e+06   
2012-08-17  1.604299e+06  1.919433e+06  414661.170964  2.227981e+06   
2012-08-24  1.586898e+06  1.895220e+06  415967.604074  2.155753e+06   
2012-0