In [10]:
#importing necessary packages
import pandas as pd
import numpy as np
from tqdm import tqdm
import math
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
from sklearn.metrics import mean_squared_error as mse

In [11]:
#Loading datasets
sales_train_eval = pd.read_csv("sales_train_evaluation.csv")
sell_priceprice = pd.read_csv("sell_prices.csv")
calendar = pd.read_csv("calendar.csv")

In [12]:
calendar["d"] = calendar["d"].apply(lambda a: int(a.split("_")[1]))

In [13]:
days = [a for a in sales_train_eval.columns if 'd_' in a]
forecast = 28

In [14]:
#splitting into train and test datasets
X_train = sales_train_eval[days[:1913]]
X_test = sales_train_eval[days[1913:]]

#### Simple Moving average model

In [46]:

def SMA(df, forecast, wind_size):

    preds = []
    for i in range(forecast):
        if i == 0:
            preds.append(np.mean(df[df.columns[-wind_size:]].values, axis=1))
        if i < forecast and i > 0:
            pred = 0.5 * (np.mean(df[df.columns[-wind_size + i:]].values, axis=1) + \
                                  np.mean(preds[:i], axis=0))
            preds.append(0.5 * (np.mean(df[df.columns[-wind_size + i:]].values, axis=1) + \
                                  np.mean(preds[:i], axis=0)))
            
    return preds   

In [51]:
df = pd.DataFrame()
predictions = SMA(X_train, forecast, 28)
for d, i in enumerate(range(1914, 1942)):
    df['F_' + str(i)] = predictions[d]

print("RMSE of SMA is:", math.sqrt(mse(X_test,df)))

RMSE of SMA is: 2.233064272884162


### Exponential Weighted Moving Average model

In [15]:
def EWMA(X_train,X_test,forecast,alpha):

    preds = []
    alpha = alpha
    for i in range(forecast):
        if i == 0:
            pred = (X_train.iloc[:,-1])
            preds.append(pred)
        if i < forecast and i > 0:
            pred = (alpha * X_test.iloc[:,i-1]+ (1-alpha)*preds[i-1])
            preds.append(pred)     
    return preds   

In [16]:
alphas = [0.2,0.5,0.7,0.8]
for j in alphas:
    predictions = EWMA(X_train,X_test,forecast,j)
    df=pd.DataFrame()
    for d, i in enumerate(range(1914, 1942)):
        df['F_' + str(i)] = predictions[d]
    print("RMSE error of EWMA with alpha: ",j," ", math.sqrt(mse(X_test,df)))

RMSE error of EWMA with alpha:  0.2   2.1642902710828524
RMSE error of EWMA with alpha:  0.5   2.252204028692715
RMSE error of EWMA with alpha:  0.7   2.3663048920859855
RMSE error of EWMA with alpha:  0.8   2.4385125436145336
