# Introduction
- Goal: Develop a forecasting algorithm to predict sales per item for any specified date.

The steps includes:
1. Load data
2. Plot data
3. Build model:
   - ARIMA
   - Seasonal ARIMA
   - FBProphet

# 1. Load Data

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

df = pd.read_excel('file.xlsx', parse_dates=True, index_col='Sales date', decimal=',')

# 2. Plot data

In [None]:
# Sum sales per day
df2 = df.resample('D')['Total'].sum()

plt.style.use('seaborn-whitegrid')

df2.plot(figsize=(14, 5))
plt.ylabel('Total')
plt.title('Total Sales per Day')
plt.show();

In [None]:
# Sum sales per month

df.resample('M')['Total'].sum().plot(figsize=(10,5))
plt.ylabel('Total')
plt.title('Total Sales per Month')
plt.show();

In [None]:
# Sales per day in different months

df_tmp = df.resample('D')['Total'].sum()['2017-04-01':].copy()
months = {n: g.reset_index() for n, g in df_tmp.groupby(pd.Grouper(freq='M'))}
plt.figure(figsize=(14, 8))
for n, g in months.items():
    plt.plot(g.index, g['Total'], label=str(n)[:7])
plt.title('Sales per day in different months')
plt.ylabel('Total')
plt.xlabel('Day')
plt.legend()
plt.show()   

In [None]:
# Sales per day in different weeks

df_tmp = df.resample('D')['Total'].sum()['2017-04-01':].copy()
weeks = {n: g.reset_index() for n, g in df_tmp.groupby(pd.Grouper(freq='W'))}
fig, ax = plt.subplots(figsize=(14, 8))
x = np.array(['TMP', 'MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT', 'SUN'])
ax.set_xticklabels(x)
for n, g in weeks.items():
    plt.plot(g.index, g['Total'], label=str(n)[:10])
plt.title('Sales per day in different weeks')
plt.ylabel('Total')
plt.xlabel('Day')
plt.legend()
plt.show()    

In [None]:
# Average sale per dag in a week

d = {}
for g in weeks.values():
    for i in range(g.shape[0]):
        d[i] = d.get(i, []) + [g.loc[i, 'Total']]
m = pd.DataFrame.from_dict({i: np.array(x).mean() for i, x in d.items()}, orient='index')
m.set_index(x[1:], drop=True).plot(legend=None,
                                   figsize=(13, 5),
                                  title='Average Sales per Day in a Week'
                                  );

# 3. Build model

In [None]:
sku1 = pd.DataFrame(df1_unstack.iloc[0, :])
sku1.plot(figsize=(14, 5));

In [None]:
weeks = {n: g.reset_index() for n, g in sku1.groupby(pd.Grouper(freq='W'))}
fig, ax = plt.subplots(figsize=(14, 8))
x = np.array(['TMP', 'MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT', 'SUN'])
ax.set_xticklabels(x)
for n, g in weeks.items():
    plt.plot(g.index, g['Total'], label=str(n)[:10])
plt.title('Sales per day in different weeks')
plt.ylabel('Total')
plt.xlabel('Day')
plt.legend()'x'
plt.show()    

## 3.1.1. ARIMA

In [None]:
import matplotlib.pyplot as plt
from statsmodels.tsa.arima_model import ARIMA
from sklearn.metrics import mean_squared_error
 
X = sku1.values
size = int(len(X) * .8) # train has 80% of all data
train, test = X[0:size], X[size:len(X)]
history = [x for x in train]
predictions = list()
for t in range(len(test)):    
    model = ARIMA(history, order=(5,1,0))
    model_fit = model.fit(disp=0)
    output = model_fit.forecast()
    yhat = output[0]
    predictions.append(yhat)
    obs = test[t]
    history.append(obs)
#     print('predicted=%f, expected=%f' % (yhat, obs))
mse = mean_squared_error(test, predictions)
print('The Mean Squared Error of SARIMAX: {}'.format(round(mse, 2)))
# plot
plt.figsize = (14, 5)
plt.plot(test)
plt.plot(predictions, color='red')
plt.show()

## 3.1.2. Seasonal ARIMA

In [None]:
import itertools

p = d = q = range(0, 2)
pdq = list(itertools.product(p, d, q))
seasonal_pdq = [(x[0], x[1], x[2], 7) for x in pdq]

result_all = []
for param in pdq:
    for param_seasonal in seasonal_pdq:
        try:
            mod = sm.tsa.statespace.SARIMAX(train,
                                            order=param,
                                            seasonal_order=param_seasonal,
                                            enforce_stationarity=False,
                                            enforce_invertibility=False,
                                           )
            results = mod.fit()
            result_all.append(results.aic)
            print('ARIMA{}x{}7 - AIC:{}'.format(param, param_seasonal, results.aic))
        except:
            continue
            
mod = sm.tsa.statespace.SARIMAX(sku1.iloc[:size],
                    order=(1, 1, 1),
                    seasonal_order=(0, 1, 1, 7),
                    enforce_stationarity=False,
                    enforce_invertibility=False
                   )
results = mod.fit()

### Validate Forcast

In [None]:
pred = results.get_prediction(start=sku1.index[size], end=sku1.index[-1], dynamic=False)
pred_ci = pred.conf_int()
ax = sku1.plot(label='observed')
pred.predicted_mean.plot(ax=ax, label='One-step ahead Forecast', alpha=.7, figsize=(14, 7))
ax.fill_between(pred_ci.index,
                pred_ci.iloc[:, 0],
                pred_ci.iloc[:, 1], color='k', alpha=.2)
ax.set_xlabel('Date')
ax.set_ylabel('Sales')
plt.legend()
plt.show()

In [None]:
y_forecasted = pred.predicted_mean
y_truth = sku1.iloc[size:]
mse = ((y_forecasted.values - y_truth.values) ** 2).mean()
print('The Mean Squared Error of SARIMAX: {}'.format(round(mse, 2)))

In [None]:
pred_uc = results.get_forecast(steps=90)
pred_ci = pred_uc.conf_int()
ax = sku1[:size].plot(label='observed', figsize=(14, 7))
pred_uc.predicted_mean.plot(ax=ax, label='Forecast')
ax.fill_between(pred_ci.index,
                pred_ci.iloc[:, 0],
                pred_ci.iloc[:, 1], color='k', alpha=.25)
ax.set_xlabel('Date')
ax.set_ylabel('Sales')
plt.legend()
plt.show()

## FBProphet

In [None]:
!pip install holidays==0.9.12
from fbprophet import Prophet

m = Prophet()
m.add_country_holidays(country_name='NO')
sku_prophet = sku1.reset_index().rename(columns={'Sales date': 'ds', 'Total': 'y'})
size = int(sku_prophet.shape[0] * .8)
train, test = sku_prophet[:size], sku_prophet[size:]
future = test[['ds']]
m.fit(train)
forecast = m.predict(future)
fig1 = m.plot(forecast)

forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]

f1 = forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]
f2 = f1.merge(test)

mse = ((f2.yhat-f2.y)**2).mean()
print('The Mean Squared Error of our forecasts is {}'.format(round(mse, 2)))

future = m.make_future_dataframe(periods=90)

forecast = m.predict(future)
fig1 = m.plot(forecast)