In [None]:
# Main data packages. 
import numpy as np
import pandas as pd

# Data Viz. 
import statsmodels.formula.api as smf
from statsmodels.tsa.seasonal import seasonal_decompose
from scipy.ndimage import gaussian_filter
from calendar import monthrange
from calendar import month_name

import matplotlib.pyplot as plt
import matplotlib.patches as patches
import seaborn as sns

from darts import TimeSeries
from darts.models import ExponentialSmoothing, VARIMA, NBEATSModel, TFTModel, LinearRegressionModel

In [None]:
df = pd.read_csv('../data/data_combined.csv')

In [None]:
df.date = pd.to_datetime(df.date)

In [None]:
df.describe()

In [None]:
df.head()

In [None]:
df.info()

# Darts

In [None]:
df_darts = df[df.year<2020].query('branch=="Metro" and product=="Mischbrote"')
df_darts.tail(10)

In [None]:
series = TimeSeries.from_dataframe(df_darts, 'date', 'turnover', freq='D', fill_missing_dates=True)[-365*4:-365]

In [None]:
train, val = series[:-365], series[-365:]

In [None]:
model = ExponentialSmoothing()
model.fit(train)
prediction = model.predict(len(val), num_samples=1000)

In [None]:
series.plot()
prediction.plot(label="forecast", low_quantile=0.45, high_quantile=0.55)
plt.legend()

# Darts II

Ok, we _might_ need the unstacked dataset after all

In [None]:
def unstack_time_series(df, index, groups, target):
    
    # create the individual combinations df
    df_groups = df.pivot(index=index, columns=groups, values=target)
    df_groups.columns = df_groups.columns.to_flat_index().map('{0[0]} | {0[1]}'.format)

    # create df for first group, use agg(pd.Series.sum) instead of .sum to enable skipna, otherwise NaN rows will add up to 0
    df_01 = df.groupby([index, groups[0]])[target] \
                        .agg(pd.Series.sum, skipna=False) \
                        .reset_index(drop=False) \
                        .pivot(index=index, columns=groups[0], values=target)

    # create df for second group
    df_02 = df.groupby([index, groups[1]])[target] \
                        .agg(pd.Series.sum, skipna=False)\
                        .reset_index(drop=False) \
                        .pivot(index=index, columns=groups[1], values=target)

    # create the total level df
    df_total = df.groupby(index)[target] \
                .agg(pd.Series.sum, skipna=False)\
                .to_frame() \
                .rename(columns={target: 'total'})

    # join the DataFrames
    df_unstacked = df_total.join(df_01) \
                                .join(df_02) \
                                .join(df_groups)
    df_unstacked.index = pd.to_datetime(df_unstacked.index)
    return df_unstacked


In [None]:
df_unstacked = unstack_time_series(df, 'date', ['branch', 'product'], 'turnover')[:'2020-01-01'].reset_index()

## Prediction with Hierarchy

Create hierarchy for targets

In [None]:
#create variables
df['product'].unique()

In [None]:
from itertools import product

hierarchy = dict()

# Fill in grouping by branch
for branch in df.branch.unique():
    hierarchy[branch] = ["total"]

# Fill in grouping by product
for good in df['product'].unique():
    hierarchy[good] = ["total"]

# Fill in grouping by (product, branch)
for good, branch in product(df['product'].unique(), df.branch.unique()):
    hierarchy["{} | {}".format(branch, good)] = [branch, good]


In [None]:
hierarchy

In [None]:
targets = ['total', 'Center', 'Metro', 'Train_Station', 'Mischbrote',
       'Weizenbrötchen', 'handliches Gebäck', 'herzhafter Snack',
       'klassischer Kuchen', 'Metro | Mischbrote', 'Metro | Weizenbrötchen',
       'Metro | klassischer Kuchen', 'Metro | handliches Gebäck',
       'Metro | herzhafter Snack', 'Center | Mischbrote',
       'Center | Weizenbrötchen', 'Center | klassischer Kuchen',
       'Center | handliches Gebäck', 'Center | herzhafter Snack',
       'Train_Station | Mischbrote', 'Train_Station | Weizenbrötchen',
       'Train_Station | klassischer Kuchen',
       'Train_Station | handliches Gebäck', 'Train_Station | herzhafter Snack']

In [None]:
df_nona = df_unstacked.fillna(0)

In [None]:
#create TimeSeries with hierarchy
series = TimeSeries.from_dataframe(df=df_unstacked, time_col='date', value_cols=targets, hierarchy=hierarchy)

from darts.utils import missing_values

series = missing_values.fill_missing_values(series, fill='auto')

In [None]:
train, val = series[:-365], series[-365:]

In [None]:
model = LinearRegressionModel(lags=365)
model.fit(train)
pred = model.predict(n=len(val))

In [None]:
#series.plot()
components_to_show = ['Center', 'Metro', 'Train_Station']
plt.figure(figsize=(28, 7))
series[components_to_show].plot(lw=1)
pred[components_to_show].plot(lw=2)

In [None]:
print('mape:',round(mape(val,pred),4))
print('rmse:',round(rmse(val, pred),4))

## Prediction with Covariates (weather data, hintedy-hint!)

Add weather data

In [None]:
weather_stats = pd.read_csv('../data/summary_stats.csv')
weather_stats.date = pd.to_datetime(weather_stats.date)

In [None]:
df_joined = df_unstacked.merge(weather_stats, on=['date'], how='left')
df_joined.head()

In [None]:
df_joined[(df_joined.date>'2019-10-01')&(df_joined.date<'2019-10-30')]

In [None]:
from darts.utils import missing_values

series = missing_values.fill_missing_values(series, fill='auto')

good to go!

Let's create a covariate series

In [None]:
df_joined.columns

In [None]:
#create covariate series
covariates = TimeSeries.from_dataframe(df=df_joined, time_col='date', value_cols=['temp_mean', 'clouds_mean', 'rain_1h_mean', 'snow_1h_mean'])

covariates = missing_values.fill_missing_values(series, fill='auto')

In [None]:
model = LinearRegressionModel(lags=[-7,-365], lags_future_covariates=[0])
model.fit(train, future_covariates=covariates)
pred = model.predict(n=len(val), future_covariates=covariates)

In [None]:
#series.plot()
#components_to_show = ['Center', 'Metro', 'Train_Station']
plt.figure(figsize=(28, 7))
series[components_to_show].plot(lw=1)
pred[components_to_show].plot(lw=2)
#plt.ylim((-2000,8000))
#plt.xlim((0,30))

In [None]:
from darts.metrics import rmse, mape

In [None]:
print('mape:',round(mape(val,pred),4))
print('rmse:',round(rmse(val, pred),4))