In [None]:
import pandas as pd
import numpy as np
import pickle

import plotly.offline as py

from joblib import Parallel, delayed
from fbprophet import Prophet
from fbprophet.plot import *
from fbprophet.diagnostics import cross_validation
from fbprophet.diagnostics import performance_metrics

## Params

In [None]:
END_SALES = 1913

## Load data

In [None]:
grid_df = pd.read_pickle("data/interim/grid_df.pkl")
sample_submission = pd.read_csv('data/refined/sample_submission.csv')

In [None]:
prophet_params = pd.read_json("data/external/prophet_params_2020-05-12-20-05-33.json")
prophet_params = prophet_params.T.reset_index().rename(columns={'index' : 'id'})
#test = pd.DataFrame.from_records(prophet_params_3.params)

## Format data for Prophet

In [None]:
grid_df['snap'] = grid_df['snap_CA']
grid_df.loc[grid_df['state_id'] == 'TX', 'snap'] = grid_df.loc[grid_df['state_id'] == 'TX', 'snap_TX'].values
grid_df.loc[grid_df['state_id'] == 'WI', 'snap'] = grid_df.loc[grid_df['state_id'] == 'WI', 'snap_WI'].values
grid_df['snap'] = grid_df['snap'].astype(int)

In [None]:
prophet_df = grid_df.groupby(['store_id', 'dept_id', 'd']).agg(ds=('date', 'max'), 
                                                               y=('sales', 'sum'),
                                                               snap=('snap', 'max'),
                                                               price=('sell_price', 'max')).reset_index()

prophet_df['id'] = prophet_df['store_id'].astype(str) + '_' + prophet_df['dept_id'].astype(str)
prophet_df.loc[prophet_df['d'] > END_SALES, 'y'] = np.nan # because in pandas, sum of NaN == 0

prophet_df = prophet_df[['id', 'd', 'ds', 'y', 'snap', 'price']]

## Forecast

In [None]:
def forecast_prophet(id, include_history=False):
    
    # Reduce df & save (futur) regressors
    df = prophet_df[prophet_df['id'] == id].copy()
    price = df['price'].values
    snap = df['snap'].values
    y = df['y'].values
    d = df['d'].values
    df.dropna(inplace=True)
    
    # Get params
    params = prophet_params.loc[prophet_params['id'] == id, 'params'].values[0]
    
    # Define model
    m = Prophet(
        yearly_seasonality=False,
        weekly_seasonality=False,
        daily_seasonality=False,
        uncertainty_samples=False,
        changepoint_range=params['changepoint_range'],
        seasonality_mode=params['seasonality_mode'],
        seasonality_prior_scale=params['seasonality_prior_scale'],
        holidays_prior_scale=params['holidays_prior_scale'],
        changepoint_prior_scale=params['changepoint_prior_scale'],
    )
    
    m.add_seasonality(
        name='yearly', 
        period=365.25, 
        fourier_order=params['seasonality_order'],
    )
    
    m.add_seasonality(
        name='monthly', 
        period=365.25/12, 
        fourier_order=params['seasonality_order'],
    )
    
    m.add_seasonality(
        name='weekly', 
        period=7, 
        fourier_order=params['seasonality_order'],
    )
    
    # Add holidays/regressor
    m.add_country_holidays(country_name='US')
    
    m.add_regressor('snap', mode=params['snap_mode'])
        
    m.add_regressor('price')
                
    # Fit 
    m.fit(df)
    
    # Predict
    future = m.make_future_dataframe(periods=28, freq='D', include_history=include_history)
    if include_history:
        future['price'] = price
        future['snap'] = snap
    else:
        future['price'] = price[-28:]
        future['snap'] = snap[-28:]
    
    fcst = m.predict(future)
    
    # Format output
    fcst['id'] = id
    fcst['y'] = y
    fcst['d'] = d
    
    for c in ['yhat']: #['yhat_lower', 'yhat_upper', 'yhat']
        fcst[c] = fcst[c].round().astype(int)
        fcst.loc[fcst[c] < 0, c] = 0
        
    return m, fcst

In [None]:
l_id = prophet_df['id'].unique()

include_history = True
res = Parallel(n_jobs=-1, verbose=1)(delayed(forecast_prophet)(id, include_history) for id in l_id)

In [None]:
prophet_model = {}
for i, r in enumerate(res):
    prophet_model[str(l_id[i])] = r[0]
    
forecast = pd.concat([r[1] for r in res], sort=False)

del res

In [None]:
#fc = forecast[['id', 'ds', 'd', 'y', 'yhat']]
#fc.to_csv('data/refined/forecast_prophet_store_dept.csv', index=False)

## Plots

In [None]:
prophet_params.sort_values('rmse')

In [None]:
id = 'CA_3_FOODS_3'

In [None]:
m = prophet_model[str(id)]
fcst = forecast[forecast['id'] == id]

fig = plot_plotly(m, fcst, uncertainty=True, plot_cap=False, trend=True, changepoints=True,
                  changepoints_threshold=0.01, xlabel='date', ylabel='sales', figsize=(900, 600))
py.iplot(fig)

## Top-Down
##### 1) Deduce weigts using last 28 days

In [None]:
grid_df_l28 = grid_df[(grid_df['d'] <= END_SALES) & (grid_df['d'] > END_SALES - 364)].copy()

In [None]:
#store_dpt = grid_df_l28.groupby(['store_id', 'dept_id', 'd']).agg(store_dpt_sales=('sales', 'sum')).reset_index()
store_dpt = grid_df_l28.groupby(['store_id', 'dept_id', 'weekday']).agg(store_dpt_sales=('sales', 'sum')).reset_index()
store_dpt.head(3)

In [None]:
#weight = grid_df_l28[['id', 'store_id', 'dept_id', 'd', 'sales']]
weight = grid_df_l28.groupby(['id', 'weekday']).agg(sales=('sales', 'sum')).reset_index()
weight = pd.merge(weight, grid_df_l28[['id', 'store_id', 'dept_id']].drop_duplicates())

weight = pd.merge(weight, store_dpt, how='left')
weight['w'] = weight['sales'] / weight['store_dpt_sales']
weight['F'] = weight.groupby('id').cumcount() + 1

weight = weight[['id', 'dept_id', 'store_id', 'F', 'w']]

#weight = weight[['id', 'dept_id', 'store_id', 'w']]

In [None]:
l = []
for i in [7, 14, 21]:
    tmp = weight.copy()
    tmp['F'] = tmp['F'] + i
    l.append(tmp)
    
weight = pd.concat([weight] + l)

weight.sort_values(['id', 'F']).head(30)

##### 2) Apply weight top-down

In [None]:
fcst = forecast[['id', 'yhat']].copy()
fcst['F'] = fcst.groupby('id').cumcount() + 1
fcst['store_id'] = fcst['id'].str[:4]
fcst['dept_id'] = fcst['id'].str[5:]

fcst = fcst[['store_id', 'dept_id', 'F', 'yhat']]
fcst.head(3)

In [None]:
res = pd.merge(weight, fcst, how='left')
res['fcst'] = res['w'] * res['yhat']

res = pd.crosstab(index=res['id'], columns=res['F'], values=res['fcst'], aggfunc=sum)
res = res.add_prefix('F')

##### 3) Format submission file

In [None]:
sub = sample_submission.copy()
sub.set_index('id', inplace=True)
sub.update(res)
sub.reset_index(inplace=True)
sub

In [None]:
sub.to_csv('data/submission/Prophet_top_down_naive_364_weekly.csv', index=False)