# NY Warehouse Flow Forecasting

In this notebook, we tried to predict the inbound & outbound traffic from the warehouse, in order to help allocate workforce. Noted that the max capacity of the warehouse is 3500 orders/day as of July 2019, but our predictions are not limited by that.

There are six types of warehouse activities:

Outbound customer order
Outbound items transfer
Outbound purchase order return
Inbound customer return
Inbound items transfer
Inbound purchase order

In [None]:
import pandas as pd
import numpy as np
import queries.utils as utils
from functools import reduce
from matplotlib import pyplot as plt

#### bigquery client & credentials
import os
from google.cloud import bigquery
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = r"C:\Users\m.young\Documents\BigQueryAPIKey\modata-79d448dbeef0.json"
client = bigquery.Client()

#### fbprophet 
import fbprophet
from fbprophet.diagnostics import cross_validation
from fbprophet.diagnostics import performance_metrics
from fbprophet.plot import plot_cross_validation_metric
from fbprophet.plot import add_changepoints_to_plot

pd.plotting.register_matplotlib_converters() ## bugs.. fbprophet & matplotlib

## A. IMPORT DATA

### 1. Actuals Data

In [None]:
# Data
# data_raw_all = pd.read_csv('data/inven_est_072319.csv')


# BQ query
wh_actuals_query = """
SELECT
     *
FROM adhoc_analytics.project_inven_forecast_actuals_EST
  """

# put query results into pandas df
data_raw_all = client.query(wh_actuals_query).to_dataframe()

# convert date column to datetime data type
data_raw_all['date_est'] = pd.to_datetime(data_raw_all['date_est'])

# sort by date desc
data_raw_all.sort_values(by='date_est', inplace=True)

# reset index numbers after sort
data_raw_all.reset_index(drop=True, inplace=True)

# replace NaN with 0s
data_raw_all.fillna(0, inplace=True)

In [None]:
# View Data
display(data_raw_all.head())
display(data_raw_all.tail())

In [None]:
data_raw_all.dtypes

### 2. Excel Predictions Data

In [None]:
#### Read the current prediction data from the forecast team
dat_curr_prediction = pd.read_csv('data/inven_curr_predictions.csv')

In [None]:
display(dat_curr_prediction.tail(5))

### 3. FB Prophet Holidays Data

In [None]:
# Outbound Customer Orders - Prophet Holidays
co_holidays = pd.read_csv('data/co_holidays.csv') # - Copy
# add employee sales dates; use promotions date table

# Inbound Purchase Orders - Prophet Holidays
po_holidays = pd.read_csv('data/po_holidays.csv')

# Inbound Customer Returns - Prophet Holidays
cr_holidays = pd.read_csv('data/cr_holidays.csv')

In [None]:
# View
display(co_holidays.tail(5))
display(po_holidays.tail(5))
display(cr_holidays.tail(5))

## B. Prepare Data

### 1. Prepare Actuals Data for Multiple Models

In [None]:
# Outbound - Customer Orders
co_dat = data_raw_all[['date_est','OUT_CUST_UNITS']]
co_dat = co_dat.rename(columns={'date_est':'ds','OUT_CUST_UNITS':'y'})

# Outbound - Vendor Returns
vr_dat = data_raw_all[['date_est','OUT_VR_UNITS']]
vr_dat = vr_dat.rename(columns={'date_est':'ds','OUT_VR_UNITS':'y'})

# Outbound - Transfer Orders
oto_dat = data_raw_all[['date_est','OUT_TO_UNITS']]
oto_dat = oto_dat.rename(columns={'date_est':'ds','OUT_TO_UNITS':'y'})

# Inbound - Customer Returns
cr_dat = data_raw_all[['date_est','IN_CUST_UNITS']]
cr_dat = cr_dat.rename(columns={'date_est':'ds','IN_CUST_UNITS':'y'})

# Inbound - Purchase Orders
po_dat = data_raw_all[['date_est','IN_PO_UNITS']]
po_dat = po_dat.rename(columns={'date_est':'ds','IN_PO_UNITS':'y'})

# Inbound - Transfer Orders
ito_dat = data_raw_all[['date_est','IN_TO_UNITS']]
ito_dat = ito_dat.rename(columns={'date_est':'ds','IN_TO_UNITS':'y'})

### 2. Prepare Forecast Data for Multiple Models Comparisons

In [None]:
dat_curr_prediction.head()

In [None]:
# Outbound - Customer Orders
co_dat_curr_prediction = dat_curr_prediction.loc[:,['FCST_Dt','FCT_OUT_Ord']]
co_dat_curr_prediction.rename(columns={'FCST_Dt':'ds','FCT_OUT_Ord':'excel_forecast'},inplace=True)
co_dat_curr_prediction.index = pd.to_datetime(co_dat_curr_prediction['ds'])
co_dat_curr_prediction.drop(["ds"],axis=1,inplace=True)

# Outbound - Vendor Returns


# Outbound - Transfer Orders
oto_dat_curr_prediction = dat_curr_prediction.loc[:,['FCST_Dt','FCT_OUT_TO']]
oto_dat_curr_prediction.rename(columns={'FCST_Dt':'ds','FCT_OUT_TO':'excel_forecast'},inplace=True)
oto_dat_curr_prediction.index = pd.to_datetime(oto_dat_curr_prediction['ds'])
oto_dat_curr_prediction.drop(["ds"],axis=1,inplace=True)

# Inbound - Customer Returns
cr_dat_curr_prediction = dat_curr_prediction.loc[:,['FCST_Dt','FCT_IN_Return']]
cr_dat_curr_prediction.rename(columns={'FCST_Dt':'ds','FCT_IN_Return':'excel_forecast'},inplace=True)
cr_dat_curr_prediction.index = pd.to_datetime(cr_dat_curr_prediction['ds'])
cr_dat_curr_prediction.drop(["ds"],axis=1,inplace=True)

# Inbound - Purchase Orders
po_dat_curr_prediction = dat_curr_prediction.loc[:,['FCST_Dt','FCT_IN_PO']]
po_dat_curr_prediction.rename(columns={'FCST_Dt':'ds','FCT_IN_PO':'excel_forecast'},inplace=True)
po_dat_curr_prediction.index = pd.to_datetime(po_dat_curr_prediction['ds'])
po_dat_curr_prediction.drop(["ds"],axis=1,inplace=True)

# Inbound - Transfer Orders
ito_dat_curr_prediction = dat_curr_prediction.loc[:,['FCST_Dt','FCT_IN_TO']]
ito_dat_curr_prediction.rename(columns={'FCST_Dt':'ds','FCT_IN_TO':'excel_forecast'},inplace=True)
ito_dat_curr_prediction.index = pd.to_datetime(ito_dat_curr_prediction['ds'])
ito_dat_curr_prediction.drop(["ds"],axis=1,inplace=True)

### 3. Prepare Train vs Test Datasets for FB Prophet Models

In [None]:
# Parameters Settings
split_date = '2019-09-01'  # Setting the train test cutoff date
forward_days = 200  # set up how

In [None]:
# Outbound Customer Orders
co_dat_train = co_dat.loc[co_dat.ds<split_date , :]
co_dat_test = co_dat.loc[co_dat.ds>=split_date , :]

# Outbound - Vendor Returns
vr_dat_train = vr_dat.loc[vr_dat.ds<split_date , :]
vr_dat_test = vr_dat.loc[vr_dat.ds>=split_date , :]

# Outbound - Transfer Orders
oto_dat_train = oto_dat.loc[oto_dat.ds<split_date , :]
oto_dat_test = oto_dat.loc[oto_dat.ds>=split_date , :]

# Inbound - Customer Returns
cr_dat_train = cr_dat.loc[cr_dat.ds<split_date , :]
cr_dat_test = cr_dat.loc[cr_dat.ds>=split_date , :]

# Inbound - Purchase Orders
po_dat_train = po_dat.loc[po_dat.ds<split_date , :]
po_dat_test = po_dat.loc[po_dat.ds>=split_date , :]

# Inbound - Transfer Orders
ito_dat_train = ito_dat.loc[ito_dat.ds<split_date , :]
ito_dat_test = ito_dat.loc[ito_dat.ds>=split_date , :]


In [None]:
# Outbound Customer Orders
co_dat_all = pd.concat([co_dat_train, co_dat_test])

# Outbound - Vendor Returns
vr_dat_all = pd.concat([vr_dat_train, vr_dat_test])

# Outbound - Transfer Orders
oto_dat_all = pd.concat([oto_dat_train, oto_dat_test])

# Inbound - Customer Returns
cr_dat_all = pd.concat([cr_dat_train, cr_dat_test])

# Inbound - Purchase Orders
po_dat_all = pd.concat([po_dat_train, po_dat_test])

# Inbound - Transfer Orders
ito_dat_all = pd.concat([ito_dat_train, ito_dat_test])

In [None]:
fbprophet.__version__
Prophet = fbprophet.Prophet

## C. FB Prophet Model - Outbound Customer Orders

In [None]:
co_m = Prophet(changepoint_prior_scale=0.1, changepoint_range=0.9 ,\
            seasonality_mode='multiplicative', seasonality_prior_scale = 10 ,\
            holidays_prior_scale = 10, \
            yearly_seasonality=True, \
            weekly_seasonality=True, \
            daily_seasonality=False \
           ,holidays = co_holidays
           )

co_m_full = Prophet(changepoint_prior_scale=0.1, changepoint_range=0.9 ,\
            seasonality_mode='multiplicative', seasonality_prior_scale = 10 ,\
            holidays_prior_scale = 10, \
            yearly_seasonality=True, \
            weekly_seasonality=True, \
            daily_seasonality=False \
           ,holidays = co_holidays
           )

In [None]:
# fit the model
co_m.fit(co_dat_train)
co_m_full.fit(co_dat_all)

# predict forward_days 
co_future = co_m.make_future_dataframe(periods=len(co_dat_test)+forward_days, freq='1D')
co_forecast = co_m.predict(co_future)
co_forecast.loc[co_forecast.ds>=split_date,:].head()

# components showing
co_m_full.component_modes

In [None]:
## Validation
fig = co_m.plot(co_forecast)
a = add_changepoints_to_plot(fig.gca(), co_m, co_forecast)


f = co_m.plot_components(co_forecast)


co_verif = utils.make_verif(co_forecast, co_dat_train, co_dat_test)
f = utils.plot_verif(co_verif,date=split_date)


f, ax = plt.subplots(nrows=1, figsize=(16,10), sharey=True)
utils.make_plot_block(co_verif, '2019-01-01', '2019-12-31', ax=ax)


utils.plot_joint_plot(co_verif.loc[co_verif.index<split_date,:], title='train set', fname=None)

utils.plot_joint_plot(co_verif.loc[(co_verif.index>=split_date) &(co_verif['y'].notnull()) ,:], \
                      title='test set', fname=None)

In [None]:
f, ax = plt.subplots(nrows=1, figsize=(16,10), sharey=True)
utils.make_plot_block(co_verif, '2019-05-19', '2019-12-31', ax=ax)

In [None]:
## Cross Validation
df_cv = cross_validation(co_m_full, initial='1210 days', period='7 days', horizon = '14 days')
df_cv_two_week = cross_validation(co_m_full, initial='1210 days', period='1 days', horizon = '14 days')

In [None]:
df_cv['horizon'] = df_cv['ds']  - df_cv['cutoff']
df_cv['mape'] = np.abs(df_cv['yhat'] - df_cv['y'])/ df_cv['y']
df_cv = df_cv.loc[df_cv.mape<1,:]
df_p = performance_metrics(df_cv, rolling_window=0.1)
df_p.head()

In [None]:
fig = plot_cross_validation_metric(df_cv, metric='mae')
fig = plot_cross_validation_metric(df_cv, metric='mape')
fig = plot_cross_validation_metric(df_cv_two_week, metric='mae')
fig = plot_cross_validation_metric(df_cv_two_week, metric='mape')

In [None]:
df_cv_two_week['horizon'] = df_cv_two_week['ds']  - df_cv_two_week['cutoff']
df_two_week_prediction = df_cv_two_week.loc[df_cv_two_week.horizon=='14 days',:]
df_two_week_prediction.index = df_two_week_prediction.ds
df_two_week_prediction.drop(columns="ds", inplace=True)

df_1_day_prediction = df_cv_two_week.loc[df_cv_two_week.horizon=='1 days',:]
df_1_day_prediction.index = df_1_day_prediction["ds"]
df_1_day_prediction.drop(columns="ds", inplace=True)

co_verif_comp = pd.concat([co_dat_curr_prediction, co_verif], axis=1, join='inner').\
filter(items=['yhat_lower','yhat_upper','yhat','y','excel_forecast'])
co_verif_comp = co_verif_comp.loc[co_verif_comp['y'].notnull() ,:]
co_verif_comp.rename(columns ={'y':'Observations' , 'yhat':f'Prophet Model up until {split_date}', 'excel_forecast':'Current Excel Model'} ,inplace=True)

co_verif_comp = co_verif_comp.merge(right = df_two_week_prediction, on="ds" ,how = "inner", suffixes=('',"_2w"))
co_verif_comp.rename(columns ={'yhat':'Prophet Model Two Weeks Ahead'} ,inplace=True)

co_verif_comp.head()

In [None]:
_, ax = plt.subplots(figsize=(25,8))
co_verif_comp.loc[:,['Observations',f'Prophet Model up until {split_date}'\
                  ,'Current Excel Model','Prophet Model Two Weeks Ahead']].plot(ax=ax)
# ax.fill_between(verif_comp.index, verif_comp.loc[:,'yhat_lower'], verif_comp.loc[:,'yhat_upper'], color='coral', alpha=0.3)


In [None]:
print('MAPE of the Current Model ' + str(np.mean(np.abs(co_verif_comp['Current Excel Model'] - co_verif_comp['Observations'])/co_verif_comp['Observations'])))
print(f'MAPE of the Prophet Model Up Until {split_date}  ' + str(np.mean(np.abs(co_verif_comp[f'Prophet Model up until {split_date}'] - co_verif_comp['Observations'])/co_verif_comp['Observations'])))
print(f'MAPE of the Prophet Model Two Weeks Prediction  ' + str(np.mean(np.abs(co_verif_comp['Prophet Model Two Weeks Ahead'] - co_verif_comp['Observations'])/co_verif_comp['Observations'])))


In [None]:
# First (Incorrect 7/23)
# MAPE of the Current Model 0.2356932773388679
# MAPE of the Prophet Model Up Until 2019-05-19  0.17342342484494647
# MAPE of the Prophet Model Two Weeks Prediction  0.16988295234710005

# Second (Corrected 7/24)
# MAPE of the Current Model 0.2356932773388679
# MAPE of the Prophet Model Up Until 2019-05-19  0.17199888077987097
# MAPE of the Prophet Model Two Weeks Prediction  0.1686462523082831

# Third (w/ Employee)
# MAPE of the Current Model 0.2356932773388679
# MAPE of the Prophet Model Up Until 2019-05-19  0.17404171412011635
# MAPE of the Prophet Model Two Weeks Prediction  0.17068368655718888

In [None]:
(np.abs(co_verif_comp['Prophet Model Two Weeks Ahead'].replace(0, np.nan) - co_verif_comp['Observations'].replace(0, np.nan))/co_verif_comp['Observations'].replace(0, np.nan)).sort_values(ascending=False)

## D. FB Prophet Model - Inbound Purchase Orders

In [None]:
# ADDITONAL REGRESSOR - Purchase Orders Estimated Delivery Windows
po_reg_raw = pd.read_csv('data/PO_EST_ACT_MA.csv')
po_reg_1_full = po_reg_raw[['date','IN_PO_EST']]
po_reg_1_full = po_reg_1_full.rename(columns={'date':'ds','IN_PO_EST':'reg'})

po_reg_1_train = po_reg_1_full[po_reg_1_full['ds']<split_date]

In [None]:
po_reg_1_full.tail()

In [None]:
po_m = Prophet(changepoint_prior_scale=0.1, changepoint_range=0.9 ,\
            seasonality_mode='multiplicative', seasonality_prior_scale = 10 ,\
            holidays_prior_scale = 10, \
            yearly_seasonality=True, \
            weekly_seasonality=True, \
            daily_seasonality=False \
           ,holidays = po_holidays
           )

po_m_full = Prophet(changepoint_prior_scale=0.1, changepoint_range=0.9 ,\
            seasonality_mode='multiplicative', seasonality_prior_scale = 10 ,\
            holidays_prior_scale = 10, \
            yearly_seasonality=True, \
            weekly_seasonality=True, \
            daily_seasonality=False \
           ,holidays = po_holidays
           )

# ### Add extra regressors
po_dat_train['reg'] = po_reg_1_train['reg']
po_m.add_regressor('reg', prior_scale = 10, mode='multiplicative') 

po_dat_all['reg'] = po_reg_1_full['reg']
po_m_full.add_regressor('reg', prior_scale = 10, mode='multiplicative') 

In [None]:
# fit the model
po_m.fit(po_dat_train)
po_m_full.fit(po_dat_all)

# predict forward_days 
po_future = po_m.make_future_dataframe(periods=len(po_dat_test)+forward_days, freq='1D')
po_future['reg'] = po_reg_1_full['reg']

po_future = po_future[po_future['ds'] < '2020-01-01']

po_forecast = po_m.predict(po_future)
po_forecast.loc[po_forecast.ds>=split_date,:].head()


# components showing
po_m_full.component_modes

In [None]:
po_verif = utils.make_verif(po_forecast, po_dat_train, po_dat_test)

# if weekend then make it 0
po_verif['weekend'] = ((pd.DatetimeIndex(po_verif.index).dayofweek) // 5 == 1).astype(float)
po_verif.loc[po_verif['weekend'] > 0.0, 'yhat'] = 0

# if negative then make it 0
po_verif.loc[po_verif['yhat'] < 0.0, 'yhat'] = 0

In [None]:
## Validation
fig = po_m.plot(po_forecast)
a = add_changepoints_to_plot(fig.gca(), po_m, po_forecast)

f = po_m.plot_components(po_forecast)

f = utils.plot_verif(po_verif,date=split_date)

f, ax = plt.subplots(nrows=1, figsize=(16,10), sharey=True)
utils.make_plot_block(po_verif, '2019-01-01', '2019-12-31', ax=ax)


utils.plot_joint_plot(po_verif.loc[po_verif.index<split_date,:], title='train set', fname=None)

utils.plot_joint_plot(po_verif.loc[(po_verif.index>=split_date) &(po_verif['y'].notnull()) ,:], \
                      title='test set', fname=None)

In [None]:
f, ax = plt.subplots(nrows=1, figsize=(16,10), sharey=True)
utils.make_plot_block(po_verif, '2017-01-01', '2019-12-31', ax=ax)


In [None]:
## Cross Validation
df_cv = cross_validation(po_m_full, initial='1210 days', period='7 days', horizon = '14 days')
df_cv_two_week = cross_validation(po_m_full, initial='1210 days', period='1 days', horizon = '14 days')

In [None]:
df_cv['horizon'] = df_cv['ds']  - df_cv['cutoff']
df_cv['mape'] = np.abs(df_cv['yhat'] - df_cv['y'])/ df_cv['y']
df_cv = df_cv.loc[df_cv.mape<1,:]
df_p = performance_metrics(df_cv, rolling_window=0.1)
df_p.head()

In [None]:
df_cv_two_week['horizon'] = df_cv_two_week['ds']  - df_cv_two_week['cutoff']
df_two_week_prediction = df_cv_two_week.loc[df_cv_two_week.horizon=='14 days',:]
df_two_week_prediction.index = df_two_week_prediction.ds
df_two_week_prediction.drop(columns="ds", inplace=True)

df_1_day_prediction = df_cv_two_week.loc[df_cv_two_week.horizon=='1 days',:]
df_1_day_prediction.index = df_1_day_prediction["ds"]
df_1_day_prediction.drop(columns="ds", inplace=True)

po_verif_comp = pd.concat([po_dat_curr_prediction, po_verif], axis=1, join='inner').\
filter(items=['yhat_lower','yhat_upper','yhat','y','excel_forecast'])
po_verif_comp = po_verif_comp.loc[po_verif_comp['y'].notnull() ,:]
po_verif_comp.rename(columns ={'y':'Observations' , 'yhat':f'Prophet Model up until {split_date}', 'excel_forecast':'Current Excel Model'} ,inplace=True)

po_verif_comp = po_verif_comp.merge(right = df_two_week_prediction, on="ds" ,how = "inner", suffixes=('',"_2w"))
po_verif_comp.rename(columns ={'yhat':'Prophet Model Two Weeks Ahead'} ,inplace=True)


# if weekend then make it 0
po_verif_comp['weekend'] = ((pd.DatetimeIndex(po_verif_comp.index).dayofweek) // 5 == 1).astype(float)
po_verif_comp.loc[po_verif_comp['weekend'] > 0.0, 'Prophet Model Two Weeks Ahead'] = 0
po_verif_comp.loc[po_verif_comp['weekend'] > 0.0, f'Prophet Model up until {split_date}'] = 0

# if negative then make it 0
po_verif_comp.loc[po_verif_comp['Prophet Model Two Weeks Ahead'] < 0.0, 'Prophet Model Two Weeks Ahead'] = 0
po_verif_comp.loc[po_verif_comp[f'Prophet Model up until {split_date}'] < 0.0, f'Prophet Model up until {split_date}'] = 0

po_verif_comp.head()

In [None]:
_, ax = plt.subplots(figsize=(25,8))
po_verif_comp.loc[:,['Observations',f'Prophet Model up until {split_date}'\
                  ,'Current Excel Model','Prophet Model Two Weeks Ahead']].plot(ax=ax)
# ax.fill_between(verif_comp.index, verif_comp.loc[:,'yhat_lower'], verif_comp.loc[:,'yhat_upper'], color='coral', alpha=0.3)


In [None]:
print('MAPE of the Current Model ' + str(np.mean(np.abs(po_verif_comp['Current Excel Model'].replace(0, np.nan) - po_verif_comp['Observations'].replace(0, np.nan))/po_verif_comp['Observations'].replace(0, np.nan))))
print(f'MAPE of the Prophet Model Up Until {split_date}  ' + str(np.mean(np.abs(po_verif_comp[f'Prophet Model up until {split_date}'].replace(0, np.nan) - po_verif_comp['Observations'].replace(0, np.nan))/po_verif_comp['Observations'].replace(0, np.nan))))
print(f'MAPE of the Prophet Model Two Weeks Prediction  ' + str(np.mean(np.abs(po_verif_comp['Prophet Model Two Weeks Ahead'].replace(0, np.nan) - po_verif_comp['Observations'].replace(0, np.nan))/po_verif_comp['Observations'].replace(0, np.nan))))


In [None]:
# MAPE of the Current Model 21.34107788721027
# MAPE of the Prophet Model Up Until 2019-05-19  26.943743728224433
# MAPE of the Prophet Model Two Weeks Prediction  25.942597891870182

In [None]:
print('MAPE of the Current Model ' + str(np.mean(np.abs(po_verif_comp['Current Excel Model'].replace(0, np.nan) - po_verif_comp['Observations'].replace(0, np.nan)))))

In [None]:
# CHECK FOR OUTLIERS SKEWING MAPE RESULTS
(np.abs(po_verif_comp['Prophet Model Two Weeks Ahead'].replace(0, np.nan) - po_verif_comp['Observations'].replace(0, np.nan))/po_verif_comp['Observations'].replace(0, np.nan)).sort_values(ascending=False)

In [None]:
# REMOVE TOP 3 OUTLIERS SKEWING MAPE RESULTS
adj_po_verif_comp = po_verif_comp[~po_verif_comp.index.isin(['2019-06-21','2019-06-25','2019-06-27'])]

In [None]:
# RECALCULATE MAPE RESULTS w/o OUTLIERS
print('MAPE of the Current Model ' + str(np.mean(np.abs(adj_po_verif_comp['Current Excel Model'].replace(0, np.nan) - adj_po_verif_comp['Observations'].replace(0, np.nan))/adj_po_verif_comp['Observations'].replace(0, np.nan))))
print(f'MAPE of the Prophet Model Up Until {split_date}  ' + str(np.mean(np.abs(adj_po_verif_comp[f'Prophet Model up until {split_date}'].replace(0, np.nan) - adj_po_verif_comp['Observations'].replace(0, np.nan))/adj_po_verif_comp['Observations'].replace(0, np.nan))))
print(f'MAPE of the Prophet Model Two Weeks Prediction  ' + str(np.mean(np.abs(adj_po_verif_comp['Prophet Model Two Weeks Ahead'].replace(0, np.nan) - adj_po_verif_comp['Observations'].replace(0, np.nan))/adj_po_verif_comp['Observations'].replace(0, np.nan))))


## E. FB Prophet Model - Inbound Customer Returns

In [None]:
cr_m = Prophet(changepoint_prior_scale=0.1, changepoint_range=0.9 ,\
            seasonality_mode='multiplicative', seasonality_prior_scale = 10 ,\
            holidays_prior_scale = 10, \
            yearly_seasonality=True, \
            weekly_seasonality=True, \
            daily_seasonality=False \
            ,holidays = cr_holidays
            
           )

cr_m_full = Prophet(changepoint_prior_scale=0.1, changepoint_range=0.9 ,\
            seasonality_mode='multiplicative', seasonality_prior_scale = 10 ,\
            holidays_prior_scale = 10, \
            yearly_seasonality=True, \
            weekly_seasonality=True, \
            daily_seasonality=False \
            ,holidays = cr_holidays
           )

# fit the model
cr_m.fit(cr_dat_train)
cr_m_full.fit(cr_dat_all)

# predict forward_days 
cr_future = cr_m.make_future_dataframe(periods=len(cr_dat_test)+forward_days, freq='1D')

# test no weekends - No effect
# cr_future = cr_future[pd.DatetimeIndex(cr_future['ds']).dayofweek // 5 != 1]

cr_forecast = cr_m.predict(cr_future)
cr_forecast.loc[cr_forecast.ds>=split_date,:].head()

# components showing
cr_m_full.component_modes

## Validation
fig = cr_m.plot(cr_forecast)
a = add_changepoints_to_plot(fig.gca(), cr_m, cr_forecast)


f = cr_m.plot_components(cr_forecast)


cr_verif = utils.make_verif(cr_forecast, cr_dat_train, cr_dat_test)
f = utils.plot_verif(cr_verif,date=split_date)


f, ax = plt.subplots(nrows=1, figsize=(16,10), sharey=True)
utils.make_plot_block(cr_verif, '2019-01-01', '2019-12-31', ax=ax)


utils.plot_joint_plot(cr_verif.loc[cr_verif.index<split_date,:], title='train set', fname=None)

utils.plot_joint_plot(cr_verif.loc[(cr_verif.index>=split_date) &(cr_verif['y'].notnull()) ,:], \
                      title='test set', fname=None)


In [None]:
## Cross Validation
df_cv = cross_validation(cr_m_full, initial='1210 days', period='7 days', horizon = '14 days')
df_cv_two_week = cross_validation(cr_m_full, initial='1210 days', period='1 days', horizon = '14 days')

df_cv['horizon'] = df_cv['ds']  - df_cv['cutoff']
df_cv['mape'] = np.abs(df_cv['yhat'] - df_cv['y'])/ df_cv['y']
df_cv = df_cv.loc[df_cv.mape<1,:]
df_p = performance_metrics(df_cv, rolling_window=0.1)
df_p.head()

df_cv_two_week['horizon'] = df_cv_two_week['ds']  - df_cv_two_week['cutoff']
df_two_week_prediction = df_cv_two_week.loc[df_cv_two_week.horizon=='14 days',:]
df_two_week_prediction.index = df_two_week_prediction.ds
df_two_week_prediction.drop(columns="ds", inplace=True)

df_1_day_prediction = df_cv_two_week.loc[df_cv_two_week.horizon=='1 days',:]
df_1_day_prediction.index = df_1_day_prediction["ds"]
df_1_day_prediction.drop(columns="ds", inplace=True)

In [None]:
f, ax = plt.subplots(nrows=1, figsize=(16,10), sharey=True)
utils.make_plot_block(cr_verif, '2017-01-01', '2019-12-31', ax=ax)

In [None]:
f, ax = plt.subplots(nrows=1, figsize=(16,10), sharey=True)
utils.make_plot_block(cr_verif, '2016-01-01', '2017-01-01', ax=ax)

In [None]:
f, ax = plt.subplots(nrows=1, figsize=(16,10), sharey=True)
utils.make_plot_block(cr_verif, '2017-01-01', '2018-01-01', ax=ax)

In [None]:
f, ax = plt.subplots(nrows=1, figsize=(16,10), sharey=True)
utils.make_plot_block(cr_verif, '2019-01-01', '2019-12-31', ax=ax)

In [None]:
cr_verif_comp = pd.concat([cr_dat_curr_prediction, cr_verif], axis=1, join='inner').\
filter(items=['yhat_lower','yhat_upper','yhat','y','excel_forecast'])
cr_verif_comp = cr_verif_comp.loc[cr_verif_comp['y'].notnull() ,:]
cr_verif_comp.rename(columns ={'y':'Observations' , 'yhat':f'Prophet Model up until {split_date}', 'excel_forecast':'Current Excel Model'} ,inplace=True)

cr_verif_comp = cr_verif_comp.merge(right = df_two_week_prediction, on="ds" ,how = "inner", suffixes=('',"_2w"))
cr_verif_comp.rename(columns ={'yhat':'Prophet Model Two Weeks Ahead'} ,inplace=True)

# if weekend then make it 0
cr_verif_comp['weekend'] = ((pd.DatetimeIndex(cr_verif_comp.index).dayofweek) // 5 == 1).astype(float)
cr_verif_comp.loc[cr_verif_comp['weekend'] > 0.0, 'Prophet Model Two Weeks Ahead'] = 0
cr_verif_comp.loc[cr_verif_comp['weekend'] > 0.0, f'Prophet Model up until {split_date}'] = 0

# if negative then make it 0
cr_verif_comp.loc[cr_verif_comp['Prophet Model Two Weeks Ahead'] < 0.0, 'Prophet Model Two Weeks Ahead'] = 0
cr_verif_comp.loc[cr_verif_comp[f'Prophet Model up until {split_date}'] < 0.0, f'Prophet Model up until {split_date}'] = 0


In [None]:
_, ax = plt.subplots(figsize=(25,8))
cr_verif_comp.loc[:,['Observations',f'Prophet Model up until {split_date}'\
                  ,'Current Excel Model','Prophet Model Two Weeks Ahead']].plot(ax=ax)
# ax.fill_between(verif_comp.index, verif_comp.loc[:,'yhat_lower'], verif_comp.loc[:,'yhat_upper'], color='coral', alpha=0.3)


print('MAPE of the Current Model ' + str(np.mean(np.abs(cr_verif_comp['Current Excel Model'] - cr_verif_comp['Observations'])/cr_verif_comp['Observations'].replace(0, np.nan))))
print(f'MAPE of the Prophet Model Up Until {split_date}  ' + str(np.mean(np.abs(cr_verif_comp[f'Prophet Model up until {split_date}'] - cr_verif_comp['Observations'])/cr_verif_comp['Observations'].replace(0, np.nan))))
print(f'MAPE of the Prophet Model Two Weeks Prediction  ' + str(np.mean(np.abs(cr_verif_comp['Prophet Model Two Weeks Ahead'] - cr_verif_comp['Observations'])/cr_verif_comp['Observations'].replace(0, np.nan))))


In [None]:
# CHECK FOR OUTLIERS SKEWING MAPE RESULTS
(np.abs(cr_verif_comp['Prophet Model Two Weeks Ahead'].replace(0, np.nan) - cr_verif_comp['Observations'].replace(0, np.nan))/cr_verif_comp['Observations'].replace(0, np.nan)).sort_values(ascending=False)

In [None]:
# REMOVE TOP 3 OUTLIERS SKEWING MAPE RESULTS
adj_cr_verif_comp = cr_verif_comp[~cr_verif_comp.index.isin(['2019-05-27','2019-07-15','2019-06-25'])]

# RECALCULATE MAPE RESULTS w/o OUTLIERS
print('MAPE of the Current Model ' + str(np.mean(np.abs(adj_cr_verif_comp['Current Excel Model'].replace(0, np.nan) - adj_cr_verif_comp['Observations'].replace(0, np.nan))/adj_cr_verif_comp['Observations'].replace(0, np.nan))))
print(f'MAPE of the Prophet Model Up Until {split_date}  ' + str(np.mean(np.abs(adj_cr_verif_comp[f'Prophet Model up until {split_date}'].replace(0, np.nan) - adj_cr_verif_comp['Observations'].replace(0, np.nan))/adj_cr_verif_comp['Observations'].replace(0, np.nan))))
print(f'MAPE of the Prophet Model Two Weeks Prediction  ' + str(np.mean(np.abs(adj_cr_verif_comp['Prophet Model Two Weeks Ahead'].replace(0, np.nan) - adj_cr_verif_comp['Observations'].replace(0, np.nan))/adj_cr_verif_comp['Observations'].replace(0, np.nan))))


## F. FB Prophet Model - Inbound Transfer Orders

In [None]:
ito_m = Prophet(changepoint_prior_scale=0.1, changepoint_range=0.9 ,\
            seasonality_mode='multiplicative', seasonality_prior_scale = 10 ,\
            holidays_prior_scale = 10, \
            yearly_seasonality=True, \
            weekly_seasonality=True, \
            daily_seasonality=False \
           #,holidays = ito_holidays
           )

ito_m_full = Prophet(changepoint_prior_scale=0.1, changepoint_range=0.9 ,\
            seasonality_mode='multiplicative', seasonality_prior_scale = 10 ,\
            holidays_prior_scale = 10, \
            yearly_seasonality=True, \
            weekly_seasonality=True, \
            daily_seasonality=False \
           #,holidays = ito_holidays
           )

# fit the model
ito_m.fit(ito_dat_train)
ito_m_full.fit(ito_dat_all)

# predict forward_days 
ito_future = ito_m.make_future_dataframe(periods=len(ito_dat_test)+forward_days, freq='1D')
ito_forecast = ito_m.predict(ito_future)
ito_forecast.loc[ito_forecast.ds>=split_date,:].head()

# components showing
ito_m_full.component_modes

## Validation
fig = ito_m.plot(ito_forecast)
a = add_changepoints_to_plot(fig.gca(), ito_m, ito_forecast)


f = ito_m.plot_components(ito_forecast)


ito_verif = utils.make_verif(ito_forecast, ito_dat_train, ito_dat_test)
f = utils.plot_verif(ito_verif,date=split_date)


f, ax = plt.subplots(nrows=1, figsize=(16,10), sharey=True)
utils.make_plot_block(ito_verif, '2019-01-01', '2019-12-31', ax=ax)


utils.plot_joint_plot(ito_verif.loc[ito_verif.index<split_date,:], title='train set', fname=None)

utils.plot_joint_plot(ito_verif.loc[(ito_verif.index>=split_date) &(ito_verif['y'].notnull()) ,:], \
                      title='test set', fname=None)

## Cross Validation
df_cv = cross_validation(ito_m_full, initial='1210 days', period='7 days', horizon = '14 days')
df_cv_two_week = cross_validation(ito_m_full, initial='1210 days', period='1 days', horizon = '14 days')

df_cv['horizon'] = df_cv['ds']  - df_cv['cutoff']
df_cv['mape'] = np.abs(df_cv['yhat'] - df_cv['y'])/ df_cv['y']
df_cv = df_cv.loc[df_cv.mape<1,:]
df_p = performance_metrics(df_cv, rolling_window=0.1)
df_p.head()

df_cv_two_week['horizon'] = df_cv_two_week['ds']  - df_cv_two_week['cutoff']
df_two_week_prediction = df_cv_two_week.loc[df_cv_two_week.horizon=='14 days',:]
df_two_week_prediction.index = df_two_week_prediction.ds
df_two_week_prediction.drop(columns="ds", inplace=True)

df_1_day_prediction = df_cv_two_week.loc[df_cv_two_week.horizon=='1 days',:]
df_1_day_prediction.index = df_1_day_prediction["ds"]
df_1_day_prediction.drop(columns="ds", inplace=True)

ito_verif_comp = pd.concat([ito_dat_curr_prediction, ito_verif], axis=1, join='inner').\
filter(items=['yhat_lower','yhat_upper','yhat','y','excel_forecast'])
ito_verif_comp = ito_verif_comp.loc[ito_verif_comp['y'].notnull() ,:]
ito_verif_comp.rename(columns ={'y':'Observations' , 'yhat':f'Prophet Model up until {split_date}', 'excel_forecast':'Current Excel Model'} ,inplace=True)

ito_verif_comp = ito_verif_comp.merge(right = df_two_week_prediction, on="ds" ,how = "inner", suffixes=('',"_2w"))
ito_verif_comp.rename(columns ={'yhat':'Prophet Model Two Weeks Ahead'} ,inplace=True)

# if weekend then make it 0
ito_verif_comp['weekend'] = ((pd.DatetimeIndex(ito_verif_comp.index).dayofweek) // 5 == 1).astype(float)
ito_verif_comp.loc[ito_verif_comp['weekend'] > 0.0, 'Prophet Model Two Weeks Ahead'] = 0
ito_verif_comp.loc[ito_verif_comp['weekend'] > 0.0, f'Prophet Model up until {split_date}'] = 0

# if negative then make it 0
ito_verif_comp.loc[ito_verif_comp['Prophet Model Two Weeks Ahead'] < 0.0, 'Prophet Model Two Weeks Ahead'] = 0
ito_verif_comp.loc[ito_verif_comp[f'Prophet Model up until {split_date}'] < 0.0, f'Prophet Model up until {split_date}'] = 0


In [None]:

_, ax = plt.subplots(figsize=(25,8))
ito_verif_comp.loc[:,['Observations',f'Prophet Model up until {split_date}'\
                  ,'Current Excel Model','Prophet Model Two Weeks Ahead']].plot(ax=ax)
# ax.fill_between(verif_comp.index, verif_comp.loc[:,'yhat_lower'], verif_comp.loc[:,'yhat_upper'], color='coral', alpha=0.3)


print('MAPE of the Current Model ' + str(np.mean(np.abs(ito_verif_comp['Current Excel Model'] - ito_verif_comp['Observations'])/ito_verif_comp['Observations'].replace(0, np.nan))))
print(f'MAPE of the Prophet Model Up Until {split_date}  ' + str(np.mean(np.abs(ito_verif_comp[f'Prophet Model up until {split_date}'] - ito_verif_comp['Observations'])/ito_verif_comp['Observations'].replace(0, np.nan))))
print(f'MAPE of the Prophet Model Two Weeks Prediction  ' + str(np.mean(np.abs(ito_verif_comp['Prophet Model Two Weeks Ahead'] - ito_verif_comp['Observations'])/ito_verif_comp['Observations'].replace(0, np.nan))))


In [None]:
(np.abs(ito_verif_comp['Prophet Model Two Weeks Ahead'].replace(0, np.nan) - ito_verif_comp['Observations'].replace(0, np.nan))/ito_verif_comp['Observations'].replace(0, np.nan)).sort_values(ascending=False)

In [None]:
# REMOVE TOP 3 OUTLIERS SKEWING MAPE RESULTS
adj_ito_verif_comp = ito_verif_comp[~ito_verif_comp.index.isin(['2019-06-17','2019-07-02','2019-06-27'])]

# RECALCULATE MAPE RESULTS w/o OUTLIERS
print('MAPE of the Current Model ' + str(np.mean(np.abs(adj_ito_verif_comp['Current Excel Model'].replace(0, np.nan) - adj_ito_verif_comp['Observations'].replace(0, np.nan))/adj_ito_verif_comp['Observations'].replace(0, np.nan))))
print(f'MAPE of the Prophet Model Up Until {split_date}  ' + str(np.mean(np.abs(adj_ito_verif_comp[f'Prophet Model up until {split_date}'].replace(0, np.nan) - adj_ito_verif_comp['Observations'].replace(0, np.nan))/adj_ito_verif_comp['Observations'].replace(0, np.nan))))
print(f'MAPE of the Prophet Model Two Weeks Prediction  ' + str(np.mean(np.abs(adj_ito_verif_comp['Prophet Model Two Weeks Ahead'].replace(0, np.nan) - adj_ito_verif_comp['Observations'].replace(0, np.nan))/adj_ito_verif_comp['Observations'].replace(0, np.nan))))


## G. FB Prophet Model - Outbound Vendor Returns

In [None]:
vr_m = Prophet(changepoint_prior_scale=0.1, changepoint_range=0.9 ,\
            seasonality_mode='multiplicative', seasonality_prior_scale = 10 ,\
            holidays_prior_scale = 10, \
            yearly_seasonality=True, \
            weekly_seasonality=True, \
            daily_seasonality=False \
           #,holidays = vr_holidays
           )

vr_m_full = Prophet(changepoint_prior_scale=0.1, changepoint_range=0.9 ,\
            seasonality_mode='multiplicative', seasonality_prior_scale = 10 ,\
            holidays_prior_scale = 10, \
            yearly_seasonality=True, \
            weekly_seasonality=True, \
            daily_seasonality=False \
           #,holidays = vr_holidays
           )

# fit the model
vr_m.fit(vr_dat_train)
vr_m_full.fit(vr_dat_all)

# predict forward_days 
vr_future = vr_m.make_future_dataframe(periods=len(vr_dat_test)+forward_days, freq='1D')
vr_forecast = vr_m.predict(vr_future)
vr_forecast.loc[vr_forecast.ds>=split_date,:].head()

# components showing
vr_m_full.component_modes

## Validation
fig = vr_m.plot(vr_forecast)
a = add_changepoints_to_plot(fig.gca(), vr_m, vr_forecast)


f = vr_m.plot_components(vr_forecast)


vr_verif = utils.make_verif(vr_forecast, vr_dat_train, vr_dat_test)
f = utils.plot_verif(vr_verif,date=split_date)


f, ax = plt.subplots(nrows=1, figsize=(16,10), sharey=True)
utils.make_plot_block(vr_verif, '2019-01-01', '2019-12-31', ax=ax)


utils.plot_joint_plot(vr_verif.loc[vr_verif.index<split_date,:], title='train set', fname=None)

utils.plot_joint_plot(vr_verif.loc[(vr_verif.index>=split_date) &(vr_verif['y'].notnull()) ,:], \
                      title='test set', fname=None)

## Cross Validation
df_cv = cross_validation(vr_m_full, initial='1210 days', period='7 days', horizon = '14 days')
df_cv_two_week = cross_validation(vr_m_full, initial='1210 days', period='1 days', horizon = '14 days')

df_cv['horizon'] = df_cv['ds']  - df_cv['cutoff']
df_cv['mape'] = np.abs(df_cv['yhat'] - df_cv['y'])/ df_cv['y']
df_cv = df_cv.loc[df_cv.mape<1,:]
df_p = performance_metrics(df_cv, rolling_window=0.1)
df_p.head()

df_cv_two_week['horizon'] = df_cv_two_week['ds']  - df_cv_two_week['cutoff']
df_two_week_prediction = df_cv_two_week.loc[df_cv_two_week.horizon=='14 days',:]
df_two_week_prediction.index = df_two_week_prediction.ds
df_two_week_prediction.drop(columns="ds", inplace=True)

df_1_day_prediction = df_cv_two_week.loc[df_cv_two_week.horizon=='1 days',:]
df_1_day_prediction.index = df_1_day_prediction["ds"]
df_1_day_prediction.drop(columns="ds", inplace=True)



In [None]:

vr_verif_comp = pd.concat([vr_verif], axis=1, join='inner').\
filter(items=['yhat_lower','yhat_upper','yhat','y','excel_forecast'])
vr_verif_comp = vr_verif_comp.loc[vr_verif_comp['y'].notnull() ,:]
vr_verif_comp.rename(columns ={'y':'Observations' , 'yhat':f'Prophet Model up until {split_date}'} ,inplace=True)

vr_verif_comp = vr_verif_comp.merge(right = df_two_week_prediction, on="ds" ,how = "inner", suffixes=('',"_2w"))
vr_verif_comp.rename(columns ={'yhat':'Prophet Model Two Weeks Ahead'} ,inplace=True)

# if weekend then make it 0
vr_verif_comp['weekend'] = ((pd.DatetimeIndex(vr_verif_comp.index).dayofweek) // 5 == 1).astype(float)
vr_verif_comp.loc[vr_verif_comp['weekend'] > 0.0, 'Prophet Model Two Weeks Ahead'] = 0
vr_verif_comp.loc[vr_verif_comp['weekend'] > 0.0, f'Prophet Model up until {split_date}'] = 0

# if negative then make it 0
vr_verif_comp.loc[vr_verif_comp['Prophet Model Two Weeks Ahead'] < 0.0, 'Prophet Model Two Weeks Ahead'] = 0
vr_verif_comp.loc[vr_verif_comp[f'Prophet Model up until {split_date}'] < 0.0, f'Prophet Model up until {split_date}'] = 0

#verif_comp.head()

_, ax = plt.subplots(figsize=(25,8))
vr_verif_comp.loc[:,['Observations',f'Prophet Model up until {split_date}'\
                  ,'Prophet Model Two Weeks Ahead']].plot(ax=ax)
# ax.fill_between(verif_comp.index, verif_comp.loc[:,'yhat_lower'], verif_comp.loc[:,'yhat_upper'], color='coral', alpha=0.3)

print(f'MAPE of the Prophet Model Up Until {split_date}  ' + str(np.mean(np.abs(vr_verif_comp[f'Prophet Model up until {split_date}'] - vr_verif_comp['Observations'])/vr_verif_comp['Observations'].replace(0, np.nan))))
print(f'MAPE of the Prophet Model Two Weeks Prediction  ' + str(np.mean(np.abs(vr_verif_comp['Prophet Model Two Weeks Ahead'] - vr_verif_comp['Observations'])/vr_verif_comp['Observations'].replace(0, np.nan))))


In [None]:
(np.abs(vr_verif_comp['Prophet Model Two Weeks Ahead'].replace(0, np.nan) - vr_verif_comp['Observations'].replace(0, np.nan))/vr_verif_comp['Observations'].replace(0, np.nan)).sort_values(ascending=False)

In [None]:
# REMOVE TOP 3 OUTLIERS SKEWING MAPE RESULTS
adj_vr_verif_comp = vr_verif_comp[~vr_verif_comp.index.isin(['2019-06-04','2019-06-11','2019-05-22'])]

# RECALCULATE MAPE RESULTS w/o OUTLIERS
#print('MAPE of the Current Model ' + str(np.mean(np.abs(adj_vr_verif_comp['Current Excel Model'].replace(0, np.nan) - adj_vr_verif_comp['Observations'].replace(0, np.nan))/adj_vr_verif_comp['Observations'].replace(0, np.nan))))
print(f'MAPE of the Prophet Model Up Until {split_date}  ' + str(np.mean(np.abs(adj_vr_verif_comp[f'Prophet Model up until {split_date}'].replace(0, np.nan) - adj_vr_verif_comp['Observations'].replace(0, np.nan))/adj_vr_verif_comp['Observations'].replace(0, np.nan))))
print(f'MAPE of the Prophet Model Two Weeks Prediction  ' + str(np.mean(np.abs(adj_vr_verif_comp['Prophet Model Two Weeks Ahead'].replace(0, np.nan) - adj_vr_verif_comp['Observations'].replace(0, np.nan))/adj_vr_verif_comp['Observations'].replace(0, np.nan))))


## H. FB Prophet Model - Outbound Transfer Orders

In [None]:
oto_m = Prophet(changepoint_prior_scale=0.1, changepoint_range=0.9 ,\
            seasonality_mode='multiplicative', seasonality_prior_scale = 10 ,\
            holidays_prior_scale = 10, \
            yearly_seasonality=True, \
            weekly_seasonality=True, \
            daily_seasonality=False \
           #,holidays = oto_holidays
           )

oto_m_full = Prophet(changepoint_prior_scale=0.1, changepoint_range=0.9 ,\
            seasonality_mode='multiplicative', seasonality_prior_scale = 10 ,\
            holidays_prior_scale = 10, \
            yearly_seasonality=True, \
            weekly_seasonality=True, \
            daily_seasonality=False \
           #,holidays = oto_holidays
           )

# fit the model
oto_m.fit(oto_dat_train)
oto_m_full.fit(oto_dat_all)

# predict forward_days 
oto_future = oto_m.make_future_dataframe(periods=len(oto_dat_test)+forward_days, freq='1D')
oto_forecast = oto_m.predict(oto_future)
oto_forecast.loc[oto_forecast.ds>=split_date,:].head()

# components showing
oto_m_full.component_modes

## Validation
fig = oto_m.plot(oto_forecast)
a = add_changepoints_to_plot(fig.gca(), oto_m, oto_forecast)


f = oto_m.plot_components(oto_forecast)


oto_verif = utils.make_verif(oto_forecast, oto_dat_train, oto_dat_test)
f = utils.plot_verif(oto_verif,date=split_date)


f, ax = plt.subplots(nrows=1, figsize=(16,10), sharey=True)
utils.make_plot_block(oto_verif, '2019-01-01', '2019-12-31', ax=ax)


utils.plot_joint_plot(oto_verif.loc[oto_verif.index<split_date,:], title='train set', fname=None)

utils.plot_joint_plot(oto_verif.loc[(oto_verif.index>=split_date) &(oto_verif['y'].notnull()) ,:], \
                      title='test set', fname=None)

## Cross Validation
df_cv = cross_validation(oto_m_full, initial='1210 days', period='7 days', horizon = '14 days')
df_cv_two_week = cross_validation(oto_m_full, initial='1210 days', period='1 days', horizon = '14 days')

df_cv['horizon'] = df_cv['ds']  - df_cv['cutoff']
df_cv['mape'] = np.abs(df_cv['yhat'] - df_cv['y'])/ df_cv['y']
df_cv = df_cv.loc[df_cv.mape<1,:]
df_p = performance_metrics(df_cv, rolling_window=0.1)
df_p.head()

df_cv_two_week['horizon'] = df_cv_two_week['ds']  - df_cv_two_week['cutoff']
df_two_week_prediction = df_cv_two_week.loc[df_cv_two_week.horizon=='14 days',:]
df_two_week_prediction.index = df_two_week_prediction.ds
df_two_week_prediction.drop(columns="ds", inplace=True)

df_1_day_prediction = df_cv_two_week.loc[df_cv_two_week.horizon=='1 days',:]
df_1_day_prediction.index = df_1_day_prediction["ds"]
df_1_day_prediction.drop(columns="ds", inplace=True)

oto_verif_comp = pd.concat([oto_dat_curr_prediction, oto_verif], axis=1, join='inner').\
filter(items=['yhat_lower','yhat_upper','yhat','y','excel_forecast'])
oto_verif_comp = oto_verif_comp.loc[oto_verif_comp['y'].notnull() ,:]
oto_verif_comp.rename(columns ={'y':'Observations' , 'yhat':f'Prophet Model up until {split_date}', 'excel_forecast':'Current Excel Model'} ,inplace=True)

oto_verif_comp = oto_verif_comp.merge(right = df_two_week_prediction, on="ds" ,how = "inner", suffixes=('',"_2w"))
oto_verif_comp.rename(columns ={'yhat':'Prophet Model Two Weeks Ahead'} ,inplace=True)

# if weekend then make it 0
oto_verif_comp['weekend'] = ((pd.DatetimeIndex(oto_verif_comp.index).dayofweek) // 5 == 1).astype(float)
oto_verif_comp.loc[oto_verif_comp['weekend'] > 0.0, 'Prophet Model Two Weeks Ahead'] = 0
oto_verif_comp.loc[oto_verif_comp['weekend'] > 0.0, f'Prophet Model up until {split_date}'] = 0

# if negative then make it 0
oto_verif_comp.loc[oto_verif_comp['Prophet Model Two Weeks Ahead'] < 0.0, 'Prophet Model Two Weeks Ahead'] = 0
oto_verif_comp.loc[oto_verif_comp[f'Prophet Model up until {split_date}'] < 0.0, f'Prophet Model up until {split_date}'] = 0


In [None]:

_, ax = plt.subplots(figsize=(25,8))
oto_verif_comp.loc[:,['Observations',f'Prophet Model up until {split_date}'\
                  ,'Current Excel Model','Prophet Model Two Weeks Ahead']].plot(ax=ax)
# ax.fill_between(verif_comp.index, verif_comp.loc[:,'yhat_lower'], verif_comp.loc[:,'yhat_upper'], color='coral', alpha=0.3)


print('MAPE of the Current Model ' + str(np.mean(np.abs(oto_verif_comp['Current Excel Model'].replace(0, np.nan) - oto_verif_comp['Observations'].replace(0, np.nan))/oto_verif_comp['Observations'].replace(0, np.nan))))
print(f'MAPE of the Prophet Model Up Until {split_date}  ' + str(np.mean(np.abs(oto_verif_comp[f'Prophet Model up until {split_date}'].replace(0, np.nan) - oto_verif_comp['Observations'].replace(0, np.nan))/oto_verif_comp['Observations'].replace(0, np.nan))))
print(f'MAPE of the Prophet Model Two Weeks Prediction  ' + str(np.mean(np.abs(oto_verif_comp['Prophet Model Two Weeks Ahead'].replace(0, np.nan) - oto_verif_comp['Observations'].replace(0, np.nan))/oto_verif_comp['Observations'].replace(0, np.nan))))


In [None]:
(np.abs(oto_verif_comp['Prophet Model Two Weeks Ahead'].replace(0, np.nan) - oto_verif_comp['Observations'].replace(0, np.nan))/oto_verif_comp['Observations'].replace(0, np.nan)).sort_values(ascending=False)

In [None]:
# REMOVE TOP 3 OUTLIERS SKEWING MAPE RESULTS
adj_oto_verif_comp = oto_verif_comp[~oto_verif_comp.index.isin(['2019-06-27','2019-06-07','2019-06-04'])]

# RECALCULATE MAPE RESULTS w/o OUTLIERS
print('MAPE of the Current Model ' + str(np.mean(np.abs(adj_oto_verif_comp['Current Excel Model'].replace(0, np.nan) - adj_oto_verif_comp['Observations'].replace(0, np.nan))/adj_oto_verif_comp['Observations'].replace(0, np.nan))))
print(f'MAPE of the Prophet Model Up Until {split_date}  ' + str(np.mean(np.abs(adj_oto_verif_comp[f'Prophet Model up until {split_date}'].replace(0, np.nan) - adj_oto_verif_comp['Observations'].replace(0, np.nan))/adj_oto_verif_comp['Observations'].replace(0, np.nan))))
print(f'MAPE of the Prophet Model Two Weeks Prediction  ' + str(np.mean(np.abs(adj_oto_verif_comp['Prophet Model Two Weeks Ahead'].replace(0, np.nan) - adj_oto_verif_comp['Observations'].replace(0, np.nan))/adj_oto_verif_comp['Observations'].replace(0, np.nan))))


# --FINAL FB Prophet Ensemble Outputs--

In [None]:
co_forecast.loc[ co_forecast['yhat'] < 0.0, 'yhat'] = 0
po_forecast.loc[ po_forecast['yhat'] < 0.0, 'yhat'] = 0
cr_forecast.loc[ cr_forecast['yhat'] < 0.0, 'yhat'] = 0
ito_forecast.loc[ito_forecast['yhat'] < 0.0, 'yhat'] = 0
oto_forecast.loc[oto_forecast['yhat'] < 0.0, 'yhat'] = 0
vr_forecast.loc[ vr_forecast['yhat'] < 0.0, 'yhat'] = 0

co_ensemble  =  co_forecast[['ds','yhat']]
po_ensemble  =  po_forecast[['ds','yhat']]
cr_ensemble  =  cr_forecast[['ds','yhat']]
ito_ensemble = ito_forecast[['ds','yhat']]
oto_ensemble = oto_forecast[['ds','yhat']]
vr_ensemble  =  vr_forecast[['ds','yhat']]

co_ensemble.rename(columns  ={'ds':'date_est','yhat':'fcst_out_co'} ,inplace=True)  
po_ensemble.rename(columns  ={'yhat':'fcst_in_po'} ,inplace=True)   
cr_ensemble.rename(columns  ={'yhat':'fcst_in_cr'} ,inplace=True)   
ito_ensemble.rename(columns ={'yhat':'fcst_in_ito'} ,inplace=True)  
oto_ensemble.rename(columns ={'yhat':'fcst_out_oto'} ,inplace=True)  
vr_ensemble.rename(columns  ={'yhat':'fcst_out_vr'} ,inplace=True)   

In [None]:
# join outputs


dfs = [co_ensemble[['date_est','fcst_out_co']] \
      ,po_ensemble[['ds','fcst_in_po']] \
      ,cr_ensemble[['ds','fcst_in_cr']] \
      ,ito_ensemble[['ds','fcst_in_ito']] \
      ,oto_ensemble[['ds','fcst_out_oto']] \
      ,vr_ensemble[['ds','fcst_out_vr']]] 
nan_value = 0

fcst_ensemble = pd.concat(dfs, join='outer', axis=1).fillna(nan_value)
fcst_ensemble.drop(columns=['ds'], inplace = True)


#create total columns
fcst_ensemble['fcst_total_ins'] =    fcst_ensemble['fcst_in_cr'] \
                                   + fcst_ensemble['fcst_in_po'] \
                                   + fcst_ensemble['fcst_in_ito']

fcst_ensemble['fcst_total_outs'] =   fcst_ensemble['fcst_out_co'] \
                                   + fcst_ensemble['fcst_out_oto'] \
                                   + fcst_ensemble['fcst_out_vr'] \

fcst_ensemble['fcst_total_flow'] =   fcst_ensemble['fcst_out_co'] \
                                   + fcst_ensemble['fcst_out_oto'] \
                                   + fcst_ensemble['fcst_out_vr'] \
                                   + fcst_ensemble['fcst_in_cr'] \
                                   + fcst_ensemble['fcst_in_po'] \
                                   + fcst_ensemble['fcst_in_ito']

fcst_ensemble['fcst_total_flow_adj'] = fcst_ensemble['fcst_out_co'] \
                                   + fcst_ensemble['fcst_out_oto'] \
                                   + fcst_ensemble['fcst_in_cr'] \
                                   + fcst_ensemble['fcst_in_po'] \
                                   + fcst_ensemble['fcst_in_ito'] 

In [None]:
display(fcst_ensemble.head())
display(fcst_ensemble.tail())

In [None]:
data_raw_all.head()

In [None]:
test_actuals  = data_raw_all.loc[data_raw_all.date_est>=split_date , :]
test_forecast = fcst_ensemble.loc[fcst_ensemble.date_est>=split_date , :]

test_actuals  = test_actuals.loc[test_actuals.date_est<='2019-07-23' , :]
test_forecast = test_forecast.loc[test_forecast.date_est<='2019-07-23' , :]

In [None]:
test_excel    = dat_curr_prediction.loc[dat_curr_prediction.FCST_Dt>=split_date , :]
test_excel    = test_excel.loc[test_excel.FCST_Dt<='2019-07-23' , :]

In [None]:
test_actuals_exc_adj = test_actuals
test_actuals_exc_adj['ADJ_TTL_ALL_UNITS'] = test_actuals['TTL_ALL_UNITS'] - test_actuals['OUT_VR_UNITS']

In [None]:
test_actuals = test_actuals.reset_index(drop=True)
test_forecast = test_forecast.reset_index(drop=True)
test_actuals_exc_adj = test_actuals_exc_adj.reset_index(drop=True)

display(test_actuals.head())
display(test_excel.head())
display(test_actuals_exc_adj.head())
display(test_forecast.head())

In [None]:
print(f'MAPE of the Prophet Model ' \
      + str(np.mean(np.abs(test_forecast['fcst_total_flow'] \
      - test_actuals['TTL_ALL_UNITS']) \
      / test_actuals['TTL_ALL_UNITS'])))

print(f'MAE of the Prophet Model ' \
      + str(np.mean(np.abs(test_forecast['fcst_total_flow'] \
      - test_actuals['TTL_ALL_UNITS']))))

In [None]:
print(f'MAPE of the Prophet Model ' \
      + str(np.mean(np.abs(test_forecast['fcst_total_outs'] \
      - test_actuals['TTL_OUT_UNITS']) \
      / test_actuals['TTL_OUT_UNITS'])))

print(f'MAE of the Prophet Model ' \
      + str(np.mean(np.abs(test_forecast['fcst_total_outs'] \
      - test_actuals['TTL_OUT_UNITS']))))


print(f'MAPE of the Prophet Model ' \
      + str(np.mean(np.abs(test_forecast['fcst_total_ins'].replace(0, np.nan) \
      - test_actuals['TTL_IN_UNITS'].replace(0, np.nan)) \
      / test_actuals['TTL_IN_UNITS'].replace(0, np.nan))))

print(f'MAE of the Prophet Model ' \
      + str(np.mean(np.abs(test_forecast['fcst_total_ins'] \
      - test_actuals['TTL_IN_UNITS']))))


In [None]:
print(f'MAPE of the Excel Model ' \
      + str(np.mean(np.abs(test_excel['FCT_TOTAL'] \
      - test_actuals_exc_adj['ADJ_TTL_ALL_UNITS']) \
      / test_actuals_exc_adj['ADJ_TTL_ALL_UNITS'])))


In [None]:
print(f'MAPE of the Prophet Model ' \
      + str(np.mean(np.abs(test_forecast['fcst_total_flow_adj'] \
      - test_actuals_exc_adj['ADJ_TTL_ALL_UNITS']) \
      / test_actuals_exc_adj['ADJ_TTL_ALL_UNITS'])))


In [None]:
display(fcst_ensemble.head())
display(fcst_ensemble.tail())

In [None]:
# export data to bigquery
fcst_ensemble.to_gbq('adhoc_analytics.temp_test_py2bq',project_id='moda-operandi-dw',if_exists = 'replace')