In [1]:
# Libraries and data
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from prophet import Prophet

#load the data
#YYYY-MM-DD
df = pd.read_csv('../nyc_data.csv')
future_df = pd.read_csv('../future.csv')
future_df.head()

Unnamed: 0,Date,Demand,Easter,Thanksgiving,Christmas,Temperature,Marketing
0,1/1/2021,,0,0,0,5.0,154.221
1,1/2/2021,,0,0,0,11.11,264.805
2,1/3/2021,,0,0,0,3.89,115.499
3,1/4/2021,,0,0,0,6.67,124.65
4,1/5/2021,,0,0,0,5.56,77.968


In [2]:
# Merge both datasets
df = pd.concat([df, future_df])
df = df.reset_index(drop = True)
df.tail()

Unnamed: 0,Date,Demand,Easter,Thanksgiving,Christmas,Temperature,Marketing
2218,1/27/2021,,0,0,0,3.33,39.664
2219,1/28/2021,,0,0,0,1.67,195.314
2220,1/29/2021,,0,0,0,-2.78,235.894
2221,1/30/2021,,0,0,0,1.11,152.752
2222,1/31/2021,,0,0,0,4.44,158.62


In [3]:
#Rename variable
df = df.rename(columns = {'Demand': 'y',
                          'Date': 'ds'})
df.head(1)

Unnamed: 0,ds,y,Easter,Thanksgiving,Christmas,Temperature,Marketing
0,1/1/2015,720.000885,0,0,0,3.68,41.305


In [4]:
# Date variable
df.ds = pd.to_datetime(df.ds,
               format  = "%m/%d/%Y")
df.ds

0      2015-01-01
1      2015-01-02
2      2015-01-03
3      2015-01-04
4      2015-01-05
          ...    
2218   2021-01-27
2219   2021-01-28
2220   2021-01-29
2221   2021-01-30
2222   2021-01-31
Name: ds, Length: 2223, dtype: datetime64[ns]

# Holidays

In [5]:
# Easter
dates = pd.to_datetime(df[df.Easter == 1].ds)
easter = pd.DataFrame({"holiday": "easter",
                       "ds": dates,
                       "lower_window": -5,
                       "upper_window": 2})

In [6]:
# Thanksgiving
dates = pd.to_datetime(df[df.Thanksgiving == 1].ds)
thanksgiving = pd.DataFrame({"holiday": "thanksgiving",
                       "ds": dates,
                       "lower_window": -3,
                       "upper_window": 6})

In [7]:
thanksgiving

Unnamed: 0,holiday,ds,lower_window,upper_window
329,thanksgiving,2015-11-26,-3,6
693,thanksgiving,2016-11-24,-3,6
1057,thanksgiving,2017-11-23,-3,6
1421,thanksgiving,2018-11-22,-3,6
1792,thanksgiving,2019-11-28,-3,6
2156,thanksgiving,2020-11-26,-3,6


In [8]:
# Combining events
holidays = pd.concat([easter, thanksgiving])
holidays

Unnamed: 0,holiday,ds,lower_window,upper_window
94,easter,2015-04-05,-5,2
451,easter,2016-03-27,-5,2
836,easter,2017-04-16,-5,2
1186,easter,2018-04-01,-5,2
1571,easter,2019-04-21,-5,2
1928,easter,2020-04-12,-5,2
329,thanksgiving,2015-11-26,-3,6
693,thanksgiving,2016-11-24,-3,6
1057,thanksgiving,2017-11-23,-3,6
1421,thanksgiving,2018-11-22,-3,6


In [9]:
# Removing the holiday columns
df = df.drop(["Easter", "Thanksgiving"], axis = 1)

In [10]:
# Split data
training = df.iloc[:-31,:]
future_df = df.iloc[-31:,:]

In [11]:
# Fetch the parameters
parameters = pd.read_csv('best_params_prophet.csv',
                         index_col = 0)

In [12]:
# Extract the parameter values
changepoint_prior_scale = float(parameters.loc['changepoint_prior_scale'][0])
holidays_prior_scale = float(parameters.loc['holidays_prior_scale'][0])
seasonality_prior_scale = float(parameters.loc['seasonality_prior_scale'][0])
seasonality_mode = parameters.loc['seasonality_mode'][0]

In [13]:
# FB model
m = Prophet(holidays = holidays,
            seasonality_mode= seasonality_mode,
            seasonality_prior_scale= seasonality_prior_scale,
            holidays_prior_scale= holidays_prior_scale,
            changepoint_prior_scale= changepoint_prior_scale)
m.add_regressor('Christmas')
m.add_regressor('Temperature')
m.add_regressor('Marketing')
m.fit(training)

07:47:35 - cmdstanpy - INFO - Chain [1] start processing
07:47:35 - cmdstanpy - INFO - Chain [1] done processing


<prophet.forecaster.Prophet at 0x25e68bcf550>