In [1]:
main_path = '.'
nyc_data = main_path+str("/nyc_data.csv")
print(nyc_data)

./nyc_data.csv


In [2]:
# Libraries 
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt

In [3]:
# load the data (for prophete, the Date information must be part of the data and not an index as the project before.)
# YYYY-MM-DD
df = pd.read_csv(nyc_data)
future_df = pd.read_csv('future.csv')
future_df.tail() 

Unnamed: 0,Date,Demand,Easter,Thanksgiving,Christmas,Temperature,Marketing
26,1/27/2021,,0,0,0,3.33,39.664
27,1/28/2021,,0,0,0,1.67,195.314
28,1/29/2021,,0,0,0,-2.78,235.894
29,1/30/2021,,0,0,0,1.11,152.752
30,1/31/2021,,0,0,0,4.44,158.62


In [4]:
# Merging the dataset 
# Be careful when just concating. The index may (most likely) not represent what you really have.  So you need to reset the index
# Without reseting the index, the dataframe goes until 30 indexes for this problem. 
# After reseting, it goes to 2220 (which is what we expect to have due to the number of data in the df)
df = pd.concat([df, future_df])
df = df.reset_index(drop=True)
df.tail()

Unnamed: 0,Date,Demand,Easter,Thanksgiving,Christmas,Temperature,Marketing
2218,1/27/2021,,0,0,0,3.33,39.664
2219,1/28/2021,,0,0,0,1.67,195.314
2220,1/29/2021,,0,0,0,-2.78,235.894
2221,1/30/2021,,0,0,0,1.11,152.752
2222,1/31/2021,,0,0,0,4.44,158.62


In [5]:
# Rename variable 
# ds = date stamp
df = df.rename(columns={'Demand': 'y', 'Date':'ds'})
df.head(0)

Unnamed: 0,ds,y,Easter,Thanksgiving,Christmas,Temperature,Marketing


# Transforming the date variable

In [6]:
# YYYY-MM-DD (this is a standard format but also required for the prophet library)
df.ds = pd.to_datetime(df.ds, format="%m/%d/%Y")
df.head()

Unnamed: 0,ds,y,Easter,Thanksgiving,Christmas,Temperature,Marketing
0,2015-01-01,720.000885,0,0,0,3.68,41.305
1,2015-01-02,581.276773,0,0,0,4.73,131.574
2,2015-01-03,754.117039,0,0,0,7.23,162.7
3,2015-01-04,622.252774,0,0,0,10.96,160.281
4,2015-01-05,785.373319,0,0,0,6.92,51.077


# Holidays

In [7]:
# Easter Holiday
# Get all the Easter dates. 
# To do so, we extract from the DataTable when Easter == 1
# '.ds' means we only get the dates
dates = df[df.Easter == 1].ds

# we create a DataFrame, but also include the window parameter. It represents the days before and the days ahead of the event
# in our case, we do it by choosing '5 days' in advance and only '2 days' afterward

easter = pd.DataFrame({'holiday': 'easter', 'ds':dates, 'lower_window':-5, 'upper_window':2})
easter

Unnamed: 0,holiday,ds,lower_window,upper_window
94,easter,2015-04-05,-5,2
451,easter,2016-03-27,-5,2
836,easter,2017-04-16,-5,2
1186,easter,2018-04-01,-5,2
1571,easter,2019-04-21,-5,2
1928,easter,2020-04-12,-5,2


In [8]:
# Thanksgiving 
dates = df[df.Thanksgiving == 1].ds
thanksgiving = pd.DataFrame({'holiday': 'thanksgiving', 'ds':dates, 'lower_window':-3, 'upper_window':5})
thanksgiving

Unnamed: 0,holiday,ds,lower_window,upper_window
329,thanksgiving,2015-11-26,-3,5
693,thanksgiving,2016-11-24,-3,5
1057,thanksgiving,2017-11-23,-3,5
1421,thanksgiving,2018-11-22,-3,5
1792,thanksgiving,2019-11-28,-3,5
2156,thanksgiving,2020-11-26,-3,5


In [9]:
# Christmas 
dates = df[df.Thanksgiving == 1].ds
christmas = pd.DataFrame({'holiday': 'christmas', 'ds':dates, 'lower_window':-7, 'upper_window':7})
christmas

Unnamed: 0,holiday,ds,lower_window,upper_window
329,christmas,2015-11-26,-7,7
693,christmas,2016-11-24,-7,7
1057,christmas,2017-11-23,-7,7
1421,christmas,2018-11-22,-7,7
1792,christmas,2019-11-28,-7,7
2156,christmas,2020-11-26,-7,7


# Combine all events

In [10]:
# We need to store and merge everything 
holidays = pd.concat([easter, thanksgiving, christmas])
holidays.head()

Unnamed: 0,holiday,ds,lower_window,upper_window
94,easter,2015-04-05,-5,2
451,easter,2016-03-27,-5,2
836,easter,2017-04-16,-5,2
1186,easter,2018-04-01,-5,2
1571,easter,2019-04-21,-5,2


# Drop holidays from DataFrame

In [11]:
# Note that by dropping the holidays we have our regressors (temperature and marketing)
df_final = df.drop(columns=["Easter", "Thanksgiving", "Christmas"])
df_final.head()

Unnamed: 0,ds,y,Temperature,Marketing
0,2015-01-01,720.000885,3.68,41.305
1,2015-01-02,581.276773,4.73,131.574
2,2015-01-03,754.117039,7.23,162.7
3,2015-01-04,622.252774,10.96,160.281
4,2015-01-05,785.373319,6.92,51.077


# Prophet model

In [17]:
# Index col parameter is very important here because otherwise the index will be 0,1,2,...instead of the names we want to have later
parameters = pd.read_csv("prophete_best_params_forecasting_product.csv", index_col=0)
parameters

Unnamed: 0,0
changepoint_prior_scale,0.01
holidays_prior_scale,5
seasonality_mode,additive
seasonality_prior_scale,5
rmse,48.069


In [50]:
# Extracting the parameters
# ps: .loc["string"] return a object datatype. We need to convert it so we can use it lalter in our model
cps = float(parameters.loc["changepoint_prior_scale"]) # let's abreviate as cps 
hps = float(parameters.loc["holidays_prior_scale"])
sps = float(parameters.loc["seasonality_prior_scale"])
sm = parameters.loc["seasonality_mode"]
sm = sm[0]

  cps = float(parameters.loc["changepoint_prior_scale"]) # let's abreviate as cps
  hps = float(parameters.loc["holidays_prior_scale"])
  sps = float(parameters.loc["seasonality_prior_scale"])
  sm = sm[0]


In [51]:
# Splitting the data 
training  = df.iloc[:-31,:]
future_df = df.iloc[-31:,:]

In [52]:
from prophet import Prophet

In [53]:
# Building the model 
# It idedntifies the yearly,weakly and daily data automatically if the parameter is set to be 'auto' (default value)
m = Prophet(
    holidays=holidays,
    seasonality_mode= sm, 
    seasonality_prior_scale=sps, 
    holidays_prior_scale=hps, 
    changepoint_prior_scale=cps
    )

# Becacuse our df_final dataframe has the regressors, we need to add the following commands
m.add_regressor("Temperature")
m.add_regressor("Marketing")
m.fit(training)

11:27:27 - cmdstanpy - INFO - Chain [1] start processing
11:27:28 - cmdstanpy - INFO - Chain [1] done processing


<prophet.forecaster.Prophet at 0x7728e41eba10>

# Forecasting 

In [None]:
# Make a future dataframe