In [5]:
main_path = 'course-material-time-series-forecasting-product'
nyc_data = main_path+str("/nyc_data.csv")
print(nyc_data)

course-material-time-series-forecasting-product/nyc_data.csv


In [6]:
# Libraries 
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt

In [8]:
# load the data (for prophete, the Date information must be part of the data and not an index as the project before.)
# YYYY-MM-DD
df = pd.read_csv(nyc_data)
df.head() 

Unnamed: 0,Date,Demand,Easter,Thanksgiving,Christmas,Temperature,Marketing
0,1/1/2015,720.000885,0,0,0,3.68,41.305
1,1/2/2015,581.276773,0,0,0,4.73,131.574
2,1/3/2015,754.117039,0,0,0,7.23,162.7
3,1/4/2015,622.252774,0,0,0,10.96,160.281
4,1/5/2015,785.373319,0,0,0,6.92,51.077


In [9]:
# Rename variable 
# ds = date stamp
df = df.rename(columns={'Demand': 'y', 'Date':'ds'})
df.head()

Unnamed: 0,ds,y,Easter,Thanksgiving,Christmas,Temperature,Marketing
0,1/1/2015,720.000885,0,0,0,3.68,41.305
1,1/2/2015,581.276773,0,0,0,4.73,131.574
2,1/3/2015,754.117039,0,0,0,7.23,162.7
3,1/4/2015,622.252774,0,0,0,10.96,160.281
4,1/5/2015,785.373319,0,0,0,6.92,51.077


# Transforming the date variable

In [11]:
# YYYY-MM-DD (this is a standard format but also required for the prophet library)
df.ds = pd.to_datetime(df.ds, format="%m/%d/%Y")
df

Unnamed: 0,ds,y,Easter,Thanksgiving,Christmas,Temperature,Marketing
0,2015-01-01,720.000885,0,0,0,3.68,41.305
1,2015-01-02,581.276773,0,0,0,4.73,131.574
2,2015-01-03,754.117039,0,0,0,7.23,162.700
3,2015-01-04,622.252774,0,0,0,10.96,160.281
4,2015-01-05,785.373319,0,0,0,6.92,51.077
...,...,...,...,...,...,...,...
2187,2020-12-27,685.915026,0,0,0,2.89,38.674
2188,2020-12-28,998.051170,0,0,0,8.83,166.712
2189,2020-12-29,847.123399,0,0,0,3.48,161.865
2190,2020-12-30,857.521043,0,0,0,5.97,179.634


# Holidays

In [15]:
# Easter Holiday
# Get all the Easter dates. 
# To do so, we extract from the DataTable when Easter == 1
# '.ds' means we only get the dates
dates = df[df.Easter == 1].ds

# we create a DataFrame, but also include the window parameter. It represents the days before and the days ahead of the event
# in our case, we do it by choosing '5 days' in advance and only '2 days' afterward

easter = pd.DataFrame({'holiday': 'easter', 'ds':dates, 'lower_window':-5, 'upper_window':2})
easter

Unnamed: 0,holiday,ds,lower_window,upper_window
94,easter,2015-04-05,-5,2
451,easter,2016-03-27,-5,2
836,easter,2017-04-16,-5,2
1186,easter,2018-04-01,-5,2
1571,easter,2019-04-21,-5,2
1928,easter,2020-04-12,-5,2


In [16]:
# Thanksgiving 
dates = df[df.Thanksgiving == 1].ds
thanksgiving = pd.DataFrame({'holiday': 'thanksgiving', 'ds':dates, 'lower_window':-3, 'upper_window':5})
thanksgiving

Unnamed: 0,holiday,ds,lower_window,upper_window
329,thanksgiving,2015-11-26,-3,5
693,thanksgiving,2016-11-24,-3,5
1057,thanksgiving,2017-11-23,-3,5
1421,thanksgiving,2018-11-22,-3,5
1792,thanksgiving,2019-11-28,-3,5
2156,thanksgiving,2020-11-26,-3,5


In [17]:
# Christmas 
dates = df[df.Thanksgiving == 1].ds
christmas = pd.DataFrame({'holiday': 'christmas', 'ds':dates, 'lower_window':-7, 'upper_window':7})
christmas

Unnamed: 0,holiday,ds,lower_window,upper_window
329,christmas,2015-11-26,-7,7
693,christmas,2016-11-24,-7,7
1057,christmas,2017-11-23,-7,7
1421,christmas,2018-11-22,-7,7
1792,christmas,2019-11-28,-7,7
2156,christmas,2020-11-26,-7,7
