In [10]:
# Libraries and data
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from greykite.framework.templates.autogen.forecast_config import *
from greykite.framework.templates.forecaster import Forecaster
from greykite.framework.templates.model_templates import ModelTemplateEnum
from greykite.common.features.timeseries_features import *
from greykite.common.evaluation import EvaluationMetricEnum
from greykite.framework.utils.result_summary import summarize_grid_search_results
from plotly.offline import iplot

In [15]:
# Load the dat
# YYYY-MM-DD
df = pd.read_csv('../nyc_data.csv')
future_df = pd.read_csv('../future.csv')
df

Unnamed: 0,Date,Demand,Easter,Thanksgiving,Christmas,Temperature,Marketing
0,1/1/2015,720.000885,0,0,0,3.68,41.305
1,1/2/2015,581.276773,0,0,0,4.73,131.574
2,1/3/2015,754.117039,0,0,0,7.23,162.700
3,1/4/2015,622.252774,0,0,0,10.96,160.281
4,1/5/2015,785.373319,0,0,0,6.92,51.077
...,...,...,...,...,...,...,...
2187,12/27/2020,685.915026,0,0,0,2.89,38.674
2188,12/28/2020,998.051170,0,0,0,8.83,166.712
2189,12/29/2020,847.123399,0,0,0,3.48,161.865
2190,12/30/2020,857.521043,0,0,0,5.97,179.634


In [16]:
# Merging both
df = pd.concat([df, future_df])
df = df.reset_index(drop = True)
df.head()

Unnamed: 0,Date,Demand,Easter,Thanksgiving,Christmas,Temperature,Marketing
0,1/1/2015,720.000885,0,0,0,3.68,41.305
1,1/2/2015,581.276773,0,0,0,4.73,131.574
2,1/3/2015,754.117039,0,0,0,7.23,162.7
3,1/4/2015,622.252774,0,0,0,10.96,160.281
4,1/5/2015,785.373319,0,0,0,6.92,51.077


In [17]:
# Inspecting df
df.tail()

Unnamed: 0,Date,Demand,Easter,Thanksgiving,Christmas,Temperature,Marketing
2218,1/27/2021,,0,0,0,3.33,39.664
2219,1/28/2021,,0,0,0,1.67,195.314
2220,1/29/2021,,0,0,0,-2.78,235.894
2221,1/30/2021,,0,0,0,1.11,152.752
2222,1/31/2021,,0,0,0,4.44,158.62


In [18]:
# Rename variable
df = df.rename(columns = {'Demand': 'y'})
df.head(0)

Unnamed: 0,Date,y,Easter,Thanksgiving,Christmas,Temperature,Marketing


In [21]:
# Specifying time series names
metadata = MetadataParam(time_col = "Date",
              value_col = "y",
              freq = "D",
              train_end_date = pd.to_datetime("2020-12-31"))
metadata

MetadataParam(anomaly_info=None, date_format=None, freq='D', time_col='Date', train_end_date=Timestamp('2020-12-31 00:00:00'), value_col='y')

In [22]:
# Growth terms posibilities
growth = dict(growth_terms = ['linear', 'quadratic', 'sqrt'])
growth

{'growth_terms': ['linear', 'quadratic', 'sqrt']}

In [23]:
# Seasonalities
seasonality = dict(yearly_seasonality = 'auto',
                   quarterly_seasonality = 'auto',
                   monthly_seasonality = 'auto',
                   weekly_seasonality = 'auto',
                   daily_seasonality = 'auto')
seasonality

{'yearly_seasonality': 'auto',
 'quarterly_seasonality': 'auto',
 'monthly_seasonality': 'auto',
 'weekly_seasonality': 'auto',
 'daily_seasonality': 'auto'}

In [26]:
# Checking wich countries are avaliable and their holidays
get_available_holiday_lookup_countries(["US"])
get_available_holidays_across_countries(countries = ["US"],
                                        year_start = 2015,
                                        year_end = 2021)

['Christmas Day',
 'Christmas Day (Observed)',
 'Columbus Day',
 'Halloween',
 'Independence Day',
 'Independence Day (Observed)',
 'Labor Day',
 'Martin Luther King, Jr. Day',
 'Memorial Day',
 "New Year's Day",
 "New Year's Day (Observed)",
 'Thanksgiving',
 'Veterans Day',
 'Veterans Day (Observed)',
 "Washington's Birthday"]

In [29]:
# Specifying events
events = dict(holidays_to_model_separatley = ["New Year's Day"],
              holiday_lookup_countires = ["US"],
              holidays_pre_num_days = 2,
              holidays_post_num_days = 2,
              holiday_pre_post_num_dict = {"New Year's Day": (3, 1)}, # 3 days before 1 after
              daily_event_df_dict = {"elections": pd.DataFrame({
                  "date": ["2016-11-08", "2020-11-03"],
                  "event_name": ["elections"] * 2
              })})
events

{'holidays_to_model_separatley': ["New Year's Day"],
 'holiday_lookup_countires': ['US'],
 'holidays_pre_num_days': 2,
 'holidays_post_num_days': 2,
 'holiday_pre_post_num_dict': {"New Year's Day": (3, 1)},
 'daily_event_df_dict': {'elections':          date event_name
  0  2016-11-08  elections
  1  2020-11-03  elections}}

In [30]:
# Changepoints -> Reflects the changes in trend
changepoints = dict(changepoints_dict = dict(method = "auto"))

In [31]:
# Regressors
regressors = dict(regressor_cols = ["Easter", "Temperature", "Marketing"])
regressors

{'regressor_cols': ['Easter', 'Temperature', 'Marketing']}

In [32]:
# Lagged regressors
lagged_regressors = dict(lagged_regressor_dict = {"Temperature": "auto",
                                                  "Easter": "auto",
                                                  "Marketing": "auto"})

In [33]:
# Autoregression -> Dependent on the forecasting horizon 
autoregression = dict(autoreg_dict = "auto")

In [34]:
# Fiting algorithims
custom = dict(fit_algorithim_dict = [dict(fit_algorithim = "linear"),
                                     dict(fit_algorithim = "ridge"),
                                     dict(fit_algorithim = "rf"),
                                     dict(fit_algorithim = "gradient_boosting")])
custom

{'fit_algorithim_dict': [{'fit_algorithim': 'linear'},
  {'fit_algorithim': 'ridge'},
  {'fit_algorithim': 'rf'},
  {'fit_algorithim': 'gradient_boosting'}]}

# Silverkite Model

In [36]:
# Build the model
model_components = ModelComponentsParam(growth = growth,
                                        seasonality = seasonality,
                                        events = events,
                                        changepoints = changepoints,
                                        regressors = regressors,
                                        lagged_regressors = lagged_regressors,
                                        autoregression = autoregression,
                                        custom = custom)

In [37]:
# Cross-Validation
evaluation_period = EvaluationPeriodParam(cv_min_train_periods = df.shape[0] - 180 - 31,
                                          cv_expanding_window = True,
                                          cv_max_splits = 50,
                                          cv_periods_between_splits = 16)

In [38]:
# Evaluation metric
evaluation_metric = EvaluationMetricParam(
    cv_selection_metric = EvaluationMetricEnum.RootMeanSquaredError.name)