#Libraries and data

In [1]:
from google.colab import drive
drive.mount('/content/drive')

ModuleNotFoundError: No module named 'google.colab'

In [None]:
#change directory
%cd /content/drive/MyDrive/Time Series Forecasting Product

In [None]:
#Install Greykite
!pip install greykite

In [None]:
#libraries
import numpy as np
import pandas as pd
from greykite.framework.templates.autogen.forecast_config import *
from greykite.framework.templates.forecaster import Forecaster
from greykite.framework.templates.model_templates import ModelTemplateEnum
from greykite.common.features.timeseries_features import *
from greykite.common.evaluation import EvaluationMetricEnum
from greykite.framework.utils.result_summary import summarize_grid_search_results
from plotly.offline import iplot

In [None]:
#load the data
#YYYY-MM-DD
df = pd.read_csv('nyc_data.csv')
future_df = pd.read_csv('future.csv')
future_df.head()

In [None]:
#get the best parameters
parameters  = pd.read_csv("Forecasting Product/best_params_silverkite.csv", 
                          index_col = 0)
parameters

In [None]:
#get the parameters
growth_term_param = parameters.loc["param_estimator__growth_term"][0]
fit_algorithm_param = parameters.loc["param_estimator__fit_algorithm_dict"][0]

In [None]:
#merging both
df = pd.concat([df, future_df])
df = df.reset_index(drop = True)
df.head()

In [None]:
#inspecting df
df.tail()

In [None]:
#Rename variable
df = df.rename(columns = {'Demand': 'y'})
df.head(0)

#Silverkite Preparations

In [None]:
#Specifying Time Series names
metadata = MetadataParam(time_col = "Date",
                         value_col = "y",
                         freq = "D",
                         train_end_date = pd.to_datetime("2020-12-31"))
metadata

In [None]:
#growth terms possibilities
growth = dict(growth_term = growth_term_param)
growth

In [None]:
#seasonalities
seasonality = dict(yearly_seasonality = "auto",
                   quarterly_seasonality = "auto",
                   monthly_seasonality = "auto",
                   weekly_seasonality = "auto",
                   daily_seasonality = "auto")
seasonality

In [None]:
#Specifying events
events = dict(holidays_to_model_separately = ["New Year's Day"],
              holiday_lookup_countries = ["US"],
              holiday_pre_num_days = 2,
              holiday_post_num_days = 2,
              holiday_pre_post_num_dict = {"New Year's Day": (3,1)},
              daily_event_df_dict = {"elections": pd.DataFrame({
                  "date": ["2016-11-08", "2020-11-03"],
                  "event_name": ["elections"] * 2
              })})
events

In [None]:
#Changepoints -> reflects the changes in the trend
changepoints = dict(changepoints_dict = dict(method = "auto"))

In [None]:
#Regressors
regressors = dict(regressor_cols = ["Easter", "Temperature", "Marketing"])
regressors

In [None]:
#Lagged Regressors
lagged_regressors = dict(lagged_regressor_dict = {"Temperature": "auto",
                                                  "Easter": "auto",
                                                  "Marketing": "auto"})

In [None]:
#autogression -> dependent on the forecasting horizon
autoregression = dict(autoreg_dict = "auto")

In [None]:
fit_algorithm_param

In [None]:
#Fitting algorithms
import yaml
custom = dict(fit_algorithm_dict = yaml.load(fit_algorithm_param))
custom

#Silverkite Model

In [None]:
#Build the model
model_components = ModelComponentsParam(growth = growth,
                                        seasonality = seasonality,
                                        events = events,
                                        changepoints = changepoints,
                                        regressors = regressors,
                                        lagged_regressors = lagged_regressors,
                                        autoregression = autoregression,
                                        custom = custom)

In [None]:
#Cross-validation
evaluation_period = EvaluationPeriodParam(cv_min_train_periods= df.shape[0] - 180 -31,
                                          cv_expanding_window = True,
                                          cv_max_splits = 50,
                                          cv_periods_between_splits = 16)

In [None]:
#Evaluation metric 
evaluation_metric = EvaluationMetricParam(
    cv_selection_metric = EvaluationMetricEnum.RootMeanSquaredError.name)

In [None]:
#Configuration
config = ForecastConfig(model_template = ModelTemplateEnum.SILVERKITE.name,
                        forecast_horizon = 31,
                        metadata_param = metadata,
                        model_components_param = model_components,
                        evaluation_period_param=evaluation_period,
                        evaluation_metric_param = evaluation_metric)

In [None]:
#Forecasting
forecaster = Forecaster()
result = forecaster.run_forecast_config(df = df,
                                        config = config)

In [None]:
#Look at the model summary
summary = result.model[-1].summary() #-1 restrieves the estimator from the pipeline
print(summary)

In [None]:
#visualization
fig = result.forecast.plot_components()
iplot(fig)

In [None]:
#getting the forecast result
forecast = result.forecast.df[["Date", "forecast"]]
forecast = forecast.rename(columns = {'forecast': 'silverkite'})
predictions_silverkite = forecast.iloc[-31:,:]
predictions_silverkite

In [None]:
#exporting
predictions_silverkite.to_csv("Forecasting Product/Ensemble/predictions_silverkite.csv")