#Libraries and Data

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
%cd /content/drive/MyDrive/Forecasting Models and Time Series for Business in Python/7. Facebook Prophet

/content/drive/MyDrive/Forecasting Models and Time Series for Business in Python/7. Facebook Prophet


In [3]:
#import libraries
import numpy as np
import pandas as pd
import itertools
from prophet import Prophet

In [4]:
#load the data
data = pd.read_csv("DHS_Daily_Report_2020.csv")
data.head()

Unnamed: 0,Date,Total Adults in Shelter,Total Children in Shelter,Total Individuals in Shelter,Single Adult Men in Shelter,Single Adult Women in Shelter,Total Single Adults in Shelter,Families with Children in Shelter,Adults in Families with Children in Shelter,Children in Families with Children in Shelter,Total Individuals in Families with Children in Shelter,Adult Families in Shelter,Easter,Thanksgiving,Christmas
0,1/1/2014,28770,21919,50689,7269,2650,9919,10469,14897,21919,1866,3954,0,0,0
1,1/2/2014,28843,21954,50797,7289,2661,9950,10484,14930,21954,1871,3963,0,0,0
2,1/3/2014,28947,22007,50954,7323,2678,10001,10521,14978,22007,1872,3968,0,0,0
3,1/4/2014,28954,22003,50957,7320,2684,10004,10520,14971,22003,1877,3979,0,0,0
4,1/5/2014,28991,21966,50957,7386,2692,10078,10502,14941,21966,1874,3972,0,0,0


#Data preparation

In [5]:
#transform the date variable
data.Date = pd.to_datetime(data.Date, format = "%m/%d/%Y")
data.Date

0      2014-01-01
1      2014-01-02
2      2014-01-03
3      2014-01-04
4      2014-01-05
          ...    
2552   2020-12-27
2553   2020-12-28
2554   2020-12-29
2555   2020-12-30
2556   2020-12-31
Name: Date, Length: 2557, dtype: datetime64[ns]

In [6]:
#final dataset
df = data.loc[data.Date <= "2020-12-01",
              ["Date", "Total Individuals in Shelter",
               "Easter", "Thanksgiving", "Christmas"]]
df.head()              

Unnamed: 0,Date,Total Individuals in Shelter,Easter,Thanksgiving,Christmas
0,2014-01-01,50689,0,0,0
1,2014-01-02,50797,0,0,0
2,2014-01-03,50954,0,0,0
3,2014-01-04,50957,0,0,0
4,2014-01-05,50957,0,0,0


In [7]:
#renaming variables
df = df.rename(columns = {'Total Individuals in Shelter': 'y',
                          'Date': 'ds'})
df.head(1)

Unnamed: 0,ds,y,Easter,Thanksgiving,Christmas
0,2014-01-01,50689,0,0,0


#Holidays

In [8]:
#Easter
easter_dates = df[df.Easter == 1].ds
easter = pd.DataFrame({'holiday': 'easter',
                       'ds': pd.to_datetime(easter_dates),
                       'lower_window': -5,
                       'upper_window': 2})
easter.head(2)

Unnamed: 0,holiday,ds,lower_window,upper_window
109,easter,2014-04-20,-5,2
459,easter,2015-04-05,-5,2


In [9]:
#Thanksgiving
thanksgiving_dates = df[df.Thanksgiving == 1].ds
thanksgiving = pd.DataFrame({'holiday': 'thanksgiving',
                       'ds': pd.to_datetime(thanksgiving_dates),
                       'lower_window': -5,
                       'upper_window': 2})
thanksgiving.head(2)

Unnamed: 0,holiday,ds,lower_window,upper_window
330,thanksgiving,2014-11-27,-5,2
694,thanksgiving,2015-11-26,-5,2


In [10]:
#combining holidays
holidays = pd.concat([easter, thanksgiving])
holidays

Unnamed: 0,holiday,ds,lower_window,upper_window
109,easter,2014-04-20,-5,2
459,easter,2015-04-05,-5,2
816,easter,2016-03-27,-5,2
1201,easter,2017-04-16,-5,2
1551,easter,2018-04-01,-5,2
1936,easter,2019-04-21,-5,2
2293,easter,2020-04-12,-5,2
330,thanksgiving,2014-11-27,-5,2
694,thanksgiving,2015-11-26,-5,2
1058,thanksgiving,2016-11-24,-5,2


#Prophet

In [11]:
m = Prophet(holidays = holidays,
            seasonality_mode = 'multiplicative',
            seasonality_prior_scale = 10,
            changepoint_prior_scale = 0.05,
            holidays_prior_scale = 10)
m.add_regressor('Christmas')
m.fit(df)

INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmp4hlunl24/q5avqlwu.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmp4hlunl24/nhsce19a.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.7/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=37748', 'data', 'file=/tmp/tmp4hlunl24/q5avqlwu.json', 'init=/tmp/tmp4hlunl24/nhsce19a.json', 'output', 'file=/tmp/tmpnz6wfipb/prophet_model-20220829165312.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
16:53:12 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
16:53:14 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing


<prophet.forecaster.Prophet at 0x7f0fbcd12050>

In [None]:
#CV
from prophet.diagnostics import cross_validation
df_cv = cross_validation(m,
                         horizon = '31 days',
                         initial = '2400 days',
                         parallel = "processes")

In [13]:
#assess model
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error
print(mean_absolute_error(df_cv.yhat, df_cv.y))
print(mean_absolute_percentage_error(df_cv.yhat, df_cv.y))

1996.790011165734
0.03544428108590342


#Hyperparameter tuning

In [14]:
#PArameters
from sklearn.model_selection import ParameterGrid
param_grid = {'seasonality_prior_scale': [10, 20],
            'changepoint_prior_scale': [0.05, 0.1],
            'holidays_prior_scale': [10, 20]}
grid = ParameterGrid(param_grid)

In [None]:
#Tuning  
mae = []
for params in grid:
  #build model
  m = Prophet(holidays = holidays,
            seasonality_mode = 'multiplicative',
            seasonality_prior_scale = params['seasonality_prior_scale'],
            changepoint_prior_scale = params['changepoint_prior_scale'],
            holidays_prior_scale = params['holidays_prior_scale'])
  m.add_regressor('Christmas')
  m.fit(df)

  #CV
  df_cv = cross_validation(m,
                         horizon = '31 days',
                         initial = '2400 days',
                         parallel = "processes")

  #Gather the results
  error = mean_absolute_error(df_cv.yhat, df_cv.y)
  mae.append(error)

In [16]:
#gather best parameters
best_params = grid[np.argmin(mae)]
print(best_params)

{'seasonality_prior_scale': 20, 'holidays_prior_scale': 10, 'changepoint_prior_scale': 0.1}


#Best Model

In [None]:
m = Prophet(holidays = holidays,
            seasonality_mode = 'multiplicative',
            seasonality_prior_scale = 20,
            changepoint_prior_scale = 0.1,
            holidays_prior_scale = 10)
m.add_regressor('Christmas')
m.fit(df)

In [None]:
df_cv = cross_validation(m,
                         horizon = '31 days',
                         initial = '2400 days',
                         parallel = "processes")

In [19]:
print(mean_absolute_error(df_cv.yhat, df_cv.y))
print(mean_absolute_percentage_error(df_cv.yhat, df_cv.y))

1983.6276610715697
0.03521705355135434
