<a href="https://colab.research.google.com/github/ShubhamHooda01/Data-Science-Projects/blob/main/New_York_shelter_demand_using_Prophet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import Libraries

In [None]:
import pandas as pd
import numpy as np
import itertools
from prophet import Prophet


# Get The Data

In [None]:
data = pd.read_csv('/content/DHS_Daily_Report_2020.csv')

In [None]:
data.head()

Unnamed: 0,Date,Total Adults in Shelter,Total Children in Shelter,Total Individuals in Shelter,Single Adult Men in Shelter,Single Adult Women in Shelter,Total Single Adults in Shelter,Families with Children in Shelter,Adults in Families with Children in Shelter,Children in Families with Children in Shelter,Total Individuals in Families with Children in Shelter,Adult Families in Shelter,Easter,Thanksgiving,Christmas
0,1/1/2014,28770,21919,50689,7269,2650,9919,10469,14897,21919,1866,3954,0,0,0
1,1/2/2014,28843,21954,50797,7289,2661,9950,10484,14930,21954,1871,3963,0,0,0
2,1/3/2014,28947,22007,50954,7323,2678,10001,10521,14978,22007,1872,3968,0,0,0
3,1/4/2014,28954,22003,50957,7320,2684,10004,10520,14971,22003,1877,3979,0,0,0
4,1/5/2014,28991,21966,50957,7386,2692,10078,10502,14941,21966,1874,3972,0,0,0


# Data Preparation

In [None]:
data.Date = pd.to_datetime(data.Date,
                           format = '%m/%d/%Y')
data.Date

0      2014-01-01
1      2014-01-02
2      2014-01-03
3      2014-01-04
4      2014-01-05
          ...    
2552   2020-12-27
2553   2020-12-28
2554   2020-12-29
2555   2020-12-30
2556   2020-12-31
Name: Date, Length: 2557, dtype: datetime64[ns]

In [None]:
df = data.loc[data.Date < '2020-12-01',
              ['Date', 'Total Individuals in Shelter', 'Easter', 'Christmas', 'Thanksgiving']]

In [None]:
df.head()

Unnamed: 0,Date,Total Individuals in Shelter,Easter,Christmas,Thanksgiving
0,2014-01-01,50689,0,0,0
1,2014-01-02,50797,0,0,0
2,2014-01-03,50954,0,0,0
3,2014-01-04,50957,0,0,0
4,2014-01-05,50957,0,0,0


In [None]:
df = df.rename(columns = {'Total Individuals in Shelter':'y', 'Date':'ds'})
df.head(1)

Unnamed: 0,ds,y,Easter,Christmas,Thanksgiving
0,2014-01-01,50689,0,0,0


# Holidays

In [None]:
# Easter holiday
easter_dates = df[df.Easter == 1].ds
easter = pd.DataFrame({'holiday':'easter',
                       'ds': pd.to_datetime(easter_dates),
                         'lower_window': -5,
                         'upper_window': 2})
easter.head()

Unnamed: 0,holiday,ds,lower_window,upper_window
109,easter,2014-04-20,-5,2
459,easter,2015-04-05,-5,2
816,easter,2016-03-27,-5,2
1201,easter,2017-04-16,-5,2
1551,easter,2018-04-01,-5,2


In [None]:
# Thanksgiving holiday
thanksgiving_dates = df[df.Thanksgiving == 1].ds
thanksgiving = pd.DataFrame({'holiday':'thanksgiving',
                       'ds': pd.to_datetime(thanksgiving_dates),
                         'lower_window': -5,
                          'upper_window': 2})
thanksgiving.head()

Unnamed: 0,holiday,ds,lower_window,upper_window
330,thanksgiving,2014-11-27,-5,2
694,thanksgiving,2015-11-26,-5,2
1058,thanksgiving,2016-11-24,-5,2
1422,thanksgiving,2017-11-23,-5,2
1786,thanksgiving,2018-11-22,-5,2


In [None]:
holidays = pd.concat([easter, thanksgiving])
holidays.sample(5)

Unnamed: 0,holiday,ds,lower_window,upper_window
1201,easter,2017-04-16,-5,2
1936,easter,2019-04-21,-5,2
2157,thanksgiving,2019-11-28,-5,2
694,thanksgiving,2015-11-26,-5,2
1422,thanksgiving,2017-11-23,-5,2


# Model

In [None]:
#prophet model
model = Prophet(growth = 'linear',
                holidays = holidays,
                seasonality_mode= 'multiplicative',
                seasonality_prior_scale= 10,
                holidays_prior_scale= 10,
                changepoint_prior_scale= .05,)
model.add_regressor('Christmas')
model.fit(df)

INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpoe6fl4f4/ngp6qfq4.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpoe6fl4f4/6hut1r3w.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=51369', 'data', 'file=/tmp/tmpoe6fl4f4/ngp6qfq4.json', 'init=/tmp/tmpoe6fl4f4/6hut1r3w.json', 'output', 'file=/tmp/tmpoe6fl4f4/prophet_model8dq8aa4c/prophet_model-20240114160535.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
16:05:35 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
16:05:37 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing


<prophet.forecaster.Prophet at 0x79e316c04a90>

# Cross Validation

In [None]:
from prophet.diagnostics import cross_validation
df_cv = cross_validation(model, horizon = '31',
                         initial = '2400 days',
                         parallel= 'processes')


INFO:prophet:Making 125 forecasts with cutoffs between 2020-07-28 23:59:59.999999969 and 2020-11-29 23:59:59.999999969
INFO:prophet:Applying in parallel with <concurrent.futures.process.ProcessPoolExecutor object at 0x79e316c06d40>
DEBUG:cmdstanpy:input tempfile: /tmp/tmpoe6fl4f4/zw8xcjx4.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpoe6fl4f4/ijvp_gky.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpoe6fl4f4/j9omc0er.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=86748', 'data', 'file=/tmp/tmpoe6fl4f4/zw8xcjx4.json', 'init=/tmp/tmpoe6fl4f4/j9omc0er.json', 'output', 'file=/tmp/tmpoe6fl4f4/prophet_modelrye2azph/prophet_model-20240114160539.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
16:05:39 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
DEBUG:cmdstanpy:input tempfile: /tmp/tmpoe6f

# Model Assessment

In [None]:
from sklearn.metrics import mean_absolute_error
print(f'Mean Absolute Error: {round(mean_absolute_error(df_cv.y, df_cv.yhat),2)}')

Mean Absolute Error: 1507.95


# Hyper Parameter Tuning

In [None]:
#parameter grid
from sklearn.model_selection import  ParameterGrid

In [None]:
param_grid = {'seasonality_prior_scale': [10,20],
              'changepoint_prior_scale': [.01,.1],
              'holidays_prior_scale': [5,20],}
param_grid

{'seasonality_prior_scale': [10, 20],
 'changepoint_prior_scale': [0.01, 0.1],
 'holidays_prior_scale': [5, 20]}

In [None]:
grid = ParameterGrid(param_grid)
len(list(grid))

8

In [22]:
#Tuning
mae = []
for param in grid:
  #Build the model
  model = Prophet(growth = 'linear',
                holidays = holidays,
                seasonality_mode= 'multiplicative',
                seasonality_prior_scale= param['seasonality_prior_scale'],
                holidays_prior_scale= param['holidays_prior_scale'],
                changepoint_prior_scale=param['changepoint_prior_scale'])
  model.add_regressor('Christmas')
  model.fit(df)

  #crossvalidation
  df_cv = cross_validation(model, horizon = '31',
                         initial = '2400 days',
                         parallel= 'processes')

  # Get the results
  error = round(mean_absolute_error(df_cv.y, df_cv.yhat),2)
  mae.append(error)






[1;30;43mStreaming output truncated to the last 5000 lines.[0m
DEBUG:cmdstanpy:input tempfile: /tmp/tmpoe6fl4f4/6lcpc19k.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpoe6fl4f4/h1b_it_k.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=88306', 'data', 'file=/tmp/tmpoe6fl4f4/6lcpc19k.json', 'init=/tmp/tmpoe6fl4f4/h1b_it_k.json', 'output', 'file=/tmp/tmpoe6fl4f4/prophet_model3r3l_0xs/prophet_model-20240114162550.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
16:25:50 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
16:25:50 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
DEBUG:cmdstanpy:input tempfile: /tmp/tmpoe6fl4f4/qjou0ajp.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpoe6fl4f4/9q78l0d4.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdSt

# Best Parameters

In [23]:
best_params = grid[np.argmin(mae)]
print(f'Best params: {best_params}')

Best params: {'seasonality_prior_scale': 10, 'holidays_prior_scale': 5, 'changepoint_prior_scale': 0.1}
