In [5]:
import pandas as pd
import numpy as np
from prophet import Prophet

# Data Loading

In [3]:
df=pd.read_csv('/home/ubuntu/code/prophet/examples/example_wp_log_peyton_manning.csv')
df.loc[:,'ds']=pd.to_datetime(df.ds,format='%Y-%m-%d')
print(df.shape)
print(df.ds.min(),df.ds.max())
df.head(5)

(2905, 2)
2007-12-10 00:00:00 2016-01-20 00:00:00


Unnamed: 0,ds,y
0,2007-12-10,9.590761
1,2007-12-11,8.51959
2,2007-12-12,8.183677
3,2007-12-13,8.072467
4,2007-12-14,7.893572


# 01 Modeling Holidays and Special Events 

## 1.1 Create holiday df 

In [12]:
playoffs=pd.DataFrame({
    'holiday':'playoff',
    'ds':pd.to_datetime(['2008-01-13', '2009-01-03', '2010-01-16',
                         '2010-01-24', '2010-02-07', '2011-01-08',
                         '2013-01-12', '2014-01-12', '2014-01-19',
                         '2014-02-02', '2015-01-11', '2016-01-17',
                         '2016-01-24', '2016-02-07']),
    'lower_window':0,
    'upper_window':1,
})

superbowls=pd.DataFrame({
    'holiday':'superbowl',
    'ds':pd.to_datetime(['2010-02-07', '2014-02-02', '2016-02-07']),
    'lower_window':0,
    'upper_window':1
})
holiday=pd.concat([playoffs,superbowls])
print(holiday.shape)
holiday.head(5)

(17, 4)


Unnamed: 0,holiday,ds,lower_window,upper_window
0,playoff,2008-01-13,0,1
1,playoff,2009-01-03,0,1
2,playoff,2010-01-16,0,1
3,playoff,2010-01-24,0,1
4,playoff,2010-02-07,0,1


## 1.2 Fitting 

In [13]:
m=Prophet(holidays=holiday)
m.fit(df)

INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


<prophet.forecaster.Prophet at 0x7f905109abb0>

In [14]:
## Making future table
future=m.make_future_dataframe(periods=365)
future.tail(5)

Unnamed: 0,ds
3265,2017-01-15
3266,2017-01-16
3267,2017-01-17
3268,2017-01-18
3269,2017-01-19


## 1.3  Predicting

In [15]:
forecast=m.predict(future)
print(forecast.columns)
forecast.tail(5)

Index(['ds', 'trend', 'yhat_lower', 'yhat_upper', 'trend_lower', 'trend_upper',
       'additive_terms', 'additive_terms_lower', 'additive_terms_upper',
       'holidays', 'holidays_lower', 'holidays_upper', 'playoff',
       'playoff_lower', 'playoff_upper', 'superbowl', 'superbowl_lower',
       'superbowl_upper', 'weekly', 'weekly_lower', 'weekly_upper', 'yearly',
       'yearly_lower', 'yearly_upper', 'multiplicative_terms',
       'multiplicative_terms_lower', 'multiplicative_terms_upper', 'yhat'],
      dtype='object')


Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,holidays,...,weekly,weekly_lower,weekly_upper,yearly,yearly_lower,yearly_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,yhat
3265,2017-01-15,7.204061,7.293909,8.772231,6.890094,7.544851,0.832112,0.832112,0.832112,0.0,...,0.016553,0.016553,0.016553,0.815559,0.815559,0.815559,0.0,0.0,0.0,8.036173
3266,2017-01-16,7.20308,7.732401,9.065972,6.887772,7.545686,1.167681,1.167681,1.167681,0.0,...,0.329642,0.329642,0.329642,0.838039,0.838039,0.838039,0.0,0.0,0.0,8.370761
3267,2017-01-17,7.202099,7.474961,8.856119,6.885489,7.546568,0.993733,0.993733,0.993733,0.0,...,0.133009,0.133009,0.133009,0.860724,0.860724,0.860724,0.0,0.0,0.0,8.195832
3268,2017-01-18,7.201119,7.306801,8.717103,6.883205,7.547664,0.830127,0.830127,0.830127,0.0,...,-0.053235,-0.053235,-0.053235,0.883362,0.883362,0.883362,0.0,0.0,0.0,8.031246
3269,2017-01-19,7.200138,7.379605,8.772959,6.881139,7.548545,0.846427,0.846427,0.846427,0.0,...,-0.059214,-0.059214,-0.059214,0.905641,0.905641,0.905641,0.0,0.0,0.0,8.046565


### Reviewing 

In [16]:
## Review Impact
forecast.loc[abs(forecast.playoff+forecast.superbowl)>0,['ds','playoff','superbowl']][-10:]

Unnamed: 0,ds,playoff,superbowl
2190,2014-02-02,1.22072,1.214678
2191,2014-02-03,1.901618,1.46085
2532,2015-01-11,1.22072,0.0
2533,2015-01-12,1.901618,0.0
2901,2016-01-17,1.22072,0.0
2902,2016-01-18,1.901618,0.0
2908,2016-01-24,1.22072,0.0
2909,2016-01-25,1.901618,0.0
2922,2016-02-07,1.22072,1.214678
2923,2016-02-08,1.901618,1.46085


In [18]:
## All special holidays
# need to test whether any date would have impact -> June 22
forecast.loc[(forecast.ds.isin(holiday.ds))|
             (abs(forecast.playoff+forecast.superbowl)>0),['ds','playoff','superbowl']]

Unnamed: 0,ds,playoff,superbowl
34,2008-01-13,1.22072,0.0
35,2008-01-14,1.901618,0.0
361,2009-01-03,1.22072,0.0
362,2009-01-04,1.901618,0.0
730,2010-01-16,1.22072,0.0
731,2010-01-17,1.901618,0.0
737,2010-01-25,1.901618,0.0
750,2010-02-07,1.22072,1.214678
1076,2011-01-08,1.22072,0.0
1077,2011-01-09,1.901618,0.0
