-
Notifications
You must be signed in to change notification settings - Fork 108
/
build_prophet.py
135 lines (131 loc) · 6.56 KB
/
build_prophet.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import numpy as np # type: ignore
import pandas as pd # type: ignore
import copy
import matplotlib.pyplot as plt # type: ignore
# helper functions
from ..utils import print_dynamic_rmse
# imported Prophet from fbprophet pkg
from fbprophet import Prophet # type: ignore
#### Suppress INFO messages from FB Prophet!
import logging
logging.getLogger('fbprophet').setLevel(logging.WARNING)
import pdb
def build_prophet_model(ts_df, time_col, target, forecast_period, time_interval,
score_type,
verbose, conf_int):
"""
Build a Time Series Model using Facebook Prophet which is a powerful model.
"""
ts_df = copy.deepcopy(ts_df)
#df.rename(columns={time_col:'ds',target:'y'},inplace=True)
##### if you are going to use matplotlib with prophet data, it gives an error unless you do this.
pd.plotting.register_matplotlib_converters()
#### You have to import Prophet if you are going to build a Prophet model #############
try:
print('Preparing Time Series data for FB Prophet: sample row before\n', ts_df[time_col].head(1))
df = ts_df.rename(columns={time_col: 'ds', target: 'y'})
print('Time Series data: sample row after transformation\n', df.head(1))
except:
#### THis happens when time_col is not found but it's actually the index. In that case, reset index
print('Preparing Time Series data for FB Prophet: sample row before\n', ts_df.head(1))
df = ts_df.reset_index()
df = df.rename(columns={time_col: 'ds', target: 'y'})
print('Time Series data: sample row after transformation\n', df.head(1))
actual = 'y'
timecol = 'ds'
dft = df[[timecol, actual]]
##### For most Financial time series data, 80% conf interval is enough...
print(' Fit-Predict data (shape=%s) with Confidence Interval = %0.2f...' % (dft.shape, conf_int))
### Make Sure you lower your desired interval width from the normal 95% to a more realistic 80%
model = Prophet(interval_width=conf_int)
model.fit(dft)
# Prophet is a Little Complicated - You need 2 steps to Forecast
## 1. You need to create a dataframe to hold the predictions which specifies datetime
## periods that you want to predict. It automatically creates one with both past
## and future dates.
## 2. You need to ask Prophet to make predictions for the past and future dates in
## that dataframe above.
## So if you had 2905 rows of data, and ask Prophet to predict for 365 periods,
## it will give you predictions of the past (2905) and an additional 365 rows
## of future (total: 3270) rows of data.
### This is where we take the first steps to make a forecast using Prophet:
## 1. Create a dataframe with datetime index of past and future dates
print('Building Forecast dataframe. Forecast Period = %d' % forecast_period)
# Next we ask Prophet to make predictions for those dates in the dataframe along with predn intervals
if time_interval in ['months', 'month', 'm']:
time_int = 'M'
elif time_interval in ['days', 'daily', 'd']:
time_int = 'D'
elif time_interval in ['weeks', 'weekly', 'w']:
time_int = 'W'
seasonal_period = 52
elif time_interval in ['qtr', 'quarter', 'q']:
time_int = 'Q'
elif time_interval in ['years', 'year', 'annual', 'y', 'a']:
time_int = 'Y'
elif time_interval in ['hours', 'hourly', 'h']:
time_int = 'H'
elif time_interval in ['minutes', 'minute', 'min', 'n']:
time_int = 'M'
elif time_interval in ['seconds', 'second', 'sec', 's']:
time_interval = 'S'
else:
time_int = 'W'
future = model.make_future_dataframe(periods=forecast_period, freq=time_int)
forecast = model.predict(future)
act_n = len(dft)
#### We are going to plot Prophet's forecasts differently since it is better
dfa = plot_prophet(dft, forecast)
# Prophet makes Incredible Predictions Charts!
### There can't be anything simpler than this to make Forecasts!
#model.plot(forecast); # make sure to add semi-colon in the end to avoid plotting twice
# Also their Trend, Seasonality Charts are Spot On!
try:
model.plot_components(forecast);
except:
print('Error in FB Prophet components forecast. Continuing...')
rmse, norm_rmse = print_dynamic_rmse(dfa['y'], dfa['yhat'], dfa['y'])
#submit = dfplot[-forecast_period:]
#submit.drop('Actuals',axis=1,inplace=True)
#submit.rename(columns={'yhat':target},inplace=True)
#print('Forecast Data frame size %s ready to submit' %(submit.shape,))
return model, forecast, rmse, norm_rmse
def plot_prophet(dft, forecastdf):
"""
This is a different way of plotting Prophet charts as described in the following article:
Source: https://nextjournal.com/viebel/forecasting-time-series-data-with-prophet
Reproduced with gratitude to the author.
"""
dft = copy.deepcopy(dft)
forecastdf = copy.deepcopy(forecastdf)
dft.set_index('ds', inplace=True)
forecastdf.set_index('ds', inplace=True)
dft.index = pd.to_datetime(dft.index)
connect_date = dft.index[-2]
mask = (forecastdf.index > connect_date)
predict_df = forecastdf.loc[mask]
viz_df = dft.join(predict_df[['yhat', 'yhat_lower', 'yhat_upper']],
how='outer')
fig,ax1 = plt.subplots(figsize=(20, 10))
ax1.plot(viz_df['y'], color='red')
ax1.plot(viz_df['yhat'], color='green')
ax1.fill_between(viz_df.index, viz_df['yhat_lower'], viz_df['yhat_upper'],
alpha=0.2, color="darkgreen")
ax1.set_title('Actuals (Red) vs Forecast (Green)')
ax1.set_ylabel('Values')
ax1.set_xlabel('Date Time')
plt.show(block=False)
return viz_df
# def print_dynamic_rmse(actuals, predicted, original):
# """
# This utility calculates rmse between actuals and predicted. However, it does one more.
# Since in dynamic forecast, we need the longer original, it calculates Normalized RMSE
# using the original array's std deviation. That way, the forecast of 2 values does not
# result in a larger Normalized RMSE since the std deviation of 2 values will be v small.
# """
# rmse = np.sqrt(np.mean((actuals - predicted)**2))
# norm_rmse = rmse/original.std()
# print(' RMSE = {:,.2f}'.format(rmse))
# print(' Std Deviation of Originals = {:,.2f}'.format(original.std()))
# print(' Normalized RMSE = %0.0f%%' %(100*norm_rmse))
# return rmse, norm_rmse