# Forecasting - Forecasting Prophet

https://facebook.github.io/prophet/

https://research.fb.com/blog/2017/02/prophet-forecasting-at-scale/

In [1]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt


In [2]:
#attention might have problems with holiday package, 
#downgrate holidays via: pip install 'holidays==0.9.12'
from prophet import Prophet 

ModuleNotFoundError: No module named 'prophet'

In [None]:
%matplotlib inline
plt.style.use('fivethirtyeight')


# Trivial Forecast (rollingMean)

In [None]:
# the final trivial model is at the end of this file

df = pd.DataFrame({'X': np.arange(0,10)}) # generate an input df
df['y']=df.rolling(3).mean() # take the window and write the average as the result

In [None]:
df


# Small Data Set

In [None]:
df_all = pd.read_csv('../data/processed/COVID_small_flattable.csv',sep=';')
df=df_all[['date','India']]
df=df.rename(columns={'date': 'ds',
                        'India': 'y'})

In [None]:
ax = df.set_index('ds').plot(figsize=(12, 8),
                             logy=True)
ax.set_ylabel('Daily Number of confimed cases')
ax.set_xlabel('Date')

plt.show()

In [None]:
# set the uncertainty interval to 95% (the Prophet default is 80%)

my_model = Prophet(interval_width=0.95)
my_model.fit(df)

In [None]:
future_dates = my_model.make_future_dataframe(periods=7, freq='D')

In [None]:
# predict according to the scikit-learn standard

forecast = my_model.predict(future_dates)

In [None]:
my_model.plot(forecast,
              uncertainty=True ); # since fbprohet is rendering the output

In [None]:
import plotly.offline as py
from fbprophet.plot import plot_plotly

fig = plot_plotly(my_model, forecast)  # This returns a plotly Figure

fig.update_layout(
    width=1024,
    height=900,
    xaxis_title="Time",
    yaxis_title="Confirmed infected people (source johns hopkins csse, log-scale)",
)
fig.update_yaxes(type="log",range=[1.1,5.5])
py.iplot(fig)

In [None]:
forecast.sort_values(by='ds').head()

In [None]:
my_model.plot_components(forecast);

In [None]:
forecast[['ds','trend']].set_index('ds').plot(figsize=(12, 8),logy=True)

# Cross Validation

In [None]:
from fbprophet.diagnostics import cross_validation
df_cv = cross_validation(my_model, 
                         initial='40 days', # we take the first 30 days for training
                         period='1 days',  # every  days a new prediction run
                         horizon = '7 days') #we predict 7days into the future

In [None]:
df_cv.sort_values(by=['cutoff','ds'])[0:12]
df_cv.head()

In [None]:
from fbprophet.diagnostics import performance_metrics
df_p = performance_metrics(df_cv)

In [None]:
# the performance matrix shows the result for all horizon
df_p

In [None]:
from fbprophet.plot import plot_cross_validation_metric
fig = plot_cross_validation_metric(df_cv, metric='mape',)

# Diagonal Plot

In [None]:
horizon='7 days'
df_cv['horizon']=df_cv.ds-df_cv.cutoff

date_vec=df_cv[df_cv['horizon']==horizon]['ds']
y_hat=df_cv[df_cv['horizon']==horizon]['yhat']
y=df_cv[df_cv['horizon']==horizon]['y']

In [None]:
df_cv_7=df_cv[df_cv['horizon']==horizon]
df_cv_7.tail()

In [None]:
type(df_cv['horizon'][0])

In [None]:
fig, ax = plt.subplots(1, 1)


ax.plot(np.arange(max(y)),np.arange(max(y)),'--',label='diagonal')
ax.plot(y,y_hat,'-',label=horizon)  # horizon is a np.timedelta objct

ax.set_title('Diagonal Plot')
ax.set_ylim(10, max(y))

ax.set_xlabel('truth: y')
ax.set_ylabel('prediciton: y_hat')
ax.set_yscale('log')

ax.set_xlim(10, max(y))
ax.set_xscale('log')
ax.legend(loc='best',
           prop={'size': 16});

# Trivial Forecast

In [None]:
def mean_absolute_percentage_error(y_true, y_pred): 
    ''' MAPE calculation '''
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [None]:
parse_dates=['date']
df_all = pd.read_csv('../data/processed/COVID_small_flattable.csv',sep=';',parse_dates=parse_dates)
df_trivial=df_all[['date','India']]
df_trivial=df_trivial.rename(columns={'date': 'ds',
                        'India': 'y'})

In [None]:
df_trivial['y_mean_r3']=df_trivial.y.rolling(3).mean() # take the average of 3 days

In [None]:
# the result has to be shifted according to the prediciton horizon (here 7 days)

df_trivial['cutoff']=df_trivial['ds'].shift(7)
df_trivial['y_hat']=df_trivial['y_mean_r3'].shift(7)
df_trivial['horizon']=df_trivial['ds']-df_trivial['cutoff']
print('MAPE: '+str(mean_absolute_percentage_error(df_trivial['y_hat'].iloc[12:,], df_trivial['y'].iloc[12:,])))
df_trivial