# Prophet - Default approach

**Import Libraries and datasets**

In [None]:
!pip install prophet
!pip install --upgrade openpyxl

In [2]:
#Import Libraries
import pandas as pd
import math
import itertools
import numpy as np
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score,mean_absolute_percentage_error
from prophet import Prophet
from prophet.diagnostics import cross_validation
from prophet.diagnostics import performance_metrics
from prophet.plot import plot_cross_validation_metric
from prophet.plot import plot_plotly, plot_components_plotly

In [3]:
pd.set_option("display.max_columns",20)

In [18]:
df = pd.read_excel('/content/drive/MyDrive/Github/m_tech/Datasets/input.xlsx')
df.rename(columns={'SalesVolume':'Values'},inplace=True)
df.head()

Unnamed: 0,Date,Values
0,1995-01-01,47639.0
1,1995-02-01,47880.0
2,1995-03-01,67025.0
3,1995-04-01,56925.0
4,1995-05-01,64192.0


In [19]:
df = df.dropna()

In [None]:
df = pd.read_excel('/content/drive/MyDrive/Github/m_tech/Datasets/tfl-journeys-type.xlsx')
df = df.dropna()

In [None]:
#Import the Dataset
df = pd.read_excel('/content/drive/MyDrive/Github/m_tech/Datasets/UK_House_Weighted.xlsx')
df.rename(columns={'SalesVolume':'Values'},inplace=True)

In [20]:
df.tail()

Unnamed: 0,Date,Values
316,2021-05-01,57106.0
317,2021-06-01,121727.0
318,2021-07-01,27865.0
319,2021-08-01,30946.0
320,2021-09-01,40929.0


**Model training and Prediction**

In [21]:
index = len(df) - 11
# input_df = df[:index].rename(columns={'Date':'ds', 'SalesVolume':'y'})
input_df = df[:index].rename(columns={'Date':'ds', 'Values':'y'})
input_df.tail(10)

Unnamed: 0,ds,y
300,2020-01-01,56603.0
301,2020-02-01,56568.0
302,2020-03-01,57626.0
303,2020-04-01,23673.0
304,2020-05-01,30798.0
305,2020-06-01,46664.0
306,2020-07-01,54524.0
307,2020-08-01,56476.0
308,2020-09-01,62864.0
309,2020-10-01,77907.0


In [22]:
#Train and fit the model
def model_train_predict(input_df,params):
  m = Prophet( seasonality_mode='additive', 
            yearly_seasonality= True,
            daily_seasonality = False,
            weekly_seasonality = False,
            **params)
  m.fit(input_df)
  # Forcasting into the future
  future = m.make_future_dataframe(periods=11,freq='MS')
  forecast = m.predict(future)
  return m, forecast

In [23]:
default_params = {  
    'changepoint_prior_scale': 0.05,
    'changepoint_range' : 0.8,
    'seasonality_prior_scale': 10.0,
}

In [24]:
model , forecast = model_train_predict(input_df,default_params)

In [25]:
#Parameters for the model
model.component_modes

{'additive': ['yearly',
  'additive_terms',
  'extra_regressors_additive',
  'holidays'],
 'multiplicative': ['multiplicative_terms', 'extra_regressors_multiplicative']}

In [26]:
# Plot the predictions
def viz_plot(m,forecast):
  fig = plot_plotly(m, forecast)
  fig.show()
  fig2 = plot_components_plotly(m, forecast)
  fig2.show()

In [27]:
viz_plot(model , forecast)

# Cross Validation

In [None]:
cutoffs = pd.date_range(start='2000-01-01', end='2020-06-01', freq='2MS')
cutoffs

In [None]:
df_cv = cross_validation(m, horizon = '90 days', cutoffs = cutoffs, parallel="processes")

INFO:prophet:Applying in parallel with <concurrent.futures.process.ProcessPoolExecutor object at 0x7f589c2fdb90>


In [None]:
df_p = performance_metrics(df_cv)
df_p.head()

In [None]:
df_p

In [None]:
fig = plot_cross_validation_metric(df_cv, metric='mape')


### **Evaluation Metrics**

In [28]:
def evaluation_metrics(forecast):
  metric_df = forecast[['ds','yhat']][311:318]
  metric_df['Actual'] = df['Values'][311:318]
  mae = mean_absolute_error(metric_df['yhat'], metric_df['Actual'])
  mape = mean_absolute_percentage_error(metric_df['yhat'], metric_df['Actual'])
  rmse = math.sqrt(mean_squared_error(metric_df['yhat'], metric_df['Actual']))
  print("Mean absolute error:", mae)
  print("Mean absolute percentage error:", mape)
  print("Root mean squared error:", rmse)

In [29]:
evaluation_metrics(forecast)

Mean absolute error: 22721.951893098656
Mean absolute percentage error: 0.36084576787716877
Root mean squared error: 27086.617968844414


### **Hyper-Paramter Tuning**

**Tunable Parameters:**

***changepoint_prior_scale (Trend):*** 

This is probably the most impactful parameter. It determines the flexibility of the trend, and in particular how much the trend changes at the trend changepoints. As described in this documentation, if it is too small, the trend will be underfit and variance that should have been modeled with trend changes will instead end up being handled with the noise term. If it is too large, the trend will overfit and in the most extreme case you can end up with the trend capturing yearly seasonality. The default of 0.05 works for many time series, but this could be tuned; a range of [0.001, 0.5] would likely be about right. Parameters like this (regularization penalties; this is effectively a lasso penalty) are often tuned on a log scale.

***changepoint_range (Trend):*** 

This is the proportion of the history in which the trend is allowed to change. This defaults to 0.8, 80% of the history, meaning the model will not fit any trend changes in the last 20% of the time series. This is fairly conservative, to avoid overfitting to trend changes at the very end of the time series where there isn’t enough runway left to fit it well. With a human in the loop, this is something that can be identified pretty easily visually: one can pretty clearly see if the forecast is doing a bad job in the last 20%. In a fully-automated setting, it may be beneficial to be less conservative. It likely will not be possible to tune this parameter effectively with cross validation over cutoffs as described above. The ability of the model to generalize from a trend change in the last 10% of the time series will be hard to learn from looking at earlier cutoffs that may not have trend changes in the last 10%. So, this parameter is probably better not tuned, except perhaps over a large number of time series. In that setting, [0.8, 0.95] may be a reasonable range.

***seasonality_prior_scale:*** 

This parameter controls the flexibility of the seasonality. Similarly, a large value allows the seasonality to fit large fluctuations, a small value shrinks the magnitude of the seasonality. The default is 10., which applies basically no regularization. That is because we very rarely see overfitting here (there’s inherent regularization with the fact that it is being modeled with a truncated Fourier series, so it’s essentially low-pass filtered). A reasonable range for tuning it would probably be [0.01, 10]; when set to 0.01 you should find that the magnitude of seasonality is forced to be very small. This likely also makes sense on a log scale, since it is effectively an L2 penalty like in ridge regression.

***holidays_prior_scale:***

This controls flexibility to fit holiday effects. Similar to seasonality_prior_scale, it defaults to 10.0 which applies basically no regularization, since we usually have multiple observations of holidays and can do a good job of estimating their effects. This could also be tuned on a range of [0.01, 10] as with seasonality_prior_scale.

In [None]:
param_grid = {  
    'changepoint_prior_scale': [0.001, 0.01, 0.1, 0.4],
    'changepoint_range' : [0.8, 0.82, 0.85,0.9,0.92],
    'seasonality_prior_scale': [0.01, 0.1, 1.0, 5.0, 9.0],
}

# Generate all combinations of parameters
all_params = [dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_grid.values())]
maes = []  # Store the mae for each params here
mapes = []  # Store the mape for each params here
rmses = []  # Store the RMSEs for each params here

# Use cross validation to evaluate all parameters
for params in all_params:
    print(params)
    m = Prophet( daily_seasonality = False,
                weekly_seasonality = False, 
                **params).fit(input_df)  # Fit model with given params
    future = m.make_future_dataframe(periods=11,freq='MS')
    forecast = m.predict(future)
    # metric_df = forecast[['ds','yhat']][311:318]
    # metric_df['Actual'] = df['SalesVolume'][311:318]
    metric_df = forecast[['ds','yhat']][index:]
    metric_df['Actual'] = df['Values'][index:]
    mae = mean_absolute_error(metric_df['yhat'], metric_df['Actual'])
    mape = mean_absolute_percentage_error(metric_df['yhat'], metric_df['Actual'])
    rmse = math.sqrt(mean_squared_error(metric_df['yhat'], metric_df['Actual']))
    maes.append(mae)
    mapes.append(mape)
    rmses.append(rmse)

# Find the best parameters
tuning_results = pd.DataFrame(all_params)
tuning_results['mae'] = maes
tuning_results['mape'] = mapes
tuning_results['rmse'] = rmses

In [None]:
best_params = all_params[np.argmin(maes)]
print(best_params)

{'changepoint_prior_scale': 0.01, 'changepoint_range': 0.85, 'seasonality_prior_scale': 0.01}


In [None]:
tuning_results.sort_values('mape').head()

Unnamed: 0,changepoint_prior_scale,changepoint_range,seasonality_prior_scale,mae,mape,rmse
55,0.1,0.82,0.01,23566.876424,0.397288,28660.160784
50,0.1,0.8,0.01,23616.544768,0.399319,28708.236399
52,0.1,0.8,1.0,24063.522953,0.403412,28888.2251
53,0.1,0.8,5.0,24155.671386,0.407915,28975.539567
51,0.1,0.8,0.1,24100.451365,0.408704,29028.390221


In [None]:
tuning_results.sort_values('mae').head()

Unnamed: 0,changepoint_prior_scale,changepoint_range,seasonality_prior_scale,mae,mape,rmse
55,0.1,0.82,0.01,23566.876424,0.397288,28660.160784
50,0.1,0.8,0.01,23616.544768,0.399319,28708.236399
60,0.1,0.85,0.01,23887.719649,0.414956,29058.591184
65,0.1,0.9,0.01,23916.951674,0.417621,29119.288647
70,0.1,0.92,0.01,23960.647034,0.420299,29182.18036


### **Predicting with the best params**

In [None]:
model , forecast = model_train_predict(input_df,best_params)

In [None]:
viz_plot(model , forecast)

In [None]:
evaluation_metrics(forecast)

Mean absolute error: 17195.636891038655
Mean absolute percentage error: 0.26045223006101204
Root mean squared error: 20930.967009333934
