In [1]:
# Library importing
import pandas as pd
import numpy as np
import seaborn as sns
import math
import warnings
import os
import logging
from time import sleep
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from prophet import Prophet
from prophet.diagnostics import cross_validation
from prophet.diagnostics import performance_metrics
import itertools

logging.getLogger("prophet").setLevel(logging.WARNING)
logging.getLogger("cmdstanpy").disabled=True
warnings.filterwarnings("ignore")  # Ignoring warnings to reduce text spam.

# Preferences
sns.set(rc={"figure.figsize": (15, 8.27)})
pd.set_option("display.float_format", "{:,}".format)
pd.set_option("display.max_columns", 30)
pd.set_option("display.max_rows", 100)

In [2]:
# Data retrieval function
def retrieve_data():
    os.chdir('..')
    cwd = os.getcwd()
    if 'data' in os.listdir():
        os.chdir(cwd + '\\data')
        df = pd.read_csv('data.csv', sep=';', decimal=',')
        df['ds'] = pd.to_datetime(df[['year','month']].assign(day=1))
        df['y'] = df['quantity'].round().astype('int64')
        df.drop(columns=['year','month','quantity'], inplace=True)
        return df

# data object
df = retrieve_data()

In [3]:
# Parameters tuned by importance from official Prophet documentation.
param_grid = {
    'changepoint_prior_scale':[0.01,0.1,0.25,0.5],
    'seasonality_prior_scale':[0.01,0.1,0.5,1.0,5.0,10.0],
    'changepoint_range':[0.8,0.95]
}

In [None]:
all_params = [dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_grid.values())]
metrics = [[],[],[]]  # Store the RMSEs for each params here

# Use cross validation to evaluate all parameters
for params in tqdm(all_params):
    m = Prophet(**params).fit(df)  # Fit model with given params
    df_cv = cross_validation(m, initial='731 days', horizon='365 days', period='365 days', parallel = 'processes')
    df_p = performance_metrics(df_cv,monthly=True)
    metrics[0].append(df_p['mdape'].values[0])
    metrics[1].append(df_p['mape'].values[0])
    metrics[2].append(df_p['smape'].values[0])

  0%|          | 0/48 [00:00<?, ?it/s]

In [None]:
# Find the best parameters
tuning_results = pd.DataFrame(all_params)
tuning_results['mdape'] = metrics[0]
tuning_results['mape'] = metrics[1]
tuning_results['smape'] = metrics[2]
# Applying weights and using mape as the critical error metric.
tuning_results['performance index'] = (tuning_results['mdape']*0.2) + (tuning_results['mape']*0.6) + (tuning_results['smape']*0.2)
tuning_results.sort_values(by='performance index')

In [None]:
# Retrieving the best features for model building.
best_params = all_params[np.argmin(tuning_results['performance index'])]
print(best_params)