In [13]:
# Library importing
import pandas as pd
import numpy as np
import seaborn as sns
import math
import warnings
import os
import matplotlib.pyplot as plt
from prophet import Prophet
from prophet.diagnostics import cross_validation
from prophet.diagnostics import performance_metrics
import itertools

warnings.filterwarnings("ignore")  # Ignoring warnings to reduce text spam.

# Preferences
sns.set(rc={"figure.figsize": (15, 8.27)})
pd.set_option("display.float_format", "{:,}".format)
pd.set_option("display.max_columns", 30)
pd.set_option("display.max_rows", None)

In [8]:
# Data retrieval function
def retrieve_data():
    os.chdir('..')
    cwd = os.getcwd()
    if 'data' in os.listdir():
        os.chdir(cwd + '\\data')
        df = pd.read_csv('data.csv', sep=';', decimal=',')
        df['ds'] = pd.to_datetime(df[['year','month']].assign(day=1))
        df['y'] = df['quantity'].round().astype('int64')
        df.drop(columns=['year','month','quantity'], inplace=True)
        return df

# data object
df = retrieve_data()

In [14]:
param_grid = {
    'changepoint_prior_scale':[0.001,0.01,0.1,0.25,0.5],
    'seasonality_prior_scale':[0.01,0.1,1.0,5.0,10.0],
    'changepoint_range':[0.8,0.95]
}

all_params = [dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_grid.values())]
rmses = []  # Store the RMSEs for each params here

# Use cross validation to evaluate all parameters
for params in all_params:
    m = Prophet(**params).fit(df)  # Fit model with given params
    df_cv = cross_validation(m, initial='731 days', horizon='365 days', period='365 days', parallel = 'processes')
    df_p = performance_metrics(df_cv, rolling_window=1)
    rmses.append(df_p['rmse'].values[0])

20:31:18 - cmdstanpy - INFO - Chain [1] start processing
20:31:18 - cmdstanpy - INFO - Chain [1] done processing
20:31:18 - cmdstanpy - ERROR - Chain [1] error: error during processing Unknown error
Optimization terminated abnormally. Falling back to Newton.
20:31:18 - cmdstanpy - INFO - Chain [1] start processing
20:31:18 - cmdstanpy - INFO - Chain [1] done processing
20:31:21 - cmdstanpy - INFO - Chain [1] start processing
20:31:21 - cmdstanpy - INFO - Chain [1] done processing
20:31:21 - cmdstanpy - ERROR - Chain [1] error: error during processing Unknown error
Optimization terminated abnormally. Falling back to Newton.
20:31:21 - cmdstanpy - INFO - Chain [1] start processing
20:31:22 - cmdstanpy - INFO - Chain [1] done processing
20:31:24 - cmdstanpy - INFO - Chain [1] start processing
20:31:24 - cmdstanpy - INFO - Chain [1] done processing
20:31:24 - cmdstanpy - ERROR - Chain [1] error: error during processing Unknown error
Optimization terminated abnormally. Falling back to Newto

    changepoint_prior_scale  seasonality_prior_scale  changepoint_range  \
0                     0.001                     0.01                0.8   
1                     0.001                     0.01               0.95   
2                     0.001                      0.1                0.8   
3                     0.001                      0.1               0.95   
4                     0.001                      1.0                0.8   
5                     0.001                      1.0               0.95   
6                     0.001                      5.0                0.8   
7                     0.001                      5.0               0.95   
8                     0.001                     10.0                0.8   
9                     0.001                     10.0               0.95   
10                     0.01                     0.01                0.8   
11                     0.01                     0.01               0.95   
12                     0.

In [22]:
# Find the best parameters
tuning_results = pd.DataFrame(all_params)
tuning_results['rmse'] = rmses
tuning_results.sort_values(by='rmse')

Unnamed: 0,changepoint_prior_scale,seasonality_prior_scale,changepoint_range,rmse
21,0.1,0.01,0.95,1140012.642191991
20,0.1,0.01,0.8,1153952.1527191007
22,0.1,0.1,0.8,1158928.3488832212
32,0.25,0.1,0.8,1160674.7278210062
23,0.1,0.1,0.95,1160856.4210949836
42,0.5,0.1,0.8,1208593.6830690764
12,0.01,0.1,0.8,1208870.7804384443
13,0.01,0.1,0.95,1211477.0804683582
1,0.001,0.01,0.95,1212924.543890295
0,0.001,0.01,0.8,1215796.280161671


In [18]:
best_params = all_params[np.argmin(rmses)]
print(best_params)

{'changepoint_prior_scale': 0.1, 'seasonality_prior_scale': 0.01, 'changepoint_range': 0.95}
