In [1]:
import pandas as pd
from prophet import Prophet
from prophet.diagnostics import cross_validation, performance_metrics
import itertools
import numpy as np

In [2]:
path = "../data/processed/superstore_clean.csv"
df = pd.read_csv(path)
df['order_date'] = pd.to_datetime(df['order_date'], dayfirst= False)

In [3]:
monthly_sales = df.set_index('order_date').resample('MS')['sales'].sum().reset_index()
monthly_sales.columns = ['ds', 'y']

In [4]:
# We will test combinations of these parameters to see which works best
param_grid = {
    'changepoint_prior_scale': [0.001, 0.01, 0.1, 0.5],
    'seasonality_prior_scale': [0.01, 0.1, 1.0, 10.0],
}

In [5]:
# Generate all combinations of parameters
all_params = [dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_grid.values())]
rmses = []  # To store the error for each model

In [6]:
print(f"--- Starting Grid Search on {len(all_params)} model configurations ---")

--- Starting Grid Search on 16 model configurations ---


In [7]:
# 3. Loop through each combination
for i, params in enumerate(all_params):
    # Initialize Model with current params
    m = Prophet(**params, seasonality_mode='multiplicative')
    
    # IMPROVEMENT 1: Add US Holidays
    m.add_country_holidays(country_name='US') 
    
    m.fit(monthly_sales)

    # Cross Validation (Backtesting)
    # We suppress output to keep it clean
    try:
        df_cv = cross_validation(m, initial='730 days', period='180 days', horizon='30 days', disable_tqdm=True)
        df_p = performance_metrics(df_cv)
        avg_rmse = df_p['rmse'].mean()
    except:
        avg_rmse = 1000000 # If it fails, assign high error
    
    rmses.append(avg_rmse)
    print(f"Model {i+1}/{len(all_params)} - RMSE: {avg_rmse:.2f} | Params: {params}")

19:47:30 - cmdstanpy - INFO - Chain [1] start processing
19:47:37 - cmdstanpy - INFO - Chain [1] done processing
19:47:37 - cmdstanpy - INFO - Chain [1] start processing
19:47:37 - cmdstanpy - INFO - Chain [1] done processing
19:47:38 - cmdstanpy - INFO - Chain [1] start processing
19:47:39 - cmdstanpy - INFO - Chain [1] done processing
19:47:39 - cmdstanpy - INFO - Chain [1] start processing
19:47:44 - cmdstanpy - INFO - Chain [1] done processing
19:47:45 - cmdstanpy - INFO - Chain [1] start processing
19:47:46 - cmdstanpy - INFO - Chain [1] done processing


Model 1/16 - RMSE: 16393.79 | Params: {'changepoint_prior_scale': 0.001, 'seasonality_prior_scale': 0.01}


19:47:46 - cmdstanpy - INFO - Chain [1] start processing
19:47:47 - cmdstanpy - INFO - Chain [1] done processing
19:47:47 - cmdstanpy - INFO - Chain [1] start processing
19:47:51 - cmdstanpy - INFO - Chain [1] done processing
19:47:51 - cmdstanpy - INFO - Chain [1] start processing
19:47:54 - cmdstanpy - INFO - Chain [1] done processing
19:47:54 - cmdstanpy - INFO - Chain [1] start processing
19:47:56 - cmdstanpy - INFO - Chain [1] done processing
19:47:56 - cmdstanpy - INFO - Chain [1] start processing
19:47:58 - cmdstanpy - INFO - Chain [1] done processing


Model 2/16 - RMSE: 11965.98 | Params: {'changepoint_prior_scale': 0.001, 'seasonality_prior_scale': 0.1}


19:47:58 - cmdstanpy - INFO - Chain [1] start processing
19:48:00 - cmdstanpy - INFO - Chain [1] done processing
19:48:00 - cmdstanpy - INFO - Chain [1] start processing
19:48:04 - cmdstanpy - INFO - Chain [1] done processing
19:48:04 - cmdstanpy - INFO - Chain [1] start processing
19:48:05 - cmdstanpy - INFO - Chain [1] done processing
19:48:05 - cmdstanpy - INFO - Chain [1] start processing
19:48:07 - cmdstanpy - INFO - Chain [1] done processing
19:48:07 - cmdstanpy - INFO - Chain [1] start processing
19:48:08 - cmdstanpy - INFO - Chain [1] done processing


Model 3/16 - RMSE: 12242.45 | Params: {'changepoint_prior_scale': 0.001, 'seasonality_prior_scale': 1.0}


19:48:08 - cmdstanpy - INFO - Chain [1] start processing
19:48:08 - cmdstanpy - INFO - Chain [1] done processing
19:48:09 - cmdstanpy - INFO - Chain [1] start processing
19:48:09 - cmdstanpy - INFO - Chain [1] done processing
19:48:10 - cmdstanpy - INFO - Chain [1] start processing
19:48:11 - cmdstanpy - INFO - Chain [1] done processing
19:48:11 - cmdstanpy - INFO - Chain [1] start processing
19:48:12 - cmdstanpy - INFO - Chain [1] done processing
19:48:13 - cmdstanpy - INFO - Chain [1] start processing
19:48:14 - cmdstanpy - INFO - Chain [1] done processing


Model 4/16 - RMSE: 9231.13 | Params: {'changepoint_prior_scale': 0.001, 'seasonality_prior_scale': 10.0}


19:48:14 - cmdstanpy - INFO - Chain [1] start processing
19:48:15 - cmdstanpy - INFO - Chain [1] done processing
19:48:15 - cmdstanpy - INFO - Chain [1] start processing
19:48:16 - cmdstanpy - INFO - Chain [1] done processing
19:48:16 - cmdstanpy - INFO - Chain [1] start processing
19:48:16 - cmdstanpy - INFO - Chain [1] done processing
19:48:17 - cmdstanpy - INFO - Chain [1] start processing
19:48:17 - cmdstanpy - INFO - Chain [1] done processing
19:48:18 - cmdstanpy - INFO - Chain [1] start processing
19:48:18 - cmdstanpy - INFO - Chain [1] done processing


Model 5/16 - RMSE: 15983.62 | Params: {'changepoint_prior_scale': 0.01, 'seasonality_prior_scale': 0.01}


19:48:19 - cmdstanpy - INFO - Chain [1] start processing
19:48:19 - cmdstanpy - INFO - Chain [1] done processing
19:48:19 - cmdstanpy - INFO - Chain [1] start processing
19:48:20 - cmdstanpy - INFO - Chain [1] done processing
19:48:20 - cmdstanpy - INFO - Chain [1] start processing
19:48:20 - cmdstanpy - INFO - Chain [1] done processing
19:48:21 - cmdstanpy - INFO - Chain [1] start processing
19:48:21 - cmdstanpy - INFO - Chain [1] done processing
19:48:22 - cmdstanpy - INFO - Chain [1] start processing
19:48:22 - cmdstanpy - INFO - Chain [1] done processing


Model 6/16 - RMSE: 10536.72 | Params: {'changepoint_prior_scale': 0.01, 'seasonality_prior_scale': 0.1}


19:48:23 - cmdstanpy - INFO - Chain [1] start processing
19:48:23 - cmdstanpy - INFO - Chain [1] done processing
19:48:23 - cmdstanpy - INFO - Chain [1] start processing
19:48:24 - cmdstanpy - INFO - Chain [1] done processing
19:48:24 - cmdstanpy - INFO - Chain [1] start processing
19:48:25 - cmdstanpy - INFO - Chain [1] done processing
19:48:25 - cmdstanpy - INFO - Chain [1] start processing
19:48:26 - cmdstanpy - INFO - Chain [1] done processing
19:48:26 - cmdstanpy - INFO - Chain [1] start processing
19:48:27 - cmdstanpy - INFO - Chain [1] done processing


Model 7/16 - RMSE: 11387.21 | Params: {'changepoint_prior_scale': 0.01, 'seasonality_prior_scale': 1.0}


19:48:27 - cmdstanpy - INFO - Chain [1] start processing
19:48:28 - cmdstanpy - INFO - Chain [1] done processing
19:48:28 - cmdstanpy - INFO - Chain [1] start processing
19:48:28 - cmdstanpy - INFO - Chain [1] done processing
19:48:29 - cmdstanpy - INFO - Chain [1] start processing
19:48:29 - cmdstanpy - INFO - Chain [1] done processing
19:48:30 - cmdstanpy - INFO - Chain [1] start processing
19:48:30 - cmdstanpy - INFO - Chain [1] done processing
19:48:30 - cmdstanpy - INFO - Chain [1] start processing
19:48:31 - cmdstanpy - INFO - Chain [1] done processing


Model 8/16 - RMSE: 9106.58 | Params: {'changepoint_prior_scale': 0.01, 'seasonality_prior_scale': 10.0}


19:48:32 - cmdstanpy - INFO - Chain [1] start processing
19:48:33 - cmdstanpy - INFO - Chain [1] done processing
19:48:33 - cmdstanpy - INFO - Chain [1] start processing
19:48:33 - cmdstanpy - INFO - Chain [1] done processing
19:48:34 - cmdstanpy - INFO - Chain [1] start processing
19:48:34 - cmdstanpy - INFO - Chain [1] done processing
19:48:35 - cmdstanpy - INFO - Chain [1] start processing
19:48:36 - cmdstanpy - INFO - Chain [1] done processing
19:48:36 - cmdstanpy - INFO - Chain [1] start processing
19:48:37 - cmdstanpy - INFO - Chain [1] done processing


Model 9/16 - RMSE: 16019.70 | Params: {'changepoint_prior_scale': 0.1, 'seasonality_prior_scale': 0.01}


19:48:38 - cmdstanpy - INFO - Chain [1] start processing
19:48:38 - cmdstanpy - INFO - Chain [1] done processing
19:48:39 - cmdstanpy - INFO - Chain [1] start processing
19:48:39 - cmdstanpy - INFO - Chain [1] done processing
19:48:40 - cmdstanpy - INFO - Chain [1] start processing
19:48:41 - cmdstanpy - INFO - Chain [1] done processing
19:48:41 - cmdstanpy - INFO - Chain [1] start processing
19:48:42 - cmdstanpy - INFO - Chain [1] done processing
19:48:42 - cmdstanpy - INFO - Chain [1] start processing
19:48:43 - cmdstanpy - INFO - Chain [1] done processing
19:48:43 - cmdstanpy - INFO - Chain [1] start processing


Model 10/16 - RMSE: 11766.25 | Params: {'changepoint_prior_scale': 0.1, 'seasonality_prior_scale': 0.1}


19:48:44 - cmdstanpy - INFO - Chain [1] done processing
19:48:44 - cmdstanpy - INFO - Chain [1] start processing
19:48:45 - cmdstanpy - INFO - Chain [1] done processing
19:48:46 - cmdstanpy - INFO - Chain [1] start processing
19:48:46 - cmdstanpy - INFO - Chain [1] done processing
19:48:47 - cmdstanpy - INFO - Chain [1] start processing
19:48:47 - cmdstanpy - INFO - Chain [1] done processing
19:48:48 - cmdstanpy - INFO - Chain [1] start processing
19:48:49 - cmdstanpy - INFO - Chain [1] done processing


Model 11/16 - RMSE: 12818.58 | Params: {'changepoint_prior_scale': 0.1, 'seasonality_prior_scale': 1.0}


19:48:49 - cmdstanpy - INFO - Chain [1] start processing
19:48:50 - cmdstanpy - INFO - Chain [1] done processing
19:48:51 - cmdstanpy - INFO - Chain [1] start processing
19:48:51 - cmdstanpy - INFO - Chain [1] done processing
19:48:52 - cmdstanpy - INFO - Chain [1] start processing
19:48:53 - cmdstanpy - INFO - Chain [1] done processing
19:48:53 - cmdstanpy - INFO - Chain [1] start processing
19:48:54 - cmdstanpy - INFO - Chain [1] done processing
19:48:54 - cmdstanpy - INFO - Chain [1] start processing
19:48:55 - cmdstanpy - INFO - Chain [1] done processing


Model 12/16 - RMSE: 20526.84 | Params: {'changepoint_prior_scale': 0.1, 'seasonality_prior_scale': 10.0}


19:48:56 - cmdstanpy - INFO - Chain [1] start processing
19:48:57 - cmdstanpy - INFO - Chain [1] done processing
19:48:58 - cmdstanpy - INFO - Chain [1] start processing
19:48:58 - cmdstanpy - INFO - Chain [1] done processing
19:48:58 - cmdstanpy - INFO - Chain [1] start processing
19:49:00 - cmdstanpy - INFO - Chain [1] done processing
19:49:01 - cmdstanpy - INFO - Chain [1] start processing
19:49:03 - cmdstanpy - INFO - Chain [1] done processing
19:49:03 - cmdstanpy - INFO - Chain [1] start processing
19:49:04 - cmdstanpy - INFO - Chain [1] done processing


Model 13/16 - RMSE: 15690.14 | Params: {'changepoint_prior_scale': 0.5, 'seasonality_prior_scale': 0.01}


19:49:05 - cmdstanpy - INFO - Chain [1] start processing
19:49:06 - cmdstanpy - INFO - Chain [1] done processing
19:49:06 - cmdstanpy - INFO - Chain [1] start processing
19:49:07 - cmdstanpy - INFO - Chain [1] done processing
19:49:07 - cmdstanpy - INFO - Chain [1] start processing
19:49:08 - cmdstanpy - INFO - Chain [1] done processing
19:49:09 - cmdstanpy - INFO - Chain [1] start processing
19:49:10 - cmdstanpy - INFO - Chain [1] done processing
19:49:10 - cmdstanpy - INFO - Chain [1] start processing
19:49:13 - cmdstanpy - INFO - Chain [1] done processing


Model 14/16 - RMSE: 12596.04 | Params: {'changepoint_prior_scale': 0.5, 'seasonality_prior_scale': 0.1}


19:49:13 - cmdstanpy - INFO - Chain [1] start processing
19:49:14 - cmdstanpy - INFO - Chain [1] done processing
19:49:14 - cmdstanpy - INFO - Chain [1] start processing
19:49:15 - cmdstanpy - INFO - Chain [1] done processing
19:49:16 - cmdstanpy - INFO - Chain [1] start processing
19:49:17 - cmdstanpy - INFO - Chain [1] done processing
19:49:17 - cmdstanpy - INFO - Chain [1] start processing
19:49:19 - cmdstanpy - INFO - Chain [1] done processing
19:49:19 - cmdstanpy - INFO - Chain [1] start processing
19:49:21 - cmdstanpy - INFO - Chain [1] done processing


Model 15/16 - RMSE: 24389.41 | Params: {'changepoint_prior_scale': 0.5, 'seasonality_prior_scale': 1.0}


19:49:21 - cmdstanpy - INFO - Chain [1] start processing
19:49:24 - cmdstanpy - INFO - Chain [1] done processing
19:49:24 - cmdstanpy - INFO - Chain [1] start processing
19:50:14 - cmdstanpy - INFO - Chain [1] done processing
19:50:15 - cmdstanpy - INFO - Chain [1] start processing
19:50:17 - cmdstanpy - INFO - Chain [1] done processing
19:50:18 - cmdstanpy - INFO - Chain [1] start processing
19:50:20 - cmdstanpy - INFO - Chain [1] done processing
19:50:20 - cmdstanpy - INFO - Chain [1] start processing
19:50:21 - cmdstanpy - INFO - Chain [1] done processing


Model 16/16 - RMSE: 40629.90 | Params: {'changepoint_prior_scale': 0.5, 'seasonality_prior_scale': 10.0}


In [8]:
tuning_results = pd.DataFrame(all_params)
tuning_results['rmse'] = rmses
best_params = tuning_results.loc[tuning_results['rmse'].idxmin()]

In [9]:
print("\n" + "="*40)
print("       OPTIMIZATION RESULTS       ")
print("="*40)
print(f"BEST RMSE: ${best_params['rmse']:.2f}")
print(f"Best Params: {best_params.to_dict()}")


       OPTIMIZATION RESULTS       
BEST RMSE: $9106.58
Best Params: {'changepoint_prior_scale': 0.01, 'seasonality_prior_scale': 10.0, 'rmse': 9106.583285468334}


In [10]:
old_rmse = 10589.22
new_rmse = best_params['rmse']
savings = (old_rmse - new_rmse) * 1.65 # Z-score for Safety Stock

print(f"\nPotential Safety Stock Savings: ${savings:,.2f}")


Potential Safety Stock Savings: $2,446.35
