In [1]:
import pandas as pd
from prophet import Prophet
from prophet.diagnostics import cross_validation, performance_metrics
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm
Importing plotly failed. Interactive plots will not work.


In [2]:
path = "../data/processed/superstore_clean.csv"
df = pd.read_csv(path)
df['order_date'] = pd.to_datetime(df['order_date'], dayfirst= False)

In [3]:
monthly_sales = df.set_index('order_date').resample('MS')['sales'].sum().reset_index()
prophet_df = monthly_sales.rename(columns={'order_date': 'ds', 'sales': 'y'})

In [4]:
model = Prophet(interval_width=0.95, seasonality_mode='multiplicative') 
model.fit(prophet_df)

19:33:23 - cmdstanpy - INFO - Chain [1] start processing
19:33:25 - cmdstanpy - INFO - Chain [1] done processing


<prophet.forecaster.Prophet at 0x22377047380>

In [5]:
# This simulates "historical forecasts" to see how accurate the model usually is
print("Calculating model accuracy (this may take a moment)...")
df_cv = cross_validation(model, initial='730 days', period='180 days', horizon='30 days')
df_p = performance_metrics(df_cv)
rmse_value = df_p['rmse'].mean()

print(f"Average Forecast Error (RMSE): ${rmse_value:.2f}")

Calculating model accuracy (this may take a moment)...


  0%|          | 0/4 [00:00<?, ?it/s]19:33:28 - cmdstanpy - INFO - Chain [1] start processing
19:33:29 - cmdstanpy - INFO - Chain [1] done processing
 25%|██▌       | 1/4 [00:01<00:05,  1.91s/it]19:33:30 - cmdstanpy - INFO - Chain [1] start processing
19:33:31 - cmdstanpy - INFO - Chain [1] done processing
 50%|█████     | 2/4 [00:03<00:03,  1.87s/it]19:33:32 - cmdstanpy - INFO - Chain [1] start processing
19:33:33 - cmdstanpy - INFO - Chain [1] done processing
 75%|███████▌  | 3/4 [00:05<00:01,  1.63s/it]19:33:33 - cmdstanpy - INFO - Chain [1] start processing
19:33:34 - cmdstanpy - INFO - Chain [1] done processing
100%|██████████| 4/4 [00:06<00:00,  1.62s/it]


Average Forecast Error (RMSE): $10589.22


In [6]:
# 4. Predict Next Month (Jan 2018)
future = model.make_future_dataframe(periods=1, freq='MS')
forecast = model.predict(future)
next_month_forecast = forecast.iloc[-1]['yhat']

In [7]:
# 5. Calculate Optimal Inventory Levels
# Service Level 95% (Z-score = 1.65)
z_score = 1.65
safety_stock = z_score * rmse_value
total_inventory_budget = next_month_forecast + safety_stock

In [8]:
print("\n--- INVENTORY RECOMMENDATION FOR JAN 2018 ---")
print(f"Predicted Demand:      ${next_month_forecast:,.2f}")
print(f"Safety Buffer (95%):   ${safety_stock:,.2f}")
print(f"---------------------------------------------")
print(f"TOTAL STOCK BUDGET:    ${total_inventory_budget:,.2f}")


--- INVENTORY RECOMMENDATION FOR JAN 2018 ---
Predicted Demand:      $34,488.88
Safety Buffer (95%):   $17,472.21
---------------------------------------------
TOTAL STOCK BUDGET:    $51,961.09
