In [None]:
# Set working directory (optional during development)
import os
os.chdir('/Users/sudishmakarki/My_project2')  # only if needed
print(" Working directory:", os.getcwd())

# Standard Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.metrics import mean_absolute_percentage_error
from prophet.diagnostics import cross_validation, performance_metrics




# Custom Functions
from models.data_interpolation import (
    load_data,
    preprocess_data,
    split_train_test,
    generate_time_series_splits
)

from models.model_prophet import (
    prepare_prophet_data,
    train_baseline_prophet,
    forecast_with_model,
    calculate_peak_hours,
    evaluate_metrics,
    cross_validate_baseline
)

from models.model_prophet import (
    tune_prophet_model,
    forecast_with_model_r1,
    evaluate_tuned_model_metrics,
    cross_validate_tuned_r1
)

In [None]:
# Load and preprocess
df = load_data('data/RestaurantData.csv')
df_clean = preprocess_data(df)

In [None]:
# Split into train and test
restaurant_train, restaurant_test = split_train_test(df_clean, split_date='2022-01-01')
# Format for Prophet
restaurant_train_prophet, restaurant_test_prophet = prepare_prophet_data(restaurant_train, restaurant_test)
# Train the baseline Prophet model
m = train_baseline_prophet(restaurant_train_prophet)

In [None]:
# -- Test Set Forecasting --
# Predict on the test set and display the first few rows
test_forecast_df = forecast_with_model(m, restaurant_test_prophet)
print("Forecast on Test Set (first 5 rows):")
test_forecast_df.head()





In [None]:
# Calculate average forecast per hour
hourly_avg = test_forecast_df.groupby('Hour')['yhat'].mean()
print("\nAverage Forecast by Hour:")
display(hourly_avg)

In [None]:
peak_hours_dynamic, hourly_avg, threshold = calculate_peak_hours(test_forecast_df)

print("\nThreshold for Peak Hours:", threshold)
print("\nDynamically Identified Peak Hours:", peak_hours_dynamic)

# Filter forecast and actuals for dynamically identified peak hours
forecast_peak_df = test_forecast_df[test_forecast_df['Hour'].isin(peak_hours_dynamic)]
actual_peak_df = restaurant_test_prophet[restaurant_test_prophet['Hour'].isin(peak_hours_dynamic)]

In [None]:
# ----- Average forecasted customer count by hour -----
plt.figure(figsize=(10, 5))
hourly_avg.plot(kind='bar', color='skyblue')
plt.title("Average Forecasted Customer Count by Hour")
plt.xlabel("Hour of Day")
plt.ylabel("Average Forecast (yhat)")
plt.xticks(rotation=0)
plt.tight_layout()
plt.show()

In [None]:
# ----- Compare Forecast to Actuals (All Hours) -----
# Plot the forecast with the actual test values
f, ax = plt.subplots(figsize=(15, 5))
ax.scatter(restaurant_test.index, restaurant_test['CustomerCount'], color='r', label='Actual')
fig = m.plot(test_forecast_df, ax=ax)
ax.set_title("Prophet Forecast with Actuals")
ax.legend()
plt.show()

In [None]:
# ----- Zoom In: January 2022 (All Hours) -----
fig, ax = plt.subplots(figsize=(10, 5))
ax.scatter(restaurant_test.index, restaurant_test['CustomerCount'], color='r', label='Actual')
fig = m.plot(test_forecast_df, ax=ax)
ax.set_xbound(lower=pd.to_datetime('2022-01-01'), upper=pd.to_datetime('2022-02-01'))
ax.set_ylim(0, 80)
plt.suptitle('January 2022 Forecast vs Actuals')
ax.legend()
plt.show()

# ----- Zoom In Further: First Week of January 2022 (All Hours) -----
fig, ax = plt.subplots(figsize=(10, 5))
ax.scatter(restaurant_test.index, restaurant_test['CustomerCount'], color='r', label='Actual')
fig = m.plot(test_forecast_df, ax=ax)
lower_bound = pd.to_datetime('2022-01-01')
upper_bound = pd.to_datetime('2022-01-08')
ax.set_xbound(lower=lower_bound, upper=upper_bound)
ax.set_ylim(0, 80)
ax.set_title('First Week of January 2022 Forecast vs Actuals')
ax.legend()
plt.show()

In [None]:
# ----- Compare Forecast vs. Actuals for Dynamically Identified Peak Hours -----
plt.figure(figsize=(10, 5))
plt.scatter(actual_peak_df['ds'], actual_peak_df['y'], 
            color='r', label='Actual Peak Hours', alpha=0.7)

plt.plot(forecast_peak_df['ds'], forecast_peak_df['yhat'], 
         marker='o', linestyle='-', color='skyblue', label='Baseline Forecast')

plt.xlabel('Date')
plt.ylabel('Customer Count')
plt.title('Baseline Model Forecast for Dynamically Identified Peak Hours')
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
# ----- Plot Prophet Components for the Test Forecast -----
fig = m.plot_components(test_forecast_df)
plt.show()

In [None]:
# -- Future Forecasting (baseline) --
# Generate future data for the next 30 days at hourly frequency
future = m.make_future_dataframe(periods=30*24, freq='h')
forecast_future = m.predict(future)

print("Future Forecast:")
print(forecast_future.head())

# Plot the future forecast (historical data in black, forecast in blue)
m.plot(forecast_future)
plt.title("Future Forecast (Hourly)")
plt.show()

# Calculate the average forecast (yhat) per hour from the future forecast
forecast_future['Hour'] = forecast_future['ds'].dt.hour
future_hourly_avg = forecast_future.groupby('Hour')['yhat'].mean()
print("\nFuture Average Forecast by Hour:")
print(future_hourly_avg)

# Define a threshold based on the maximum forecast value (e.g., 60% of max)
threshold = 0.6 * future_hourly_avg.max()
print("\nThreshold for Peak Hours:", threshold)

# Dynamically select all hours where the forecast meets or exceeds the threshold
future_peak_hours = sorted([hour for hour, demand in future_hourly_avg.items() if demand >= threshold])
print("\nDynamically Selected Peak Hours:", future_peak_hours)

In [None]:
# ----- Evaluate Error Metrics for All Test Data -----
mae_all = mean_absolute_error(
    y_true=restaurant_test['CustomerCount'],
    y_pred=test_forecast_df['yhat']
)
rmse_all = np.sqrt(mean_squared_error(
    y_true=restaurant_test['CustomerCount'],
    y_pred=test_forecast_df['yhat']
))
mape_all = mean_absolute_percentage_error(
    y_true=restaurant_test['CustomerCount'],
    y_pred=test_forecast_df['yhat']
)

print("Overall Test Data Metrics:")
print("MAE:", mae_all)
print("RMSE:", rmse_all)
print("MAPE:", mape_all)

In [None]:
# ----- Evaluate Error Metrics for Peak Hours -----
# Align the forecasts and actual values by their datetime 'ds' for peak hours
actual_peak = actual_peak_df.set_index('ds')['y']
predicted_peak = forecast_peak_df.set_index('ds')['yhat']

mae_peak = mean_absolute_error(actual_peak, predicted_peak)
rmse_peak = np.sqrt(mean_squared_error(actual_peak, predicted_peak))
mape_peak = mean_absolute_percentage_error(actual_peak, predicted_peak)

print("\nPeak Hours Metrics:")
print("Baseline Peak Hours MAE:", mae_peak)
print("Baseline Peak Hours RMSE:", rmse_peak)
print("Baseline Peak Hours MAPE:", mape_peak)

In [None]:
# Use your baseline model (m) for cross-validation.
df_cv_baseline = cross_validation(m, initial='730 days', period='180 days', horizon='365 days')
df_p_baseline = performance_metrics(df_cv_baseline)

print("Cross-Validation Performance Metrics for Baseline Model:")
print(df_p_baseline.head())

In [24]:
param_grid = {
    'changepoint_prior_scale': [0.001, 0.01, 0.1, 0.5, 1.0],
    'seasonality_prior_scale': [0.01, 0.1, 1.0, 10.0],
    'seasonality_mode': ['additive', 'multiplicative'],
    'changepoint_range': [0.8, 0.9, 1.0]
}

m_best_r1, best_params, tuning_results = tune_prophet_model(
    train_df=restaurant_train_prophet,
    test_df=restaurant_test_prophet,
    param_grid=param_grid
)

Hyperparameter Tuning (Composite = RMSE + MAE):
Params: {'changepoint_prior_scale': 0.001, 'seasonality_prior_scale': 0.01, 'seasonality_mode': 'additive', 'changepoint_range': 0.8} --> RMSE: 7.7878, MAE: 6.1805, Composite: 13.9682
Params: {'changepoint_prior_scale': 0.001, 'seasonality_prior_scale': 0.01, 'seasonality_mode': 'additive', 'changepoint_range': 0.9} --> RMSE: 7.7878, MAE: 6.1805, Composite: 13.9682
Params: {'changepoint_prior_scale': 0.001, 'seasonality_prior_scale': 0.01, 'seasonality_mode': 'additive', 'changepoint_range': 1.0} --> RMSE: 7.7878, MAE: 6.1805, Composite: 13.9682
Params: {'changepoint_prior_scale': 0.001, 'seasonality_prior_scale': 0.01, 'seasonality_mode': 'multiplicative', 'changepoint_range': 0.8} --> RMSE: 7.7873, MAE: 6.1853, Composite: 13.9727
Params: {'changepoint_prior_scale': 0.001, 'seasonality_prior_scale': 0.01, 'seasonality_mode': 'multiplicative', 'changepoint_range': 0.9} --> RMSE: 7.7873, MAE: 6.1853, Composite: 13.9727
Params: {'changepoin

In [27]:
restaurant_test_fcst_best_r1 = forecast_with_model(m_best_r1, restaurant_test_prophet)
restaurant_test_fcst_best_r1.head()


Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,daily,daily_lower,daily_upper,weekly,weekly_lower,weekly_upper,yearly,yearly_lower,yearly_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,yhat,Hour
0,2022-01-01 00:00:00,20.493384,-1.019163,18.446384,20.493384,20.493384,-11.558638,-11.558638,-11.558638,-13.469402,-13.469402,-13.469402,3.087093,3.087093,3.087093,-1.176329,-1.176329,-1.176329,0.0,0.0,0.0,8.934746,0
1,2022-01-01 01:00:00,20.493493,0.50368,20.159216,20.493493,20.493493,-9.738844,-9.738844,-9.738844,-11.556553,-11.556553,-11.556553,2.994288,2.994288,2.994288,-1.176579,-1.176579,-1.176579,0.0,0.0,0.0,10.754649,1
2,2022-01-01 02:00:00,20.493602,2.00965,21.55481,20.493602,20.493602,-8.654748,-8.654748,-8.654748,-10.360383,-10.360383,-10.360383,2.882492,2.882492,2.882492,-1.176858,-1.176858,-1.176858,0.0,0.0,0.0,11.838854,2
3,2022-01-01 03:00:00,20.493712,0.720377,19.506351,20.493712,20.493712,-10.074568,-10.074568,-10.074568,-11.650193,-11.650193,-11.650193,2.752792,2.752792,2.752792,-1.177166,-1.177166,-1.177166,0.0,0.0,0.0,10.419144,3
4,2022-01-01 04:00:00,20.493821,-1.613292,18.611159,20.493821,20.493821,-11.697421,-11.697421,-11.697421,-13.126359,-13.126359,-13.126359,2.606442,2.606442,2.606442,-1.177504,-1.177504,-1.177504,0.0,0.0,0.0,8.7964,4
