In [1]:
# Set working directory (optional during development)
import os
os.chdir('/Users/sudishmakarki/My_project2')  # only if needed
print(" Working directory:", os.getcwd())

# Standard Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.metrics import mean_absolute_percentage_error
from prophet.diagnostics import cross_validation, performance_metrics
import scipy.stats as stats

# Custom Functions
from models.data_interpolation import (
    load_data,
    preprocess_data,
    split_train_test,
    generate_time_series_splits
)

from models.model_sarimax import (
    prepare_sarimax_data,
    check_stationarity,
    plot_acf_pacf,
    fit_sarimax_model,
    analyze_residual_spike,
    ljung_box_test,
    forecast_sarimax_model,
    identify_peak_hours_sarimax,
    evaluate_sarimax_metrics,
    rolling_forecast_sarimax,
    generate_future_forecast_sarimax,
    group_forecast_by_hour,
    
)

from models.model_sarimax import (
    create_exogenous_variables,
    fit_sarimax_with_exog,
    ljung_box_test_refined_sarimax,
    analyze_largest_residual_sarimax_exog,
    forecast_with_exog,
    analyze_peak_hours_exog,
    evaluate_sarimax_exog_metrics,
    rolling_forecast_sarimax_exog,
    generate_future_forecast_sarimax_exog,
    group_forecast_by_hour_sarimax_exog
    
)

 Working directory: /Users/sudishmakarki/My_project2


In [2]:
# Custom Functions
from models.data_interpolation import (
    load_data,
    preprocess_data,
    split_train_test,
    generate_time_series_splits
)

In [3]:
# Load and preprocess
df = load_data('data/RestaurantData.csv')
df_clean = preprocess_data(df)

                     Year  Month  Season  DayOfWeek WeekDay  Hour  \
Timestamp                                                           
2018-01-01 00:00:00  2018      1  Winter          1  Monday     0   
2018-01-01 01:00:00  2018      1  Winter          1  Monday     1   
2018-01-01 02:00:00  2018      1  Winter          1  Monday     2   
2018-01-01 03:00:00  2018      1  Winter          1  Monday     3   
2018-01-01 04:00:00  2018      1  Winter          1  Monday     4   

                            Holiday Weather SpecialEvent  CustomerCount  \
Timestamp                                                                 
2018-01-01 00:00:00  New Year's Day   Rainy          NaN              6   
2018-01-01 01:00:00  New Year's Day   Windy          NaN             11   
2018-01-01 02:00:00  New Year's Day   Snowy          NaN              9   
2018-01-01 03:00:00  New Year's Day   Rainy          NaN             10   
2018-01-01 04:00:00  New Year's Day  Cloudy          NaN          

In [4]:
# Split into train and test
restaurant_train, restaurant_test = split_train_test(df_clean, split_date='2022-01-01')
# Format for SARIMAX
train_series, test_series = prepare_sarimax_data(restaurant_train, restaurant_test)

SARIMAX Exogenous Variables model refinement 2

In [5]:
# Ensure the datetime index is correctly set and frequency is hourly
restaurant_subset_train = restaurant_train.copy()
restaurant_subset_test = restaurant_test.copy()

restaurant_subset_train.index = pd.to_datetime(restaurant_subset_train.index)
restaurant_subset_train = restaurant_subset_train.asfreq('h')

restaurant_subset_test.index = pd.to_datetime(restaurant_subset_test.index)
restaurant_subset_test = restaurant_subset_test.asfreq('h')

In [7]:
# Extract exogenous variables (hour + holidays) for train and test
exog_train, exog_test = create_exogenous_variables(restaurant_subset_train, restaurant_subset_test)
exog_train.head()


Unnamed: 0_level_0,hour,Holiday_Boxing Day,Holiday_Boxing Day (observed),Holiday_Christmas Day,Holiday_Christmas Day (observed),Holiday_Good Friday,Holiday_May Day,Holiday_New Year's Day,Holiday_New Year's Day (observed),Holiday_No Holiday,Holiday_Platinum Jubilee of Elizabeth II,Holiday_Spring Bank Holiday,Holiday_State Funeral of Queen Elizabeth II
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2018-01-01 00:00:00,0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
2018-01-01 01:00:00,1,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
2018-01-01 02:00:00,2,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
2018-01-01 03:00:00,3,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
2018-01-01 04:00:00,4,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
