In [None]:
import pandas as pd
import numpy as np
from prophet import Prophet
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error

def plot_predictions(results, store_name):
    # Set the style to a default matplotlib style
    plt.style.use('default')
    
    # Create figure and axis for subplots
    fig, axs = plt.subplots(2, 2, figsize=(20, 12))
    fig.suptitle(f'Prediction Analysis for {store_name}', fontsize=16)
    
    # 1. Time Series Plot with Confidence Intervals
    axs[0, 0].plot(results['ds'], results['Actual'], label='Actual', marker='o', alpha=0.5)
    axs[0, 0].plot(results['ds'], results['Predicted'], label='Predicted', color='red', alpha=0.7)
    axs[0, 0].fill_between(results['ds'], 
                          results['Lower_Bound'], 
                          results['Upper_Bound'], 
                          color='red', 
                          alpha=0.1)
    axs[0, 0].set_title('Actual vs Predicted Values')
    axs[0, 0].set_xlabel('Time')
    axs[0, 0].set_ylabel('Dwell Time (minutes)')
    axs[0, 0].legend()
    axs[0, 0].tick_params(axis='x', rotation=45)
    
    # 2. Scatter Plot
    axs[0, 1].scatter(results['Actual'], results['Predicted'], alpha=0.5)
    max_val = max(results['Actual'].max(), results['Predicted'].max())
    axs[0, 1].plot([0, max_val], [0, max_val], 'r--', alpha=0.5)
    axs[0, 1].set_title('Actual vs Predicted Scatter Plot')
    axs[0, 1].set_xlabel('Actual Dwell Time')
    axs[0, 1].set_ylabel('Predicted Dwell Time')
    
    # 3. Error Distribution
    errors = results['Predicted'] - results['Actual']
    axs[1, 0].hist(errors, bins=30, alpha=0.7)
    axs[1, 0].set_title('Error Distribution')
    axs[1, 0].set_xlabel('Prediction Error (minutes)')
    axs[1, 0].set_ylabel('Count')
    
    # 4. Error vs Predicted Value
    axs[1, 1].scatter(results['Predicted'], errors, alpha=0.5)
    axs[1, 1].axhline(y=0, color='r', linestyle='--', alpha=0.5)
    axs[1, 1].set_title('Error vs Predicted Value')
    axs[1, 1].set_xlabel('Predicted Value')
    axs[1, 1].set_ylabel('Error')
    
    plt.tight_layout()
    plt.show()

def prepare_and_predict(df, store_name, train_size=0.8):
    # Filter data for specific store
    store_data = df[df['store'] == store_name].copy()
    
    # Prepare data for Prophet
    store_data = store_data.rename(columns={'timestamp': 'ds', 'dwell_time': 'y'})
    
    # Convert timestamp to datetime and remove timezone
    store_data['ds'] = pd.to_datetime(store_data['ds']).dt.tz_localize(None)
    
    # Sort by timestamp
    store_data = store_data.sort_values('ds')
    
    # Add rolling statistics
    window_sizes = [5, 10, 15]
    for window in window_sizes:
        store_data[f'rolling_mean_{window}'] = (
            store_data['y']
            .rolling(window=window, min_periods=1)
            .mean()
            .fillna(method='bfill')
            .fillna(method='ffill')
        )
    
    # Split the data
    train_idx = int(len(store_data) * train_size)
    train_data = store_data[:train_idx]
    test_data = store_data[train_idx:]
    
    print(f"\nStore: {store_name}")
    print(f"Training data size: {len(train_data)}")
    print(f"Testing data size: {len(test_data)}")
    
    # Create and train the model with optimized parameters
    model = Prophet(
        changepoint_prior_scale=0.5,
        daily_seasonality=20,
        weekly_seasonality=True,
        yearly_seasonality=False,
        seasonality_prior_scale=15,
        changepoint_range=0.9,
        interval_width=0.95
    )
    
    # Add custom seasonalities
    model.add_seasonality(
        name='lunch_peak',
        period=1,
        fourier_order=8,
        condition_name='is_lunch'
    )
    
    model.add_seasonality(
        name='dinner_peak',
        period=1,
        fourier_order=8,
        condition_name='is_dinner'
    )
    
    model.add_seasonality(
        name='morning_peak',
        period=1,
        fourier_order=5,
        condition_name='is_morning'
    )
    
    model.add_seasonality(
        name='afternoon_peak',
        period=1,
        fourier_order=5,
        condition_name='is_afternoon'
    )
    
    # Add time-based conditions
    train_data['is_lunch'] = train_data['ds'].apply(lambda x: 1 if 11 <= x.hour <= 14 else 0)
    train_data['is_dinner'] = train_data['ds'].apply(lambda x: 1 if 17 <= x.hour <= 19 else 0)
    train_data['is_morning'] = train_data['ds'].apply(lambda x: 1 if 8 <= x.hour <= 10 else 0)
    train_data['is_afternoon'] = train_data['ds'].apply(lambda x: 1 if 14 <= x.hour <= 16 else 0)
    train_data['hour'] = train_data['ds'].dt.hour
    
    # Add regressors
    model.add_regressor('hour')
    for window in window_sizes:
        model.add_regressor(f'rolling_mean_{window}')
    
    # Fit the model
    model.fit(train_data)
    
    # Create future dataframe for test period
    future = model.make_future_dataframe(
        periods=len(test_data),
        freq='min'
    )
    
    # Add all features to future dataframe
    future['is_lunch'] = future['ds'].apply(lambda x: 1 if 11 <= x.hour <= 14 else 0)
    future['is_dinner'] = future['ds'].apply(lambda x: 1 if 17 <= x.hour <= 19 else 0)
    future['is_morning'] = future['ds'].apply(lambda x: 1 if 8 <= x.hour <= 10 else 0)
    future['is_afternoon'] = future['ds'].apply(lambda x: 1 if 14 <= x.hour <= 16 else 0)
    future['hour'] = future['ds'].dt.hour
    
    # Add rolling statistics to future dataframe
    for window in window_sizes:
        future[f'rolling_mean_{window}'] = train_data[f'rolling_mean_{window}'].mean()
    
    # Make predictions
    forecast = model.predict(future)
    
    # Get predictions for test period
    test_predictions = forecast.tail(len(test_data))
    
    # Calculate metrics
    mae = mean_absolute_error(test_data['y'], test_predictions['yhat'])
    rmse = np.sqrt(mean_squared_error(test_data['y'], test_predictions['yhat']))
    
    print(f"\nMetrics for {store_name}:")
    print(f"MAE: {mae:.2f} minutes")
    print(f"RMSE: {rmse:.2f} minutes")
    
    # Return predictions and actual values
    results = pd.DataFrame({
        'ds': test_data['ds'].values,
        'Actual': test_data['y'].values,
        'Predicted': test_predictions['yhat'].values,
        'Lower_Bound': test_predictions['yhat_lower'].values,
        'Upper_Bound': test_predictions['yhat_upper'].values
    })
    
    # Plot the predictions
    plot_predictions(results, store_name)
    
    return model, results

def main():
    # Read the data
    df = pd.read_csv('output.csv')
    
    # List of stores
    stores = ['Chicken Rice', 'Indian', 'Taiwanese']
    
    # Dictionary to store results
    store_results = {}
    
    # Process each store
    for store in stores:
        model, results = prepare_and_predict(df, store)
        store_results[store] = {
            'model': model,
            'predictions': results
        }
        
        # Print sample of predictions vs actual
        print(f"\nSample predictions for {store}:")
        print(results.head())
    
    return store_results

if __name__ == "__main__":
    results = main()