In [1]:
import pandas as pd
import numpy as np
import torch
import matplotlib.pyplot as plt
from prophet import Prophet
from tqdm import tqdm
import pickle
import os
from datetime import datetime
import json

# Define consistent colors for plots
TRAIN_COLOR = 'steelblue'
TRAIN_FILL_COLOR = 'steelblue'
TRAIN_FILL_ALPHA = 0.3
VAL_COLOR = 'coral'
VAL_FILL_COLOR = 'coral'
VAL_FILL_ALPHA = 0.3
TEST_COLOR = 'forestgreen'
TEST_FILL_COLOR = 'forestgreen'
TEST_FILL_ALPHA = 0.3

# Set random seeds for reproducibility
def set_seeds(seed=42):
    torch.manual_seed(seed)
    np.random.seed(seed)
    
set_seeds()

## Data Loading and Preprocessing

In [2]:
def load_data(file_path):
    data = pd.read_parquet(file_path)
    return data

def check_for_missing_values(data):
    missing_values = data.isnull().sum()
    if missing_values.any():
        print("Missing values found in the dataset:")
        print(missing_values[missing_values > 0])
    else:
        print("No missing values found in the dataset.")
    return missing_values

def split_data(data, train_years, val_year, test_year):
    data['timestamp'] = pd.to_datetime(data['timestamp'])
    
    train_data = data[data['timestamp'].dt.year.isin(train_years)]
    val_data = data[data['timestamp'].dt.year == val_year]
    test_data = data[data['timestamp'].dt.year == test_year]
    
    print(f"Train data size: {len(train_data)}")
    print(f"Validation data size: {len(val_data)}")
    print(f"Test data size: {len(test_data)}")
    
    return train_data, val_data, test_data

def filter_ride_data(data, ride_name):
    return data[data[f'ride_name_{ride_name}'] == True].copy()

def get_all_rides(data):
    ride_columns = [col for col in data.columns if col.startswith('ride_name_')]
    return [col.replace('ride_name_', '') for col in ride_columns]

def filter_to_operating_hours(ride_data):
    # Determine operating hours from data where wait times > 0
    operating_hours = ride_data[ride_data["wait_time"] > 0].groupby(
        ride_data["timestamp"].dt.date
    )["timestamp"].agg(['min', 'max']).reset_index()
    
    # Extract opening and closing hours
    operating_hours['opening_hour'] = pd.to_datetime(operating_hours['min']).dt.hour
    operating_hours['closing_hour'] = pd.to_datetime(operating_hours['max']).dt.hour
    
    # Set reasonable boundaries for operating hours
    operating_hours['opening_hour'] = operating_hours['opening_hour'].clip(lower=9, upper=11)
    operating_hours['closing_hour'] = operating_hours['closing_hour'].clip(lower=17, upper=21)
    
    # Create date-to-hours mapping
    date_to_hours = {}
    for _, row in operating_hours.iterrows():
        date_to_hours[row['timestamp']] = (row['opening_hour'], row['closing_hour'])
    
    # Filter data to operating hours only
    def is_operating_hour(timestamp):
        date = timestamp.date()
        if date not in date_to_hours:
            return 0
        
        open_hour, close_hour = date_to_hours[date]
        hour = timestamp.hour
        return 1 if open_hour <= hour < close_hour else 0
    
    ride_data['operating_hour'] = ride_data['timestamp'].apply(is_operating_hour)
    ride_data = ride_data[ride_data['operating_hour'] == 1]
    ride_data = ride_data.drop(columns=["operating_hour"])
    
    return ride_data

## Time Series Decomposition with Prophet

In [3]:
class BaseTimeSeriesModel:
    def __init__(self):
        self.model = None
        self.forecast = None
        self.holidays = None
        
    def prepare_prophet_dataframe(self, data, include_y=True):
        prophet_df = data[['timestamp', 'wait_time', 'temperature_unscaled', 'rain_unscaled', 'is_weekend']].copy()
        prophet_df = prophet_df.rename(columns={'timestamp': 'ds', 'wait_time': 'y', 'temperature_unscaled': 'temperature', 'rain_unscaled': 'rain'})

        if not include_y:
            prophet_df = prophet_df.drop(["y"], axis=1) 
        
        # Add additional features
        prophet_df['temp_squared'] = prophet_df['temperature'] ** 2
        prophet_df['high_temp'] = (prophet_df['temperature'] > 25).astype(int)
        prophet_df['any_rain'] = (prophet_df['rain'] > 0).astype(int)
        prophet_df['temp_weekend'] = prophet_df['temperature'] * prophet_df['is_weekend']
        prophet_df['rain_weekend'] = prophet_df['rain'] * prophet_df['is_weekend']
        
        return prophet_df
    
    def create_holiday_dataframes(self, data):
        holiday_dfs = []
        
        # Process country holidays
        for country in ['swiss', 'german', 'french']:
            holiday_col = f"is_{country}_holiday"
            if holiday_col in data.columns:
                country_holidays = data.loc[data[holiday_col] == 1, ["timestamp"]]
                if len(country_holidays) > 0:
                    country_holidays["ds"] = pd.to_datetime(country_holidays["timestamp"]).dt.date
                    country_holidays = country_holidays.drop_duplicates(subset=["ds"])
                    country_holidays = country_holidays.drop(columns=["timestamp"])
                    country_holidays["holiday"] = f"{country}_holiday"
                    country_holidays["lower_window"] = 0
                    country_holidays["upper_window"] = 0
                    holiday_dfs.append(country_holidays.reset_index(drop=True))
        # Combine all holidays
        if holiday_dfs:
            all_holidays = pd.concat(holiday_dfs)
            all_holidays["ds"] = pd.to_datetime(all_holidays["ds"])
            return all_holidays.sort_values(by=["ds"]).reset_index(drop=True)
        return None
    
    def fit(self, prophet_df):
        # Create holidays dataframe
        self.holidays = self.create_holiday_dataframes(prophet_df)
        
        # Create a Prophet model with configuration from prophet_model.py
        self.model = Prophet(
            # Core parameters
            seasonality_mode='multiplicative',  # Better for tourism/attraction data
            changepoint_prior_scale=0.05,       # Flexibility in trend changes
            changepoint_range=0.95,             # Allow changepoints closer to the end

            # Handling seasonality
            seasonality_prior_scale=12,         # Stronger seasonality influence
            yearly_seasonality=True,            # Capture yearly patterns
            weekly_seasonality=True,            # Capture weekly patterns
            daily_seasonality=True,             # Custom daily seasonality for operating hours

            # Specific to park operation
            holidays_prior_scale=15,            # Strong holiday effects for parks
            holidays=self.holidays,             # Include holidays and covid periods
            interval_width=0.95                 # 95% confidence interval
        )

        # Add weather and interaction regressors
        self.model.add_regressor('temperature', mode='multiplicative', standardize=True)
        self.model.add_regressor('rain', mode='multiplicative', standardize=True)
        self.model.add_regressor('temp_squared', mode='additive', standardize=True)
        self.model.add_regressor('high_temp', mode='multiplicative', standardize=False)
        self.model.add_regressor('any_rain', mode='multiplicative', standardize=False)
        self.model.add_regressor('temp_weekend', mode='additive', standardize=True)
        self.model.add_regressor('rain_weekend', mode='multiplicative', standardize=True)
        
        prophet_df['between_covid_lockdowns'] = 0
        covid_period = (prophet_df['ds'] >= '2020-05-20') & (prophet_df['ds'] <= '2020-11-01')
        prophet_df.loc[covid_period, 'between_covid_lockdowns'] = 1
        self.model.add_regressor('between_covid_lockdowns')
        
        prophet_df['covid_recovery'] = 0
        recovery_period = (prophet_df['ds'] >= '2021-05-21') & (prophet_df['ds'] <= '2021-08-31')
        prophet_df.loc[recovery_period, 'covid_recovery'] = 1
        self.model.add_regressor('covid_recovery')

        self.model.fit(prophet_df)
        return self.model

    def predict(self, future_df):
        # Add required columns for prediction if they're not already present
        future_df = future_df.copy()
        
        # Add COVID regressors if they're not already present
        if 'between_covid_lockdowns' not in future_df.columns:
            future_df['between_covid_lockdowns'] = 0
            covid_period = (future_df['ds'] >= '2020-05-20') & (future_df['ds'] <= '2020-11-01')
            future_df.loc[covid_period, 'between_covid_lockdowns'] = 1
            
        if 'covid_recovery' not in future_df.columns:
            future_df['covid_recovery'] = 0
            recovery_period = (future_df['ds'] >= '2021-05-21') & (future_df['ds'] <= '2021-08-31')
            future_df.loc[recovery_period, 'covid_recovery'] = 1
        
        # Make predictions
        self.forecast = self.model.predict(future_df)
        
        # Apply post-processing
        self.forecast = self.post_process_forecast(self.forecast)
        
        return self.forecast
    
    def post_process_forecast(self, forecast):
        forecast = forecast.copy()
        
        # Correct negative predictions
        negative_mask = forecast['yhat'] < 0
        forecast.loc[negative_mask, 'yhat'] = 0
        forecast.loc[negative_mask, 'yhat_lower'] = 0
        forecast.loc[negative_mask, 'yhat_upper'] = 0
        
        return forecast
    
    def merge_predictions(self, original_data, forecast_data):
        result = original_data.copy()
        
        # Identify forecast columns to keep
        forecast_columns = ['ds', 'trend', 'yhat', 'yhat_lower', 'yhat_upper']
        for component in ['weekly', 'daily']:
            if component in forecast_data.columns:
                forecast_columns.append(component)
        
        result = pd.merge(
            result, 
            forecast_data[forecast_columns], 
            left_on='timestamp', 
            right_on='ds', 
            how='left'
        )
        
        result['residual'] = result['wait_time'] - result['yhat']
        return result

## Model Evaluation Functions

In [4]:
def evaluate_model(ride_df, actual_values, predictions, title=""):
    # Calculate metrics
    mae = np.mean(np.abs(predictions - actual_values))
    rmse = np.sqrt(np.mean(np.square(predictions - actual_values)))
    
    # For sMAPE, avoid division by zero
    epsilon = 1e-8
    abs_pct_errors = np.abs(predictions - actual_values) / (np.abs(predictions) + np.abs(actual_values) + epsilon)
    # Only include points where actual values are non-zero
    non_zero_mask = (actual_values > 0) & (predictions > 0)
    smape = np.mean(abs_pct_errors[non_zero_mask]) * 100 if np.any(non_zero_mask) else 0

    # Print metrics
    print(f"\n{title} MAE: {mae:.2f} minutes")
    print(f"{title} RMSE: {rmse:.2f} minutes")
    print(f"{title} sMAPE: {smape:.2f}%")
    
    # Create a DataFrame with results for time-based analysis
    results_df = pd.DataFrame({
        'timestamp': ride_df['timestamp'].values,
        'actual': actual_values,
        'predicted': predictions,
    })
    
    # Add time components
    results_df['hour'] = results_df['timestamp'].dt.hour
    results_df['day_of_week'] = results_df['timestamp'].dt.dayofweek
    results_df['month'] = results_df['timestamp'].dt.month
    
    # Calculate errors
    results_df['error'] = results_df['predicted'] - results_df['actual']
    results_df['abs_error'] = np.abs(results_df['error'])
    results_df['pct_error'] = abs_pct_errors * 100
    
    # Create metrics dictionary
    metrics = {
        "mae": mae,
        "rmse": rmse,
        "smape": smape
    }
    
    return metrics, results_df

def create_evaluation_plots(results_df, title=""):
    """Create evaluation plots for model performance"""
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    
    # Actual vs Predicted scatter plot
    axes[0, 0].scatter(results_df['actual'], results_df['predicted'], alpha=0.5)
    max_val = max(results_df['actual'].max(), results_df['predicted'].max())
    axes[0, 0].plot([0, max_val], [0, max_val], 'k--')
    axes[0, 0].set_xlabel('Actual Wait Time (minutes)')
    axes[0, 0].set_ylabel('Predicted Wait Time (minutes)')
    axes[0, 0].set_title(f'{title} - Actual vs Predicted')
    
    # Hourly error analysis
    hourly_errors = results_df.groupby('hour')['abs_error'].mean()
    hourly_errors.plot(kind='bar', ax=axes[0, 1])
    axes[0, 1].set_xlabel('Hour of Day')
    axes[0, 1].set_ylabel('Mean Absolute Error (minutes)')
    axes[0, 1].set_title(f'{title} - Error by Hour')
    axes[0, 1].tick_params(axis='x', rotation=45)
    
    # Residual plot
    axes[1, 0].scatter(results_df['predicted'], results_df['error'], alpha=0.5)
    axes[1, 0].axhline(y=0, color='k', linestyle='--')
    axes[1, 0].set_xlabel('Predicted Wait Time (minutes)')
    axes[1, 0].set_ylabel('Residual (minutes)')
    axes[1, 0].set_title(f'{title} - Residual Plot')
    
    # Time series plot (sample)
    if len(results_df) > 1000:
        # Sample for visualization if too many points
        sample_df = results_df.sample(1000).sort_values('timestamp')
    else:
        sample_df = results_df.sort_values('timestamp')
    
    axes[1, 1].plot(sample_df['timestamp'], sample_df['actual'], label='Actual', alpha=0.7)
    axes[1, 1].plot(sample_df['timestamp'], sample_df['predicted'], label='Predicted', alpha=0.7)
    axes[1, 1].set_xlabel('Time')
    axes[1, 1].set_ylabel('Wait Time (minutes)')
    axes[1, 1].set_title(f'{title} - Time Series (Sample)')
    axes[1, 1].legend()
    axes[1, 1].tick_params(axis='x', rotation=45)
    
    plt.tight_layout()
    return fig

## Model Storage Functions

In [5]:
def save_model(ride_name, prophet_model, metrics, output_dir="models"):
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    
    # Create ride-specific directory
    ride_dir = os.path.join(output_dir, ride_name.replace(" ", "_"))
    os.makedirs(ride_dir, exist_ok=True)
    
    # Save Prophet model (using pickle)
    with open(os.path.join(ride_dir, "prophet_model.pkl"), "wb") as f:
        pickle.dump(prophet_model.model, f)
    
    # Save holidays if they exist
    if prophet_model.holidays is not None:
        prophet_model.holidays.to_csv(os.path.join(ride_dir, "holidays.csv"), index=False)
    
    # Save metrics
    with open(os.path.join(ride_dir, "metrics.json"), "w") as f:
        json.dump(metrics, f, indent=4)
    
    print(f"Models and results saved to {ride_dir}")

def load_model(ride_name, output_dir="models"):
    # Create ride-specific directory path
    ride_dir = os.path.join(output_dir, ride_name.replace(" ", "_"))
    
    # Check if models exist
    if not os.path.exists(ride_dir):
        return None, None
    
    # Load Prophet model
    try:
        with open(os.path.join(ride_dir, "prophet_model.pkl"), "rb") as f:
            prophet_model_obj = pickle.load(f)
    except:
        print(f"Could not load model for {ride_name}")
        return None, None
    
    # Initialize BaseTimeSeriesModel and set the loaded model
    prophet_ts = BaseTimeSeriesModel()
    prophet_ts.model = prophet_model_obj
    
    # Load holidays if they exist
    if os.path.exists(os.path.join(ride_dir, "holidays.csv")):
        prophet_ts.holidays = pd.read_csv(os.path.join(ride_dir, "holidays.csv"))
    
    # Load metrics
    with open(os.path.join(ride_dir, "metrics.json"), "r") as f:
        metrics = json.load(f)
    
    return prophet_ts, metrics

def get_processed_rides(output_dir="models"):
    if not os.path.exists(output_dir):
        return []
    
    # Get all subdirectories in the output directory
    processed_rides = [d for d in os.listdir(output_dir) 
                      if os.path.isdir(os.path.join(output_dir, d))]
    
    # Convert directory names back to ride names
    processed_rides = [ride.replace("_", " ") for ride in processed_rides]
    
    return processed_rides

## Multi-Ride Processing

In [6]:
def create_checkpoint_file(processed_rides, output_dir="models"):
    checkpoint_path = os.path.join(output_dir, "checkpoint.json")
    with open(checkpoint_path, "w") as f:
        json.dump({"processed_rides": processed_rides}, f, indent=4)

def load_checkpoint_file(output_dir="models"):
    checkpoint_path = os.path.join(output_dir, "checkpoint.json")
    if os.path.exists(checkpoint_path):
        with open(checkpoint_path, "r") as f:
            checkpoint = json.load(f)
        return checkpoint.get("processed_rides", [])
    return []

## Training Pipeline for Single Ride (Extended with Test Set)

In [7]:
def process_single_ride_extended(ride_name, train_data, val_data, test_data, output_dir="models"):
    """Process a single ride with both validation and test set evaluation."""
    print(f"\n{'='*50}")
    print(f"Processing ride: {ride_name}")
    print(f"{'='*50}")
    
    # Filter data for the current ride
    ride_train_data = filter_ride_data(train_data, ride_name)
    ride_val_data = filter_ride_data(val_data, ride_name)
    ride_test_data = filter_ride_data(test_data, ride_name)
    
    print(f"Training data size: {len(ride_train_data)}")
    print(f"Validation data size: {len(ride_val_data)}")
    print(f"Test data size: {len(ride_test_data)}")
    
    # Skip if not enough data
    if len(ride_train_data) < 100 or len(ride_val_data) < 50 or len(ride_test_data) < 50:
        print(f"Skipping {ride_name} due to insufficient data")
        return None
    
    # VALIDATION EVALUATION: Train on train data only, evaluate on validation set
    print("Training Prophet model on train data for validation evaluation...")
    prophet_val = BaseTimeSeriesModel()
    prophet_val_df = prophet_val.prepare_prophet_dataframe(ride_train_data)
    prophet_val.fit(prophet_val_df)
    
    # Generate forecasts for validation data
    print("Evaluating on validation set...")
    future_val = prophet_val.prepare_prophet_dataframe(ride_val_data, include_y=False)
    val_forecast = prophet_val.predict(future_val)
    
    # Merge predictions with validation data
    ride_val_data_with_forecast = pd.merge(
        ride_val_data,
        val_forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']],
        left_on='timestamp', 
        right_on='ds',
        how='left'
    )
    
    # Evaluate on validation set
    val_actuals = ride_val_data_with_forecast['wait_time'].values
    val_predictions = ride_val_data_with_forecast['yhat'].values
    val_metrics, val_results_df = evaluate_model(
        ride_val_data_with_forecast, val_actuals, val_predictions, 
        title=f"{ride_name} - Validation"
    )
    
    # TEST EVALUATION: Train on combined train + validation data, evaluate on test set
    print("Training Prophet model on combined train + validation data for test evaluation...")
    combined_train_val = pd.concat([ride_train_data, ride_val_data], ignore_index=True)
    prophet_test = BaseTimeSeriesModel()
    prophet_test_df = prophet_test.prepare_prophet_dataframe(combined_train_val)
    prophet_test.fit(prophet_test_df)
    
    # Generate forecasts for test data
    print("Evaluating on test set...")
    future_test = prophet_test.prepare_prophet_dataframe(ride_test_data, include_y=False)
    test_forecast = prophet_test.predict(future_test)
    
    # Merge predictions with test data
    ride_test_data_with_forecast = pd.merge(
        ride_test_data,
        test_forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']],
        left_on='timestamp', 
        right_on='ds',
        how='left'
    )
    
    # Evaluate on test set
    test_actuals = ride_test_data_with_forecast['wait_time'].values
    test_predictions = ride_test_data_with_forecast['yhat'].values
    test_metrics, test_results_df = evaluate_model(
        ride_test_data_with_forecast, test_actuals, test_predictions, 
        title=f"{ride_name} - Test"
    )
    
    # Create figures directory
    fig_dir = os.path.join(output_dir, ride_name.replace(" ", "_"), "figures")
    os.makedirs(fig_dir, exist_ok=True)
    
    # Save model components visualization (using test model which is trained on more data)
    fig_components = prophet_test.model.plot_components(test_forecast, figsize=(14, 10))
    fig_components.savefig(os.path.join(fig_dir, "model_components.png"))
    plt.close(fig_components)

    # Save evaluation figures
    fig_val_eval = create_evaluation_plots(val_results_df, f"{ride_name} - Validation")
    fig_val_eval.savefig(os.path.join(fig_dir, "validation_evaluation.png"))
    plt.close(fig_val_eval)
    
    fig_test_eval = create_evaluation_plots(test_results_df, f"{ride_name} - Test")
    fig_test_eval.savefig(os.path.join(fig_dir, "test_evaluation.png"))
    plt.close(fig_test_eval)
    
    # Prepare combined metrics
    combined_metrics = {
        "validation": val_metrics,
        "test": test_metrics,
        "data_counts": {
            "train": len(ride_train_data),
            "validation": len(ride_val_data),
            "test": len(ride_test_data),
            "combined_train_val": len(combined_train_val)
        },
        "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    }
    
    # Save models and results (save the test model which is trained on more data)
    save_model(ride_name, prophet_test, combined_metrics, output_dir)
    
    # Save detailed results as CSV
    results_dir = os.path.join(output_dir, ride_name.replace(" ", "_"), "results")
    os.makedirs(results_dir, exist_ok=True)
    val_results_df.to_csv(os.path.join(results_dir, "validation_results.csv"), index=False)
    test_results_df.to_csv(os.path.join(results_dir, "test_results.csv"), index=False)
    
    return combined_metrics

## Training Pipeline for All Rides (Extended)

In [8]:
def generate_summary_report_extended(all_results, output_dir="models"):
    """Generate comprehensive summary report for both validation and test sets."""
    # Create lists to store summary data
    summary_data = []
    
    # Extract data from results
    for ride_name, metrics in all_results.items():
        if not metrics:
            continue
        
        # Base ride info
        base_info = {
            "ride_name": ride_name,
            "train_data_size": metrics.get("data_counts", {}).get("train", 0),
            "val_data_size": metrics.get("data_counts", {}).get("validation", 0),
            "test_data_size": metrics.get("data_counts", {}).get("test", 0),
            "combined_train_val_size": metrics.get("data_counts", {}).get("combined_train_val", 0)
        }
        
        # Add validation metrics
        if "validation" in metrics:
            val_row = base_info.copy()
            val_row["dataset"] = "validation"
            val_metrics = metrics["validation"]
            val_row["mae"] = val_metrics.get("mae", float('nan'))
            val_row["rmse"] = val_metrics.get("rmse", float('nan'))
            val_row["smape"] = val_metrics.get("smape", float('nan'))
            summary_data.append(val_row)
        
        # Add test metrics
        if "test" in metrics:
            test_row = base_info.copy()
            test_row["dataset"] = "test"
            test_metrics = metrics["test"]
            test_row["mae"] = test_metrics.get("mae", float('nan'))
            test_row["rmse"] = test_metrics.get("rmse", float('nan'))
            test_row["smape"] = test_metrics.get("smape", float('nan'))
            summary_data.append(test_row)
    
    # Create DataFrame
    detailed_summary = pd.DataFrame(summary_data)
    
    if len(detailed_summary) == 0:
        print("No results to summarize.")
        return None
    
    # Save detailed summary
    detailed_path = os.path.join(output_dir, "detailed_prophet_summary_extended.csv")
    detailed_summary.to_csv(detailed_path, index=False)
    
    # Create aggregated summary for each dataset
    val_summary = detailed_summary[detailed_summary['dataset'] == 'validation'].groupby('dataset').agg({
        'mae': ['mean', 'std', 'median'],
        'rmse': ['mean', 'std', 'median'],
        'smape': ['mean', 'std', 'median']
    }).round(2)
    
    test_summary = detailed_summary[detailed_summary['dataset'] == 'test'].groupby('dataset').agg({
        'mae': ['mean', 'std', 'median'],
        'rmse': ['mean', 'std', 'median'],
        'smape': ['mean', 'std', 'median']
    }).round(2)
    
    # Create simple summary DataFrame for comparison
    simple_summary = []
    
    val_data = detailed_summary[detailed_summary['dataset'] == 'validation']
    test_data = detailed_summary[detailed_summary['dataset'] == 'test']
    
    if len(val_data) > 0:
        simple_summary.append({
            'dataset': 'validation',
            'count': len(val_data),
            'mae_mean': val_data['mae'].mean(),
            'mae_std': val_data['mae'].std(),
            'rmse_mean': val_data['rmse'].mean(),
            'rmse_std': val_data['rmse'].std(),
            'smape_mean': val_data['smape'].mean(),
            'smape_std': val_data['smape'].std()
        })
    
    if len(test_data) > 0:
        simple_summary.append({
            'dataset': 'test',
            'count': len(test_data),
            'mae_mean': test_data['mae'].mean(),
            'mae_std': test_data['mae'].std(),
            'rmse_mean': test_data['rmse'].mean(),
            'rmse_std': test_data['rmse'].std(),
            'smape_mean': test_data['smape'].mean(),
            'smape_std': test_data['smape'].std()
        })
    
    simple_summary_df = pd.DataFrame(simple_summary)
    
    # Save simple summary
    simple_path = os.path.join(output_dir, "prophet_summary_extended.csv")
    simple_summary_df.to_csv(simple_path, index=False)
    
    # Print summary statistics
    print("\n" + "="*80)
    print("PROPHET MODEL SUMMARY (VALIDATION & TEST SETS):")
    print(f"Total rides processed: {detailed_summary['ride_name'].nunique()}")
    
    if len(val_data) > 0:
        print(f"\nValidation Set Performance:")
        print(f"  Average MAE: {val_data['mae'].mean():.2f} ± {val_data['mae'].std():.2f}")
        print(f"  Average RMSE: {val_data['rmse'].mean():.2f} ± {val_data['rmse'].std():.2f}")
        print(f"  Average sMAPE: {val_data['smape'].mean():.2f}% ± {val_data['smape'].std():.2f}%")
    
    if len(test_data) > 0:
        print(f"\nTest Set Performance:")
        print(f"  Average MAE: {test_data['mae'].mean():.2f} ± {test_data['mae'].std():.2f}")
        print(f"  Average RMSE: {test_data['rmse'].mean():.2f} ± {test_data['rmse'].std():.2f}")
        print(f"  Average sMAPE: {test_data['smape'].mean():.2f}% ± {test_data['smape'].std():.2f}%")
    
    print(f"\nDetailed summary saved to: {detailed_path}")
    print(f"Simple summary saved to: {simple_path}")
    print("="*80)
    
    # Create visualizations
    create_prophet_visualizations_extended(detailed_summary, simple_summary_df, output_dir)
    
    return detailed_summary, simple_summary_df

def create_prophet_visualizations_extended(detailed_summary, simple_summary_df, output_dir):
    """Create visualizations for Prophet model results including both validation and test sets."""
    
    # 1. Performance comparison (validation vs test)
    if len(simple_summary_df) == 2:
        plt.figure(figsize=(15, 8))
        
        datasets = simple_summary_df['dataset']
        mae_means = simple_summary_df['mae_mean']
        mae_stds = simple_summary_df['mae_std']
        
        x = np.arange(len(datasets))
        width = 0.35
        
        bars = plt.bar(x, mae_means, width, yerr=mae_stds, capsize=5, 
                      color=[VAL_COLOR if d == 'validation' else TEST_COLOR for d in datasets],
                      alpha=0.8)
        
        # Add value labels on bars
        for bar, mean_val in zip(bars, mae_means):
            height = bar.get_height()
            plt.text(bar.get_x() + bar.get_width()/2., height + max(mae_means) * 0.01,
                    f'{mean_val:.2f}', ha='center', va='bottom', fontsize=16)
        
        plt.title('Prophet Model: Average MAE Comparison (Validation vs Test)', fontsize=20)
        plt.xlabel('Dataset', fontsize=14)
        plt.ylabel('Average MAE (minutes)', fontsize=14)
        plt.xticks(x, datasets)
        plt.tight_layout()
        plt.savefig(os.path.join(output_dir, "prophet_val_vs_test_comparison.png"))
        plt.savefig(os.path.join(output_dir, "prophet_val_vs_test_comparison.svg"))
        plt.close()
    
    # 2. Distribution comparison
    val_data = detailed_summary[detailed_summary['dataset'] == 'validation']
    test_data = detailed_summary[detailed_summary['dataset'] == 'test']
    
    fig, axes = plt.subplots(1, 3, figsize=(20, 6))
    
    # MAE distribution
    if len(val_data) > 0:
        axes[0].hist(val_data['mae'], bins=15, alpha=0.7, label='Validation', color=VAL_COLOR)
    if len(test_data) > 0:
        axes[0].hist(test_data['mae'], bins=15, alpha=0.7, label='Test', color=TEST_COLOR)
    axes[0].set_xlabel('MAE (minutes)')
    axes[0].set_ylabel('Number of Rides')
    axes[0].set_title('MAE Distribution')
    axes[0].legend()
    
    # RMSE distribution
    if len(val_data) > 0:
        axes[1].hist(val_data['rmse'], bins=15, alpha=0.7, label='Validation', color=VAL_COLOR)
    if len(test_data) > 0:
        axes[1].hist(test_data['rmse'], bins=15, alpha=0.7, label='Test', color=TEST_COLOR)
    axes[1].set_xlabel('RMSE (minutes)')
    axes[1].set_ylabel('Number of Rides')
    axes[1].set_title('RMSE Distribution')
    axes[1].legend()
    
    # sMAPE distribution
    if len(val_data) > 0:
        axes[2].hist(val_data['smape'], bins=15, alpha=0.7, label='Validation', color=VAL_COLOR)
    if len(test_data) > 0:
        axes[2].hist(test_data['smape'], bins=15, alpha=0.7, label='Test', color=TEST_COLOR)
    axes[2].set_xlabel('sMAPE (%)')
    axes[2].set_ylabel('Number of Rides')
    axes[2].set_title('sMAPE Distribution')
    axes[2].legend()
    
    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, "prophet_metrics_distribution_extended.png"))
    plt.close()
    
    # 3. Ride performance ranking (test set)
    if len(test_data) > 0:
        test_sorted = test_data.sort_values('mae')
        
        plt.figure(figsize=(16, 10))
        bars = plt.barh(range(len(test_sorted)), test_sorted['mae'], alpha=0.8, color=TEST_COLOR)
        
        plt.title('Prophet Model: Test Set MAE by Ride', fontsize=20, fontweight='bold')
        plt.xlabel('Mean Absolute Error (MAE) - Minutes', fontsize=16)
        plt.ylabel('Rides', fontsize=16)
        plt.yticks(range(len(test_sorted)), test_sorted['ride_name'], fontsize=14)
        
        # Add value labels
        for i, (bar, mae_val) in enumerate(zip(bars, test_sorted['mae'])):
            plt.text(bar.get_width() + max(test_sorted['mae']) * 0.01, 
                    bar.get_y() + bar.get_height()/2, 
                    f'{mae_val:.1f}', ha='left', va='center', fontsize=16)
        
        plt.grid(axis='x', alpha=0.3, linestyle='--')
        plt.tight_layout()
        plt.savefig(os.path.join(output_dir, "prophet_mae_by_ride_test.png"), dpi=300, bbox_inches='tight')
        plt.savefig(os.path.join(output_dir, "prophet_mae_by_ride_test.svg"), bbox_inches='tight')
        plt.close()
    
    print("Extended Prophet visualizations saved to output directory.")

def process_all_rides_extended(all_rides, train_data, val_data, test_data,
                              output_dir="models", resume=True):
    """Process all rides with Prophet models on both validation and test sets."""
    # Create output directory
    os.makedirs(output_dir, exist_ok=True)
    
    # Get list of already processed rides
    processed_rides = []
    if resume:
        processed_rides = load_checkpoint_file(output_dir)
        if processed_rides:
            print(f"Resuming from checkpoint. {len(processed_rides)} rides already processed.")
    
    # Initialize results dictionary
    all_results = {}
    
    # Process each ride
    for i, ride_name in enumerate(tqdm(all_rides, desc="Processing rides")):
        if ride_name in processed_rides:
            print(f"Skipping {ride_name} (already processed)")
            # Load metrics for the summary
            _, metrics = load_model(ride_name, output_dir)
            if metrics:
                all_results[ride_name] = metrics
            continue
            
        print(f"\nProcessing ride {i+1}/{len(all_rides)}: {ride_name}")
        ride_metrics = process_single_ride_extended(ride_name, train_data, val_data, test_data,
                                                   output_dir=output_dir)
        
        if ride_metrics:
            all_results[ride_name] = ride_metrics
            processed_rides.append(ride_name)
            
            # Update checkpoint after each ride
            create_checkpoint_file(processed_rides, output_dir)
    
    # Generate summary report
    generate_summary_report_extended(all_results, output_dir)
    
    return all_results

## Analysis Functions

In [9]:
def analyze_prophet_results_extended(output_dir="../models/prophet_enhanced/"):
    """Load and analyze saved Prophet model results for both validation and test sets."""
    
    # Check if summary files exist
    detailed_path = os.path.join(output_dir, "detailed_prophet_summary_extended.csv")
    simple_path = os.path.join(output_dir, "prophet_summary_extended.csv")
    
    if not os.path.exists(detailed_path) or not os.path.exists(simple_path):
        print("Extended summary files not found. Run the processing pipeline first.")
        return None
    
    # Load summary data
    detailed_summary = pd.read_csv(detailed_path)
    simple_summary = pd.read_csv(simple_path)
    
    # Display key insights
    print("="*60)
    print("PROPHET MODEL ANALYSIS (VALIDATION & TEST SETS)")
    print("="*60)
    
    # Split by dataset
    val_data = detailed_summary[detailed_summary['dataset'] == 'validation']
    test_data = detailed_summary[detailed_summary['dataset'] == 'test']
    
    print("\n1. Overall Performance Summary:")
    print(simple_summary)
    
    if len(val_data) > 0:
        print("\n2. Validation Set - Top 10 rides (lowest MAE):")
        val_sorted = val_data.sort_values('mae')
        print(val_sorted.head(10)[['ride_name', 'mae', 'rmse', 'smape']])
    
    if len(test_data) > 0:
        print("\n3. Test Set - Top 10 rides (lowest MAE):")
        test_sorted = test_data.sort_values('mae')
        print(test_sorted.head(10)[['ride_name', 'mae', 'rmse', 'smape']])
    
    if len(val_data) > 0 and len(test_data) > 0:
        print("\n4. Validation vs Test Performance Comparison:")
        val_avg = val_data.groupby('dataset')[['mae', 'rmse', 'smape']].mean()
        test_avg = test_data.groupby('dataset')[['mae', 'rmse', 'smape']].mean()
        comparison_df = pd.concat([val_avg, test_avg])
        print(comparison_df)
    
    return {
        'detailed_summary': detailed_summary,
        'simple_summary': simple_summary,
        'val_data': val_data,
        'test_data': test_data
    }

def create_prophet_method_summary_csv(all_results, output_dir="../models/prophet_enhanced/", filename="prophet_method_summary.csv"):
    """
    Create a CSV file with Prophet model performance metrics in the same format as baselines.
    
    Args:
        all_results: Dictionary containing all Prophet model results
        output_dir: Directory to save the CSV file
        filename: Name of the output CSV file
    
    Returns:
        pandas.DataFrame: Summary dataframe
    """
    # Initialize data for Prophet
    val_mae_values = []
    val_rmse_values = []
    val_smape_values = []
    test_mae_values = []
    test_rmse_values = []
    test_smape_values = []
    
    # Collect metrics from all rides
    for ride_name, ride_results in all_results.items():
        if "validation" in ride_results:
            val_metrics = ride_results["validation"]
            if "mae" in val_metrics and not np.isinf(val_metrics["mae"]):
                val_mae_values.append(val_metrics["mae"])
            if "rmse" in val_metrics and not np.isinf(val_metrics["rmse"]):
                val_rmse_values.append(val_metrics["rmse"])
            if "smape" in val_metrics and not np.isinf(val_metrics["smape"]):
                val_smape_values.append(val_metrics["smape"])
        
        if "test" in ride_results:
            test_metrics = ride_results["test"]
            if "mae" in test_metrics and not np.isinf(test_metrics["mae"]):
                test_mae_values.append(test_metrics["mae"])
            if "rmse" in test_metrics and not np.isinf(test_metrics["rmse"]):
                test_rmse_values.append(test_metrics["rmse"])
            if "smape" in test_metrics and not np.isinf(test_metrics["smape"]):
                test_smape_values.append(test_metrics["smape"])
    
    # Calculate average metrics
    val_mae_avg = np.mean(val_mae_values) if val_mae_values else np.nan
    val_rmse_avg = np.mean(val_rmse_values) if val_rmse_values else np.nan
    val_smape_avg = np.mean(val_smape_values) if val_smape_values else np.nan
    
    test_mae_avg = np.mean(test_mae_values) if test_mae_values else np.nan
    test_rmse_avg = np.mean(test_rmse_values) if test_rmse_values else np.nan
    test_smape_avg = np.mean(test_smape_values) if test_smape_values else np.nan
    
    # Create summary data
    summary_data = [{
        'method_name': 'Prophet Enhanced',
        'val_mae': val_mae_avg,
        'val_rmse': val_rmse_avg,
        'val_smape': val_smape_avg,
        'test_mae': test_mae_avg,
        'test_rmse': test_rmse_avg,
        'test_smape': test_smape_avg
    }]
    
    # Create DataFrame
    summary_df = pd.DataFrame(summary_data)
    
    # Round values to 2 decimal places
    numeric_columns = ['val_mae', 'val_rmse', 'val_smape', 'test_mae', 'test_rmse', 'test_smape']
    for col in numeric_columns:
        summary_df[col] = summary_df[col].round(2)
    
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    
    # Save to CSV with semicolon separator
    output_path = os.path.join(output_dir, filename)
    summary_df.to_csv(output_path, sep=';', index=False)
    
    print(f"Prophet method summary CSV saved to: {output_path}")
    print("\nProphet model performance:")
    print(summary_df.to_string(index=False))
    
    return summary_df

## Main Execution

In [10]:
print("Loading data...")
data = load_data("../data/processed/ep/final_cleaned_processed_wait_times.parquet")
print(f"Loaded data with {len(data)} rows")

check_for_missing_values(data)

data = filter_to_operating_hours(data)

# Define time periods for splitting
train_years, val_year, test_year = list(range(2017, 2023)), 2023, 2024

# Split the data
train_data, val_data, test_data = split_data(data, train_years, val_year, test_year)

# Get all rides in the dataset
all_rides = get_all_rides(data)
print(f"Found {len(all_rides)} rides in the dataset:")
for i, ride in enumerate(all_rides):
    print(f"{i+1}. {ride}")

# Set output directory for models and results
output_dir = "../models/prophet_enhanced_extended/"

# Process all rides with extended evaluation (including test set)
print("\n" + "="*60)
print("STARTING PROPHET MODEL PROCESSING FOR ALL RIDES (VALIDATION & TEST SETS)")
print("="*60)

results = process_all_rides_extended(
    all_rides=all_rides,
    train_data=train_data,
    val_data=val_data,
    test_data=test_data,
    output_dir=output_dir,
    resume=True  # Resume from checkpoint if available
)

print("\n" + "="*60)
print("PROPHET MODEL PROCESSING COMPLETED")
print("="*60)

# Analyze the extended results
results_analysis = analyze_prophet_results_extended(output_dir)

# Create method summary CSV for comparison with baselines
create_prophet_method_summary_csv(results, output_dir)

Loading data...
Loaded data with 7834739 rows
No missing values found in the dataset.
Train data size: 297362
Validation data size: 61851
Test data size: 55699
Found 31 rides in the dataset:
1. alpine express enzian
2. arena of football  be part of it
3. arthur
4. atlantica supersplash
5. atlantis adventure
6. baaa express
7. blue fire megacoaster
8. castello dei medici
9. dancing dingie
10. euromir
11. eurosat  cancan coaster
12. eurotower
13. fjordrafting
14. jim button  journey through morrowland
15. josefinas magical imperial journey
16. kolumbusjolle
17. madame freudenreich curiosits
18. matterhornblitz
19. old mac donalds tractor fun
20. pegasus
21. poppy towers
22. poseidon
23. silver star
24. swiss bob run
25. tirol log flume
26. vienna wave swing  glckspilz
27. vindjammer
28. voletarium
29. volo da vinci
30. voltron nevera powered by rimac
31. whale adventures  northern lights

STARTING PROPHET MODEL PROCESSING FOR ALL RIDES (VALIDATION & TEST SETS)


Processing rides:   0%|          | 0/31 [00:00<?, ?it/s]


Processing ride 1/31: alpine express enzian

Processing ride: alpine express enzian
Training data size: 10302
Validation data size: 2019
Test data size: 1753
Training Prophet model on train data for validation evaluation...


18:38:45 - cmdstanpy - INFO - Chain [1] start processing
18:38:51 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on validation set...

alpine express enzian - Validation MAE: 7.96 minutes
alpine express enzian - Validation RMSE: 9.96 minutes
alpine express enzian - Validation sMAPE: 28.06%
Training Prophet model on combined train + validation data for test evaluation...


18:38:52 - cmdstanpy - INFO - Chain [1] start processing
18:38:59 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on test set...

alpine express enzian - Test MAE: 15.59 minutes
alpine express enzian - Test RMSE: 19.08 minutes
alpine express enzian - Test sMAPE: 26.56%


Processing rides:   3%|▎         | 1/31 [00:16<08:27, 16.91s/it]

Models and results saved to ../models/prophet_enhanced_extended/alpine_express_enzian

Processing ride 2/31: arena of football  be part of it

Processing ride: arena of football  be part of it
Training data size: 9612
Validation data size: 2052
Test data size: 1816
Training Prophet model on train data for validation evaluation...


18:39:02 - cmdstanpy - INFO - Chain [1] start processing
18:39:09 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on validation set...

arena of football  be part of it - Validation MAE: 9.24 minutes
arena of football  be part of it - Validation RMSE: 11.26 minutes
arena of football  be part of it - Validation sMAPE: 42.75%
Training Prophet model on combined train + validation data for test evaluation...


18:39:10 - cmdstanpy - INFO - Chain [1] start processing
18:39:22 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on test set...

arena of football  be part of it - Test MAE: 4.93 minutes
arena of football  be part of it - Test RMSE: 5.08 minutes
arena of football  be part of it - Test sMAPE: 95.41%


Processing rides:   6%|▋         | 2/31 [00:39<09:41, 20.05s/it]

Models and results saved to ../models/prophet_enhanced_extended/arena_of_football__be_part_of_it

Processing ride 3/31: arthur

Processing ride: arthur
Training data size: 10298
Validation data size: 2067
Test data size: 1832
Training Prophet model on train data for validation evaluation...


18:39:24 - cmdstanpy - INFO - Chain [1] start processing
18:39:34 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on validation set...

arthur - Validation MAE: 23.45 minutes
arthur - Validation RMSE: 27.85 minutes
arthur - Validation sMAPE: 32.22%
Training Prophet model on combined train + validation data for test evaluation...


18:39:35 - cmdstanpy - INFO - Chain [1] start processing
18:39:47 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on test set...

arthur - Test MAE: 7.48 minutes
arthur - Test RMSE: 10.30 minutes
arthur - Test sMAPE: 14.80%


Processing rides:  10%|▉         | 3/31 [01:04<10:28, 22.43s/it]

Models and results saved to ../models/prophet_enhanced_extended/arthur

Processing ride 4/31: atlantica supersplash

Processing ride: atlantica supersplash
Training data size: 9825
Validation data size: 2065
Test data size: 1826
Training Prophet model on train data for validation evaluation...


18:39:50 - cmdstanpy - INFO - Chain [1] start processing
18:39:54 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on validation set...

atlantica supersplash - Validation MAE: 19.51 minutes
atlantica supersplash - Validation RMSE: 23.63 minutes
atlantica supersplash - Validation sMAPE: 39.99%
Training Prophet model on combined train + validation data for test evaluation...


18:39:55 - cmdstanpy - INFO - Chain [1] start processing
18:40:09 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on test set...

atlantica supersplash - Test MAE: 12.11 minutes
atlantica supersplash - Test RMSE: 17.90 minutes
atlantica supersplash - Test sMAPE: 51.54%


Processing rides:  13%|█▎        | 4/31 [01:26<10:02, 22.33s/it]

Models and results saved to ../models/prophet_enhanced_extended/atlantica_supersplash

Processing ride 5/31: atlantis adventure

Processing ride: atlantis adventure
Training data size: 10289
Validation data size: 2066
Test data size: 1832
Training Prophet model on train data for validation evaluation...


18:40:12 - cmdstanpy - INFO - Chain [1] start processing
18:40:19 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on validation set...

atlantis adventure - Validation MAE: 4.11 minutes
atlantis adventure - Validation RMSE: 5.38 minutes
atlantis adventure - Validation sMAPE: 23.89%
Training Prophet model on combined train + validation data for test evaluation...


18:40:20 - cmdstanpy - INFO - Chain [1] start processing
18:40:31 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on test set...

atlantis adventure - Test MAE: 2.07 minutes
atlantis adventure - Test RMSE: 2.69 minutes
atlantis adventure - Test sMAPE: 40.40%


Processing rides:  16%|█▌        | 5/31 [01:48<09:38, 22.25s/it]

Models and results saved to ../models/prophet_enhanced_extended/atlantis_adventure

Processing ride 6/31: baaa express

Processing ride: baaa express
Training data size: 10291
Validation data size: 2067
Test data size: 1824
Training Prophet model on train data for validation evaluation...


18:40:34 - cmdstanpy - INFO - Chain [1] start processing
18:40:35 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on validation set...

baaa express - Validation MAE: 4.60 minutes
baaa express - Validation RMSE: 5.86 minutes
baaa express - Validation sMAPE: 27.28%
Training Prophet model on combined train + validation data for test evaluation...


18:40:37 - cmdstanpy - INFO - Chain [1] start processing
18:40:40 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on test set...

baaa express - Test MAE: 4.66 minutes
baaa express - Test RMSE: 6.64 minutes
baaa express - Test sMAPE: 50.11%


Processing rides:  19%|█▉        | 6/31 [01:57<07:24, 17.80s/it]

Models and results saved to ../models/prophet_enhanced_extended/baaa_express

Processing ride 7/31: blue fire megacoaster

Processing ride: blue fire megacoaster
Training data size: 10288
Validation data size: 2067
Test data size: 1823
Training Prophet model on train data for validation evaluation...


18:40:43 - cmdstanpy - INFO - Chain [1] start processing
18:40:53 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on validation set...

blue fire megacoaster - Validation MAE: 17.44 minutes
blue fire megacoaster - Validation RMSE: 21.31 minutes
blue fire megacoaster - Validation sMAPE: 27.41%
Training Prophet model on combined train + validation data for test evaluation...


18:40:54 - cmdstanpy - INFO - Chain [1] start processing
18:41:18 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on test set...

blue fire megacoaster - Test MAE: 17.13 minutes
blue fire megacoaster - Test RMSE: 22.92 minutes
blue fire megacoaster - Test sMAPE: 45.63%


Processing rides:  23%|██▎       | 7/31 [02:35<09:39, 24.14s/it]

Models and results saved to ../models/prophet_enhanced_extended/blue_fire_megacoaster

Processing ride 8/31: castello dei medici

Processing ride: castello dei medici
Training data size: 10193
Validation data size: 2066
Test data size: 1143
Training Prophet model on train data for validation evaluation...


18:41:20 - cmdstanpy - INFO - Chain [1] start processing
18:41:29 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on validation set...

castello dei medici - Validation MAE: 3.50 minutes
castello dei medici - Validation RMSE: 4.24 minutes
castello dei medici - Validation sMAPE: 12.76%
Training Prophet model on combined train + validation data for test evaluation...


18:41:30 - cmdstanpy - INFO - Chain [1] start processing
18:41:41 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on test set...

castello dei medici - Test MAE: 4.69 minutes
castello dei medici - Test RMSE: 5.38 minutes
castello dei medici - Test sMAPE: 0.00%


Processing rides:  26%|██▌       | 8/31 [02:58<09:09, 23.91s/it]

Models and results saved to ../models/prophet_enhanced_extended/castello_dei_medici

Processing ride 9/31: dancing dingie

Processing ride: dancing dingie
Training data size: 10292
Validation data size: 2066
Test data size: 1824
Training Prophet model on train data for validation evaluation...


18:41:44 - cmdstanpy - INFO - Chain [1] start processing
18:41:47 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on validation set...

dancing dingie - Validation MAE: 3.44 minutes
dancing dingie - Validation RMSE: 4.51 minutes
dancing dingie - Validation sMAPE: 25.19%
Training Prophet model on combined train + validation data for test evaluation...


18:41:48 - cmdstanpy - INFO - Chain [1] start processing
18:42:04 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on test set...

dancing dingie - Test MAE: 2.43 minutes
dancing dingie - Test RMSE: 3.30 minutes
dancing dingie - Test sMAPE: 27.27%


Processing rides:  29%|██▉       | 9/31 [03:21<08:36, 23.49s/it]

Models and results saved to ../models/prophet_enhanced_extended/dancing_dingie

Processing ride 10/31: euromir

Processing ride: euromir
Training data size: 10289
Validation data size: 2049
Test data size: 1832
Training Prophet model on train data for validation evaluation...


18:42:06 - cmdstanpy - INFO - Chain [1] start processing
18:42:13 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on validation set...

euromir - Validation MAE: 10.67 minutes
euromir - Validation RMSE: 13.93 minutes
euromir - Validation sMAPE: 24.05%
Training Prophet model on combined train + validation data for test evaluation...


18:42:15 - cmdstanpy - INFO - Chain [1] start processing
18:42:29 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on test set...

euromir - Test MAE: 17.07 minutes
euromir - Test RMSE: 21.67 minutes
euromir - Test sMAPE: 67.39%


Processing rides:  32%|███▏      | 10/31 [03:46<08:24, 24.04s/it]

Models and results saved to ../models/prophet_enhanced_extended/euromir

Processing ride 11/31: eurosat  cancan coaster

Processing ride: eurosat  cancan coaster
Training data size: 8650
Validation data size: 2067
Test data size: 1832
Training Prophet model on train data for validation evaluation...


18:42:31 - cmdstanpy - INFO - Chain [1] start processing
18:42:36 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on validation set...

eurosat  cancan coaster - Validation MAE: 10.79 minutes
eurosat  cancan coaster - Validation RMSE: 13.68 minutes
eurosat  cancan coaster - Validation sMAPE: 20.50%
Training Prophet model on combined train + validation data for test evaluation...


18:42:37 - cmdstanpy - INFO - Chain [1] start processing
18:42:47 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on test set...

eurosat  cancan coaster - Test MAE: 22.10 minutes
eurosat  cancan coaster - Test RMSE: 25.68 minutes
eurosat  cancan coaster - Test sMAPE: 66.51%


Processing rides:  35%|███▌      | 11/31 [04:04<07:23, 22.18s/it]

Models and results saved to ../models/prophet_enhanced_extended/eurosat__cancan_coaster

Processing ride 12/31: eurotower

Processing ride: eurotower
Training data size: 10308
Validation data size: 2067
Test data size: 1824
Training Prophet model on train data for validation evaluation...


18:42:49 - cmdstanpy - INFO - Chain [1] start processing
18:42:52 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on validation set...

eurotower - Validation MAE: 3.24 minutes
eurotower - Validation RMSE: 4.92 minutes
eurotower - Validation sMAPE: 19.93%
Training Prophet model on combined train + validation data for test evaluation...


18:42:53 - cmdstanpy - INFO - Chain [1] start processing
18:42:59 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on test set...

eurotower - Test MAE: 7.37 minutes
eurotower - Test RMSE: 9.14 minutes
eurotower - Test sMAPE: 66.56%


Processing rides:  39%|███▊      | 12/31 [04:16<06:03, 19.12s/it]

Models and results saved to ../models/prophet_enhanced_extended/eurotower

Processing ride 13/31: fjordrafting

Processing ride: fjordrafting
Training data size: 9808
Validation data size: 2066
Test data size: 1825
Training Prophet model on train data for validation evaluation...


18:43:02 - cmdstanpy - INFO - Chain [1] start processing
18:43:03 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on validation set...

fjordrafting - Validation MAE: 7.80 minutes
fjordrafting - Validation RMSE: 10.14 minutes
fjordrafting - Validation sMAPE: 24.12%
Training Prophet model on combined train + validation data for test evaluation...


18:43:05 - cmdstanpy - INFO - Chain [1] start processing
18:43:23 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on test set...

fjordrafting - Test MAE: 14.28 minutes
fjordrafting - Test RMSE: 19.81 minutes
fjordrafting - Test sMAPE: 43.01%


Processing rides:  42%|████▏     | 13/31 [04:41<06:15, 20.87s/it]

Models and results saved to ../models/prophet_enhanced_extended/fjordrafting

Processing ride 14/31: jim button  journey through morrowland

Processing ride: jim button  journey through morrowland
Training data size: 8597
Validation data size: 2065
Test data size: 1820
Training Prophet model on train data for validation evaluation...


18:43:26 - cmdstanpy - INFO - Chain [1] start processing
18:43:30 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on validation set...

jim button  journey through morrowland - Validation MAE: 3.62 minutes
jim button  journey through morrowland - Validation RMSE: 4.33 minutes
jim button  journey through morrowland - Validation sMAPE: 17.24%
Training Prophet model on combined train + validation data for test evaluation...


18:43:31 - cmdstanpy - INFO - Chain [1] start processing
18:43:34 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on test set...

jim button  journey through morrowland - Test MAE: 2.05 minutes
jim button  journey through morrowland - Test RMSE: 2.51 minutes
jim button  journey through morrowland - Test sMAPE: 36.95%


Processing rides:  45%|████▌     | 14/31 [04:51<05:01, 17.76s/it]

Models and results saved to ../models/prophet_enhanced_extended/jim_button__journey_through_morrowland

Processing ride 15/31: josefinas magical imperial journey

Processing ride: josefinas magical imperial journey
Training data size: 10266
Validation data size: 2067
Test data size: 1821
Training Prophet model on train data for validation evaluation...


18:43:37 - cmdstanpy - INFO - Chain [1] start processing
18:43:39 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on validation set...

josefinas magical imperial journey - Validation MAE: 4.71 minutes
josefinas magical imperial journey - Validation RMSE: 6.19 minutes
josefinas magical imperial journey - Validation sMAPE: 24.70%
Training Prophet model on combined train + validation data for test evaluation...


18:43:41 - cmdstanpy - INFO - Chain [1] start processing
18:43:54 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on test set...

josefinas magical imperial journey - Test MAE: 2.57 minutes
josefinas magical imperial journey - Test RMSE: 3.56 minutes
josefinas magical imperial journey - Test sMAPE: 28.53%


Processing rides:  48%|████▊     | 15/31 [05:11<04:52, 18.28s/it]

Models and results saved to ../models/prophet_enhanced_extended/josefinas_magical_imperial_journey

Processing ride 16/31: kolumbusjolle

Processing ride: kolumbusjolle
Training data size: 10300
Validation data size: 2067
Test data size: 1821
Training Prophet model on train data for validation evaluation...


18:43:57 - cmdstanpy - INFO - Chain [1] start processing
18:43:57 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on validation set...

kolumbusjolle - Validation MAE: 1.35 minutes
kolumbusjolle - Validation RMSE: 1.89 minutes
kolumbusjolle - Validation sMAPE: 14.35%
Training Prophet model on combined train + validation data for test evaluation...


18:43:58 - cmdstanpy - INFO - Chain [1] start processing
18:44:03 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on test set...

kolumbusjolle - Test MAE: 1.20 minutes
kolumbusjolle - Test RMSE: 1.76 minutes
kolumbusjolle - Test sMAPE: 9.55%


Processing rides:  52%|█████▏    | 16/31 [05:20<03:51, 15.41s/it]

Models and results saved to ../models/prophet_enhanced_extended/kolumbusjolle

Processing ride 17/31: madame freudenreich curiosits

Processing ride: madame freudenreich curiosits
Training data size: 8643
Validation data size: 2066
Test data size: 1832
Training Prophet model on train data for validation evaluation...


18:44:05 - cmdstanpy - INFO - Chain [1] start processing
18:44:13 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on validation set...

madame freudenreich curiosits - Validation MAE: 0.07 minutes
madame freudenreich curiosits - Validation RMSE: 0.54 minutes
madame freudenreich curiosits - Validation sMAPE: 99.40%
Training Prophet model on combined train + validation data for test evaluation...


18:44:14 - cmdstanpy - INFO - Chain [1] start processing
18:44:22 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on test set...

madame freudenreich curiosits - Test MAE: 0.27 minutes
madame freudenreich curiosits - Test RMSE: 0.65 minutes
madame freudenreich curiosits - Test sMAPE: 85.07%


Processing rides:  55%|█████▍    | 17/31 [05:39<03:50, 16.50s/it]

Models and results saved to ../models/prophet_enhanced_extended/madame_freudenreich_curiosits

Processing ride 18/31: matterhornblitz

Processing ride: matterhornblitz
Training data size: 10299
Validation data size: 2065
Test data size: 1830
Training Prophet model on train data for validation evaluation...


18:44:24 - cmdstanpy - INFO - Chain [1] start processing
18:44:27 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on validation set...

matterhornblitz - Validation MAE: 9.75 minutes
matterhornblitz - Validation RMSE: 12.71 minutes
matterhornblitz - Validation sMAPE: 18.23%
Training Prophet model on combined train + validation data for test evaluation...


18:44:28 - cmdstanpy - INFO - Chain [1] start processing
18:44:35 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on test set...

matterhornblitz - Test MAE: 19.64 minutes
matterhornblitz - Test RMSE: 23.71 minutes
matterhornblitz - Test sMAPE: 65.70%


Processing rides:  58%|█████▊    | 18/31 [05:52<03:22, 15.61s/it]

Models and results saved to ../models/prophet_enhanced_extended/matterhornblitz

Processing ride 19/31: old mac donalds tractor fun

Processing ride: old mac donalds tractor fun
Training data size: 10292
Validation data size: 2065
Test data size: 1818
Training Prophet model on train data for validation evaluation...


18:44:38 - cmdstanpy - INFO - Chain [1] start processing
18:44:40 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on validation set...

old mac donalds tractor fun - Validation MAE: 1.89 minutes
old mac donalds tractor fun - Validation RMSE: 2.44 minutes
old mac donalds tractor fun - Validation sMAPE: 38.95%
Training Prophet model on combined train + validation data for test evaluation...


18:44:41 - cmdstanpy - INFO - Chain [1] start processing
18:44:47 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on test set...

old mac donalds tractor fun - Test MAE: 1.82 minutes
old mac donalds tractor fun - Test RMSE: 2.54 minutes
old mac donalds tractor fun - Test sMAPE: 59.21%


Processing rides:  61%|██████▏   | 19/31 [06:04<02:53, 14.49s/it]

Models and results saved to ../models/prophet_enhanced_extended/old_mac_donalds_tractor_fun

Processing ride 20/31: pegasus

Processing ride: pegasus
Training data size: 10299
Validation data size: 2067
Test data size: 1831
Training Prophet model on train data for validation evaluation...


18:44:49 - cmdstanpy - INFO - Chain [1] start processing
18:44:53 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on validation set...

pegasus - Validation MAE: 7.80 minutes
pegasus - Validation RMSE: 10.05 minutes
pegasus - Validation sMAPE: 32.09%
Training Prophet model on combined train + validation data for test evaluation...


18:44:54 - cmdstanpy - INFO - Chain [1] start processing
18:45:02 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on test set...

pegasus - Test MAE: 6.36 minutes
pegasus - Test RMSE: 9.33 minutes
pegasus - Test sMAPE: 40.65%


Processing rides:  65%|██████▍   | 20/31 [06:19<02:40, 14.61s/it]

Models and results saved to ../models/prophet_enhanced_extended/pegasus

Processing ride 21/31: poppy towers

Processing ride: poppy towers
Training data size: 10276
Validation data size: 2065
Test data size: 1822
Training Prophet model on train data for validation evaluation...


18:45:05 - cmdstanpy - INFO - Chain [1] start processing
18:45:09 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on validation set...

poppy towers - Validation MAE: 1.93 minutes
poppy towers - Validation RMSE: 2.60 minutes
poppy towers - Validation sMAPE: 21.54%
Training Prophet model on combined train + validation data for test evaluation...


18:45:10 - cmdstanpy - INFO - Chain [1] start processing
18:45:20 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on test set...

poppy towers - Test MAE: 3.17 minutes
poppy towers - Test RMSE: 3.46 minutes
poppy towers - Test sMAPE: 50.23%


Processing rides:  68%|██████▊   | 21/31 [06:36<02:33, 15.34s/it]

Models and results saved to ../models/prophet_enhanced_extended/poppy_towers

Processing ride 22/31: poseidon

Processing ride: poseidon
Training data size: 9832
Validation data size: 2064
Test data size: 1826
Training Prophet model on train data for validation evaluation...


18:45:22 - cmdstanpy - INFO - Chain [1] start processing
18:45:24 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on validation set...

poseidon - Validation MAE: 8.21 minutes
poseidon - Validation RMSE: 11.32 minutes
poseidon - Validation sMAPE: 19.56%
Training Prophet model on combined train + validation data for test evaluation...


18:45:26 - cmdstanpy - INFO - Chain [1] start processing
18:45:29 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on test set...

poseidon - Test MAE: 13.27 minutes
poseidon - Test RMSE: 21.38 minutes
poseidon - Test sMAPE: 73.06%


Processing rides:  71%|███████   | 22/31 [06:46<02:04, 13.83s/it]

Models and results saved to ../models/prophet_enhanced_extended/poseidon

Processing ride 23/31: silver star

Processing ride: silver star
Training data size: 10244
Validation data size: 2064
Test data size: 1826
Training Prophet model on train data for validation evaluation...


18:45:32 - cmdstanpy - INFO - Chain [1] start processing
18:45:35 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on validation set...

silver star - Validation MAE: 9.07 minutes
silver star - Validation RMSE: 11.66 minutes
silver star - Validation sMAPE: 21.60%
Training Prophet model on combined train + validation data for test evaluation...


18:45:36 - cmdstanpy - INFO - Chain [1] start processing
18:45:41 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on test set...

silver star - Test MAE: 8.65 minutes
silver star - Test RMSE: 11.81 minutes
silver star - Test sMAPE: 21.16%


Processing rides:  74%|███████▍  | 23/31 [06:58<01:45, 13.14s/it]

Models and results saved to ../models/prophet_enhanced_extended/silver_star

Processing ride 24/31: swiss bob run

Processing ride: swiss bob run
Training data size: 10293
Validation data size: 2066
Test data size: 1826
Training Prophet model on train data for validation evaluation...


18:45:44 - cmdstanpy - INFO - Chain [1] start processing
18:45:45 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on validation set...

swiss bob run - Validation MAE: 6.74 minutes
swiss bob run - Validation RMSE: 9.12 minutes
swiss bob run - Validation sMAPE: 14.36%
Training Prophet model on combined train + validation data for test evaluation...


18:45:46 - cmdstanpy - INFO - Chain [1] start processing
18:45:49 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on test set...

swiss bob run - Test MAE: 7.84 minutes
swiss bob run - Test RMSE: 10.27 minutes
swiss bob run - Test sMAPE: 19.15%


Processing rides:  77%|███████▋  | 24/31 [07:06<01:22, 11.72s/it]

Models and results saved to ../models/prophet_enhanced_extended/swiss_bob_run

Processing ride 25/31: tirol log flume

Processing ride: tirol log flume
Training data size: 9849
Validation data size: 2019
Test data size: 1752
Training Prophet model on train data for validation evaluation...


18:45:52 - cmdstanpy - INFO - Chain [1] start processing
18:45:55 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on validation set...

tirol log flume - Validation MAE: 7.83 minutes
tirol log flume - Validation RMSE: 11.43 minutes
tirol log flume - Validation sMAPE: 35.72%
Training Prophet model on combined train + validation data for test evaluation...


18:45:56 - cmdstanpy - INFO - Chain [1] start processing
18:46:01 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on test set...

tirol log flume - Test MAE: 44.43 minutes
tirol log flume - Test RMSE: 51.52 minutes
tirol log flume - Test sMAPE: 54.76%


Processing rides:  81%|████████  | 25/31 [07:18<01:10, 11.73s/it]

Models and results saved to ../models/prophet_enhanced_extended/tirol_log_flume

Processing ride 26/31: vienna wave swing  glckspilz

Processing ride: vienna wave swing  glckspilz
Training data size: 8416
Validation data size: 2067
Test data size: 1822
Training Prophet model on train data for validation evaluation...


18:46:04 - cmdstanpy - INFO - Chain [1] start processing
18:46:05 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on validation set...

vienna wave swing  glckspilz - Validation MAE: 2.19 minutes
vienna wave swing  glckspilz - Validation RMSE: 2.69 minutes
vienna wave swing  glckspilz - Validation sMAPE: 18.45%
Training Prophet model on combined train + validation data for test evaluation...


18:46:07 - cmdstanpy - INFO - Chain [1] start processing
18:46:09 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on test set...

vienna wave swing  glckspilz - Test MAE: 1.33 minutes
vienna wave swing  glckspilz - Test RMSE: 1.81 minutes
vienna wave swing  glckspilz - Test sMAPE: 12.41%


Processing rides:  84%|████████▍ | 26/31 [07:25<00:52, 10.44s/it]

Models and results saved to ../models/prophet_enhanced_extended/vienna_wave_swing__glckspilz

Processing ride 27/31: vindjammer

Processing ride: vindjammer
Training data size: 10258
Validation data size: 2064
Test data size: 1802
Training Prophet model on train data for validation evaluation...


18:46:11 - cmdstanpy - INFO - Chain [1] start processing
18:46:14 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on validation set...

vindjammer - Validation MAE: 2.39 minutes
vindjammer - Validation RMSE: 3.08 minutes
vindjammer - Validation sMAPE: 19.63%
Training Prophet model on combined train + validation data for test evaluation...


18:46:15 - cmdstanpy - INFO - Chain [1] start processing
18:46:23 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on test set...

vindjammer - Test MAE: 2.32 minutes
vindjammer - Test RMSE: 2.90 minutes
vindjammer - Test sMAPE: 29.12%


Processing rides:  87%|████████▋ | 27/31 [07:39<00:45, 11.49s/it]

Models and results saved to ../models/prophet_enhanced_extended/vindjammer

Processing ride 28/31: voletarium

Processing ride: voletarium
Training data size: 8430
Validation data size: 2067
Test data size: 1832
Training Prophet model on train data for validation evaluation...


18:46:25 - cmdstanpy - INFO - Chain [1] start processing
18:46:32 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on validation set...

voletarium - Validation MAE: 13.99 minutes
voletarium - Validation RMSE: 19.32 minutes
voletarium - Validation sMAPE: 38.49%
Training Prophet model on combined train + validation data for test evaluation...


18:46:33 - cmdstanpy - INFO - Chain [1] start processing
18:46:38 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on test set...

voletarium - Test MAE: 21.51 minutes
voletarium - Test RMSE: 26.96 minutes
voletarium - Test sMAPE: 65.12%


Processing rides:  90%|█████████ | 28/31 [07:55<00:38, 12.73s/it]

Models and results saved to ../models/prophet_enhanced_extended/voletarium

Processing ride 29/31: volo da vinci

Processing ride: volo da vinci
Training data size: 10309
Validation data size: 2066
Test data size: 1830
Training Prophet model on train data for validation evaluation...


18:46:41 - cmdstanpy - INFO - Chain [1] start processing
18:46:46 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on validation set...

volo da vinci - Validation MAE: 4.95 minutes
volo da vinci - Validation RMSE: 6.26 minutes
volo da vinci - Validation sMAPE: 19.15%
Training Prophet model on combined train + validation data for test evaluation...


18:46:48 - cmdstanpy - INFO - Chain [1] start processing
18:46:56 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on test set...

volo da vinci - Test MAE: 7.45 minutes
volo da vinci - Test RMSE: 9.36 minutes
volo da vinci - Test sMAPE: 43.74%


Processing rides:  94%|█████████▎| 29/31 [08:13<00:28, 14.43s/it]

Models and results saved to ../models/prophet_enhanced_extended/volo_da_vinci

Processing ride 30/31: voltron nevera powered by rimac

Processing ride: voltron nevera powered by rimac
Training data size: 0
Validation data size: 0
Test data size: 1770
Skipping voltron nevera powered by rimac due to insufficient data

Processing ride 31/31: whale adventures  northern lights

Processing ride: whale adventures  northern lights
Training data size: 10314
Validation data size: 2063
Test data size: 1832
Training Prophet model on train data for validation evaluation...


18:46:59 - cmdstanpy - INFO - Chain [1] start processing
18:47:07 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on validation set...

whale adventures  northern lights - Validation MAE: 0.94 minutes
whale adventures  northern lights - Validation RMSE: 2.47 minutes
whale adventures  northern lights - Validation sMAPE: 92.22%
Training Prophet model on combined train + validation data for test evaluation...


18:47:08 - cmdstanpy - INFO - Chain [1] start processing
18:47:22 - cmdstanpy - INFO - Chain [1] done processing


Evaluating on test set...

whale adventures  northern lights - Test MAE: 1.26 minutes
whale adventures  northern lights - Test RMSE: 2.85 minutes
whale adventures  northern lights - Test sMAPE: 75.24%


Processing rides: 100%|██████████| 31/31 [08:39<00:00, 16.75s/it]

Models and results saved to ../models/prophet_enhanced_extended/whale_adventures__northern_lights

PROPHET MODEL SUMMARY (VALIDATION & TEST SETS):
Total rides processed: 30

Validation Set Performance:
  Average MAE: 7.10 ± 5.64
  Average RMSE: 9.16 ± 6.84
  Average sMAPE: 29.79% ± 19.66%

Test Set Performance:
  Average MAE: 9.24 ± 9.40
  Average RMSE: 11.87 ± 11.29
  Average sMAPE: 45.50% ± 23.69%

Detailed summary saved to: ../models/prophet_enhanced_extended/detailed_prophet_summary_extended.csv
Simple summary saved to: ../models/prophet_enhanced_extended/prophet_summary_extended.csv





Extended Prophet visualizations saved to output directory.

PROPHET MODEL PROCESSING COMPLETED
PROPHET MODEL ANALYSIS (VALIDATION & TEST SETS)

1. Overall Performance Summary:
      dataset  count  mae_mean   mae_std  rmse_mean   rmse_std  smape_mean  \
0  validation     30  7.104824  5.635081   9.159882   6.840383   29.794987   
1        test     30  9.235983  9.399306  11.865569  11.287848   45.495572   

   smape_std  
0  19.660159  
1  23.691822  

2. Validation Set - Top 10 rides (lowest MAE):
                            ride_name       mae      rmse      smape
32      madame freudenreich curiosits  0.067799  0.538258  99.403340
58  whale adventures  northern lights  0.938673  2.467813  92.219341
30                      kolumbusjolle  1.348201  1.886739  14.349242
36        old mac donalds tractor fun  1.886466  2.437464  38.950417
40                       poppy towers  1.933295  2.600554  21.541371
50       vienna wave swing  glckspilz  2.188961  2.688666  18.453290
52           

Unnamed: 0,method_name,val_mae,val_rmse,val_smape,test_mae,test_rmse,test_smape
0,Prophet Enhanced,7.1,9.16,29.79,9.24,11.87,45.5
