# Baseline Models


In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
import json
from datetime import datetime
from tqdm import tqdm

# Define consistent colors for plots
TRAIN_COLOR = 'steelblue'
TRAIN_FILL_COLOR = 'steelblue'
TRAIN_FILL_ALPHA = 0.3
VAL_COLOR = 'coral'
VAL_FILL_COLOR = 'coral'
VAL_FILL_ALPHA = 0.3
TEST_COLOR = 'forestgreen'
TEST_FILL_COLOR = 'forestgreen'
TEST_FILL_ALPHA = 0.3

## Data Loading and Initial Preprocessing

We'll use the same preprocessing as the Prophet model for consistency.

In [2]:
def load_data(file_path):
    data = pd.read_parquet(file_path)
    return data

def add_features(data):
    data['hour'] = data['timestamp'].dt.hour
    data['minute'] = data['timestamp'].dt.minute
    data['time_key'] = data['hour'] * 60 + data['minute']
    data['day_of_week'] = data['timestamp'].dt.dayofweek
    data['month'] = data['timestamp'].dt.month
    data['time_key'] = data['hour'] * 60 + data['minute']
    return data

def check_for_missing_values(data):
    missing_values = data.isnull().sum()
    if missing_values.any():
        print("Missing values found in the dataset:")
        print(missing_values[missing_values > 0])
    else:
        print("No missing values found in the dataset.")
    return missing_values

def split_data(data, train_years, val_year, test_year):
    data['timestamp'] = pd.to_datetime(data['timestamp'])
    
    train_data = data[data['timestamp'].dt.year.isin(train_years)]
    val_data = data[data['timestamp'].dt.year == val_year]
    test_data = data[data['timestamp'].dt.year == test_year]
    
    print(f"Train data size: {len(train_data)}")
    print(f"Validation data size: {len(val_data)}")
    print(f"Test data size: {len(test_data)}")
    
    return train_data, val_data, test_data

def filter_ride_data(data, ride_name):
    return data[data[f'ride_name_{ride_name}'] == True].copy()

def get_all_rides(data):
    ride_columns = [col for col in data.columns if col.startswith('ride_name_')]
    return [col.replace('ride_name_', '') for col in ride_columns]

def filter_to_operating_hours(ride_data):
    # Determine operating hours from data where wait times > 0
    operating_hours = ride_data[ride_data["wait_time"] > 0].groupby(
        ride_data["timestamp"].dt.date
    )["timestamp"].agg(['min', 'max']).reset_index()
    
    # Extract opening and closing hours
    operating_hours['opening_hour'] = pd.to_datetime(operating_hours['min']).dt.hour
    operating_hours['closing_hour'] = pd.to_datetime(operating_hours['max']).dt.hour
    
    # Set reasonable boundaries for operating hours
    operating_hours['opening_hour'] = operating_hours['opening_hour'].clip(lower=9, upper=11)
    operating_hours['closing_hour'] = operating_hours['closing_hour'].clip(lower=17, upper=21)
    
    # Create date-to-hours mapping
    date_to_hours = {}
    for _, row in operating_hours.iterrows():
        date_to_hours[row['timestamp']] = (row['opening_hour'], row['closing_hour'])
    
    # Filter data to operating hours only
    def is_operating_hour(timestamp):
        date = timestamp.date()
        if date not in date_to_hours:
            return 0
        
        open_hour, close_hour = date_to_hours[date]
        hour = timestamp.hour
        return 1 if open_hour <= hour < close_hour else 0
    
    ride_data['operating_hour'] = ride_data['timestamp'].apply(is_operating_hour)
    ride_data = ride_data[ride_data['operating_hour'] == 1]
    ride_data = ride_data.drop(columns=["operating_hour"])
    
    return ride_data

## Holiday Effects

We'll create the holiday dataframes.

In [3]:
def create_holiday_dataframes(data):
    holiday_dfs = []
    
    # Process country holidays
    for country in ['swiss', 'german', 'french']:
        holiday_col = f"is_{country}_holiday"
        if holiday_col in data.columns:
            country_holidays = data.loc[data[holiday_col] == 1, ["timestamp"]]
            if len(country_holidays) > 0:
                country_holidays["timestamp"] = pd.to_datetime(country_holidays["timestamp"]).dt.date
                country_holidays = country_holidays.drop_duplicates(subset=["timestamp"])
                country_holidays["holiday"] = f"{country}_holiday"
                holiday_dfs.append(country_holidays.reset_index(drop=True))
    # Combine all holidays
    if holiday_dfs:
        all_holidays = pd.concat(holiday_dfs)
        all_holidays["timestamp"] = pd.to_datetime(all_holidays["timestamp"])
        return all_holidays.sort_values(by=["timestamp"]).reset_index(drop=True)
    return None

## Helper Functions for Evaluation and Visualization

In [4]:
def post_process_forecast(forecast, closed_data):
    """Apply corrections to forecasted values."""
    forecast = forecast.copy()
    
    # Set predictions to zero during known closures (if closed_data has such info)
    if 'closed' in closed_data.columns:
        closed_mask = forecast['timestamp'].isin(closed_data.loc[closed_data['closed'] == 1, 'timestamp'])
        forecast.loc[closed_mask, 'wait_time'] = 0
    
    # Correct negative predictions
    if 'wait_time' in forecast.columns:
        negative_mask = forecast['wait_time'] < 0
        forecast.loc[negative_mask, 'wait_time'] = 0
    elif 'yhat' in forecast.columns:
        negative_mask = forecast['yhat'] < 0
        forecast.loc[negative_mask, 'yhat'] = 0
    
    return forecast

def evaluate_model(ride_df, actual_values, predictions, title=""):
    # Calculate metrics
    mae = np.mean(np.abs(predictions - actual_values))
    rmse = np.sqrt(np.mean(np.square(predictions - actual_values)))
    
    # For sMAPE, avoid division by zero
    epsilon = 1e-8
    abs_pct_errors = np.abs(predictions - actual_values) / (np.abs(predictions) + np.abs(actual_values) + epsilon)
    # Only include points where actual values are non-zero
    non_zero_mask = (actual_values > 0) & (predictions > 0)
    smape = np.mean(abs_pct_errors[non_zero_mask]) * 100 if np.any(non_zero_mask) else 0

    # Print metrics
    print(f"\n{title} MAE: {mae:.2f} minutes")
    print(f"{title} RMSE: {rmse:.2f} minutes")
    print(f"{title} sMAPE: {smape:.2f}%")
    
    # Create a DataFrame with results for time-based analysis
    results_df = pd.DataFrame({
        'timestamp': ride_df['timestamp'].values,
        'actual': actual_values,
        'predicted': predictions,
    })
    
    # Add time components
    results_df['hour'] = results_df['timestamp'].dt.hour
    results_df['day_of_week'] = results_df['timestamp'].dt.dayofweek
    results_df['month'] = results_df['timestamp'].dt.month
    
    # Calculate errors
    results_df['error'] = results_df['predicted'] - results_df['actual']
    results_df['abs_error'] = np.abs(results_df['error'])
    results_df['pct_error'] = abs_pct_errors * 100
    
    # Create metrics dictionary
    metrics = {
        "mae": mae,
        "rmse": rmse,
        "smape": smape
    }
    
    return metrics, results_df

## Baseline Model Classes

In [5]:
class MeanBaselineModel:
    """A simple baseline model that predicts the mean value of the training data."""
    def __init__(self):
        self.mean_value = None
        
    def fit(self, train_data):
        """Fit the model by calculating the mean of the training data"""
        self.mean_value = train_data['wait_time'].mean()
        return self
        
    def predict(self, future_df):
        """Predict using the mean value for all future points"""
        predictions = pd.DataFrame({'timestamp': future_df['timestamp']})
        predictions['yhat'] = self.mean_value
        return predictions

class TimeOfDayBaselineModel:
    """A baseline model that uses patterns at different times of day."""
    def __init__(self):
        self.time_of_day_means = None
        self.global_mean = None
        
    def fit(self, train_data):
        """Fit the model by calculating mean values for each time of day"""
        # Calculate mean for each time of day
        self.time_of_day_means = train_data.groupby('time_key')['wait_time'].mean().to_dict()
        self.global_mean = train_data['wait_time'].mean()  # Fallback value
        return self
        
    def predict(self, future_df):
        """Predict using time-of-day pattern"""
        predictions = pd.DataFrame({'timestamp': future_df['timestamp']})
        
        # Extract hour and minute from prediction dates
        predictions['hour'] = predictions['timestamp'].dt.hour
        predictions['minute'] = predictions['timestamp'].dt.minute
        predictions['time_key'] = predictions['hour'] * 60 + predictions['minute']
        
        # Assign predictions based on time of day
        predictions['yhat'] = predictions['time_key'].map(
            lambda x: self.time_of_day_means.get(x, self.global_mean))
        
        return predictions[['timestamp', 'yhat']]

class DayAndTimeBaselineModel:
    """A model that combines day of week patterns with time of day patterns."""
    def __init__(self):
        self.day_time_means = None
        self.time_means = None
        self.global_mean = None
        
    def fit(self, train_data):
        """Fit the model by calculating mean values for each day+time combination"""
        # Calculate mean for each day and time combination
        train_data['day_time_key'] = (train_data['day_of_week'] * 24 * 60 + 
                                      train_data['time_key'])
        
        self.day_time_means = train_data.groupby('day_time_key')['wait_time'].mean().to_dict()
        self.time_means = train_data.groupby('time_key')['wait_time'].mean().to_dict()
        self.global_mean = train_data['wait_time'].mean()
        self.std_dev = train_data['wait_time'].std()
        
        return self
        
    def predict(self, future_df):
        """Predict using day-of-week and time-of-day patterns"""
        predictions = pd.DataFrame({'timestamp': future_df['timestamp']})
        
        # Extract day of week, hour and minute
        predictions['day_of_week'] = predictions['timestamp'].dt.dayofweek
        predictions['hour'] = predictions['timestamp'].dt.hour
        predictions['minute'] = predictions['timestamp'].dt.minute
        predictions['time_key'] = predictions['hour'] * 60 + predictions['minute']
        predictions['day_time_key'] = (predictions['day_of_week'] * 24 * 60 + 
                                      predictions['time_key'])
        
        # First try to find day+time combination
        predictions['yhat'] = predictions['day_time_key'].map(
            lambda x: self.day_time_means.get(x, None))
        
        # If not found, fall back to time of day
        mask = predictions['yhat'].isna()
        predictions.loc[mask, 'yhat'] = predictions.loc[mask, 'time_key'].map(
            lambda x: self.time_means.get(x, self.global_mean))
                
        return predictions[['timestamp', 'yhat']]

class MovingAverageBaselineModel:
    """This model uses the average of recent observations for predictions."""
    def __init__(self, window_size=48):  # Default: 1 day (48 30-min intervals)
        self.window_size = window_size
        self.historical_data = None
        self.global_mean = None
        self.std_dev = None
        
    def fit(self, train_data):
        """Store the training data for later prediction"""
        # Make a copy and reset index to avoid ambiguity issues
        self.historical_data = train_data.copy().reset_index(drop=True)
        self.global_mean = self.historical_data['wait_time'].mean()
        self.std_dev = self.historical_data['wait_time'].std()
        return self
        
    def predict(self, future_df):
        """Predict using moving average of recent observations"""
        # Create a copy of future_df with reset index
        future_copy = future_df.copy().reset_index(drop=True)
        predictions = pd.DataFrame({'timestamp': future_copy['timestamp']})
        
        # Create lists for predictions and confidence intervals
        yhat = []
        
        # Sort historical data by date
        sorted_history = self.historical_data.sort_values('timestamp').reset_index(drop=True)
        
        # For each prediction date, calculate the moving average
        for pred_date in predictions['timestamp']:
            # Find recent observations (before the prediction date)
            recent_data = sorted_history[sorted_history['timestamp'] < pred_date].tail(self.window_size)
            
            if len(recent_data) > 0:
                # Calculate the mean of recent observations
                pred_value = recent_data['wait_time'].mean()
            else:
                # Fallback to global mean if no recent data
                pred_value = self.global_mean
                
            # Store prediction and confidence interval
            yhat.append(pred_value)
        
        # Add predictions to dataframe
        predictions['yhat'] = yhat
        
        return predictions

class SeasonalWeeklyBaselineModel:
    """This model uses data from the same day and time in previous weeks."""
    def __init__(self, num_weeks=4):
        self.num_weeks = num_weeks
        self.training_data = None
        self.global_mean = None
        self.std_dev = None
        
    def fit(self, train_data):
        """Store the training data for later prediction"""
        # Make a copy and reset index to avoid ambiguity issues
        self.training_data = train_data.copy().reset_index(drop=True)
        self.global_mean = self.training_data['wait_time'].mean()
        self.std_dev = self.training_data['wait_time'].std()
        
        # Pre-calculate time features for training data
        self.training_data['day_of_week'] = self.training_data['timestamp'].dt.dayofweek
        self.training_data['hour'] = self.training_data['timestamp'].dt.hour
        self.training_data['minute'] = self.training_data['timestamp'].dt.minute
        self.training_data['time_key'] = self.training_data['hour'] * 60 + self.training_data['minute']
        
        return self
        
    def predict(self, future_df):
        """Predict using the same day of week and time of day from previous weeks"""
        # Create a copy of future_df with reset index
        future_copy = future_df.copy().reset_index(drop=True)
        predictions = pd.DataFrame({'timestamp': future_copy['timestamp']})
        
        # Extract day of week and time for predictions
        predictions['day_of_week'] = predictions['timestamp'].dt.dayofweek
        predictions['hour'] = predictions['timestamp'].dt.hour
        predictions['minute'] = predictions['timestamp'].dt.minute
        predictions['time_key'] = predictions['hour'] * 60 + predictions['minute']
        
        # Predict for each point based on same day/time from previous weeks
        yhat = []
        
        for _, row in predictions.iterrows():
            # Find matching day and time in training data
            matches = self.training_data[
                (self.training_data['day_of_week'] == row['day_of_week']) & 
                (self.training_data['time_key'] == row['time_key'])
            ].copy().reset_index(drop=True)
            
            # Sort by date (descending) and take most recent num_weeks
            if len(matches) > 0:
                matches = matches.sort_values('timestamp', ascending=False).reset_index(drop=True)
                matches = matches.head(self.num_weeks)
                
                # Calculate prediction and confidence interval
                pred = matches['wait_time'].mean()
                yhat.append(pred)
            else:
                # Fallback to global mean if no matches
                yhat.append(self.global_mean)
        
        # Add predictions to dataframe
        predictions['yhat'] = yhat
        
        return predictions[['timestamp', 'yhat']]

class HolidayAwareBaselineModel:
    """A model that uses different patterns for holidays and normal days."""
    def __init__(self):
        self.holiday_day_time_means = None
        self.normal_day_time_means = None
        self.holiday_time_means = None
        self.normal_time_means = None
        self.holiday_mean = None
        self.normal_mean = None
        self.global_mean = None
        
    def fit(self, train_data, holiday_data):
        """Fit separate models for holidays and normal days"""
        # Mark holiday days
        if holiday_data is not None:
            holiday_dates = set(pd.to_datetime(holiday_data['timestamp']).dt.date)
        else:
            holiday_dates = set()
            
        train_data = train_data.copy()
        train_data['is_holiday'] = train_data['timestamp'].dt.date.isin(holiday_dates)
        
        # Create day-time keys if they don't exist yet
        if 'day_time_key' not in train_data.columns:
            train_data['day_time_key'] = (train_data['day_of_week'] * 24 * 60 + 
                                         train_data['time_key'])
        
        # Calculate means for holiday days
        holiday_data_subset = train_data[train_data['is_holiday']]
        self.holiday_day_time_means = holiday_data_subset.groupby('day_time_key')['wait_time'].mean().to_dict()
        self.holiday_time_means = holiday_data_subset.groupby('time_key')['wait_time'].mean().to_dict()
        
        # Calculate means for normal days
        normal_data = train_data[~train_data['is_holiday']]
        self.normal_day_time_means = normal_data.groupby('day_time_key')['wait_time'].mean().to_dict()
        self.normal_time_means = normal_data.groupby('time_key')['wait_time'].mean().to_dict()
        
        # Global means as fallback
        self.holiday_mean = holiday_data_subset['wait_time'].mean() if len(holiday_data_subset) > 0 else train_data['wait_time'].mean()
        self.normal_mean = normal_data['wait_time'].mean() if len(normal_data) > 0 else train_data['wait_time'].mean()
        self.global_mean = train_data['wait_time'].mean()
        self.std_dev = train_data['wait_time'].std()
        
        return self
        
    def predict(self, future_df, holiday_data):
        """Predict using different models for holidays and normal days"""
        predictions = pd.DataFrame({'timestamp': future_df['timestamp']})
        
        # Extract day and time features
        predictions['day_of_week'] = predictions['timestamp'].dt.dayofweek
        predictions['hour'] = predictions['timestamp'].dt.hour
        predictions['minute'] = predictions['timestamp'].dt.minute
        predictions['time_key'] = predictions['hour'] * 60 + predictions['minute']
        predictions['day_time_key'] = (predictions['day_of_week'] * 24 * 60 + 
                                      predictions['time_key'])
        
        # Mark holiday days
        if holiday_data is not None:
            holiday_dates = set(pd.to_datetime(holiday_data['timestamp']).dt.date)
        else:
            holiday_dates = set()
        predictions['is_holiday'] = predictions['timestamp'].dt.date.isin(holiday_dates)
        
        # Initialize predictions
        predictions['yhat'] = np.nan
        
        # Predict for holiday days
        holiday_mask = predictions['is_holiday']
        
        # First try day+time for holidays
        predictions.loc[holiday_mask, 'yhat'] = predictions.loc[holiday_mask, 'day_time_key'].map(
            lambda x: self.holiday_day_time_means.get(x, None))
        
        # Fall back to time of day for holidays
        still_na = holiday_mask & predictions['yhat'].isna()
        predictions.loc[still_na, 'yhat'] = predictions.loc[still_na, 'time_key'].map(
            lambda x: self.holiday_time_means.get(x, self.holiday_mean))
        
        # Predict for normal days
        normal_mask = ~predictions['is_holiday']
        
        # First try day+time for normal days
        predictions.loc[normal_mask, 'yhat'] = predictions.loc[normal_mask, 'day_time_key'].map(
            lambda x: self.normal_day_time_means.get(x, None))
        
        # Fall back to time of day for normal days
        still_na = normal_mask & predictions['yhat'].isna()
        predictions.loc[still_na, 'yhat'] = predictions.loc[still_na, 'time_key'].map(
            lambda x: self.normal_time_means.get(x, self.normal_mean))
        
        # Final fallback to global mean
        still_na = predictions['yhat'].isna()
        predictions.loc[still_na, 'yhat'] = self.global_mean
        
        return predictions[['timestamp', 'yhat']]

class TrueLastWeekModel:
    """A model that uses the exact value from 7 days before in the same dataset."""
    def __init__(self):
        self.train_data = None
        self.std_dev = None
        
    def fit(self, train_data):
        """Store training data"""
        self.train_data = train_data.copy().reset_index(drop=True)
        self.train_data_dict = dict(zip(self.train_data['timestamp'], self.train_data['wait_time']))
        self.std_dev = self.train_data['wait_time'].std()
        self.global_mean = self.train_data['wait_time'].mean()
        return self
        
    def predict(self, future_df, val_data=None, test_data=None):
        """Predict using values from 7 days before in the same dataset"""
        predictions = pd.DataFrame({'timestamp': future_df['timestamp']})
        yhat = []
        
        # Create value lookup dictionaries
        combined_data_dict = self.train_data_dict.copy()
        
        if val_data is not None:
            val_data = val_data.copy().reset_index(drop=True)
            val_data_dict = dict(zip(val_data['timestamp'], val_data['wait_time']))
            combined_data_dict.update(val_data_dict)
        
        if test_data is not None:
            test_data = test_data.copy().reset_index(drop=True)
            test_data_dict = dict(zip(test_data['timestamp'], test_data['wait_time']))
            combined_data_dict.update(test_data_dict)
        
        # For each prediction date
        for date in predictions['timestamp']:
            last_week = date - pd.Timedelta(days=7)
            
            # Check in combined data
            if last_week in combined_data_dict:
                yhat.append(combined_data_dict[last_week])
            else:
                # If no exact match, use global mean
                yhat.append(self.global_mean)
        
        # Add predictions and confidence intervals
        predictions['yhat'] = yhat
        
        return predictions[['timestamp', 'yhat']]

class LastYearModel:
    """A baseline that uses the value from the same day of the week in the previous year."""
    def __init__(self):
        self.historical_data = None
        self.std_dev = None
        
    def fit(self, train_data):
        """Store the training data indexed by timestamp"""
        self.historical_data = train_data.copy().reset_index(drop=True)
        # Create a lookup dictionary for fast access
        self.date_to_value = dict(zip(self.historical_data['timestamp'], self.historical_data['wait_time']))
        self.std_dev = self.historical_data['wait_time'].std()
        self.global_mean = self.historical_data['wait_time'].mean()
        return self
    
    def _get_same_day_previous_year(self, date):
        """Get the same day of the week from the previous year, handling leap years."""
        # Start with the same date last year
        try:
            same_date_last_year = date.replace(year=date.year - 1)
        except ValueError:
            # Handle Feb 29 in leap years - move to Feb 28
            same_date_last_year = date.replace(year=date.year - 1, day=28)
        
        # Calculate the difference in days of the week
        current_weekday = date.weekday()
        last_year_weekday = same_date_last_year.weekday()
        
        # Adjust to get the same day of the week
        days_diff = current_weekday - last_year_weekday
        target_date = same_date_last_year + pd.Timedelta(days=days_diff)
        
        return target_date
        
    def predict(self, future_df):
        """Predict using the same day of the week from the previous year"""
        predictions = pd.DataFrame({'timestamp': future_df['timestamp']})
        yhat = []
        
        for date in predictions['timestamp']:
            # Get the same day of the week from previous year
            target_date = self._get_same_day_previous_year(date)
            
            # If we have data for that exact date, use it
            if target_date in self.date_to_value:
                yhat.append(self.date_to_value[target_date])
            else:
                # Look for closest date within a 7-day window (prefer same weekday)
                closest_date = None
                min_diff = pd.Timedelta(days=7)
                
                for historical_date in self.date_to_value.keys():
                    diff = abs(historical_date - target_date)
                    # Prefer dates with the same weekday
                    if historical_date.weekday() == date.weekday():
                        diff = diff - pd.Timedelta(hours=1)  # Small bias towards same weekday
                    
                    if diff < min_diff:
                        min_diff = diff
                        closest_date = historical_date
                
                if closest_date is not None:
                    yhat.append(self.date_to_value[closest_date])
                else:
                    # Otherwise use global mean
                    yhat.append(self.global_mean)
                
        predictions['yhat'] = yhat
        
        return predictions

## Model Storage and Management Functions

In [6]:
def save_baseline_results(ride_name, all_metrics, output_dir="baseline_models"):
    """Save baseline model results for a ride."""
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    
    # Create ride-specific directory
    ride_dir = os.path.join(output_dir, ride_name.replace(" ", "_"))
    os.makedirs(ride_dir, exist_ok=True)
    
    # Save metrics
    with open(os.path.join(ride_dir, "baseline_metrics.json"), "w") as f:
        json.dump(all_metrics, f, indent=4)
    
    print(f"Baseline results saved to {ride_dir}")

def load_baseline_results(ride_name, output_dir="baseline_models"):
    """Load baseline model results for a ride."""
    ride_dir = os.path.join(output_dir, ride_name.replace(" ", "_"))
    
    # Check if results exist
    metrics_path = os.path.join(ride_dir, "baseline_metrics.json")
    if not os.path.exists(metrics_path):
        return None
    
    # Load metrics
    with open(metrics_path, "r") as f:
        metrics = json.load(f)
    
    return metrics

def create_checkpoint_file(processed_rides, output_dir="baseline_models"):
    """Create a checkpoint file to track progress."""
    checkpoint_path = os.path.join(output_dir, "checkpoint.json")
    with open(checkpoint_path, "w") as f:
        json.dump({"processed_rides": processed_rides}, f, indent=4)

def load_checkpoint_file(output_dir="baseline_models"):
    """Load checkpoint file to resume processing."""
    checkpoint_path = os.path.join(output_dir, "checkpoint.json")
    if os.path.exists(checkpoint_path):
        with open(checkpoint_path, "r") as f:
            checkpoint = json.load(f)
        return checkpoint.get("processed_rides", [])
    return []

## Single Ride Processing Function

In [7]:
def process_single_ride_baselines(ride_name, train_data, val_data, test_data, output_dir="baseline_models"):
    """Process all baseline models for a single ride on both validation and test sets."""
    print(f"\n{'='*50}")
    print(f"Processing baseline models for ride: {ride_name}")
    print(f"{'='*50}")
    
    # Filter data for the current ride
    ride_train_data = filter_ride_data(train_data, ride_name)
    ride_val_data = filter_ride_data(val_data, ride_name)
    ride_test_data = filter_ride_data(test_data, ride_name)
    
    print(f"Training data size: {len(ride_train_data)}")
    print(f"Validation data size: {len(ride_val_data)}")
    print(f"Test data size: {len(ride_test_data)}")
    
    # Skip if not enough data
    if len(ride_train_data) < 100 or len(ride_val_data) < 50 or len(ride_test_data) < 50:
        print(f"Skipping {ride_name} due to insufficient data")
        return None
    
    # Add features
    ride_train_data = add_features(ride_train_data)
    ride_val_data = add_features(ride_val_data)
    ride_test_data = add_features(ride_test_data)
    
    # Create holidays dataframe
    holidays_df = create_holiday_dataframes(ride_train_data)
    
    # Create future dataframes for predictions
    val_future = pd.DataFrame({'timestamp': ride_val_data['timestamp'].unique()})
    val_future = val_future.sort_values('timestamp').reset_index(drop=True)
    
    test_future = pd.DataFrame({'timestamp': ride_test_data['timestamp'].unique()})
    test_future = test_future.sort_values('timestamp').reset_index(drop=True)
    
    # Initialize results dictionary
    all_metrics = {}
    
    # Define all baseline models
    baseline_models = {
        "Mean Baseline": MeanBaselineModel(),
        "Time-of-Day Baseline": TimeOfDayBaselineModel(),
        "Day+Time Baseline": DayAndTimeBaselineModel(),
        "Moving Average Baseline": MovingAverageBaselineModel(window_size=48),
        "Seasonal Weekly Baseline": SeasonalWeeklyBaselineModel(num_weeks=4),
        "Holiday-Aware Baseline": HolidayAwareBaselineModel(),
        "LastWeek Baseline": TrueLastWeekModel(),
        "LastYear Baseline": LastYearModel()
    }
    
    # Get actual values
    val_actual = ride_val_data['wait_time'].values
    test_actual = ride_test_data['wait_time'].values
    
    # Process each baseline model
    for model_name, model in baseline_models.items():
        try:
            print(f"  Training {model_name}...")
            
            # Fit the model
            if model_name == "Holiday-Aware Baseline":
                model.fit(ride_train_data, holidays_df)
            else:
                model.fit(ride_train_data)
            
            # Make predictions for validation set
            if model_name == "Holiday-Aware Baseline":
                val_forecast = model.predict(val_future, holidays_df)
            elif model_name == "LastWeek Baseline":
                val_forecast = model.predict(val_future, ride_val_data, ride_test_data)
            else:
                val_forecast = model.predict(val_future)
            
            # Make predictions for test set
            if model_name == "Holiday-Aware Baseline":
                test_forecast = model.predict(test_future, holidays_df)
            elif model_name == "LastWeek Baseline":
                test_forecast = model.predict(test_future, ride_val_data, ride_test_data)
            else:
                test_forecast = model.predict(test_future)
            
            # Post-process forecasts
            val_forecast = post_process_forecast(val_forecast, ride_val_data)
            test_forecast = post_process_forecast(test_forecast, ride_test_data)
            
            # Get predictions
            val_predictions = val_forecast['yhat'].values
            test_predictions = test_forecast['yhat'].values
            
            # Evaluate model on validation set
            val_metrics, val_results_df = evaluate_model(
                ride_val_data, val_actual, val_predictions, 
                title=f"{model_name} (Validation)"
            )
            
            # Evaluate model on test set
            test_metrics, test_results_df = evaluate_model(
                ride_test_data, test_actual, test_predictions, 
                title=f"{model_name} (Test)"
            )
            
            # Store metrics for both sets
            all_metrics[model_name] = {
                "validation": val_metrics,
                "test": test_metrics
            }
            
        except Exception as e:
            print(f"  Error processing {model_name}: {str(e)}")
            all_metrics[model_name] = {
                "validation": {"mae": float('inf'), "rmse": float('inf'), "smape": float('inf')},
                "test": {"mae": float('inf'), "rmse": float('inf'), "smape": float('inf')}
            }
    
    # Add metadata
    all_metrics["metadata"] = {
        "ride_name": ride_name,
        "train_data_size": len(ride_train_data),
        "val_data_size": len(ride_val_data),
        "test_data_size": len(ride_test_data),
        "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    }
    
    # Save results
    save_baseline_results(ride_name, all_metrics, output_dir)
    
    return all_metrics

## All Rides Processing Function

In [8]:
def process_all_rides_baselines(all_rides, train_data, val_data, test_data,
                               output_dir="baseline_models", resume=True):
    """Process baseline models for all rides on both validation and test sets."""
    # Create output directory
    os.makedirs(output_dir, exist_ok=True)
    
    # Get list of already processed rides
    processed_rides = []
    if resume:
        processed_rides = load_checkpoint_file(output_dir)
        if processed_rides:
            print(f"Resuming from checkpoint. {len(processed_rides)} rides already processed.")
    
    # Initialize results dictionary
    all_results = {}
    
    # Process each ride
    for i, ride_name in enumerate(tqdm(all_rides, desc="Processing rides")):
        if ride_name in processed_rides:
            print(f"Skipping {ride_name} (already processed)")
            # Load metrics for the summary
            metrics = load_baseline_results(ride_name, output_dir)
            if metrics:
                all_results[ride_name] = metrics
            continue
            
        print(f"\nProcessing ride {i+1}/{len(all_rides)}: {ride_name}")
        ride_metrics = process_single_ride_baselines(ride_name, train_data, val_data, test_data,
                                                    output_dir=output_dir)
        
        if ride_metrics:
            all_results[ride_name] = ride_metrics
            processed_rides.append(ride_name)
            
            # Update checkpoint after each ride
            create_checkpoint_file(processed_rides, output_dir)
    
    # Generate summary report
    generate_baseline_summary_report(all_results, output_dir)
    
    return all_results

## Summary Report Generation

In [32]:
def generate_baseline_summary_report(all_results, output_dir="baseline_models"):
    """Generate a comprehensive summary report of all baseline models across all rides for both validation and test sets."""
    
    # Create lists to store summary data
    summary_data = []
    
    # Extract data from results
    for ride_name, ride_results in all_results.items():
        if not ride_results or "metadata" not in ride_results:
            continue
        
        # Base ride info
        metadata = ride_results["metadata"]
        base_info = {
            "ride_name": ride_name,
            "train_data_size": metadata.get("train_data_size", 0),
            "val_data_size": metadata.get("val_data_size", 0),
            "test_data_size": metadata.get("test_data_size", 0)
        }
        
        # Add metrics for each baseline model and dataset
        for model_name, model_results in ride_results.items():
            if model_name == "metadata":
                continue
            
            # Add validation metrics
            if "validation" in model_results:
                val_row = base_info.copy()
                val_row["model_name"] = model_name
                val_row["dataset"] = "validation"
                val_row["mae"] = model_results["validation"].get("mae", float('inf'))
                val_row["rmse"] = model_results["validation"].get("rmse", float('inf'))
                val_row["smape"] = model_results["validation"].get("smape", float('inf'))
                summary_data.append(val_row)
            
            # Add test metrics
            if "test" in model_results:
                test_row = base_info.copy()
                test_row["model_name"] = model_name
                test_row["dataset"] = "test"
                test_row["mae"] = model_results["test"].get("mae", float('inf'))
                test_row["rmse"] = model_results["test"].get("rmse", float('inf'))
                test_row["smape"] = model_results["test"].get("smape", float('inf'))
                summary_data.append(test_row)
    
    # Create DataFrame
    summary_df = pd.DataFrame(summary_data)
    
    if len(summary_df) == 0:
        print("No results to summarize.")
        return None
    
    # Save detailed summary
    detailed_summary_path = os.path.join(output_dir, "detailed_baseline_summary_extended.csv")
    summary_df.to_csv(detailed_summary_path, index=False)
    
    # Create model comparison summary for each dataset
    val_summary = summary_df[summary_df['dataset'] == 'validation'].groupby('model_name').agg({
        'mae': ['mean', 'std', 'median'],
        'rmse': ['mean', 'std', 'median'],
        'smape': ['mean', 'std', 'median']
    }).round(2)
    
    test_summary = summary_df[summary_df['dataset'] == 'test'].groupby('model_name').agg({
        'mae': ['mean', 'std', 'median'],
        'rmse': ['mean', 'std', 'median'],
        'smape': ['mean', 'std', 'median']
    }).round(2)
    
    # Flatten column names
    val_summary.columns = ['_'.join(col).strip() for col in val_summary.columns.values]
    test_summary.columns = ['_'.join(col).strip() for col in test_summary.columns.values]
    
    val_summary = val_summary.reset_index()
    test_summary = test_summary.reset_index()
    
    val_summary['dataset'] = 'validation'
    test_summary['dataset'] = 'test'
    
    # Combine validation and test summaries
    model_summary = pd.concat([val_summary, test_summary], ignore_index=True)
    
    # Save model comparison summary
    model_summary_path = os.path.join(output_dir, "model_comparison_summary_extended.csv")
    model_summary.to_csv(model_summary_path, index=False)
    
    # Create ride-wise best model summary for each dataset
    val_data = summary_df[summary_df['dataset'] == 'validation']
    test_data = summary_df[summary_df['dataset'] == 'test']
    
    val_best = val_data.loc[val_data.groupby('ride_name')['mae'].idxmin()]
    test_best = test_data.loc[test_data.groupby('ride_name')['mae'].idxmin()]
    
    val_best_summary = val_best[['ride_name', 'model_name', 'mae', 'rmse', 'smape']].copy()
    test_best_summary = test_best[['ride_name', 'model_name', 'mae', 'rmse', 'smape']].copy()
    
    val_best_summary['dataset'] = 'validation'
    test_best_summary['dataset'] = 'test'
    
    ride_best_summary = pd.concat([val_best_summary, test_best_summary], ignore_index=True)
    ride_best_summary = ride_best_summary.sort_values(['dataset', 'mae'])
    
    # Save ride-wise best model summary
    ride_best_path = os.path.join(output_dir, "best_model_per_ride_extended.csv")
    ride_best_summary.to_csv(ride_best_path, index=False)
    
    # Print summary statistics
    print("\n" + "="*80)
    print("BASELINE MODELS SUMMARY (VALIDATION & TEST SETS):")
    print(f"Total rides processed: {summary_df['ride_name'].nunique()}")
    print(f"Total model evaluations: {len(summary_df)}")
    
    print("\nValidation Set - Model Performance (Average MAE):")
    val_perf = val_summary[['model_name', 'mae_mean']].sort_values('mae_mean')
    print(val_perf)
    
    print("\nTest Set - Model Performance (Average MAE):")
    test_perf = test_summary[['model_name', 'mae_mean']].sort_values('mae_mean')
    print(test_perf)
    
    print(f"\nDetailed summary saved to: {detailed_summary_path}")
    print(f"Model comparison saved to: {model_summary_path}")
    print(f"Best models per ride saved to: {ride_best_path}")
    print("="*80)
    
    # Create visualizations
    create_baseline_visualizations_extended(summary_df, model_summary, ride_best_summary, output_dir)
    
    return summary_df, model_summary, ride_best_summary

def create_baseline_visualizations_extended(summary_df, model_summary, ride_best_summary, output_dir):
    """Create visualizations for baseline model results including both validation and test sets."""
    
    # 1. Model comparison boxplot (side by side for val and test)
    fig, axes = plt.subplots(1, 2, figsize=(20, 8))
    
    val_data = summary_df[summary_df['dataset'] == 'validation']
    test_data = summary_df[summary_df['dataset'] == 'test']
    
    val_data.boxplot(column='mae', by='model_name', ax=axes[0])
    axes[0].set_title('Validation Set: MAE Distribution by Model')
    axes[0].set_xlabel('Baseline Model')
    axes[0].set_ylabel('Mean Absolute Error (minutes)')
    axes[0].tick_params(axis='x', rotation=45)
    
    test_data.boxplot(column='mae', by='model_name', ax=axes[1])
    axes[1].set_title('Test Set: MAE Distribution by Model')
    axes[1].set_xlabel('Baseline Model')
    axes[1].set_ylabel('Mean Absolute Error (minutes)')
    axes[1].tick_params(axis='x', rotation=45)
    
    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, "model_comparison_boxplot_extended.png"))
    plt.close()
    
    # 2. Average performance comparison (validation vs test)
    val_summary = model_summary[model_summary['dataset'] == 'validation'].sort_values('mae_mean')
    test_summary = model_summary[model_summary['dataset'] == 'test'].sort_values('mae_mean')

    plt.figure(figsize=(15, 8))
    x = np.arange(len(val_summary))
    width = 0.35

    # Create the bars
    val_bars = plt.bar(x - width/2, val_summary['mae_mean'], width, label='Validation', color=VAL_COLOR, alpha=0.8)
    test_bars = plt.bar(x + width/2, test_summary['mae_mean'], width, label='Test', color=TEST_COLOR, alpha=0.8)

    # Add values above validation bars
    for i, bar in enumerate(val_bars):
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2., height + 0.01 * max(val_summary['mae_mean']),
                f'{height:.2f}', ha='center', va='bottom', fontsize=12)

    # Add values above test bars
    for i, bar in enumerate(test_bars):
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2., height + 0.01 * max(test_summary['mae_mean']),
                f'{height:.2f}', ha='center', va='bottom', fontsize=12)

    plt.title('Average MAE Comparison: Validation vs Test Set', fontdict={'fontsize': 20})
    plt.xlabel('Baseline Model', fontdict={'fontsize': 14})
    plt.ylabel('Average MAE (minutes)', fontdict={'fontsize': 14})
    plt.xticks(x, val_summary['model_name'], rotation=45)
    plt.legend()
    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, "val_vs_test_comparison.png"))
    plt.savefig(os.path.join(output_dir, "val_vs_test_comparison.svg"))
    plt.close()
    
    # 3. Best model distribution (validation vs test)
    fig, axes = plt.subplots(1, 2, figsize=(20, 6))
    
    val_best = ride_best_summary[ride_best_summary['dataset'] == 'validation']
    test_best = ride_best_summary[ride_best_summary['dataset'] == 'test']
    
    val_counts = val_best['model_name'].value_counts()
    test_counts = test_best['model_name'].value_counts()
    
    axes[0].bar(val_counts.index, val_counts.values, color=VAL_COLOR, alpha=0.8)
    axes[0].set_title('Validation Set: Best Model Frequency')
    axes[0].set_xlabel('Baseline Model')
    axes[0].set_ylabel('Number of Rides Where Model is Best')
    axes[0].tick_params(axis='x', rotation=45)
    
    axes[1].bar(test_counts.index, test_counts.values, color=TEST_COLOR, alpha=0.8)
    axes[1].set_title('Test Set: Best Model Frequency')
    axes[1].set_xlabel('Baseline Model')
    axes[1].set_ylabel('Number of Rides Where Model is Best')
    axes[1].tick_params(axis='x', rotation=45)
    
    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, "best_model_frequency_extended.png"))
    plt.close()
    
    # 4. Performance correlation (validation vs test MAE)
    val_pivot = val_data.pivot(index='ride_name', columns='model_name', values='mae')
    test_pivot = test_data.pivot(index='ride_name', columns='model_name', values='mae')
    
    # Calculate correlation between validation and test performance for each model
    correlations = {}
    for model in val_pivot.columns:
        if model in test_pivot.columns:
            val_scores = val_pivot[model].dropna()
            test_scores = test_pivot[model].dropna()
            # Align the indices
            common_rides = val_scores.index.intersection(test_scores.index)
            if len(common_rides) > 5:  # Only calculate if we have enough data points
                corr = np.corrcoef(val_scores[common_rides], test_scores[common_rides])[0, 1]
                correlations[model] = corr
    
    if correlations:
        plt.figure(figsize=(12, 6))
        models = list(correlations.keys())
        corr_values = list(correlations.values())
        
        plt.bar(models, corr_values, color='skyblue', alpha=0.8)
        plt.title('Validation-Test Performance Correlation by Model')
        plt.xlabel('Baseline Model')
        plt.ylabel('Correlation Coefficient')
        plt.xticks(rotation=45)
        plt.axhline(y=0, color='black', linestyle='-', alpha=0.3)
        plt.tight_layout()
        plt.savefig(os.path.join(output_dir, "val_test_correlation.png"))
        plt.close()
    
    print("Extended visualizations saved to output directory.")

## Analysis Functions for Loaded Results

In [10]:
def analyze_baseline_results_extended(output_dir="../models/baseline_models/"):
    """Load and analyze saved baseline model results for both validation and test sets."""
    
    # Check if summary files exist
    summary_path = os.path.join(output_dir, "model_comparison_summary_extended.csv")
    if not os.path.exists(summary_path):
        print("Extended summary files not found. Run the processing pipeline first.")
        return None
    
    # Load summary data
    model_summary = pd.read_csv(summary_path)
    detailed_summary = pd.read_csv(os.path.join(output_dir, "detailed_baseline_summary_extended.csv"))
    best_per_ride = pd.read_csv(os.path.join(output_dir, "best_model_per_ride_extended.csv"))
    
    # Display key insights
    print("="*60)
    print("BASELINE MODELS ANALYSIS (VALIDATION & TEST SETS)")
    print("="*60)
    
    # Split by dataset
    val_summary = model_summary[model_summary['dataset'] == 'validation'].sort_values('mae_mean')
    test_summary = model_summary[model_summary['dataset'] == 'test'].sort_values('mae_mean')
    
    print("\n1. Validation Set - Model Performance (sorted by average MAE):")
    print(val_summary[['model_name', 'mae_mean', 'mae_std']])
    
    print("\n2. Test Set - Model Performance (sorted by average MAE):")
    print(test_summary[['model_name', 'mae_mean', 'mae_std']])
    
    print("\n3. Validation Set - Best performing model distribution:")
    val_best = best_per_ride[best_per_ride['dataset'] == 'validation']
    print(val_best['model_name'].value_counts())
    
    print("\n4. Test Set - Best performing model distribution:")
    test_best = best_per_ride[best_per_ride['dataset'] == 'test']
    print(test_best['model_name'].value_counts())
    
    print("\n5. Top 10 rides with lowest Test MAE:")
    test_best_sorted = test_best.sort_values('mae')
    print(test_best_sorted.head(10)[['ride_name', 'model_name', 'mae']])
    
    print("\n6. Validation vs Test Performance Comparison (Average MAE):")
    comparison_df = pd.merge(
        val_summary[['model_name', 'mae_mean']].rename(columns={'mae_mean': 'val_mae'}),
        test_summary[['model_name', 'mae_mean']].rename(columns={'mae_mean': 'test_mae'}),
        on='model_name'
    )
    comparison_df['difference'] = comparison_df['test_mae'] - comparison_df['val_mae']
    comparison_df = comparison_df.sort_values('test_mae')
    print(comparison_df)
    
    return {
        'model_summary': model_summary,
        'detailed_summary': detailed_summary,
        'best_per_ride': best_per_ride,
        'val_summary': val_summary,
        'test_summary': test_summary,
        'comparison': comparison_df
    }

## Main Execution

In [11]:

print("Loading data...")
data = load_data("../data/processed/ep/final_cleaned_processed_wait_times.parquet")
print(f"Loaded data with {len(data)} rows")

check_for_missing_values(data)

data = filter_to_operating_hours(data)

# Define time periods for splitting
train_years, val_year, test_year = list(range(2017, 2023)), 2023, 2024

# Split the data
train_data, val_data, test_data = split_data(data, train_years, val_year, test_year)

# Get all rides in the dataset
all_rides = get_all_rides(data)
print(f"Found {len(all_rides)} rides in the dataset:")
for i, ride in enumerate(all_rides):
    print(f"{i+1}. {ride}")

# Set output directory for baseline models and results
output_dir = "../models/baseline_models/"

# Process all rides with baseline models (now including test set evaluation)
print("\n" + "="*60)
print("STARTING BASELINE MODEL PROCESSING FOR ALL RIDES (VALIDATION & TEST SETS)")
print("="*60)

results = process_all_rides_baselines(
    all_rides=all_rides,
    train_data=train_data,
    val_data=val_data,
    test_data=test_data,
    output_dir=output_dir,
    resume=True  # Resume from checkpoint if available
)

print("\n" + "="*60)
print("BASELINE MODEL PROCESSING COMPLETED")
print("="*60)

# Analyze the extended results
results_analysis = analyze_baseline_results_extended(output_dir)

Loading data...
Loaded data with 7834739 rows
No missing values found in the dataset.
Train data size: 297362
Validation data size: 61851
Test data size: 55699
Found 31 rides in the dataset:
1. alpine express enzian
2. arena of football  be part of it
3. arthur
4. atlantica supersplash
5. atlantis adventure
6. baaa express
7. blue fire megacoaster
8. castello dei medici
9. dancing dingie
10. euromir
11. eurosat  cancan coaster
12. eurotower
13. fjordrafting
14. jim button  journey through morrowland
15. josefinas magical imperial journey
16. kolumbusjolle
17. madame freudenreich curiosits
18. matterhornblitz
19. old mac donalds tractor fun
20. pegasus
21. poppy towers
22. poseidon
23. silver star
24. swiss bob run
25. tirol log flume
26. vienna wave swing  glckspilz
27. vindjammer
28. voletarium
29. volo da vinci
30. voltron nevera powered by rimac
31. whale adventures  northern lights

STARTING BASELINE MODEL PROCESSING FOR ALL RIDES (VALIDATION & TEST SETS)


Processing rides:   0%|          | 0/31 [00:00<?, ?it/s]


Processing ride 1/31: alpine express enzian

Processing baseline models for ride: alpine express enzian
Training data size: 10302
Validation data size: 2019
Test data size: 1753
  Training Mean Baseline...

Mean Baseline (Validation) MAE: 7.87 minutes
Mean Baseline (Validation) RMSE: 9.64 minutes
Mean Baseline (Validation) sMAPE: 22.56%

Mean Baseline (Test) MAE: 11.26 minutes
Mean Baseline (Test) RMSE: 13.47 minutes
Mean Baseline (Test) sMAPE: 25.31%
  Training Time-of-Day Baseline...

Time-of-Day Baseline (Validation) MAE: 6.77 minutes
Time-of-Day Baseline (Validation) RMSE: 8.27 minutes
Time-of-Day Baseline (Validation) sMAPE: 24.18%

Time-of-Day Baseline (Test) MAE: 10.27 minutes
Time-of-Day Baseline (Test) RMSE: 12.64 minutes
Time-of-Day Baseline (Test) sMAPE: 26.42%
  Training Day+Time Baseline...

Day+Time Baseline (Validation) MAE: 6.04 minutes
Day+Time Baseline (Validation) RMSE: 7.48 minutes
Day+Time Baseline (Validation) sMAPE: 22.39%

Day+Time Baseline (Test) MAE: 10.09 mi

Processing rides:   3%|▎         | 1/31 [00:53<26:47, 53.60s/it]


LastYear Baseline (Validation) MAE: 9.06 minutes
LastYear Baseline (Validation) RMSE: 12.09 minutes
LastYear Baseline (Validation) sMAPE: 29.08%

LastYear Baseline (Test) MAE: 11.26 minutes
LastYear Baseline (Test) RMSE: 13.47 minutes
LastYear Baseline (Test) sMAPE: 25.31%
Baseline results saved to ../models/baseline_models/alpine_express_enzian

Processing ride 2/31: arena of football  be part of it

Processing baseline models for ride: arena of football  be part of it
Training data size: 9612
Validation data size: 2052
Test data size: 1816
  Training Mean Baseline...

Mean Baseline (Validation) MAE: 2.57 minutes
Mean Baseline (Validation) RMSE: 2.73 minutes
Mean Baseline (Validation) sMAPE: 32.88%

Mean Baseline (Test) MAE: 2.53 minutes
Mean Baseline (Test) RMSE: 2.66 minutes
Mean Baseline (Test) sMAPE: 32.66%
  Training Time-of-Day Baseline...

Time-of-Day Baseline (Validation) MAE: 2.50 minutes
Time-of-Day Baseline (Validation) RMSE: 2.76 minutes
Time-of-Day Baseline (Validation) 

Processing rides:   6%|▋         | 2/31 [01:59<29:18, 60.64s/it]


LastYear Baseline (Validation) MAE: 4.30 minutes
LastYear Baseline (Validation) RMSE: 4.82 minutes
LastYear Baseline (Validation) sMAPE: 3.55%

LastYear Baseline (Test) MAE: 2.53 minutes
LastYear Baseline (Test) RMSE: 2.66 minutes
LastYear Baseline (Test) sMAPE: 32.66%
Baseline results saved to ../models/baseline_models/arena_of_football__be_part_of_it

Processing ride 3/31: arthur

Processing baseline models for ride: arthur
Training data size: 10298
Validation data size: 2067
Test data size: 1832
  Training Mean Baseline...

Mean Baseline (Validation) MAE: 11.02 minutes
Mean Baseline (Validation) RMSE: 14.69 minutes
Mean Baseline (Validation) sMAPE: 18.44%

Mean Baseline (Test) MAE: 10.80 minutes
Mean Baseline (Test) RMSE: 14.41 minutes
Mean Baseline (Test) sMAPE: 19.18%
  Training Time-of-Day Baseline...

Time-of-Day Baseline (Validation) MAE: 9.12 minutes
Time-of-Day Baseline (Validation) RMSE: 12.58 minutes
Time-of-Day Baseline (Validation) sMAPE: 18.38%

Time-of-Day Baseline (Te

Processing rides:  10%|▉         | 3/31 [02:54<27:05, 58.06s/it]


LastYear Baseline (Validation) MAE: 10.75 minutes
LastYear Baseline (Validation) RMSE: 15.56 minutes
LastYear Baseline (Validation) sMAPE: 19.43%

LastYear Baseline (Test) MAE: 10.80 minutes
LastYear Baseline (Test) RMSE: 14.41 minutes
LastYear Baseline (Test) sMAPE: 19.18%
Baseline results saved to ../models/baseline_models/arthur

Processing ride 4/31: atlantica supersplash

Processing baseline models for ride: atlantica supersplash
Training data size: 9825
Validation data size: 2065
Test data size: 1826
  Training Mean Baseline...

Mean Baseline (Validation) MAE: 11.95 minutes
Mean Baseline (Validation) RMSE: 14.23 minutes
Mean Baseline (Validation) sMAPE: 31.59%

Mean Baseline (Test) MAE: 11.93 minutes
Mean Baseline (Test) RMSE: 13.58 minutes
Mean Baseline (Test) sMAPE: 30.86%
  Training Time-of-Day Baseline...

Time-of-Day Baseline (Validation) MAE: 9.69 minutes
Time-of-Day Baseline (Validation) RMSE: 12.66 minutes
Time-of-Day Baseline (Validation) sMAPE: 30.69%

Time-of-Day Base

Processing rides:  13%|█▎        | 4/31 [03:47<25:16, 56.15s/it]


LastYear Baseline (Validation) MAE: 10.18 minutes
LastYear Baseline (Validation) RMSE: 14.52 minutes
LastYear Baseline (Validation) sMAPE: 31.26%

LastYear Baseline (Test) MAE: 11.93 minutes
LastYear Baseline (Test) RMSE: 13.58 minutes
LastYear Baseline (Test) sMAPE: 30.86%
Baseline results saved to ../models/baseline_models/atlantica_supersplash

Processing ride 5/31: atlantis adventure

Processing baseline models for ride: atlantis adventure
Training data size: 10289
Validation data size: 2066
Test data size: 1832
  Training Mean Baseline...

Mean Baseline (Validation) MAE: 2.45 minutes
Mean Baseline (Validation) RMSE: 2.97 minutes
Mean Baseline (Validation) sMAPE: 53.25%

Mean Baseline (Test) MAE: 2.59 minutes
Mean Baseline (Test) RMSE: 3.07 minutes
Mean Baseline (Test) sMAPE: 53.06%
  Training Time-of-Day Baseline...

Time-of-Day Baseline (Validation) MAE: 2.18 minutes
Time-of-Day Baseline (Validation) RMSE: 2.81 minutes
Time-of-Day Baseline (Validation) sMAPE: 44.66%

Time-of-Day

Processing rides:  16%|█▌        | 5/31 [04:41<24:05, 55.58s/it]


LastYear Baseline (Validation) MAE: 1.98 minutes
LastYear Baseline (Validation) RMSE: 3.59 minutes
LastYear Baseline (Validation) sMAPE: 12.06%

LastYear Baseline (Test) MAE: 2.59 minutes
LastYear Baseline (Test) RMSE: 3.07 minutes
LastYear Baseline (Test) sMAPE: 53.06%
Baseline results saved to ../models/baseline_models/atlantis_adventure

Processing ride 6/31: baaa express

Processing baseline models for ride: baaa express
Training data size: 10291
Validation data size: 2067
Test data size: 1824
  Training Mean Baseline...

Mean Baseline (Validation) MAE: 3.80 minutes
Mean Baseline (Validation) RMSE: 6.19 minutes
Mean Baseline (Validation) sMAPE: 17.08%

Mean Baseline (Test) MAE: 3.88 minutes
Mean Baseline (Test) RMSE: 5.87 minutes
Mean Baseline (Test) sMAPE: 17.06%
  Training Time-of-Day Baseline...

Time-of-Day Baseline (Validation) MAE: 4.40 minutes
Time-of-Day Baseline (Validation) RMSE: 6.03 minutes
Time-of-Day Baseline (Validation) sMAPE: 31.81%

Time-of-Day Baseline (Test) MA

Processing rides:  19%|█▉        | 6/31 [05:34<22:47, 54.72s/it]


LastYear Baseline (Validation) MAE: 6.09 minutes
LastYear Baseline (Validation) RMSE: 8.74 minutes
LastYear Baseline (Validation) sMAPE: 24.82%

LastYear Baseline (Test) MAE: 3.88 minutes
LastYear Baseline (Test) RMSE: 5.87 minutes
LastYear Baseline (Test) sMAPE: 17.06%
Baseline results saved to ../models/baseline_models/baaa_express

Processing ride 7/31: blue fire megacoaster

Processing baseline models for ride: blue fire megacoaster
Training data size: 10288
Validation data size: 2067
Test data size: 1823
  Training Mean Baseline...

Mean Baseline (Validation) MAE: 13.29 minutes
Mean Baseline (Validation) RMSE: 17.47 minutes
Mean Baseline (Validation) sMAPE: 24.98%

Mean Baseline (Test) MAE: 12.74 minutes
Mean Baseline (Test) RMSE: 16.59 minutes
Mean Baseline (Test) sMAPE: 27.23%
  Training Time-of-Day Baseline...

Time-of-Day Baseline (Validation) MAE: 12.39 minutes
Time-of-Day Baseline (Validation) RMSE: 16.65 minutes
Time-of-Day Baseline (Validation) sMAPE: 23.17%

Time-of-Day 

Processing rides:  23%|██▎       | 7/31 [06:31<22:09, 55.41s/it]


LastYear Baseline (Validation) MAE: 11.10 minutes
LastYear Baseline (Validation) RMSE: 16.56 minutes
LastYear Baseline (Validation) sMAPE: 21.24%

LastYear Baseline (Test) MAE: 12.74 minutes
LastYear Baseline (Test) RMSE: 16.59 minutes
LastYear Baseline (Test) sMAPE: 27.23%
Baseline results saved to ../models/baseline_models/blue_fire_megacoaster

Processing ride 8/31: castello dei medici

Processing baseline models for ride: castello dei medici
Training data size: 10193
Validation data size: 2066
Test data size: 1143
  Training Mean Baseline...

Mean Baseline (Validation) MAE: 2.45 minutes
Mean Baseline (Validation) RMSE: 2.60 minutes
Mean Baseline (Validation) sMAPE: 45.68%

Mean Baseline (Test) MAE: 3.40 minutes
Mean Baseline (Test) RMSE: 3.85 minutes
Mean Baseline (Test) sMAPE: 47.73%
  Training Time-of-Day Baseline...

Time-of-Day Baseline (Validation) MAE: 2.44 minutes
Time-of-Day Baseline (Validation) RMSE: 2.61 minutes
Time-of-Day Baseline (Validation) sMAPE: 45.73%

Time-of-D

Processing rides:  26%|██▌       | 8/31 [07:08<18:54, 49.32s/it]


LastYear Baseline (Validation) MAE: 2.70 minutes
LastYear Baseline (Validation) RMSE: 3.73 minutes
LastYear Baseline (Validation) sMAPE: 2.41%

LastYear Baseline (Test) MAE: 3.40 minutes
LastYear Baseline (Test) RMSE: 3.85 minutes
LastYear Baseline (Test) sMAPE: 47.73%
Baseline results saved to ../models/baseline_models/castello_dei_medici

Processing ride 9/31: dancing dingie

Processing baseline models for ride: dancing dingie
Training data size: 10292
Validation data size: 2066
Test data size: 1824
  Training Mean Baseline...

Mean Baseline (Validation) MAE: 2.98 minutes
Mean Baseline (Validation) RMSE: 3.87 minutes
Mean Baseline (Validation) sMAPE: 30.73%

Mean Baseline (Test) MAE: 2.77 minutes
Mean Baseline (Test) RMSE: 3.37 minutes
Mean Baseline (Test) sMAPE: 29.59%
  Training Time-of-Day Baseline...

Time-of-Day Baseline (Validation) MAE: 2.80 minutes
Time-of-Day Baseline (Validation) RMSE: 3.85 minutes
Time-of-Day Baseline (Validation) sMAPE: 33.68%

Time-of-Day Baseline (Test

Processing rides:  29%|██▉       | 9/31 [08:02<18:41, 50.97s/it]


LastYear Baseline (Validation) MAE: 3.70 minutes
LastYear Baseline (Validation) RMSE: 5.81 minutes
LastYear Baseline (Validation) sMAPE: 14.66%

LastYear Baseline (Test) MAE: 2.77 minutes
LastYear Baseline (Test) RMSE: 3.37 minutes
LastYear Baseline (Test) sMAPE: 29.59%
Baseline results saved to ../models/baseline_models/dancing_dingie

Processing ride 10/31: euromir

Processing baseline models for ride: euromir
Training data size: 10289
Validation data size: 2049
Test data size: 1832
  Training Mean Baseline...

Mean Baseline (Validation) MAE: 12.15 minutes
Mean Baseline (Validation) RMSE: 15.21 minutes
Mean Baseline (Validation) sMAPE: 25.79%

Mean Baseline (Test) MAE: 11.01 minutes
Mean Baseline (Test) RMSE: 13.80 minutes
Mean Baseline (Test) sMAPE: 24.95%
  Training Time-of-Day Baseline...

Time-of-Day Baseline (Validation) MAE: 9.62 minutes
Time-of-Day Baseline (Validation) RMSE: 13.25 minutes
Time-of-Day Baseline (Validation) sMAPE: 23.07%

Time-of-Day Baseline (Test) MAE: 9.08 

Processing rides:  32%|███▏      | 10/31 [08:56<18:09, 51.86s/it]


LastYear Baseline (Validation) MAE: 8.93 minutes
LastYear Baseline (Validation) RMSE: 14.01 minutes
LastYear Baseline (Validation) sMAPE: 22.73%

LastYear Baseline (Test) MAE: 11.01 minutes
LastYear Baseline (Test) RMSE: 13.80 minutes
LastYear Baseline (Test) sMAPE: 24.95%
Baseline results saved to ../models/baseline_models/euromir

Processing ride 11/31: eurosat  cancan coaster

Processing baseline models for ride: eurosat  cancan coaster
Training data size: 8650
Validation data size: 2067
Test data size: 1832
  Training Mean Baseline...

Mean Baseline (Validation) MAE: 11.67 minutes
Mean Baseline (Validation) RMSE: 13.98 minutes
Mean Baseline (Validation) sMAPE: 23.88%

Mean Baseline (Test) MAE: 10.69 minutes
Mean Baseline (Test) RMSE: 13.00 minutes
Mean Baseline (Test) sMAPE: 21.70%
  Training Time-of-Day Baseline...

Time-of-Day Baseline (Validation) MAE: 9.19 minutes
Time-of-Day Baseline (Validation) RMSE: 11.98 minutes
Time-of-Day Baseline (Validation) sMAPE: 19.07%

Time-of-Day

Processing rides:  35%|███▌      | 11/31 [09:44<16:52, 50.63s/it]


LastYear Baseline (Validation) MAE: 8.60 minutes
LastYear Baseline (Validation) RMSE: 12.17 minutes
LastYear Baseline (Validation) sMAPE: 17.16%

LastYear Baseline (Test) MAE: 10.69 minutes
LastYear Baseline (Test) RMSE: 13.00 minutes
LastYear Baseline (Test) sMAPE: 21.70%
Baseline results saved to ../models/baseline_models/eurosat__cancan_coaster

Processing ride 12/31: eurotower

Processing baseline models for ride: eurotower
Training data size: 10308
Validation data size: 2067
Test data size: 1824
  Training Mean Baseline...

Mean Baseline (Validation) MAE: 3.61 minutes
Mean Baseline (Validation) RMSE: 5.34 minutes
Mean Baseline (Validation) sMAPE: 20.51%

Mean Baseline (Test) MAE: 4.08 minutes
Mean Baseline (Test) RMSE: 5.78 minutes
Mean Baseline (Test) sMAPE: 21.82%
  Training Time-of-Day Baseline...

Time-of-Day Baseline (Validation) MAE: 3.29 minutes
Time-of-Day Baseline (Validation) RMSE: 5.02 minutes
Time-of-Day Baseline (Validation) sMAPE: 18.88%

Time-of-Day Baseline (Test)

Processing rides:  39%|███▊      | 12/31 [10:38<16:24, 51.79s/it]


LastYear Baseline (Validation) MAE: 3.10 minutes
LastYear Baseline (Validation) RMSE: 5.78 minutes
LastYear Baseline (Validation) sMAPE: 14.17%

LastYear Baseline (Test) MAE: 4.08 minutes
LastYear Baseline (Test) RMSE: 5.78 minutes
LastYear Baseline (Test) sMAPE: 21.82%
Baseline results saved to ../models/baseline_models/eurotower

Processing ride 13/31: fjordrafting

Processing baseline models for ride: fjordrafting
Training data size: 9808
Validation data size: 2066
Test data size: 1825
  Training Mean Baseline...

Mean Baseline (Validation) MAE: 13.55 minutes
Mean Baseline (Validation) RMSE: 15.77 minutes
Mean Baseline (Validation) sMAPE: 33.07%

Mean Baseline (Test) MAE: 12.42 minutes
Mean Baseline (Test) RMSE: 14.27 minutes
Mean Baseline (Test) sMAPE: 32.32%
  Training Time-of-Day Baseline...

Time-of-Day Baseline (Validation) MAE: 11.53 minutes
Time-of-Day Baseline (Validation) RMSE: 14.54 minutes
Time-of-Day Baseline (Validation) sMAPE: 32.66%

Time-of-Day Baseline (Test) MAE: 

Processing rides:  42%|████▏     | 13/31 [11:32<15:41, 52.31s/it]


LastYear Baseline (Validation) MAE: 9.68 minutes
LastYear Baseline (Validation) RMSE: 14.46 minutes
LastYear Baseline (Validation) sMAPE: 29.66%

LastYear Baseline (Test) MAE: 12.42 minutes
LastYear Baseline (Test) RMSE: 14.27 minutes
LastYear Baseline (Test) sMAPE: 32.32%
Baseline results saved to ../models/baseline_models/fjordrafting

Processing ride 14/31: jim button  journey through morrowland

Processing baseline models for ride: jim button  journey through morrowland
Training data size: 8597
Validation data size: 2065
Test data size: 1820
  Training Mean Baseline...

Mean Baseline (Validation) MAE: 2.84 minutes
Mean Baseline (Validation) RMSE: 3.08 minutes
Mean Baseline (Validation) sMAPE: 34.86%

Mean Baseline (Test) MAE: 2.73 minutes
Mean Baseline (Test) RMSE: 2.87 minutes
Mean Baseline (Test) sMAPE: 32.77%
  Training Time-of-Day Baseline...

Time-of-Day Baseline (Validation) MAE: 2.60 minutes
Time-of-Day Baseline (Validation) RMSE: 2.98 minutes
Time-of-Day Baseline (Validati

Processing rides:  45%|████▌     | 14/31 [12:17<14:14, 50.27s/it]


LastYear Baseline (Validation) MAE: 1.94 minutes
LastYear Baseline (Validation) RMSE: 3.37 minutes
LastYear Baseline (Validation) sMAPE: 9.33%

LastYear Baseline (Test) MAE: 2.73 minutes
LastYear Baseline (Test) RMSE: 2.87 minutes
LastYear Baseline (Test) sMAPE: 32.77%
Baseline results saved to ../models/baseline_models/jim_button__journey_through_morrowland

Processing ride 15/31: josefinas magical imperial journey

Processing baseline models for ride: josefinas magical imperial journey
Training data size: 10266
Validation data size: 2067
Test data size: 1821
  Training Mean Baseline...

Mean Baseline (Validation) MAE: 4.09 minutes
Mean Baseline (Validation) RMSE: 5.23 minutes
Mean Baseline (Validation) sMAPE: 41.62%

Mean Baseline (Test) MAE: 3.25 minutes
Mean Baseline (Test) RMSE: 3.86 minutes
Mean Baseline (Test) sMAPE: 33.17%
  Training Time-of-Day Baseline...

Time-of-Day Baseline (Validation) MAE: 3.59 minutes
Time-of-Day Baseline (Validation) RMSE: 4.96 minutes
Time-of-Day Bas

Processing rides:  48%|████▊     | 15/31 [13:12<13:44, 51.52s/it]


LastYear Baseline (Validation) MAE: 4.49 minutes
LastYear Baseline (Validation) RMSE: 7.00 minutes
LastYear Baseline (Validation) sMAPE: 27.29%

LastYear Baseline (Test) MAE: 3.25 minutes
LastYear Baseline (Test) RMSE: 3.86 minutes
LastYear Baseline (Test) sMAPE: 33.17%
Baseline results saved to ../models/baseline_models/josefinas_magical_imperial_journey

Processing ride 16/31: kolumbusjolle

Processing baseline models for ride: kolumbusjolle
Training data size: 10300
Validation data size: 2067
Test data size: 1821
  Training Mean Baseline...

Mean Baseline (Validation) MAE: 2.53 minutes
Mean Baseline (Validation) RMSE: 2.63 minutes
Mean Baseline (Validation) sMAPE: 32.86%

Mean Baseline (Test) MAE: 2.47 minutes
Mean Baseline (Test) RMSE: 2.49 minutes
Mean Baseline (Test) sMAPE: 32.55%
  Training Time-of-Day Baseline...

Time-of-Day Baseline (Validation) MAE: 2.50 minutes
Time-of-Day Baseline (Validation) RMSE: 2.66 minutes
Time-of-Day Baseline (Validation) sMAPE: 33.47%

Time-of-Day

Processing rides:  52%|█████▏    | 16/31 [14:06<13:06, 52.44s/it]


LastYear Baseline (Validation) MAE: 1.80 minutes
LastYear Baseline (Validation) RMSE: 3.05 minutes
LastYear Baseline (Validation) sMAPE: 1.98%

LastYear Baseline (Test) MAE: 2.47 minutes
LastYear Baseline (Test) RMSE: 2.49 minutes
LastYear Baseline (Test) sMAPE: 32.55%
Baseline results saved to ../models/baseline_models/kolumbusjolle

Processing ride 17/31: madame freudenreich curiosits

Processing baseline models for ride: madame freudenreich curiosits
Training data size: 8643
Validation data size: 2066
Test data size: 1832
  Training Mean Baseline...

Mean Baseline (Validation) MAE: 0.51 minutes
Mean Baseline (Validation) RMSE: 0.67 minutes
Mean Baseline (Validation) sMAPE: 83.00%

Mean Baseline (Test) MAE: 0.53 minutes
Mean Baseline (Test) RMSE: 0.74 minutes
Mean Baseline (Test) sMAPE: 83.30%
  Training Time-of-Day Baseline...

Time-of-Day Baseline (Validation) MAE: 0.47 minutes
Time-of-Day Baseline (Validation) RMSE: 0.65 minutes
Time-of-Day Baseline (Validation) sMAPE: 84.12%

Ti

Processing rides:  55%|█████▍    | 17/31 [14:52<11:44, 50.35s/it]


LastYear Baseline (Validation) MAE: 0.07 minutes
LastYear Baseline (Validation) RMSE: 0.60 minutes
LastYear Baseline (Validation) sMAPE: 0.00%

LastYear Baseline (Test) MAE: 0.53 minutes
LastYear Baseline (Test) RMSE: 0.74 minutes
LastYear Baseline (Test) sMAPE: 83.30%
Baseline results saved to ../models/baseline_models/madame_freudenreich_curiosits

Processing ride 18/31: matterhornblitz

Processing baseline models for ride: matterhornblitz
Training data size: 10299
Validation data size: 2065
Test data size: 1830
  Training Mean Baseline...

Mean Baseline (Validation) MAE: 12.61 minutes
Mean Baseline (Validation) RMSE: 15.29 minutes
Mean Baseline (Validation) sMAPE: 20.42%

Mean Baseline (Test) MAE: 11.27 minutes
Mean Baseline (Test) RMSE: 13.91 minutes
Mean Baseline (Test) sMAPE: 19.63%
  Training Time-of-Day Baseline...

Time-of-Day Baseline (Validation) MAE: 9.03 minutes
Time-of-Day Baseline (Validation) RMSE: 12.38 minutes
Time-of-Day Baseline (Validation) sMAPE: 17.04%

Time-of-

Processing rides:  58%|█████▊    | 18/31 [15:46<11:08, 51.40s/it]


LastYear Baseline (Validation) MAE: 7.85 minutes
LastYear Baseline (Validation) RMSE: 11.94 minutes
LastYear Baseline (Validation) sMAPE: 16.96%

LastYear Baseline (Test) MAE: 11.27 minutes
LastYear Baseline (Test) RMSE: 13.91 minutes
LastYear Baseline (Test) sMAPE: 19.63%
Baseline results saved to ../models/baseline_models/matterhornblitz

Processing ride 19/31: old mac donalds tractor fun

Processing baseline models for ride: old mac donalds tractor fun
Training data size: 10292
Validation data size: 2065
Test data size: 1818
  Training Mean Baseline...

Mean Baseline (Validation) MAE: 2.13 minutes
Mean Baseline (Validation) RMSE: 2.45 minutes
Mean Baseline (Validation) sMAPE: 52.18%

Mean Baseline (Test) MAE: 2.22 minutes
Mean Baseline (Test) RMSE: 2.53 minutes
Mean Baseline (Test) sMAPE: 50.54%
  Training Time-of-Day Baseline...

Time-of-Day Baseline (Validation) MAE: 1.94 minutes
Time-of-Day Baseline (Validation) RMSE: 2.41 minutes
Time-of-Day Baseline (Validation) sMAPE: 48.83%


Processing rides:  61%|██████▏   | 19/31 [16:38<10:19, 51.65s/it]


LastYear Baseline (Validation) MAE: 1.93 minutes
LastYear Baseline (Validation) RMSE: 3.41 minutes
LastYear Baseline (Validation) sMAPE: 11.03%

LastYear Baseline (Test) MAE: 2.22 minutes
LastYear Baseline (Test) RMSE: 2.53 minutes
LastYear Baseline (Test) sMAPE: 50.54%
Baseline results saved to ../models/baseline_models/old_mac_donalds_tractor_fun

Processing ride 20/31: pegasus

Processing baseline models for ride: pegasus
Training data size: 10299
Validation data size: 2067
Test data size: 1831
  Training Mean Baseline...

Mean Baseline (Validation) MAE: 6.87 minutes
Mean Baseline (Validation) RMSE: 8.49 minutes
Mean Baseline (Validation) sMAPE: 25.67%

Mean Baseline (Test) MAE: 6.78 minutes
Mean Baseline (Test) RMSE: 8.34 minutes
Mean Baseline (Test) sMAPE: 26.94%
  Training Time-of-Day Baseline...

Time-of-Day Baseline (Validation) MAE: 5.53 minutes
Time-of-Day Baseline (Validation) RMSE: 7.55 minutes
Time-of-Day Baseline (Validation) sMAPE: 22.90%

Time-of-Day Baseline (Test) MA

Processing rides:  65%|██████▍   | 20/31 [17:32<09:35, 52.35s/it]


LastYear Baseline (Validation) MAE: 5.59 minutes
LastYear Baseline (Validation) RMSE: 8.54 minutes
LastYear Baseline (Validation) sMAPE: 24.87%

LastYear Baseline (Test) MAE: 6.78 minutes
LastYear Baseline (Test) RMSE: 8.34 minutes
LastYear Baseline (Test) sMAPE: 26.94%
Baseline results saved to ../models/baseline_models/pegasus

Processing ride 21/31: poppy towers

Processing baseline models for ride: poppy towers
Training data size: 10276
Validation data size: 2065
Test data size: 1822
  Training Mean Baseline...

Mean Baseline (Validation) MAE: 2.46 minutes
Mean Baseline (Validation) RMSE: 2.87 minutes
Mean Baseline (Validation) sMAPE: 27.06%

Mean Baseline (Test) MAE: 2.22 minutes
Mean Baseline (Test) RMSE: 2.42 minutes
Mean Baseline (Test) sMAPE: 25.54%
  Training Time-of-Day Baseline...

Time-of-Day Baseline (Validation) MAE: 2.45 minutes
Time-of-Day Baseline (Validation) RMSE: 2.94 minutes
Time-of-Day Baseline (Validation) sMAPE: 29.31%

Time-of-Day Baseline (Test) MAE: 2.26 mi

Processing rides:  68%|██████▊   | 21/31 [18:25<08:45, 52.58s/it]


LastYear Baseline (Validation) MAE: 2.16 minutes
LastYear Baseline (Validation) RMSE: 3.42 minutes
LastYear Baseline (Validation) sMAPE: 5.57%

LastYear Baseline (Test) MAE: 2.22 minutes
LastYear Baseline (Test) RMSE: 2.42 minutes
LastYear Baseline (Test) sMAPE: 25.54%
Baseline results saved to ../models/baseline_models/poppy_towers

Processing ride 22/31: poseidon

Processing baseline models for ride: poseidon
Training data size: 9832
Validation data size: 2064
Test data size: 1826
  Training Mean Baseline...

Mean Baseline (Validation) MAE: 15.90 minutes
Mean Baseline (Validation) RMSE: 18.20 minutes
Mean Baseline (Validation) sMAPE: 31.77%

Mean Baseline (Test) MAE: 15.46 minutes
Mean Baseline (Test) RMSE: 17.31 minutes
Mean Baseline (Test) sMAPE: 31.83%
  Training Time-of-Day Baseline...

Time-of-Day Baseline (Validation) MAE: 13.56 minutes
Time-of-Day Baseline (Validation) RMSE: 16.88 minutes
Time-of-Day Baseline (Validation) sMAPE: 28.46%

Time-of-Day Baseline (Test) MAE: 13.50 

Processing rides:  71%|███████   | 22/31 [19:17<07:51, 52.40s/it]


LastYear Baseline (Validation) MAE: 10.45 minutes
LastYear Baseline (Validation) RMSE: 16.64 minutes
LastYear Baseline (Validation) sMAPE: 26.24%

LastYear Baseline (Test) MAE: 15.46 minutes
LastYear Baseline (Test) RMSE: 17.31 minutes
LastYear Baseline (Test) sMAPE: 31.83%
Baseline results saved to ../models/baseline_models/poseidon

Processing ride 23/31: silver star

Processing baseline models for ride: silver star
Training data size: 10244
Validation data size: 2064
Test data size: 1826
  Training Mean Baseline...

Mean Baseline (Validation) MAE: 10.89 minutes
Mean Baseline (Validation) RMSE: 13.65 minutes
Mean Baseline (Validation) sMAPE: 25.21%

Mean Baseline (Test) MAE: 11.27 minutes
Mean Baseline (Test) RMSE: 14.20 minutes
Mean Baseline (Test) sMAPE: 25.08%
  Training Time-of-Day Baseline...

Time-of-Day Baseline (Validation) MAE: 9.46 minutes
Time-of-Day Baseline (Validation) RMSE: 12.32 minutes
Time-of-Day Baseline (Validation) sMAPE: 21.51%

Time-of-Day Baseline (Test) MAE:

Processing rides:  74%|███████▍  | 23/31 [20:11<07:03, 52.90s/it]


LastYear Baseline (Validation) MAE: 9.02 minutes
LastYear Baseline (Validation) RMSE: 13.02 minutes
LastYear Baseline (Validation) sMAPE: 21.15%

LastYear Baseline (Test) MAE: 11.27 minutes
LastYear Baseline (Test) RMSE: 14.20 minutes
LastYear Baseline (Test) sMAPE: 25.08%
Baseline results saved to ../models/baseline_models/silver_star

Processing ride 24/31: swiss bob run

Processing baseline models for ride: swiss bob run
Training data size: 10293
Validation data size: 2066
Test data size: 1826
  Training Mean Baseline...

Mean Baseline (Validation) MAE: 12.48 minutes
Mean Baseline (Validation) RMSE: 14.64 minutes
Mean Baseline (Validation) sMAPE: 22.39%

Mean Baseline (Test) MAE: 11.41 minutes
Mean Baseline (Test) RMSE: 13.76 minutes
Mean Baseline (Test) sMAPE: 21.57%
  Training Time-of-Day Baseline...

Time-of-Day Baseline (Validation) MAE: 9.64 minutes
Time-of-Day Baseline (Validation) RMSE: 12.42 minutes
Time-of-Day Baseline (Validation) sMAPE: 19.77%

Time-of-Day Baseline (Test

Processing rides:  77%|███████▋  | 24/31 [21:04<06:09, 52.81s/it]


LastYear Baseline (Validation) MAE: 7.60 minutes
LastYear Baseline (Validation) RMSE: 11.41 minutes
LastYear Baseline (Validation) sMAPE: 17.15%

LastYear Baseline (Test) MAE: 11.41 minutes
LastYear Baseline (Test) RMSE: 13.76 minutes
LastYear Baseline (Test) sMAPE: 21.57%
Baseline results saved to ../models/baseline_models/swiss_bob_run

Processing ride 25/31: tirol log flume

Processing baseline models for ride: tirol log flume
Training data size: 9849
Validation data size: 2019
Test data size: 1752
  Training Mean Baseline...

Mean Baseline (Validation) MAE: 8.90 minutes
Mean Baseline (Validation) RMSE: 11.50 minutes
Mean Baseline (Validation) sMAPE: 28.99%

Mean Baseline (Test) MAE: 9.79 minutes
Mean Baseline (Test) RMSE: 11.56 minutes
Mean Baseline (Test) sMAPE: 35.05%
  Training Time-of-Day Baseline...

Time-of-Day Baseline (Validation) MAE: 6.89 minutes
Time-of-Day Baseline (Validation) RMSE: 10.11 minutes
Time-of-Day Baseline (Validation) sMAPE: 26.88%

Time-of-Day Baseline (T

Processing rides:  81%|████████  | 25/31 [21:53<05:11, 51.88s/it]


LastYear Baseline (Validation) MAE: 9.49 minutes
LastYear Baseline (Validation) RMSE: 13.66 minutes
LastYear Baseline (Validation) sMAPE: 30.33%

LastYear Baseline (Test) MAE: 9.79 minutes
LastYear Baseline (Test) RMSE: 11.56 minutes
LastYear Baseline (Test) sMAPE: 35.05%
Baseline results saved to ../models/baseline_models/tirol_log_flume

Processing ride 26/31: vienna wave swing  glckspilz

Processing baseline models for ride: vienna wave swing  glckspilz
Training data size: 8416
Validation data size: 2067
Test data size: 1822
  Training Mean Baseline...

Mean Baseline (Validation) MAE: 2.20 minutes
Mean Baseline (Validation) RMSE: 2.57 minutes
Mean Baseline (Validation) sMAPE: 23.35%

Mean Baseline (Test) MAE: 2.04 minutes
Mean Baseline (Test) RMSE: 2.28 minutes
Mean Baseline (Test) sMAPE: 22.52%
  Training Time-of-Day Baseline...

Time-of-Day Baseline (Validation) MAE: 2.20 minutes
Time-of-Day Baseline (Validation) RMSE: 2.64 minutes
Time-of-Day Baseline (Validation) sMAPE: 25.04%


Processing rides:  84%|████████▍ | 26/31 [22:37<04:07, 49.42s/it]


LastYear Baseline (Validation) MAE: 2.13 minutes
LastYear Baseline (Validation) RMSE: 3.38 minutes
LastYear Baseline (Validation) sMAPE: 6.10%

LastYear Baseline (Test) MAE: 2.04 minutes
LastYear Baseline (Test) RMSE: 2.28 minutes
LastYear Baseline (Test) sMAPE: 22.52%
Baseline results saved to ../models/baseline_models/vienna_wave_swing__glckspilz

Processing ride 27/31: vindjammer

Processing baseline models for ride: vindjammer
Training data size: 10258
Validation data size: 2064
Test data size: 1802
  Training Mean Baseline...

Mean Baseline (Validation) MAE: 2.82 minutes
Mean Baseline (Validation) RMSE: 3.50 minutes
Mean Baseline (Validation) sMAPE: 29.33%

Mean Baseline (Test) MAE: 2.51 minutes
Mean Baseline (Test) RMSE: 2.95 minutes
Mean Baseline (Test) sMAPE: 27.58%
  Training Time-of-Day Baseline...

Time-of-Day Baseline (Validation) MAE: 2.76 minutes
Time-of-Day Baseline (Validation) RMSE: 3.50 minutes
Time-of-Day Baseline (Validation) sMAPE: 31.29%

Time-of-Day Baseline (Te

Processing rides:  87%|████████▋ | 27/31 [23:29<03:20, 50.11s/it]


LastYear Baseline (Validation) MAE: 2.56 minutes
LastYear Baseline (Validation) RMSE: 3.97 minutes
LastYear Baseline (Validation) sMAPE: 8.28%

LastYear Baseline (Test) MAE: 2.51 minutes
LastYear Baseline (Test) RMSE: 2.95 minutes
LastYear Baseline (Test) sMAPE: 27.58%
Baseline results saved to ../models/baseline_models/vindjammer

Processing ride 28/31: voletarium

Processing baseline models for ride: voletarium
Training data size: 8430
Validation data size: 2067
Test data size: 1832
  Training Mean Baseline...

Mean Baseline (Validation) MAE: 14.25 minutes
Mean Baseline (Validation) RMSE: 16.78 minutes
Mean Baseline (Validation) sMAPE: 35.47%

Mean Baseline (Test) MAE: 13.64 minutes
Mean Baseline (Test) RMSE: 16.26 minutes
Mean Baseline (Test) sMAPE: 32.13%
  Training Time-of-Day Baseline...

Time-of-Day Baseline (Validation) MAE: 11.02 minutes
Time-of-Day Baseline (Validation) RMSE: 13.40 minutes
Time-of-Day Baseline (Validation) sMAPE: 30.09%

Time-of-Day Baseline (Test) MAE: 10.8

Processing rides:  90%|█████████ | 28/31 [24:14<02:25, 48.66s/it]


LastYear Baseline (Validation) MAE: 10.27 minutes
LastYear Baseline (Validation) RMSE: 14.78 minutes
LastYear Baseline (Validation) sMAPE: 26.03%

LastYear Baseline (Test) MAE: 13.64 minutes
LastYear Baseline (Test) RMSE: 16.26 minutes
LastYear Baseline (Test) sMAPE: 32.13%
Baseline results saved to ../models/baseline_models/voletarium

Processing ride 29/31: volo da vinci

Processing baseline models for ride: volo da vinci
Training data size: 10309
Validation data size: 2066
Test data size: 1830
  Training Mean Baseline...

Mean Baseline (Validation) MAE: 6.34 minutes
Mean Baseline (Validation) RMSE: 7.68 minutes
Mean Baseline (Validation) sMAPE: 22.79%

Mean Baseline (Test) MAE: 5.72 minutes
Mean Baseline (Test) RMSE: 7.05 minutes
Mean Baseline (Test) sMAPE: 23.33%
  Training Time-of-Day Baseline...

Time-of-Day Baseline (Validation) MAE: 6.12 minutes
Time-of-Day Baseline (Validation) RMSE: 7.38 minutes
Time-of-Day Baseline (Validation) sMAPE: 23.65%

Time-of-Day Baseline (Test) MAE

Processing rides:  94%|█████████▎| 29/31 [25:07<01:39, 49.88s/it]


LastYear Baseline (Validation) MAE: 5.57 minutes
LastYear Baseline (Validation) RMSE: 7.64 minutes
LastYear Baseline (Validation) sMAPE: 19.35%

LastYear Baseline (Test) MAE: 5.72 minutes
LastYear Baseline (Test) RMSE: 7.05 minutes
LastYear Baseline (Test) sMAPE: 23.33%
Baseline results saved to ../models/baseline_models/volo_da_vinci

Processing ride 30/31: voltron nevera powered by rimac

Processing baseline models for ride: voltron nevera powered by rimac
Training data size: 0
Validation data size: 0
Test data size: 1770
Skipping voltron nevera powered by rimac due to insufficient data

Processing ride 31/31: whale adventures  northern lights

Processing baseline models for ride: whale adventures  northern lights
Training data size: 10314
Validation data size: 2063
Test data size: 1832
  Training Mean Baseline...

Mean Baseline (Validation) MAE: 1.52 minutes
Mean Baseline (Validation) RMSE: 2.31 minutes
Mean Baseline (Validation) sMAPE: 70.81%

Mean Baseline (Test) MAE: 1.63 minute

Processing rides: 100%|██████████| 31/31 [26:01<00:00, 50.37s/it]


LastYear Baseline (Validation) MAE: 1.11 minutes
LastYear Baseline (Validation) RMSE: 2.59 minutes
LastYear Baseline (Validation) sMAPE: 12.26%

LastYear Baseline (Test) MAE: 1.63 minutes
LastYear Baseline (Test) RMSE: 2.58 minutes
LastYear Baseline (Test) sMAPE: 71.46%
Baseline results saved to ../models/baseline_models/whale_adventures__northern_lights

BASELINE MODELS SUMMARY (VALIDATION & TEST SETS):
Total rides processed: 30
Total model evaluations: 480

Validation Set - Model Performance (Average MAE):
                 model_name  mae_mean
2         LastWeek Baseline      4.52
1    Holiday-Aware Baseline      5.62
0         Day+Time Baseline      5.63
3         LastYear Baseline      5.81
7      Time-of-Day Baseline      5.90
6  Seasonal Weekly Baseline      6.83
4             Mean Baseline      6.96
5   Moving Average Baseline      6.98

Test Set - Model Performance (Average MAE):
                 model_name  mae_mean
2         LastWeek Baseline      4.81
0         Day+Time Bas




Extended visualizations saved to output directory.

BASELINE MODEL PROCESSING COMPLETED
BASELINE MODELS ANALYSIS (VALIDATION & TEST SETS)

1. Validation Set - Model Performance (sorted by average MAE):
                 model_name  mae_mean  mae_std
2         LastWeek Baseline      4.52     3.43
1    Holiday-Aware Baseline      5.62     3.63
0         Day+Time Baseline      5.63     3.64
3         LastYear Baseline      5.81     3.55
7      Time-of-Day Baseline      5.90     3.86
6  Seasonal Weekly Baseline      6.83     5.11
4             Mean Baseline      6.96     4.86
5   Moving Average Baseline      6.98     5.14

2. Test Set - Model Performance (sorted by average MAE):
                  model_name  mae_mean  mae_std
10         LastWeek Baseline      4.81     3.43
8          Day+Time Baseline      5.78     3.66
9     Holiday-Aware Baseline      5.78     3.65
15      Time-of-Day Baseline      5.90     3.76
14  Seasonal Weekly Baseline      6.57     4.89
13   Moving Average Baseline 

In [16]:
def create_method_summary_csv(all_results, output_dir="../models/baseline_models", filename="method_summary.csv"):
    """
    Create a CSV file with aggregated baseline model performance metrics.
    
    Args:
        all_results: Dictionary containing all baseline model results
        output_dir: Directory to save the CSV file
        filename: Name of the output CSV file
    
    Returns:
        pandas.DataFrame: Summary dataframe with the requested format
    """
    import pandas as pd
    import numpy as np
    import os
    
    # Initialize list to store summary data
    summary_data = []
    
    # Extract all baseline model names
    model_names = set()
    for ride_name, ride_results in all_results.items():
        if "metadata" not in ride_results:
            continue
        for model_name in ride_results.keys():
            if model_name != "metadata":
                model_names.add(model_name)
    
    # For each baseline model, aggregate metrics across all rides
    for model_name in sorted(model_names):
        val_mse_values = []
        val_rmse_values = []
        val_smape_values = []
        test_mse_values = []
        test_rmse_values = []
        test_smape_values = []
        
        # Collect metrics from all rides for this model
        for ride_name, ride_results in all_results.items():
            if "metadata" not in ride_results or model_name not in ride_results:
                continue
                
            model_results = ride_results[model_name]
            
            # Extract validation metrics
            if "validation" in model_results:
                val_metrics = model_results["validation"]
                if "rmse" in val_metrics and not np.isinf(val_metrics["rmse"]):
                    val_rmse_values.append(val_metrics["rmse"])
                if "smape" in val_metrics and not np.isinf(val_metrics["smape"]):
                    val_smape_values.append(val_metrics["smape"])
                if "mse" in val_metrics and not np.isinf(val_metrics["mse"]):
                    val_mse_values.append(val_metrics["mse"])
            
            # Extract test metrics
            if "test" in model_results:
                test_metrics = model_results["test"]
                if "rmse" in test_metrics and not np.isinf(test_metrics["rmse"]):
                    test_rmse_values.append(test_metrics["rmse"])
                if "smape" in test_metrics and not np.isinf(test_metrics["smape"]):
                    test_smape_values.append(test_metrics["smape"])
                if "mse" in test_metrics and not np.isinf(test_metrics["mse"]):
                    test_mse_values.append(test_metrics["mse"])
        
        # Calculate average metrics across all rides
        val_rmse_avg = np.mean(val_rmse_values) if val_rmse_values else np.nan
        val_mse_avg = np.mean(val_rmse_values) if val_rmse_values else np.nan
        val_smape_avg = np.mean(val_smape_values) if val_smape_values else np.nan
        
        test_rmse_avg = np.mean(test_rmse_values) if test_rmse_values else np.nan
        test_mse_avg = np.mean(test_mse_values) if test_mse_values else np.nan
        test_smape_avg = np.mean(test_smape_values) if test_smape_values else np.nan
        
        # Add to summary data
        summary_data.append({
            'method_name': model_name,
            'val_mse': val_mse_avg,
            'val_rmse': val_rmse_avg,
            'val_smape': val_smape_avg,
            'test_mse': test_mse_avg,
            'test_rmse': test_rmse_avg,
            'test_smape': test_smape_avg
        })
    
    # Create DataFrame
    summary_df = pd.DataFrame(summary_data)
    
    # Round values to 2 decimal places
    numeric_columns = ['val_mse', 'val_rmse', 'val_smape', 'test_mse', 'test_rmse', 'test_smape']
    for col in numeric_columns:
        summary_df[col] = summary_df[col].round(2)
    
    # Sort by test_rmse (ascending - best performance first)
    summary_df = summary_df.sort_values('test_rmse')
    
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    
    # Save to CSV with semicolon separator as requested
    output_path = os.path.join(output_dir, filename)
    summary_df.to_csv(output_path, sep=';', index=False)
    
    print(f"Method summary CSV saved to: {output_path}")
    print("\nSummary of baseline model performance:")
    print(summary_df.to_string(index=False))
    
    return summary_df

create_method_summary_csv(results)

Method summary CSV saved to: ../models/baseline_models/method_summary.csv

Summary of baseline model performance:
             method_name  val_mse  val_rmse  val_smape  test_mse  test_rmse  test_smape
       Day+Time Baseline     7.40      7.40      31.13       NaN       7.49       31.75
  Holiday-Aware Baseline     7.41      7.41      31.61       NaN       7.51       32.33
    Time-of-Day Baseline     7.68      7.68      31.83       NaN       7.60       31.83
       LastWeek Baseline     7.41      7.41      13.07       NaN       7.92       16.89
       LastYear Baseline     8.68      8.68      16.87       NaN       8.29       32.62
           Mean Baseline     8.54      8.54      32.94       NaN       8.29       32.62
 Moving Average Baseline     8.85      8.85      27.50       NaN       8.50       27.14
Seasonal Weekly Baseline     9.65      9.65      31.31       NaN       9.39       31.21


Unnamed: 0,method_name,val_mse,val_rmse,val_smape,test_mse,test_rmse,test_smape
0,Day+Time Baseline,7.4,7.4,31.13,,7.49,31.75
1,Holiday-Aware Baseline,7.41,7.41,31.61,,7.51,32.33
7,Time-of-Day Baseline,7.68,7.68,31.83,,7.6,31.83
2,LastWeek Baseline,7.41,7.41,13.07,,7.92,16.89
3,LastYear Baseline,8.68,8.68,16.87,,8.29,32.62
4,Mean Baseline,8.54,8.54,32.94,,8.29,32.62
5,Moving Average Baseline,8.85,8.85,27.5,,8.5,27.14
6,Seasonal Weekly Baseline,9.65,9.65,31.31,,9.39,31.21


In [28]:
results_analysis["val_summary"][["model_name", "mae_mean", "rmse_mean", "smape_mean", "dataset"]].sort_values("model_name")

Unnamed: 0,model_name,mae_mean,rmse_mean,smape_mean,dataset
0,Day+Time Baseline,5.63,7.4,31.13,validation
1,Holiday-Aware Baseline,5.62,7.41,31.61,validation
2,LastWeek Baseline,4.52,7.41,13.07,validation
3,LastYear Baseline,5.81,8.68,16.87,validation
4,Mean Baseline,6.96,8.54,32.94,validation
5,Moving Average Baseline,6.98,8.85,27.5,validation
6,Seasonal Weekly Baseline,6.83,9.65,31.31,validation
7,Time-of-Day Baseline,5.9,7.68,31.83,validation


In [27]:
results_analysis["test_summary"][["model_name", "mae_mean", "rmse_mean", "smape_mean", "dataset"]].sort_values("model_name")

Unnamed: 0,model_name,mae_mean,rmse_mean,smape_mean,dataset
8,Day+Time Baseline,5.78,7.49,31.75,test
9,Holiday-Aware Baseline,5.78,7.51,32.33,test
10,LastWeek Baseline,4.81,7.92,16.89,test
11,LastYear Baseline,6.83,8.29,32.62,test
12,Mean Baseline,6.83,8.29,32.62,test
13,Moving Average Baseline,6.64,8.5,27.14,test
14,Seasonal Weekly Baseline,6.57,9.39,31.21,test
15,Time-of-Day Baseline,5.9,7.6,31.83,test


In [24]:
results_analysis["comparison"].sort_values("model_name")

Unnamed: 0,model_name,val_mae,test_mae,difference
2,Day+Time Baseline,5.63,5.78,0.15
1,Holiday-Aware Baseline,5.62,5.78,0.16
0,LastWeek Baseline,4.52,4.81,0.29
3,LastYear Baseline,5.81,6.83,1.02
6,Mean Baseline,6.96,6.83,-0.13
7,Moving Average Baseline,6.98,6.64,-0.34
5,Seasonal Weekly Baseline,6.83,6.57,-0.26
4,Time-of-Day Baseline,5.9,5.9,0.0
