# SeasonalNaive model based on last weeks wait times

In [1]:
%pip install pandas

Note: you may need to restart the kernel to use updated packages.


In [2]:
# Load in data
import pandas as pd
final_merged_df = pd.read_csv('../data/final_merged_data.csv', parse_dates=['Local Time'])

# Print preliminary data
print(f"Loaded {len(final_merged_df):,} records")
print(f"Number of unique rides: {final_merged_df['Ride'].nunique()}")

# Print a list of all unique ride names along with the number of records for each ride.
# This provides an overview of data distribution across rides.
print(f"\nUnique rides:")
for ride in sorted(final_merged_df['Ride'].unique()):
    count = len(final_merged_df[final_merged_df['Ride'] == ride])
    print(f"  {ride}: {count:,} records")

  final_merged_df = pd.read_csv('../data/final_merged_data.csv', parse_dates=['Local Time'])


Loaded 589,985 records
Number of unique rides: 42

Unique rides:
  Animation Academy: 18,021 records
  Games of Pixar Pier: 16,166 records
  Golden Zephyr: 18,021 records
  Goofy's Sky School: 18,021 records
  Grizzly River Run: 18,021 records
  Guardians of the Galaxy - Mission: BREAKOUT!: 18,021 records
  Guardians of the Galaxy - Monsters After Dark: 9,754 records
  Incredicoaster: 18,021 records
  Incredicoaster Single Rider: 18,021 records
  Inside Out Emotional Whirlwind: 18,021 records
  Jessie's Critter Carousel: 18,021 records
  Jumpin' Jellyfish: 18,021 records
  Luigi's Honkin' Haul-O-Ween: 1,516 records
  Luigi's Joy to the Whirl: 3,087 records
  Luigi's Rollickin' Roadsters: 11,564 records
  Mater's Graveyard JamBOOree: 3,344 records
  Mater's Jingle Jamboree: 3,087 records
  Mater's Junkyard Jamboree: 11,590 records
  Mickey's PhilharMagic: 18,021 records
  Monsters, Inc. Mike & Sulley to the Rescue!: 18,021 records
  Pixar Pal-A-Round - Swinging: 18,021 records
  Pixar P

In [3]:
# Split the main dataset into separate, time-sorted DataFrames for each
# ride and store them in a dictionary for easy per-ride analysis.
ride_dataframes = {}

for ride in final_merged_df['Ride'].unique():
    ride_df = final_merged_df[final_merged_df['Ride'] == ride].copy()
    ride_df = ride_df.sort_values('Local Time').reset_index(drop=True)
    ride_dataframes[ride] = ride_df

In [None]:
# Feature Engineering and Seasonal Naive Baseline

import numpy as np
from sklearn.metrics import mean_absolute_error

# We'll store baseline MAE results for each ride
baseline_results = {}

# Loop over each ride's data
for ride, df in ride_dataframes.items():
    
    # Feature Engineering (make raw data easier to use)
    df['hour'] = df['Local Time'].dt.hour
    df['day_of_week'] = df['Local Time'].dt.dayofweek
    df['month'] = df['Local Time'].dt.month
    df['is_weekend'] = df['day_of_week'].isin([5,6]).astype(int)
    
    # Create a Seasonal Naive feature
    # Prediction = wait time at same time last week
    lag = 7*24*4                                                # Data is at 15-minute intervals, so lag = 7 days * 24 hours * 4 intervals/hour = 672
    df['Predicted Wait Time'] = df['Wait Time'].shift(lag)
    df.dropna(subset=['Predicted Wait Time'], inplace=True)     # Drop first week of rows that have no "previous week" value
    
    # Calculate baseline error (average absolute difference between predicted and actual wait times)
    mae = mean_absolute_error(df['Wait Time'], df['Predicted Wait Time'])
    baseline_results[ride] = mae
    
    # Save the updated DataFrame back to the dictionary
    ride_dataframes[ride] = df

# Display baseline MAE for each ride
for ride, mae in baseline_results.items():
    print(f"{ride}: Seasonal Naive MAE = {mae:.2f} minutes")

Guardians of the Galaxy - Mission: BREAKOUT!: Seasonal Naive MAE = 33.87 minutes
Golden Zephyr: Seasonal Naive MAE = 6.26 minutes
Goofy's Sky School: Seasonal Naive MAE = 23.17 minutes
Silly Symphony Swings: Seasonal Naive MAE = 7.14 minutes
Silly Symphony Swings Single Rider: Seasonal Naive MAE = 0.00 minutes
The Little Mermaid - Ariel's Undersea Adventure: Seasonal Naive MAE = 14.35 minutes
Games of Pixar Pier: Seasonal Naive MAE = 2.46 minutes
Walt Disney Imagineering Blue Sky Cellar: Seasonal Naive MAE = 0.00 minutes
Incredicoaster: Seasonal Naive MAE = 22.38 minutes
Inside Out Emotional Whirlwind: Seasonal Naive MAE = 12.62 minutes
Jessie's Critter Carousel: Seasonal Naive MAE = 2.30 minutes
Jumpin' Jellyfish: Seasonal Naive MAE = 7.02 minutes
Pixar Pal-A-Round - Swinging: Seasonal Naive MAE = 19.78 minutes
Pixar Pal-A-Round – Non-Swinging: Seasonal Naive MAE = 18.51 minutes
Toy Story Midway Mania!: Seasonal Naive MAE = 21.41 minutes
Incredicoaster Single Rider: Seasonal Naive MAE

In [None]:
# Train/Test Split

from sklearn.model_selection import train_test_split

# Fraction of data to use for testing
test_fraction = 0.1

# Dictionary to store train/test sets per ride
ride_splits = {}

for ride, df in ride_dataframes.items():
    # Features we want to use for ML
    # Do one-hot encoding for columns where necessary
    feature_cols = [
        'hour', 'day_of_week', 'month', 'is_weekend',
        'temperature_2m (°F)', 'apparent_temperature (°F)', 'precipitation (mm)',
        'holiday_flag', 'tier', 'Land', 'holiday_name'
    ]
    target_col = 'Wait Time'    # Target variable
    
    # Determine index to split
    split_idx = int(len(df) * (1 - test_fraction))
    
    # Train/test split
    # We split the data chronologically (earlier rows for training, later rows for testing).
    # Randomly shuffling would let the model see the future, causing leakage. 
    # Chronological splitting ensures the model is trained on past data and evaluated on truly unseen future data.

    # I used .copy() for these, I'm not sure if that's necessary
    X_train = df[feature_cols].iloc[:split_idx].copy()
    X_test = df[feature_cols].iloc[split_idx:].copy()
    
    y_train = df[target_col].iloc[:split_idx].copy()
    y_test = df[target_col].iloc[split_idx:].copy()

    # One-hot encode categorical columns
    categorical_cols = ['tier', 'Land', 'holiday_name']
    X_train = pd.get_dummies(X_train, columns=categorical_cols, drop_first=False)
    X_test = pd.get_dummies(X_test, columns=categorical_cols, drop_first=False)
    
    # Align columns: ensure test set has same columns as train set
    X_test = X_test.reindex(columns=X_train.columns, fill_value=0)
    
    # Store in dictionary
    ride_splits[ride] = {
        'X_train': X_train,
        'y_train': y_train,
        'X_test': X_test,
        'y_test': y_test
    }
    
    print(f"{ride}: Train size = {len(X_train)}, Test size = {len(X_test)}")

Guardians of the Galaxy - Mission: BREAKOUT!: Train size = 15614, Test size = 1735
Golden Zephyr: Train size = 15614, Test size = 1735
Goofy's Sky School: Train size = 15614, Test size = 1735
Silly Symphony Swings: Train size = 15614, Test size = 1735
Silly Symphony Swings Single Rider: Train size = 15614, Test size = 1735
The Little Mermaid - Ariel's Undersea Adventure: Train size = 15614, Test size = 1735
Games of Pixar Pier: Train size = 13944, Test size = 1550
Walt Disney Imagineering Blue Sky Cellar: Train size = 635, Test size = 71
Incredicoaster: Train size = 15614, Test size = 1735
Inside Out Emotional Whirlwind: Train size = 15614, Test size = 1735
Jessie's Critter Carousel: Train size = 15614, Test size = 1735
Jumpin' Jellyfish: Train size = 15614, Test size = 1735
Pixar Pal-A-Round - Swinging: Train size = 15614, Test size = 1735
Pixar Pal-A-Round – Non-Swinging: Train size = 15614, Test size = 1735
Toy Story Midway Mania!: Train size = 15614, Test size = 1735
Incredicoaster

In [6]:
!brew install libomp
%pip uninstall -y xgboost
%pip install xgboost --no-cache-dir
import xgboost as xgb
print(xgb.__version__)

To reinstall 21.1.5, run:
  brew reinstall libomp
Found existing installation: xgboost 3.1.1
Uninstalling xgboost-3.1.1:
  Successfully uninstalled xgboost-3.1.1
Note: you may need to restart the kernel to use updated packages.
Collecting xgboost
  Downloading xgboost-3.1.1-py3-none-macosx_12_0_arm64.whl.metadata (2.1 kB)
Downloading xgboost-3.1.1-py3-none-macosx_12_0_arm64.whl (2.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: xgboost
Successfully installed xgboost-3.1.1
Note: you may need to restart the kernel to use updated packages.
3.1.1


In [None]:
# Train XGBoost Models for Each Ride and Evaluate MAE

import xgboost as xgb
from sklearn.metrics import mean_absolute_error

# Dictionary to store trained models and MAE
ride_models = {}
ride_mae = {}

for ride, data in ride_splits.items():
    X_train = data['X_train']
    y_train = data['y_train']
    X_test = data['X_test']
    y_test = data['y_test']
    
    # Initialize XGBoost regressor
    xgb_model = xgb.XGBRegressor(
        n_estimators=100,
        max_depth=5,
        learning_rate=0.1,
        objective='reg:squarederror',
        random_state=42
    )
    
    # Train the model
    xgb_model.fit(X_train, y_train)
    
    # Make predictions
    y_pred = xgb_model.predict(X_test)
    
    # Evaluate MAE
    mae = mean_absolute_error(y_test, y_pred)
    
    # Save model and MAE
    ride_models[ride] = xgb_model
    ride_mae[ride] = mae
    
# Determine the maximum length of ride names for formatting
max_ride_length = max(len(ride) for ride in ride_splits.keys())

# Print header
header = f"{'Ride':<{max_ride_length}} | Seasonal Naive MAE | XGBoost MAE"
print(header)
print("-" * len(header))

# Print results
for ride in ride_splits.keys():
    naive_mae = baseline_results.get(ride, float('nan'))
    xgb_mae = ride_mae.get(ride, float('nan'))
    print(f"{ride:<{max_ride_length}} | {naive_mae:>17.2f} | {xgb_mae:>11.2f}")

Ride                                              | Seasonal Naive MAE | XGBoost MAE
------------------------------------------------------------------------------------
Guardians of the Galaxy - Mission: BREAKOUT!      |             33.87 |       22.61
Golden Zephyr                                     |              6.26 |        4.98
Goofy's Sky School                                |             23.17 |       13.05
Silly Symphony Swings                             |              7.14 |        4.54
Silly Symphony Swings Single Rider                |              0.00 |        0.00
The Little Mermaid - Ariel's Undersea Adventure   |             14.35 |        8.82
Games of Pixar Pier                               |              2.46 |        3.09
Walt Disney Imagineering Blue Sky Cellar          |              0.00 |        0.00
Incredicoaster                                    |             22.38 |       14.69
Inside Out Emotional Whirlwind                    |             12.62 |   