# SeasonalNaive model based on prior weeks wait times

## Data Manipulation

In [1]:
from sklearn.metrics import mean_absolute_error
import pandas as pd

In [2]:
final_merged_df = pd.read_csv('../data/final_merged_data.csv', parse_dates=['Local Time'])

# Print preliminary data
print(f"Loaded {len(final_merged_df):,} records")
print(f"Number of unique rides: {final_merged_df['Ride'].nunique()}")

# Print a list of all unique ride names along with the number of records for each ride.
# Using value_counts() which is more reliable than manual counting
print(f"\nUnique rides:")
ride_counts = final_merged_df['Ride'].value_counts().sort_index()
for ride, count in ride_counts.items():
    print(f"  {ride}: {count:,} records")

  final_merged_df = pd.read_csv('../data/final_merged_data.csv', parse_dates=['Local Time'])


Loaded 589,985 records
Number of unique rides: 42

Unique rides:
  Animation Academy: 18,021 records
  Games of Pixar Pier: 16,166 records
  Golden Zephyr: 18,021 records
  Goofy's Sky School: 18,021 records
  Grizzly River Run: 18,021 records
  Guardians of the Galaxy - Mission: BREAKOUT!: 18,021 records
  Guardians of the Galaxy - Monsters After Dark: 9,754 records
  Incredicoaster: 18,021 records
  Incredicoaster Single Rider: 18,021 records
  Inside Out Emotional Whirlwind: 18,021 records
  Jessie's Critter Carousel: 18,021 records
  Jumpin' Jellyfish: 18,021 records
  Luigi's Honkin' Haul-O-Ween: 1,516 records
  Luigi's Joy to the Whirl: 3,087 records
  Luigi's Rollickin' Roadsters: 11,564 records
  Mater's Graveyard JamBOOree: 3,344 records
  Mater's Jingle Jamboree: 3,087 records
  Mater's Junkyard Jamboree: 11,590 records
  Mickey's PhilharMagic: 18,021 records
  Monsters, Inc. Mike & Sulley to the Rescue!: 18,021 records
  Pixar Pal-A-Round - Swinging: 18,021 records
  Pixar P

In [3]:
# Dropping rides that are redundant/stupid
rides_to_drop = [
    'Animation Academy',
    'Games of Pixar Pier',
    'Guardians of the Galaxy - Monsters After Dark',
    'Luigi\'s Honkin\' Haul-O-Ween',
    'Luigi\'s Joy to the Whirl',
    'Mater\'s Graveyard JamBOOree',
    'Mater\'s Jingle Jamboree',
    'Red Car Trolley',
    'Redwood Creek Challenge Trail',
    'The Bakery Tour',
    'Sorcerer\'s Workshop',
    'Soarin\' Over California',
    'Rogers: The Musical', 
    'Walt Disney Imagineering Blue Sky Cellar', 
    'Turtle Talk with Crush', 
    'Mickey\'s PhilharMagic',
    'World of Color – ONE',
    'World of Color - Season of Light',
    'Villains Grove at Oogie Boogie Bash',
    'Silly Symphony Swings Single Rider',
    'Incredicoaster Single Rider',
    'Radiator Springs Racers Single Rider'
]

print(f"Rides before filtering: {final_merged_df['Ride'].nunique()}")
print(f"Total records before filtering: {len(final_merged_df):,}")

# Filter out the specified rides
final_merged_df = final_merged_df[~final_merged_df['Ride'].isin(rides_to_drop)]

print(f"Rides after filtering: {final_merged_df['Ride'].nunique()}")
print(f"Total records after filtering: {len(final_merged_df):,}")

print(f"\nDropped rides:")
for ride in rides_to_drop:
    print(f"  - {ride}")

Rides before filtering: 42
Total records before filtering: 589,985
Rides after filtering: 20
Total records after filtering: 344,631

Dropped rides:
  - Animation Academy
  - Games of Pixar Pier
  - Guardians of the Galaxy - Monsters After Dark
  - Luigi's Honkin' Haul-O-Ween
  - Luigi's Joy to the Whirl
  - Mater's Graveyard JamBOOree
  - Mater's Jingle Jamboree
  - Red Car Trolley
  - Redwood Creek Challenge Trail
  - The Bakery Tour
  - Sorcerer's Workshop
  - Soarin' Over California
  - Rogers: The Musical
  - Walt Disney Imagineering Blue Sky Cellar
  - Turtle Talk with Crush
  - Mickey's PhilharMagic
  - World of Color – ONE
  - World of Color - Season of Light
  - Villains Grove at Oogie Boogie Bash
  - Silly Symphony Swings Single Rider
  - Incredicoaster Single Rider
  - Radiator Springs Racers Single Rider


In [4]:
# Split the main dataset into separate, time-sorted DataFrames for each
# ride and store them in a dictionary for easy per-ride analysis.
ride_dataframes = {}

for ride in final_merged_df['Ride'].unique():
    ride_df = final_merged_df[final_merged_df['Ride'] == ride].copy()
    ride_df = ride_df.sort_values('Local Time').reset_index(drop=True)
    ride_dataframes[ride] = ride_df

## Model

In [5]:
baseline_results = {}

# Loop over each ride's data
for ride, df in ride_dataframes.items():
    
    # Feature Engineering (make raw data easier to use)
    df['hour'] = df['Local Time'].dt.hour
    df['day_of_week'] = df['Local Time'].dt.dayofweek
    df['month'] = df['Local Time'].dt.month
    df['is_weekend'] = df['day_of_week'].isin([5,6]).astype(int)
    
    # Create a Seasonal Naive feature
    # Prediction = wait time at same time last week
    lag = 7*24*4                                                # Data is at 15-minute intervals, so lag = 7 days * 24 hours * 4 intervals/hour = 672
    df['Predicted Wait Time'] = df['Wait Time'].shift(lag)
    df.dropna(subset=['Predicted Wait Time'], inplace=True)     # Drop first week of rows that have no "previous week" value
    
    # Calculate baseline error (average absolute difference between predicted and actual wait times)
    mae = mean_absolute_error(df['Wait Time'], df['Predicted Wait Time'])
    baseline_results[ride] = mae
    
    # Save the updated DataFrame back to the dictionary
    ride_dataframes[ride] = df

# Display baseline MAE for each ride
for ride, mae in baseline_results.items():
    print(f"{ride}: Seasonal Naive MAE = {mae:.2f} minutes")

Guardians of the Galaxy - Mission: BREAKOUT!: Seasonal Naive MAE = 33.87 minutes
Golden Zephyr: Seasonal Naive MAE = 6.26 minutes
Goofy's Sky School: Seasonal Naive MAE = 23.17 minutes
Silly Symphony Swings: Seasonal Naive MAE = 7.14 minutes
The Little Mermaid - Ariel's Undersea Adventure: Seasonal Naive MAE = 14.35 minutes
Incredicoaster: Seasonal Naive MAE = 22.38 minutes
Inside Out Emotional Whirlwind: Seasonal Naive MAE = 12.62 minutes
Jessie's Critter Carousel: Seasonal Naive MAE = 2.30 minutes
Jumpin' Jellyfish: Seasonal Naive MAE = 7.02 minutes
Pixar Pal-A-Round - Swinging: Seasonal Naive MAE = 19.78 minutes
Pixar Pal-A-Round – Non-Swinging: Seasonal Naive MAE = 18.51 minutes
Toy Story Midway Mania!: Seasonal Naive MAE = 21.41 minutes
WEB SLINGERS: A Spider-Man Adventure: Seasonal Naive MAE = 21.94 minutes
WEB SLINGERS: A Spider-Man Adventure Single Rider: Seasonal Naive MAE = 0.00 minutes
Luigi's Rollickin' Roadsters: Seasonal Naive MAE = 15.86 minutes
Mater's Junkyard Jamboree