# NHITS Model

## Data Manipulation

In [None]:
from sklearn.metrics import mean_absolute_error
import pandas as pd

In [None]:
final_merged_df = pd.read_csv('../data/final_merged_data.csv', parse_dates=['Local Time'])

# Print preliminary data
print(f"Loaded {len(final_merged_df):,} records")
print(f"Number of unique rides: {final_merged_df['Ride'].nunique()}")

# Print a list of all unique ride names along with the number of records for each ride.
# Using value_counts() which is more reliable than manual counting
print(f"\nUnique rides:")
ride_counts = final_merged_df['Ride'].value_counts().sort_index()
for ride, count in ride_counts.items():
    print(f"  {ride}: {count:,} records")

  final_merged_df = pd.read_csv('../data/final_merged_data.csv', parse_dates=['Local Time'])


Loaded 589,985 records
Number of unique rides: 42

Unique rides:
  Animation Academy: 18,021 records
  Games of Pixar Pier: 16,166 records
  Golden Zephyr: 18,021 records
  Goofy's Sky School: 18,021 records
  Grizzly River Run: 18,021 records
  Guardians of the Galaxy - Mission: BREAKOUT!: 18,021 records
  Guardians of the Galaxy - Monsters After Dark: 9,754 records
  Incredicoaster: 18,021 records
  Incredicoaster Single Rider: 18,021 records
  Inside Out Emotional Whirlwind: 18,021 records
  Jessie's Critter Carousel: 18,021 records
  Jumpin' Jellyfish: 18,021 records
  Luigi's Honkin' Haul-O-Ween: 1,516 records
  Luigi's Joy to the Whirl: 3,087 records
  Luigi's Rollickin' Roadsters: 11,564 records
  Mater's Graveyard JamBOOree: 3,344 records
  Mater's Jingle Jamboree: 3,087 records
  Mater's Junkyard Jamboree: 11,590 records
  Mickey's PhilharMagic: 18,021 records
  Monsters, Inc. Mike & Sulley to the Rescue!: 18,021 records
  Pixar Pal-A-Round - Swinging: 18,021 records
  Pixar P

In [None]:
# Dropping rides that are redundant/stupid
rides_to_drop = [
    'Animation Academy',
    'Games of Pixar Pier',
    'Guardians of the Galaxy - Monsters After Dark',
    'Luigi\'s Honkin\' Haul-O-Ween',
    'Luigi\'s Joy to the Whirl',
    'Mater\'s Graveyard JamBOOree',
    'Mater\'s Jingle Jamboree',
    'Red Car Trolley',
    'Redwood Creek Challenge Trail',
    'The Bakery Tour',
    'Sorcerer\'s Workshop',
    'Soarin\' Over California',
    'Rogers: The Musical', 
    'Walt Disney Imagineering Blue Sky Cellar', 
    'Turtle Talk with Crush', 
    'Mickey\'s PhilharMagic',
    'World of Color – ONE',
    'World of Color - Season of Light',
    'Villains Grove at Oogie Boogie Bash',
    'Silly Symphony Swings Single Rider',
    'Incredicoaster Single Rider',
    'Radiator Springs Racers Single Rider'
]

print(f"Rides before filtering: {final_merged_df['Ride'].nunique()}")
print(f"Total records before filtering: {len(final_merged_df):,}")

# Filter out the specified rides
final_merged_df = final_merged_df[~final_merged_df['Ride'].isin(rides_to_drop)]

print(f"Rides after filtering: {final_merged_df['Ride'].nunique()}")
print(f"Total records after filtering: {len(final_merged_df):,}")

print(f"\nDropped rides:")
for ride in rides_to_drop:
    print(f"  - {ride}")

Rides before filtering: 42
Total records before filtering: 589,985
Rides after filtering: 20
Total records after filtering: 344,631

Dropped rides:
  - Animation Academy
  - Games of Pixar Pier
  - Guardians of the Galaxy - Monsters After Dark
  - Luigi's Honkin' Haul-O-Ween
  - Luigi's Joy to the Whirl
  - Mater's Graveyard JamBOOree
  - Mater's Jingle Jamboree
  - Red Car Trolley
  - Redwood Creek Challenge Trail
  - The Bakery Tour
  - Sorcerer's Workshop
  - Soarin' Over California
  - Rogers: The Musical
  - Walt Disney Imagineering Blue Sky Cellar
  - Turtle Talk with Crush
  - Mickey's PhilharMagic
  - World of Color – ONE
  - World of Color - Season of Light
  - Villains Grove at Oogie Boogie Bash
  - Silly Symphony Swings Single Rider
  - Incredicoaster Single Rider
  - Radiator Springs Racers Single Rider


In [None]:
# Split the main dataset into separate, time-sorted DataFrames for each
# ride and store them in a dictionary for easy per-ride analysis.
ride_dataframes = {}

for ride in final_merged_df['Ride'].unique():
    ride_df = final_merged_df[final_merged_df['Ride'] == ride].copy()
    ride_df = ride_df.sort_values('Local Time').reset_index(drop=True)
    ride_dataframes[ride] = ride_df

## Model

In [None]:
from sklearn.metrics import mean_absolute_error
import pandas as pd

In [None]:
final_merged_df = pd.read_csv('../data/final_merged_data.csv', parse_dates=['Local Time'])

# Print preliminary data
print(f"Loaded {len(final_merged_df):,} records")
print(f"Number of unique rides: {final_merged_df['Ride'].nunique()}")

# Print a list of all unique ride names along with the number of records for each ride.
# Using value_counts() which is more reliable than manual counting
print(f"\nUnique rides:")
ride_counts = final_merged_df['Ride'].value_counts().sort_index()
for ride, count in ride_counts.items():
    print(f"  {ride}: {count:,} records")

  final_merged_df = pd.read_csv('../data/final_merged_data.csv', parse_dates=['Local Time'])


Loaded 589,985 records
Number of unique rides: 42

Unique rides:
  Animation Academy: 18,021 records
  Games of Pixar Pier: 16,166 records
  Golden Zephyr: 18,021 records
  Goofy's Sky School: 18,021 records
  Grizzly River Run: 18,021 records
  Guardians of the Galaxy - Mission: BREAKOUT!: 18,021 records
  Guardians of the Galaxy - Monsters After Dark: 9,754 records
  Incredicoaster: 18,021 records
  Incredicoaster Single Rider: 18,021 records
  Inside Out Emotional Whirlwind: 18,021 records
  Jessie's Critter Carousel: 18,021 records
  Jumpin' Jellyfish: 18,021 records
  Luigi's Honkin' Haul-O-Ween: 1,516 records
  Luigi's Joy to the Whirl: 3,087 records
  Luigi's Rollickin' Roadsters: 11,564 records
  Mater's Graveyard JamBOOree: 3,344 records
  Mater's Jingle Jamboree: 3,087 records
  Mater's Junkyard Jamboree: 11,590 records
  Mickey's PhilharMagic: 18,021 records
  Monsters, Inc. Mike & Sulley to the Rescue!: 18,021 records
  Pixar Pal-A-Round - Swinging: 18,021 records
  Pixar P

In [None]:
# Dropping rides that are redundant/stupid
rides_to_drop = [
    'Animation Academy',
    'Games of Pixar Pier',
    'Guardians of the Galaxy - Monsters After Dark',
    'Luigi\'s Honkin\' Haul-O-Ween',
    'Luigi\'s Joy to the Whirl',
    'Mater\'s Graveyard JamBOOree',
    'Mater\'s Jingle Jamboree',
    'Red Car Trolley',
    'Redwood Creek Challenge Trail',
    'The Bakery Tour',
    'Sorcerer\'s Workshop',
    'Soarin\' Over California',
    'Rogers: The Musical', 
    'Walt Disney Imagineering Blue Sky Cellar', 
    'Turtle Talk with Crush', 
    'Mickey\'s PhilharMagic',
    'World of Color – ONE',
    'World of Color - Season of Light',
    'Villains Grove at Oogie Boogie Bash',
    'Silly Symphony Swings Single Rider',
    'Incredicoaster Single Rider',
    'Radiator Springs Racers Single Rider'
]

print(f"Rides before filtering: {final_merged_df['Ride'].nunique()}")
print(f"Total records before filtering: {len(final_merged_df):,}")

# Filter out the specified rides
final_merged_df = final_merged_df[~final_merged_df['Ride'].isin(rides_to_drop)]

print(f"Rides after filtering: {final_merged_df['Ride'].nunique()}")
print(f"Total records after filtering: {len(final_merged_df):,}")

print(f"\nDropped rides:")
for ride in rides_to_drop:
    print(f"  - {ride}")

Rides before filtering: 42
Total records before filtering: 589,985
Rides after filtering: 20
Total records after filtering: 344,631

Dropped rides:
  - Animation Academy
  - Games of Pixar Pier
  - Guardians of the Galaxy - Monsters After Dark
  - Luigi's Honkin' Haul-O-Ween
  - Luigi's Joy to the Whirl
  - Mater's Graveyard JamBOOree
  - Mater's Jingle Jamboree
  - Red Car Trolley
  - Redwood Creek Challenge Trail
  - The Bakery Tour
  - Sorcerer's Workshop
  - Soarin' Over California
  - Rogers: The Musical
  - Walt Disney Imagineering Blue Sky Cellar
  - Turtle Talk with Crush
  - Mickey's PhilharMagic
  - World of Color – ONE
  - World of Color - Season of Light
  - Villains Grove at Oogie Boogie Bash
  - Silly Symphony Swings Single Rider
  - Incredicoaster Single Rider
  - Radiator Springs Racers Single Rider


In [None]:
# Split the main dataset into separate, time-sorted DataFrames for each
# ride and store them in a dictionary for easy per-ride analysis.
ride_dataframes = {}

for ride in final_merged_df['Ride'].unique():
    ride_df = final_merged_df[final_merged_df['Ride'] == ride].copy()
    ride_df = ride_df.sort_values('Local Time').reset_index(drop=True)
    ride_dataframes[ride] = ride_df