In [8]:
import pandas as pd

df = pd.read_csv("wait_times_2015_2024.csv")
df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
print(df.head)

<bound method NDFrame.head of              Date                                    Ride  \
0      2015-01-01                          Space Mountain   
1      2015-01-01                Indiana Jones™ Adventure   
2      2015-01-01                      Peter Pan's Flight   
3      2015-01-01                     Alice in Wonderland   
4      2015-01-01            Roger Rabbit's Car Toon Spin   
...           ...                                     ...   
174677 2024-12-31              Storybook Land Canal Boats   
174678 2024-12-31  The Many Adventures of Winnie the Pooh   
174679 2024-12-31                 Tiana's Bayou Adventure   
174680 2024-12-31    Tiana's Bayou Adventure Single Rider   
174681 2024-12-31       Walt Disney's Enchanted Tiki Room   

        Average Wait Time (mins)  Max Wait Time (mins)  
0                           50.0                  96.0  
1                           48.0                  91.0  
2                           42.0                  50.0  
3        

In [9]:
# Missing values
missing_counts = df.isna().sum()
missing_pct = df.isna().mean() * 100

missing_summary = pd.DataFrame({
    "missing_count": missing_counts,
    "missing_pct": missing_pct.round(2)
})

print("\nMissing values summary:")
display(missing_summary)


Missing values summary:


Unnamed: 0,missing_count,missing_pct
Date,0,0.0
Ride,0,0.0
Average Wait Time (mins),89401,51.18
Max Wait Time (mins),89780,51.4


In [11]:
# Basic stats & ranges
n_rides = df["Ride"].nunique()
print(f"\nNumber of unique rides: {n_rides}")

# Date range
min_date = df["Date"].min()
max_date = df["Date"].max()
print(f"Date range: {min_date.date()} to {max_date.date()}")

# Numeric summary for wait times
wait_cols = ["Average Wait Time (mins)", "Max Wait Time (mins)"]

print("\nDescriptive stats for wait time columns:")
display(df[wait_cols].describe())


Number of unique rides: 167
Date range: 2015-01-01 to 2024-12-31

Descriptive stats for wait time columns:


Unnamed: 0,Average Wait Time (mins),Max Wait Time (mins)
count,85281.0,84902.0
mean,21.007294,36.195025
std,14.859333,26.973308
min,0.0,5.0
25%,10.0,15.0
50%,18.0,30.0
75%,29.0,50.0
max,169.0,900.0


In [12]:
# List of rides to keep
rides = [
    "Peter Pan's Flight",
    "Alice in Wonderland",
    "Buzz Lightyear Astro Blasters",
    "Disneyland Monorail",
    "Dumbo the Flying Elephant",
    "King Arthur Carrousel",
    "Pinocchio's Daring Journey",
    "Snow White's Enchanted Wish",
    "Space Mountain",
    "Astro Orbitor",
    "Mad Tea Party",
    "Star Tours - The Adventures Continue",
    "Mr. Toad's Wild Ride",
    "\"it's a small world\" Holiday",
    "Autopia",
    "Big Thunder Mountain Railroad",
    "Casey Jr. Circus Train",
    "Chip 'n' Dale's GADGETcoaster",
    "Davy Crockett's Explorer Canoes",
    "Disneyland Railroad",
    "Finding Nemo Submarine Voyage",
    "Indiana Jones™ Adventure",
    "Jungle Cruise",
    "Mark Twain Riverboat",
    "Matterhorn Bobsleds",
    "Mickey & Minnie's Runaway Railway",
    "Millennium Falcon: Smugglers Run",
    "Pirates of the Caribbean",
    "Roger Rabbit's Car Toon Spin",
    "Sailing Ship Columbia",
    "Star Wars: Rise of the Resistance",
    "Storybook Land Canal Boats",
    "The Many Adventures of Winnie the Pooh",
    "Tiana's Bayou Adventure"
]
filtered_df = df[df["Ride"].isin(rides)]

print("Original rows:", len(df))
print("Filtered rows:", len(filtered_df))

# Optional: preview
display(filtered_df.head())


Original rows: 174682
Filtered rows: 96958


Unnamed: 0,Date,Ride,Average Wait Time (mins),Max Wait Time (mins)
0,2015-01-01,Space Mountain,50.0,96.0
1,2015-01-01,Indiana Jones™ Adventure,48.0,91.0
2,2015-01-01,Peter Pan's Flight,42.0,50.0
3,2015-01-01,Alice in Wonderland,37.0,50.0
4,2015-01-01,Roger Rabbit's Car Toon Spin,34.0,61.0
