In [7]:
import time
import pandas as pd
import requests
from bs4 import BeautifulSoup

url_path = 'data/Rides/date_url.csv'
url = "https://www.thrill-data.com/trip-planning/weather-calendar/disneyland/calendar/2023"


In [8]:
def extract_weather_data_from_url(url, df):
    print(f"Fetching webpage from {url}...")
    response = requests.get(url)

    
    if response.status_code != 200:
        print(f"Failed to retrieve the webpage. Status code: {response.status_code}")
        return
    print("Webpage fetched successfully!")

    
    print("Parsing HTML content...")
    soup = BeautifulSoup(response.content, 'html.parser')

    
    dates = []
    temperatures = []
    precipitations = []
    humidities = []
    cloud_covers = []

    print("Extracting weather data...")
    rows = soup.find_all('tr')

    for row in rows:
        date_section = row.find('div', class_='button-set-label')
        if not date_section:
            continue

        date_str = date_section.get_text(strip=True)
        try:
            formatted_date = pd.to_datetime(f'2023 {date_str}', format='%Y %b %d').strftime('%Y-%m-%d')
        except ValueError:
            print(f"Skipping invalid date: {date_str}")
            continue

        print(f"Processing data for {formatted_date}...")

        
        temp_section = row.find('div', title=lambda x: x and 'Historical average temperature' in x)
        temperature = temp_section.get_text(strip=True).replace('°F', '').strip() if temp_section else None
        print(f"Temperature: {temperature}°F")

        
        rain_section = row.find('div', title=lambda x: x and 'Rain (GPM)' in x)
        precipitation = rain_section.get_text(strip=True) if rain_section else None
        print(f"Precipitation: {precipitation} GPM")

        
        humidity_section = row.find('div', title=lambda x: x and 'Relative Humidity' in x)
        humidity = humidity_section.get_text(strip=True).replace('%', '').strip() if humidity_section else None
        print(f"Humidity: {humidity}%")

        
        cloud_section = row.find('div', title=lambda x: x and 'Cloud Cover' in x)
        cloud_cover = cloud_section.get_text(strip=True).replace('%', '').strip() if cloud_section else None
        print(f"Cloud Cover: {cloud_cover}%")

        
        dates.append(formatted_date)
        temperatures.append(temperature)
        precipitations.append(precipitation)
        humidities.append(humidity)
        cloud_covers.append(cloud_cover)

    
    print("Creating DataFrame from scraped data...")
    weather_df = pd.DataFrame({
        'Date': dates,
        'Temperature': temperatures,
        'Precipitation': precipitations,
        'Humidity': humidities,
        'Cloud Cover': cloud_covers
    })

    
    print("Merging scraped data with the original DataFrame...")
    merged_df = pd.merge(df, weather_df, on='Date', how='left')

    
    output_file = 'updated_weather_data_2023.csv'
    merged_df.to_csv(output_file, index=False)

    print(f"Data scraping and merging complete. Data saved to '{output_file}'.")


df = pd.read_csv('data/Rides/date_url.csv')
extract_weather_data_from_url(url, df)

Fetching webpage from https://www.thrill-data.com/trip-planning/weather-calendar/disneyland/calendar/2023...
Webpage fetched successfully!
Parsing HTML content...
Extracting weather data...
Processing data for 2023-01-01...
Temperature: 62°F
Precipitation: 0.15 GPM
Humidity: 82%
Cloud Cover: 49%
Processing data for 2023-01-02...
Temperature: 59°F
Precipitation: 0.09 GPM
Humidity: 80%
Cloud Cover: 65%
Processing data for 2023-01-09...
Temperature: 62°F
Precipitation: 0.19 GPM
Humidity: 75%
Cloud Cover: 100%
Processing data for 2023-01-16...
Temperature: 61°F
Precipitation: 0.26 GPM
Humidity: 76%
Cloud Cover: 72%
Processing data for 2023-01-23...
Temperature: 66°F
Precipitation: 0.0 GPM
Humidity: 36%
Cloud Cover: 6%
Processing data for 2023-01-30...
Temperature: 62°F
Precipitation: 0.33 GPM
Humidity: 72%
Cloud Cover: 87%
Processing data for 2023-02-06...
Temperature: 70°F
Precipitation: 0.0 GPM
Humidity: 60%
Cloud Cover: 5%
Processing data for 2023-02-13...
Temperature: 63°F
Precipitatio