In [None]:
# Import
import requests
from bs4 import BeautifulSoup
import time
import pandas as pd
import pickle


In [None]:
base_url = 'https://visitseattle.org/events/page/'
detail_links = []

# Scrape the event detail links from the first 41 pages
for i in range(1, 42):
    url = base_url + str(i)
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Select the specific elements containing the event links
    for link in soup.select('div.search-result-preview > div > h3 > a'):
        detail_links.append(link['href'])

# Optionally, save the links to a file for later use
with open('detail_links.pkl', 'wb') as f:
    pickle.dump(detail_links, f)


In [None]:
with open('detail_links.pkl', 'rb') as f:
    detail_links = pickle.load(f)

event_list = []

for event_url in detail_links:
    response = requests.get(event_url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Extract event details
    name = soup.select_one('.medium-6.columns.event-top > h1').text
    date = soup.select_one('.medium-6.columns.event-top > h4 > span:nth-child(1)').text
    location = soup.select_one('.medium-6.columns.event-top > h4 > span:nth-child(2)').text.strip()
    event_type = soup.select_one('.medium-6.columns.event-top > a:nth-child(3)').text
    region = soup.select_one('.medium-6.columns.event-top > a:nth-child(4)').text

    # Append the extracted details to the event list
    event_list.append({
        'Name': name,
        'Date': date,
        'Location': location,
        'Type': event_type,
        'Region': region
    })
    time.sleep(1)  # Be respectful by not overloading the server

pd.DataFrame(event_list).to_csv('event_data.csv', index=False)


In [None]:
# Extract locations from the event list for weather data
Location_list = [event['Location'] for event in event_list]

# Initialize list for weather data
weather_list = []

# Fetch weather data for each location
for location in Location_list:
    location_query = f'{location}, Seattle, WA'
    query_params = {"q": location_query, "format": "jsonv2"}
    response = requests.get("https://nominatim.openstreetmap.org/search", params=query_params)
    location_data = response.json()

    if location_data:
        latitude = location_data[0]['lat']
        longitude = location_data[0]['lon']
        weather_url = f"https://api.weather.gov/points/{latitude},{longitude}"
        weather_response = requests.get(weather_url)
        weather_data = weather_response.json()

        if 'properties' in weather_data:
            forecast_url = weather_data['properties']['forecast']
            forecast_response = requests.get(forecast_url)
            forecast_data = forecast_response.json()

            if 'properties' in forecast_data:
                for period in forecast_data['properties']['periods']:
                    if period['isDaytime']:
                        weather_list.append({
                            'Location': location,
                            'Day': period['name'],
                            'Temperature': period['temperature'],
                            'ShortForecast': period['shortForecast']
                        })
    else:
        weather_list.append({
            'Location': location,
            'Day': 'No data',
            'Temperature': 'No data',
            'ShortForecast': 'No data'
        })

# Save weather data to a CSV file
pd.DataFrame(weather_list).to_csv('weather_data.csv', index=False)
