In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [None]:
##Scraping Home page - Master Details

In [3]:
URL = "https://www.thebostoncalendar.com/events?day=12&month=2&week=1&year=2025"

In [4]:
HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}

In [7]:
response = requests.get(URL, headers=HEADERS)
    
if response.status_code != 200:
    print(f"Failed to fetch page: {response.status_code}")
    
soup = BeautifulSoup(response.text, "lxml")

In [9]:
events = soup.find_all("li", class_="event")

In [11]:
event_data = []

In [13]:
for event in events:
    title_tag = event.find("h3").find("a")
    title = title_tag.get_text(strip=True) if title_tag else "No Title"

    link = title_tag["href"] if title_tag and title_tag.has_attr("href") else "No Link"
    full_link = f"https://www.thebostoncalendar.com{link}" if link.startswith("/") else link

    event_data.append({"Title": title, "Link": full_link})

In [15]:
df = pd.DataFrame(event_data)

In [17]:
df

Unnamed: 0,Title,Link
0,The Boston Run Show & The Boston Outdoor Expo,https://www.thebostoncalendar.com/events/free-...
1,Swiftie Galentine’s Day brunch ALL AGES,https://www.thebostoncalendar.com/events/swift...
2,¡Miércoles Maravilloso!: free in-person Spanis...,https://www.thebostoncalendar.com/events/mierc...
3,2025 Foundry Festival,https://www.thebostoncalendar.com/events/2025-...
4,91 FREE things to do in Boston this week: Feb ...,https://www.thebostoncalendar.com/events/91-fr...
...,...,...
1186,Trivia Night at Shopper's Cafe,https://www.thebostoncalendar.com/events/trivi...
1187,Trivia Night at Charlie's Kitchen,https://www.thebostoncalendar.com/events/trivi...
1188,The Cesária Évora Orchestra | LIVE SHOW,https://www.thebostoncalendar.com/events/the-c...
1189,FREE Yoga @ Arc'teryx MarketStreet,https://www.thebostoncalendar.com/events/free-...


In [19]:
event_list=[]

In [21]:
for index, row in df.iterrows():
    EVENT_URL = row["Link"]
    
    
    response = requests.get(EVENT_URL, headers=HEADERS)

    # Check response status
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, "lxml")

        # Extract event title
        title_tag = soup.find("h1", itemprop="name")
        title = title_tag.get_text(strip=True) if title_tag else "No Title"

        # Extract event image URL
        image_tag = soup.find("a", class_="zoom_in")
        image_url = image_tag["href"] if image_tag and image_tag.has_attr("href") else "No Image"

        # Extract event time
        start_time_tag = soup.find("span", id="startdate", itemprop="startDate")
        end_time_tag = soup.find("span", id="startdate", itemprop="endDate")
        start_time = start_time_tag["content"] if start_time_tag and start_time_tag.has_attr("content") else "No Start Time"
        end_time = end_time_tag["content"] if end_time_tag and end_time_tag.has_attr("content") else "No End Time"

        # Extract event location
        location_name_tag = soup.find("span", itemprop="name")
        street_address_tag = soup.find("span", itemprop="streetAddress")
        city_tag = soup.find("span", itemprop="addressLocality")
        state_tag = soup.find("span", itemprop="addressRegion")
        postal_code_tag = soup.find("span", itemprop="postalCode")

        location_name = location_name_tag.get_text(strip=True) if location_name_tag else "No Location Name"
        street_address = street_address_tag.get_text(strip=True) if street_address_tag else "No Street Address"
        city = city_tag.get_text(strip=True) if city_tag else "No City"
        state = state_tag.get_text(strip=True) if state_tag else "No State"
        postal_code = postal_code_tag.get_text(strip=True) if postal_code_tag else "No Postal Code"
        full_address = f"{street_address}, {city}, {state} {postal_code}"

        # Extract event categories
        categories_tag = soup.find("b", string="Categories:")
        categories = categories_tag.find_next_sibling(string=True).strip() if categories_tag else "No Categories"

        # Extract event admission details
        admission_tag = soup.find("b", string="Admission:")
        admission = admission_tag.find_next_sibling("span").get_text(strip=True) if admission_tag else "No Admission Info"

        # Extract event description
        description_tag = soup.find("div", id="event_description")
        description = description_tag.get_text(strip=True) if description_tag else "No Description"

        # Check if any link in the description points to the same site and skip if found
        if description_tag:
            links = description_tag.find_all("a", href=True)
            for link in links:
                if "https://www.thebostoncalendar.com/events/" in link["href"]:
                    continue  # Skip this event if such a link is found

        # Store data in a dictionary
        event_data = {
            "Title": title,
            "Image URL": image_url,
            "Start Time": start_time,
            "End Time": end_time,
            "Location": location_name,
            "Full Address": full_address,
            "Categories": categories,
            "Admission": admission,
            "Description": description,
            "Event URL": EVENT_URL
        }

        event_list.append(event_data)


In [23]:
df_events = pd.DataFrame(event_list)

In [25]:
df_events

Unnamed: 0,Title,Image URL,Start Time,End Time,Location,Full Address,Categories,Admission,Description,Event URL
0,The Boston Run Show & The Boston Outdoor Expo,No Image,2025-03-01 10:00am EST,2025-03-02 4:00pm EST,Boston Convention & Exhibition Center,"415 Summer Street, Boston, MA 02110","Food, Meetup, Social Good, Sports & Active Life",FREE,The Boston Outdoor Expo is coming to Boston Ma...,https://www.thebostoncalendar.com/events/free-...
1,Swiftie Galentine’s Day brunch ALL AGES,No Image,2025-02-15 12:00pm EST,2025-02-15 4:00pm EST,The Greatest Bar,"262 Friend St, Boston, MA 02114","Food, Kid Friendly, Music, Party",$23,From your eras tour watch party HOSTESS WITH T...,https://www.thebostoncalendar.com/events/swift...
2,¡Miércoles Maravilloso!: free in-person Spanis...,No Image,2025-02-12 7:15pm EST,2025-02-12 8:15pm EST,Boston Area Spanish Exchange (BASE),"101 Arch St., Boston, MA 02110","Classes, Date Idea, Meetup",FREE,¡Miércoles Maravilloso!Free open-level Spanish...,https://www.thebostoncalendar.com/events/mierc...
3,2025 Foundry Festival,https://media.thebostoncalendar.com/images/q_a...,2025-02-15 12:00am EST,2025-02-22 12:00am EST,The Foundry,"101 Rogers St., Cambridge, MA 02142","Festivals & Fairs, Good for Groups, Kid Friend...",FREE,"This February, bring your friends and family t...",https://www.thebostoncalendar.com/events/2025-...
4,91 FREE things to do in Boston this week: Feb ...,https://media.thebostoncalendar.com/images/q_a...,2025-02-10 7:00am EST,2025-02-17 11:00pm EST,Boston,"Surrounding areas, Boston, MA","Animals, Art, Business & Professional, Classes...",FREE,Happy week of love! It’s time to celebrate our...,https://www.thebostoncalendar.com/events/91-fr...
...,...,...,...,...,...,...,...,...,...,...
1186,Trivia Night at Shopper's Cafe,https://media.thebostoncalendar.com/images/q_a...,2025-02-19 8:00pm EST,2025-02-19 10:00pm EST,Shoppers Cafe,"731 Moody St, Waltham, MA","Drinks, Food, Games, Good for Groups",FREE,Trivia Night!8PM start with @thinkaboutittrivi...,https://www.thebostoncalendar.com/events/trivi...
1187,Trivia Night at Charlie's Kitchen,https://media.thebostoncalendar.com/images/q_a...,2025-02-19 8:00pm EST,2025-02-19 10:00pm EST,Charlie's Kitchen,"10 Eliot Street, Cambridge, MA","Date Idea, Drinks, Games, Good for Groups, Mee...",FREE,Welcome to Charlie Kitchen'sTrivia: every Wedn...,https://www.thebostoncalendar.com/events/trivi...
1188,The Cesária Évora Orchestra | LIVE SHOW,https://media.thebostoncalendar.com/images/q_a...,2025-02-19 8:00pm EST,2025-02-19 11:30pm EST,The Wilbbur,"246 Tremont St, Boston, MA 02116","Drinks, Music, Nightlife, Shows",$49-69,*Doors one hour before show timeThe Cesária Év...,https://www.thebostoncalendar.com/events/the-c...
1189,FREE Yoga @ Arc'teryx MarketStreet,https://media.thebostoncalendar.com/images/q_a...,2025-02-19 8:00pm EST,2025-02-19 9:00pm EST,Arc'teryx MarketStreet,"110 Market St, Lynnfield, MA 01940","Classes, Date Idea, Games, Sports & Active Life",FREE,Join Us for an Exclusive Yoga Experience at Ar...,https://www.thebostoncalendar.com/events/free-...
