## Import Libraries and Packages

In [1]:
!pip install selenium
!pip install webdriver-manager
!pip install pandas
!pip install openpyxl


!pip install beautifulsoup4 lxml
!pip install psutil

In [2]:
# ChromeDriver is managed automatically by webdriver-manager on local installs

In [61]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, StaleElementReferenceException, NoSuchElementException
from webdriver_manager.chrome import ChromeDriverManager
from datetime import datetime, timedelta
from concurrent.futures import ThreadPoolExecutor
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import psutil
import os
import time
import re

## Setup Driver, Formatting, Selenium, Event processing Functions

In [4]:
def setup_driver():
    options = webdriver.ChromeOptions()
    options.add_argument("--headless")
    options.add_argument("--disable-gpu")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    options.add_argument("--log-level=3")
    options.add_argument("--window-size=1920,1080")
    options.add_experimental_option("excludeSwitches", ["enable-logging"])
    service = Service(ChromeDriverManager().install(), log_path=os.devnull)
    return webdriver.Chrome(options=options, service=service)


def get_month_year(driver, timeout=8):
    # Read current calendar month from div.month_name
    # (State Farm / TD Garden both use this template)
    try:
        el = WebDriverWait(driver, timeout).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "div.month_name"))
        )
        return datetime.strptime(el.text.strip(), "%B %Y")
    except Exception:
        return None


def click_next_month(driver, timeout=5):
    # Click div.cal-next — it is a div with role=button, not an anchor
    try:
        btn = WebDriverWait(driver, timeout).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "div.cal-next"))
        )
        driver.execute_script("arguments[0].click();", btn)
        time.sleep(2)
        return True
    except Exception:
        return False


def navigate_to_month(driver, target_month, max_clicks=8):
    # Advance calendar forward until current month >= target_month
    for _ in range(max_clicks):
        cur = get_month_year(driver)
        if cur is None or cur >= target_month:
            return
        print(f"  Navigating: {cur.strftime('%B %Y')} -> next")
        click_next_month(driver)


In [5]:
# Path to your spreadsheet
spreadsheet_path = "https://github.com/tommygarner/tommygarner.github.io/raw/refs/heads/main/projects/basketball/nba%20schedule%20optimizer/venue_events.xlsx"
venues = pd.read_excel(spreadsheet_path)
venues

Unnamed: 0,Team,City,Venue,"City, State",Website,Unnamed: 5
0,Atlanta Hawks,ATL,State Farm Arena,"Atlanta, GA",https://www.statefarmarena.com/events/calendar,
1,Boston Celtics,BOS,TD Garden,"Boston, MA",https://www.tdgarden.com/calendar,
2,Brooklyn Nets,BKN,Barclays Center,"Brooklyn, NY",https://www.barclayscenter.com/events/event-ca...,
3,Charlotte Hornets,CHA,Spectrum Center,"Charlotte, NC",https://www.spectrumcentercharlotte.com/events,
4,Chicago Bulls,CHI,United Center,"Chicago, IL",https://www.unitedcenter.com/events/month/,
5,Cleveland Cavaliers,CLE,Rocket Mortgage FieldHouse,"Cleveland, OH",https://www.rocketmortgagefieldhouse.com/event...,
6,Dallas Mavericks,DAL,American Airlines Center,"Dallas, TX",https://www.americanairlinescenter.com/events-...,
7,Denver Nuggets,DEN,Ball Arena,"Denver, CO",https://www.ballarena.com/events-tickets/calen...,
8,Detroit Pistons,DET,Little Caesars Arena,"Detroit, MI",https://www.313presents.com/events/event-calendar,
9,Golden State Warriors,GS,Chase Center,"San Francisco, CA",https://chasecenter.com/events/,


In [6]:
# Playoff date range: Play-In through potential Game 7 of NBA Finals
start_date = datetime(2026, 4, 14)   # Play-In begins
end_date   = datetime(2026, 6, 19)   # Potential Game 7 of Finals

# Extract month and year from end_date
end_month_num  = end_date.month
end_month_text = end_date.strftime("%B")
end_year       = end_date.year


## State Farm Arena

In [7]:
# List to store all events
all_events = []

def scrape_state_farm(venue_name, url):
    print(f"Scraping events for {venue_name} ({url})...")
    driver.get(url)
    time.sleep(3)
    events_data = []

    # Navigate forward to the playoff start month
    navigate_to_month(driver, start_date.replace(day=1))

    while True:
        cur = get_month_year(driver)
        if cur is None:
            break
        print(f"  Month: {cur.strftime('%B %Y')}")
        if cur > end_date.replace(day=1):
            break

        event_elements = driver.find_elements(By.CLASS_NAME, "hasEvent")
        print(f"  Found {len(event_elements)} events")

        for event in event_elements:
            try:
                date_str = event.get_attribute("data-fulldate")
                if not date_str:
                    continue
                event_date = datetime.strptime(date_str, "%m-%d-%Y")
                if not (start_date <= event_date <= end_date):
                    continue
                try:
                    a_tag = event.find_element(By.CSS_SELECTOR, ".desc a")
                    title = a_tag.get_attribute("textContent").strip()
                    link  = a_tag.get_attribute("href") or ""
                    aria  = a_tag.get_attribute("aria-label") or ""
                    etime = aria.split("Showings at")[-1].strip() if "Showings at" in aria else "TBA"
                except Exception:
                    title, link, etime = "Unknown", "", "TBA"
                events_data.append({"Venue": venue_name, "Title": title,
                                    "Date": event_date.strftime("%Y-%m-%d"), "Time": etime, "Link": link})
            except Exception as e:
                print(f"  Error: {e}")

        if not click_next_month(driver):
            break

    print(f"  Collected {len(events_data)} events")
    return events_data


In [8]:
driver = setup_driver()

# Find the row for State Farm Arena in the venues DataFrame
state_farm_arena_venue = venues[venues['Venue'] == 'State Farm Arena'].iloc[0]
state_farm_arena_name = state_farm_arena_venue['Venue']
state_farm_arena_url = state_farm_arena_venue['Website']

state_farm_arena_events_data = scrape_state_farm(state_farm_arena_name, state_farm_arena_url)

Scraping events for State Farm Arena (https://www.statefarmarena.com/events/calendar)...
Found 3 events in the current month for State Farm Arena
Current month for State Farm Arena: August 2025
Moved to the next month.
Found 15 events in the current month for State Farm Arena
Current month for State Farm Arena: September 2025
Moved to the next month.
Found 14 events in the current month for State Farm Arena
Current month for State Farm Arena: October 2025
Moved to the next month.
Found 17 events in the current month for State Farm Arena
Current month for State Farm Arena: November 2025
Moved to the next month.
Found 13 events in the current month for State Farm Arena
Current month for State Farm Arena: December 2025
Moved to the next month.
Found 6 events in the current month for State Farm Arena
Current month for State Farm Arena: January 2026
Moved to the next month.
Found 6 events in the current month for State Farm Arena
Current month for State Farm Arena: February 2026
Moved to th

In [18]:
state_farm_events_data = pd.DataFrame(state_farm_arena_events_data)
state_farm_events_data

Unnamed: 0,Venue,Title,Date,Time,Link,Event_Key
0,State Farm Arena,Hawks vs Raptors,2025-10-22,7:30pm,https://www.statefarmarena.com/events/detail/h...,State Farm Arena_Hawks vs Raptors_2025-10-22_7...
1,State Farm Arena,Hawks vs Thunder,2025-10-25,7:30pm,https://www.statefarmarena.com/events/detail/h...,State Farm Arena_Hawks vs Thunder_2025-10-25_7...
2,State Farm Arena,Reneé Rapp,2025-10-26,7:30pm,https://www.statefarmarena.com/events/detail/r...,State Farm Arena_Reneé Rapp_2025-10-26_7:30pm
3,State Farm Arena,Jonas Brothers,2025-10-28,7:30pm,https://www.statefarmarena.com/events/detail/j...,State Farm Arena_Jonas Brothers_2025-10-28_7:30pm
4,State Farm Arena,The Bad Boy Mowers Series,2025-10-30,"6:00pm, 8:30pm",https://www.statefarmarena.com/events/detail/t...,State Farm Arena_The Bad Boy Mowers Series_202...
...,...,...,...,...,...,...
56,State Farm Arena,Hawks vs Kings,2026-03-28,7:30pm,https://www.statefarmarena.com/events/detail/h...,State Farm Arena_Hawks vs Kings_2026-03-28_7:30pm
57,State Farm Arena,Hawks vs Celtics,2026-03-30,7:30pm,https://www.statefarmarena.com/events/detail/h...,State Farm Arena_Hawks vs Celtics_2026-03-30_7...
58,State Farm Arena,MANÁ,2026-04-03,8:30pm,https://www.statefarmarena.com/events/detail/m...,State Farm Arena_MANÁ_2026-04-03_8:30pm
59,State Farm Arena,Hawks vs Knicks,2026-04-06,7:00pm,https://www.statefarmarena.com/events/detail/h...,State Farm Arena_Hawks vs Knicks_2026-04-06_7:...


## TD Garden

In [10]:
# List to store all events
all_events = []

def scrape_td_garden(venue_name, url):
    print(f"Scraping events for {venue_name} ({url})...")
    driver.get(url)
    time.sleep(3)
    events_data = []

    navigate_to_month(driver, start_date.replace(day=1))

    while True:
        cur = get_month_year(driver)
        if cur is None:
            break
        print(f"  Month: {cur.strftime('%B %Y')}")
        if cur > end_date.replace(day=1):
            break

        event_elements = driver.find_elements(By.CLASS_NAME, "hasEvent")
        print(f"  Found {len(event_elements)} events")

        for event in event_elements:
            try:
                date_str = event.get_attribute("data-fulldate")
                if not date_str:
                    continue
                event_date = datetime.strptime(date_str, "%m-%d-%Y")
                if not (start_date <= event_date <= end_date):
                    continue
                try:
                    a_tag = event.find_element(By.CSS_SELECTOR, ".desc a")
                    title = a_tag.text.strip()
                    link  = a_tag.get_attribute("href") or ""
                except Exception:
                    title, link = "Unknown", ""
                try:
                    etime = event.find_element(By.CLASS_NAME, "showings").text.strip()
                except Exception:
                    etime = "TBA"
                events_data.append({"Venue": venue_name, "Title": title,
                                    "Date": event_date.strftime("%Y-%m-%d"), "Time": etime, "Link": link})
            except Exception as e:
                print(f"  Error: {e}")

        if not click_next_month(driver):
            break

    print(f"  Collected {len(events_data)} events")
    return events_data


In [22]:
driver = setup_driver()

# Find the row for TD Garden in the venues DataFrame
td_garden_venue = venues[venues['Venue'] == 'TD Garden'].iloc[0]
td_garden_name = td_garden_venue['Venue']
td_garden_url = td_garden_venue['Website']

td_garden_events_data = scrape_td_garden(td_garden_name, td_garden_url)

Scraping events for TD Garden (https://www.tdgarden.com/calendar)...
Found 9 events in the current month for TD Garden
Current month for TD Garden: August 2025
Moved to the next month.
Found 16 events in the current month for TD Garden
Current month for TD Garden: September 2025
Moved to the next month.
Found 18 events in the current month for TD Garden
Current month for TD Garden: October 2025
Moved to the next month.
Found 18 events in the current month for TD Garden
Current month for TD Garden: November 2025
Moved to the next month.
Found 15 events in the current month for TD Garden
Current month for TD Garden: December 2025
Moved to the next month.
Found 16 events in the current month for TD Garden
Current month for TD Garden: January 2026
Moved to the next month.
Found 6 events in the current month for TD Garden
Current month for TD Garden: February 2026
Moved to the next month.
Found 18 events in the current month for TD Garden
Current month for TD Garden: March 2026
Moved to the

In [23]:
td_garden_events_data = pd.DataFrame(td_garden_events_data)
td_garden_events_data

Unnamed: 0,Venue,Title,Date,Time,Link,Event_Key
0,TD Garden,Bruins vs. Panthers,2025-10-21,7:30pm,https://www.tdgarden.com/events/detail/bruins-...,TD Garden_Bruins vs. Panthers_2025-10-21_7:30pm
1,TD Garden,Celtics vs. 76ers,2025-10-22,7:30pm,https://www.tdgarden.com/events/detail/celtics...,TD Garden_Celtics vs. 76ers_2025-10-22_7:30pm
2,TD Garden,Bruins vs. Ducks,2025-10-23,7:00pm,https://www.tdgarden.com/events/detail/bruins-...,TD Garden_Bruins vs. Ducks_2025-10-23_7:00pm
3,TD Garden,Bruins vs. Avalanche,2025-10-25,3:00pm,https://www.tdgarden.com/events/detail/bruins-...,TD Garden_Bruins vs. Avalanche_2025-10-25_3:00pm
4,TD Garden,Bryan Adams,2025-10-26,7:30pm,https://www.tdgarden.com/events/detail/bryan-a...,TD Garden_Bryan Adams_2025-10-26_7:30pm
...,...,...,...,...,...,...
82,TD Garden,Celtics vs. Raptors,2026-04-05,3:30pm,https://www.tdgarden.com/events/detail/celtics...,TD Garden_Celtics vs. Raptors_2026-04-05_3:30pm
83,TD Garden,Celtics vs. Hornets,2026-04-07,7:30pm,https://www.tdgarden.com/events/detail/celtics...,TD Garden_Celtics vs. Hornets_2026-04-07_7:30pm
84,TD Garden,Celtics vs. Pelicans,2026-04-10,7:30pm,https://www.tdgarden.com/events/detail/celtics...,TD Garden_Celtics vs. Pelicans_2026-04-10_7:30pm
85,TD Garden,Bruins vs. Lightning,2026-04-11,12:30pm,https://www.tdgarden.com/events/detail/bruins-...,TD Garden_Bruins vs. Lightning_2026-04-11_12:30pm


In [25]:
all_events_data = pd.concat([state_farm_events_data, td_garden_events_data])
all_events_data['Venue'].value_counts()

Venue
TD Garden           87
State Farm Arena    61
Name: count, dtype: int64

## Barclays Center

In [26]:
def parse_barclays_date(raw):
    # Barclays date format on live site: "Feb 1, 2026 -"
    # Strip trailing " -" then parse with %b %d, %Y
    raw = raw.strip().rstrip(" -").strip()
    if not raw:
        return None
    for fmt in ["%b %d, %Y", "%B %d, %Y", "%b. %d, %Y"]:
        try:
            return datetime.strptime(raw, fmt)
        except ValueError:
            continue
    return None


def scrape_barclays(venue_name, url, start_date, end_date, driver):
    print(f"Scraping events for {venue_name} ({url})...")
    driver.get(url)
    time.sleep(3)
    events_data = []

    def get_barclays_month():
        try:
            el = WebDriverWait(driver, 8).until(
                EC.presence_of_element_located((By.CLASS_NAME, "cal-month")))
            return datetime.strptime(el.text.strip().title(), "%B %Y")
        except Exception:
            return None

    def click_barclays_next():
        try:
            btn = WebDriverWait(driver, 5).until(
                EC.presence_of_element_located((By.CLASS_NAME, "cal-next")))
            driver.execute_script("arguments[0].click();", btn)
            time.sleep(2)
            return True
        except Exception:
            return False

    for _ in range(10):
        cur = get_barclays_month()
        if cur is None or cur >= start_date.replace(day=1):
            break
        print(f"  Navigating: {cur.strftime('%B %Y')} -> next")
        click_barclays_next()

    while True:
        cur = get_barclays_month()
        if cur is None:
            break
        print(f"  Month: {cur.strftime('%B %Y')}")
        if cur > end_date.replace(day=1):
            break

        soup = BeautifulSoup(driver.page_source, "html.parser")
        wrappers = soup.select("div.event_item_wrapper")
        print(f"  Found {len(wrappers)} event wrappers")

        for wrapper in wrappers:
            try:
                date_el = wrapper.select_one("div.date span.dt")
                raw_date = date_el.get_text(strip=True) if date_el else ""
                event_date = parse_barclays_date(raw_date)
                if event_date is None or not (start_date <= event_date <= end_date):
                    continue
                title_el = wrapper.select_one("h3 a")
                title = title_el.get_text(strip=True) if title_el else "Unknown"
                link  = title_el.get("href", "") if title_el else ""
                time_el = wrapper.select_one("div.date span.time, span.time")
                etime = time_el.get_text(strip=True) if time_el else "TBA"
                events_data.append({"Venue": venue_name, "Title": title,
                                    "Date": event_date.strftime("%Y-%m-%d"), "Time": etime, "Link": link})
            except Exception as e:
                print(f"  Event parse error: {e}")

        if not click_barclays_next():
            break

    print(f"  Collected {len(events_data)} events")
    return events_data


In [27]:
driver = setup_driver()

# Find the row for Barclays Center in the venues DataFrame
barclays_center_venue = venues[venues['Venue'] == 'Barclays Center'].iloc[0]
barclays_center_name = barclays_center_venue['Venue']
barclays_center_url = barclays_center_venue['Website']

barclays_center_events_data = scrape_barclays(barclays_center_name, barclays_center_url, start_date, end_date, driver)

Scraping events for Barclays Center (https://www.barclayscenter.com/events/event-calendar)...
Reached start month: OCTOBER 2025
Current month for Barclays Center: OCTOBER 2025
Found 5 events in OCTOBER 2025
Moved to the next month.
Calendar updated to next month.
Current month for Barclays Center: NOVEMBER 2025
Found 15 events in NOVEMBER 2025
Moved to the next month.
Calendar updated to next month.
Current month for Barclays Center: DECEMBER 2025
Found 10 events in DECEMBER 2025
Moved to the next month.
Calendar updated to next month.
Current month for Barclays Center: JANUARY 2026
Found 13 events in JANUARY 2026
Moved to the next month.
Calendar updated to next month.
Current month for Barclays Center: FEBRUARY 2026
Found 10 events in FEBRUARY 2026
Moved to the next month.
Calendar updated to next month.
Current month for Barclays Center: MARCH 2026
Found 8 events in MARCH 2026
Moved to the next month.
Calendar updated to next month.
Current month for Barclays Center: APRIL 2026
Foun

In [28]:
barclays_center_events_data = pd.DataFrame(barclays_center_events_data)
barclays_center_events_data

Unnamed: 0,Venue,Title,Date,Time,Link,Event_Key
0,Barclays Center,BROOKLYN NETS VS. CLEVELAND CAVALIERS,2025-10-24,7:30 PM,https://www.barclayscenter.com/events/detail/b...,Barclays Center_BROOKLYN NETS VS. CLEVELAND CA...
1,Barclays Center,BROOKLYN NETS VS. ATLANTA HAWKS,2025-10-29,7:30 PM,https://www.barclayscenter.com/events/detail/b...,Barclays Center_BROOKLYN NETS VS. ATLANTA HAWK...
2,Barclays Center,BROOKLYN NETS VS. PHILADELPHIA 76ERS,2025-11-02,6:00 PM,https://www.barclayscenter.com/events/detail/b...,Barclays Center_BROOKLYN NETS VS. PHILADELPHIA...
3,Barclays Center,BROOKLYN NETS VS. MINNESOTA TIMBERWOLVES,2025-11-03,7:00 PM,https://www.barclayscenter.com/events/detail/b...,Barclays Center_BROOKLYN NETS VS. MINNESOTA TI...
4,Barclays Center,JOHN LEGEND,2025-11-04,8:00 PM,https://www.barclayscenter.com/events/detail/j...,Barclays Center_JOHN LEGEND_2025-11-04_8:00 PM
...,...,...,...,...,...,...
57,Barclays Center,BROOKLYN NETS VS. CHARLOTTE HORNETS,2026-03-31,7:30 PM,https://www.barclayscenter.com/events/detail/b...,Barclays Center_BROOKLYN NETS VS. CHARLOTTE HO...
58,Barclays Center,BROOKLYN NETS VS. ATLANTA HAWKS,2026-04-03,7:30 PM,https://www.barclayscenter.com/events/detail/b...,Barclays Center_BROOKLYN NETS VS. ATLANTA HAWK...
59,Barclays Center,BROOKLYN NETS VS. WASHINGTON WIZARDS,2026-04-05,3:30 PM,https://www.barclayscenter.com/events/detail/b...,Barclays Center_BROOKLYN NETS VS. WASHINGTON W...
60,Barclays Center,BROOKLYN NETS VS. MILWAUKEE BUCKS,2026-04-07,7:30 PM,https://www.barclayscenter.com/events/detail/b...,Barclays Center_BROOKLYN NETS VS. MILWAUKEE BU...


In [29]:
all_events_data = pd.concat([all_events_data, barclays_center_events_data], ignore_index=True)
all_events_data['Venue'].value_counts()

Venue
TD Garden           87
Barclays Center     62
State Farm Arena    61
Name: count, dtype: int64

## Spectrum Center

In [86]:
def scrape_spectrum_events(venue_name, url, start_date, end_date, driver):
    print(f"Opening {url}")
    driver.get(url)

    results = []

    while True:
        soup = BeautifulSoup(driver.page_source, "html.parser")

        # --- Current month-year ---
        month_header = soup.select_one("h2#cal-month")
        if not month_header:
            print("No calendar month found.")
            break

        month_year = month_header.get_text(strip=True)
        month_dt = datetime.strptime(month_year, "%B %Y")

        # --- Loop through event wrappers ---
        for wrapper in soup.select("div.event_item_wrapper"):
            try:
                # Date + time
                date_tag = wrapper.select_one("div.info .date .dt")
                time_tag = wrapper.select_one("div.info .date .time")
                event_date = None
                if date_tag:
                    try:
                        event_date = datetime.strptime(
                            date_tag.get_text(strip=True), "%b %d, %Y"
                        ).date()
                    except Exception as e:
                        print(f"Could not parse date: {date_tag.get_text(strip=True)} -> {e}")
                        continue

                if not event_date:
                    continue
                if not (start_date.date() <= event_date <= end_date.date()):
                    continue

                event_time = (
                    time_tag.get_text(strip=True).lstrip("- ").strip()
                    if time_tag else "TBA"
                )

                # Title + URL
                title_tag = wrapper.select_one("h3 a")
                title = title_tag.get_text(strip=True) if title_tag else "Untitled Event"
                more_info = urljoin(url, title_tag["href"]) if title_tag and "href" in title_tag.attrs else None

                results.append({
                    "Venue": venue_name,
                    "Title": title,
                    "Date": event_date.strftime("%Y-%m-%d"),
                    "Time": event_time,
                    "Link": more_info
                })
                #print(f"Saved: {title} on {event_date} at {event_time}") #debugging

            except Exception as e:
                print(f"Error parsing event wrapper: {e}")

        # --- Stop if current month is past end_date ---
        if month_dt > end_date.replace(day=1):
            break

        # --- Next month ---
        try:
            next_btn = driver.find_element(By.ID, "cal-next")
            driver.execute_script("arguments[0].click();", next_btn)
            time.sleep(1.5)
        except Exception:
            print("No more next button found.")
            break

    print(f"\nScraping complete. Collected {len(results)} events.")
    return results


In [87]:
driver = setup_driver()

# Find the row for Spectrum Center in the venues DataFrame
spectrum_center_venue = venues[venues['Venue'] == 'Spectrum Center'].iloc[0]
spectrum_center_name = spectrum_center_venue['Venue']
spectrum_center_url = spectrum_center_venue['Website']

spectrum_center_events_data = scrape_spectrum_events(spectrum_center_name, spectrum_center_url, start_date, end_date, driver)

Opening https://www.spectrumcentercharlotte.com/events

Scraping complete. Collected 17 events.


In [88]:
spectrum_center_events_data = pd.DataFrame(spectrum_center_events_data)
spectrum_center_events_data

Unnamed: 0,Venue,Title,Date,Time,Link
0,Spectrum Center,Stevie Nicks,2025-10-21,7:00 PM,https://www.spectrumcentercharlotte.com/events...
1,Spectrum Center,Tate McRae,2025-10-24,7:30 PM,https://www.spectrumcentercharlotte.com/events...
2,Spectrum Center,Lainey Wilson,2025-10-25,7:00 PM,https://www.spectrumcentercharlotte.com/events...
3,Spectrum Center,NBA YoungBoy,2025-10-26,7:00 PM,https://www.spectrumcentercharlotte.com/events...
4,Spectrum Center,Reneé Rapp,2025-10-29,7:00 PM,https://www.spectrumcentercharlotte.com/events...
5,Spectrum Center,Dick Vitale Invitational,2025-11-04,@ TBA,https://www.spectrumcentercharlotte.com/events...
6,Spectrum Center,Jonas Brothers,2025-11-05,7:30 PM,https://www.spectrumcentercharlotte.com/events...
7,Spectrum Center,Ally Tipoff,2025-11-09,@ TBA,https://www.spectrumcentercharlotte.com/events...
8,Spectrum Center,Brandy and Monica,2025-11-13,8:00 PM,https://www.spectrumcentercharlotte.com/events...
9,Spectrum Center,Playboy Carti,2025-11-14,7:30 PM,https://www.spectrumcentercharlotte.com/events...


In [89]:
all_events_data = pd.concat([all_events_data, spectrum_center_events_data], ignore_index=True)
all_events_data['Venue'].value_counts()

Venue
TD Garden           87
Barclays Center     62
State Farm Arena    61
Spectrum Center     17
Name: count, dtype: int64

## United Center

In [113]:
def scrape_united_center_events(venue_name, url, start_date, end_date, driver):
    """
    Scrapes United Center events.
    Primary:  Selenium + explicit WebDriverWait inside the calendar iframe.
    Fallback: requests + BeautifulSoup (works if calendar is server-rendered).
    """
    import requests as _req
    from bs4 import BeautifulSoup as _BS
    from datetime import datetime as _dt
    print(f"Opening {url}")
    results = []
    base_url = "https://www.unitedcenter.com"

    # ── Strategy 1: Selenium + iframe ────────────────────────────────────────
    iframe_ok = False
    try:
        driver.get(url)
        iframe = WebDriverWait(driver, 15).until(
            EC.presence_of_element_located((By.TAG_NAME, "iframe"))
        )
        driver.switch_to.frame(iframe)
        print("Switched to calendar iframe.")
        iframe_ok = True
    except Exception as e:
        print(f"Iframe not found ({e}). Trying BeautifulSoup fallback.")

    if iframe_ok:
        while True:
            # Explicit wait for month header; try several selectors
            month_year_text = None
            for sel in [
                "div.hdngbar.primaryc h1.title",
                "h1.title",
                ".calendar-month-year",
                ".cal-header h1",
            ]:
                try:
                    el = WebDriverWait(driver, 12).until(
                        EC.presence_of_element_located((By.CSS_SELECTOR, sel))
                    )
                    month_year_text = el.text.strip()
                    break
                except TimeoutException:
                    continue

            if not month_year_text:
                print("Could not find month/year header. Falling back to BS4.")
                driver.switch_to.default_content()
                iframe_ok = False
                break

            try:
                month_dt = _dt.strptime(month_year_text, "%B %Y")
            except ValueError:
                print(f"Could not parse month: {month_year_text!r}")
                break

            print("Current calendar month:", month_dt.strftime("%B %Y"))

            if month_dt > end_date.replace(day=1):
                break

            for cell in driver.find_elements(By.CSS_SELECTOR, "td.calendar_day_event"):
                try:
                    link_tag = cell.find_element(By.CSS_SELECTOR, "a.eventLink")
                    title = link_tag.find_element(By.TAG_NAME, "span").text
                    event_url = urljoin(base_url, link_tag.get_attribute("href"))
                    try:
                        date_text = cell.find_element(By.CSS_SELECTOR, "div.eventToolTipDates").text
                        event_date = _dt.strptime(date_text, "%B %d, %Y").date()
                    except Exception:
                        event_date = None
                    try:
                        event_time = cell.find_element(By.CSS_SELECTOR, "div.eventToolTipDuration").text
                    except Exception:
                        event_time = "TBA"
                    if event_date and start_date.date() <= event_date <= end_date.date():
                        results.append({
                            "Venue": venue_name,
                            "Title": title,
                            "Date": event_date.strftime("%Y-%m-%d"),
                            "Time": event_time,
                            "Link": event_url,
                        })
                except Exception as e:
                    print("Error parsing event cell:", e)

            # Click next month button
            clicked = False
            for xpath in [
                "//img[@alt='Next']/..",
                "//button[contains(@class,'next')]",
                "//a[contains(@class,'cal-next')]",
            ]:
                try:
                    btn = driver.find_element(By.XPATH, xpath)
                    driver.execute_script("arguments[0].click();", btn)
                    time.sleep(2)
                    clicked = True
                    break
                except Exception:
                    continue
            if not clicked:
                print("No next button found, stopping.")
                break

        driver.switch_to.default_content()

    # ── Strategy 2: requests + BeautifulSoup (fallback) ─────────────────────
    if not iframe_ok or len(results) == 0:
        print("Using requests+BeautifulSoup fallback for United Center...")
        month_urls = [
            f"{base_url}/events/month/2026/04/",
            f"{base_url}/events/month/2026/05/",
            f"{base_url}/events/month/2026/06/",
        ]
        for month_url in month_urls:
            try:
                resp = _req.get(month_url, timeout=15,
                                headers={"User-Agent": "Mozilla/5.0"})
                if resp.status_code != 200:
                    print(f"HTTP {resp.status_code} for {month_url}")
                    continue
                soup = _BS(resp.text, "html.parser")
                for link_tag in soup.select("a.eventLink, a[href*='/events/']"):
                    title = link_tag.get_text(strip=True)
                    if not title:
                        continue
                    href = urljoin(base_url, link_tag.get("href", ""))
                    parent = (link_tag.find_parent("td") or
                              link_tag.find_parent("div"))
                    date_el = (parent.select_one(".eventToolTipDates, .date")
                               if parent else None)
                    time_el = (parent.select_one(".eventToolTipDuration, .time")
                               if parent else None)
                    try:
                        event_date = _dt.strptime(
                            date_el.get_text(strip=True), "%B %d, %Y"
                        ).date() if date_el else None
                    except Exception:
                        event_date = None
                    event_time = time_el.get_text(strip=True) if time_el else "TBA"
                    if event_date and start_date.date() <= event_date <= end_date.date():
                        results.append({
                            "Venue": venue_name,
                            "Title": title,
                            "Date": event_date.strftime("%Y-%m-%d"),
                            "Time": event_time,
                            "Link": href,
                        })
            except Exception as e:
                print(f"BS4 fetch failed for {month_url}: {e}")

    print(f"United Center: collected {len(results)} events")
    return results


In [114]:
driver = setup_driver()

# Find the row for United Center in the venues DataFrame
united_center_venue = venues[venues['Venue'] == 'United Center'].iloc[0]
united_center_name = united_center_venue['Venue']
united_center_url = united_center_venue['Website']

united_center_events_data = scrape_united_center_events(united_center_name, united_center_url, start_date, end_date, driver)

Opening https://www.unitedcenter.com/events/month/
Switched to calendar iframe.
Error getting calendar month/year: Message: 
Stacktrace:
	GetHandleVerifier [0x0x7ff637b03d85+79397]
	GetHandleVerifier [0x0x7ff637b03de0+79488]
	(No symbol) [0x0x7ff6378ac0fa]
	(No symbol) [0x0x7ff637902fd6]
	(No symbol) [0x0x7ff63790328c]
	(No symbol) [0x0x7ff637956537]
	(No symbol) [0x0x7ff63792b1df]
	(No symbol) [0x0x7ff637953344]
	(No symbol) [0x0x7ff63792af73]
	(No symbol) [0x0x7ff6378f41b1]
	(No symbol) [0x0x7ff6378f4f43]
	GetHandleVerifier [0x0x7ff637dce1ed+3005069]
	GetHandleVerifier [0x0x7ff637dc831d+2980797]
	GetHandleVerifier [0x0x7ff637de7e0d+3110573]
	GetHandleVerifier [0x0x7ff637b1d6de+184190]
	GetHandleVerifier [0x0x7ff637b2516f+215567]
	GetHandleVerifier [0x0x7ff637b0c974+115220]
	GetHandleVerifier [0x0x7ff637b0cb29+115657]
	GetHandleVerifier [0x0x7ff637af3268+11016]
	BaseThreadInitThunk [0x0x7ff9f792e8d7+23]
	RtlUserThreadStart [0x0x7ff9f98fc34c+44]


Scraping complete. Collected 0 events.

In [None]:
united_center_events_data = pd.DataFrame(united_center_events_data)
united_center_events_data

In [None]:
# Combine all scraped results and save to playoff CSV
import pandas as pd

scraped_dfs = []
for df_name in [
    "state_farm_events_data",
    "td_garden_events_data",
    "barclays_center_events_data",
    "spectrum_center_events_data",
    "united_center_events_data",
]:
    try:
        scraped_dfs.append(eval(df_name))
    except (NameError, TypeError):
        print(f"{df_name} not available, skipping")

all_scraped = pd.concat(scraped_dfs, ignore_index=True)

# Filter to playoff window
all_scraped["Date"] = pd.to_datetime(all_scraped["Date"])
playoff_start = pd.Timestamp("2026-04-14")
playoff_end   = pd.Timestamp("2026-06-19")
all_scraped = all_scraped[
    (all_scraped["Date"] >= playoff_start) &
    (all_scraped["Date"] <= playoff_end)
].drop_duplicates(subset=["Venue", "Title", "Date"]).reset_index(drop=True)

all_scraped.to_csv("nba_playoff_scraped_2026.csv", index=False)
print(f"Saved {len(all_scraped)} events to nba_playoff_scraped_2026.csv")
all_scraped
