## Install libraries and packages, API Key

In [51]:
!pip install requests
!pip -q install tqdm
!pip install python-dotenv

In [52]:
import pandas as pd
import requests
import json
import re
import time
from datetime import datetime, timedelta
from tqdm import tqdm

In [3]:
import os
from dotenv import load_dotenv
load_dotenv()
api_key = os.environ.get("TICKETMASTER_API_KEY")
if not api_key:
    raise ValueError("TICKETMASTER_API_KEY not found in environment")
print("API key loaded:", api_key[:8] + "...")

## API Call

### NBA Arena Coordinates

In [18]:
# Manually create a dictionary or list of dictionaries with venue names
# The key should be the 'Team' name from the DataFrame, and the value should be the venue name.
venue_names = {
    'Atlanta Hawks': 'State Farm Arena',
    'Boston Celtics': 'TD Garden',
    'Brooklyn Nets': 'Barclays Center',
    'Charlotte Hornets': 'Spectrum Center',
    'Chicago Bulls': 'United Center',
    'Cleveland Cavaliers': 'Rocket Arena',
    'Dallas Mavericks': 'American Airlines Center',
    'Denver Nuggets': 'Ball Arena',
    'Detroit Pistons': 'Little Caesars Arena',
    'Golden State Warriors': 'Chase Center',
    'Houston Rockets': 'Toyota Center',
    'Indiana Pacers': 'Gainbridge Fieldhouse',
    'Los Angeles Clippers': 'Intuit Dome',
    'Los Angeles Lakers': 'Crypto.com Arena',
    'Memphis Grizzlies': 'FedExForum',
    'Miami Heat': 'Kaseya Center',
    'Milwaukee Bucks': 'Fiserv Forum',
    'Minnesota Timberwolves': 'Target Center',
    'New Orleans Pelicans': 'Smoothie King Center',
    'New York Knicks': 'Madison Square Garden',
    'Oklahoma City Thunder': 'Paycom Center',
    'Orlando Magic': 'Kia Center',
    'Philadelphia 76ers': 'Xfinity Mobile Arena',
    'Phoenix Suns': 'Mortgage Matchup Center',
    'Portland Trail Blazers': 'Moda Center',
    'Sacramento Kings': 'Golden 1 Center',
    'San Antonio Spurs': 'Frost Bank Center',
    'Toronto Raptors': 'Scotiabank Arena',
    'Utah Jazz': 'Delta Center',
    'Washington Wizards': 'Capital One Arena'
}

lat = {
    'Atlanta Hawks': 33.7574,
    'Boston Celtics': 42.3663,
    'Brooklyn Nets': 40.6782,
    'Charlotte Hornets': 35.2232,
    'Chicago Bulls': 41.8819,
    'Cleveland Cavaliers': 41.4967,
    'Dallas Mavericks': 32.7767,
    'Denver Nuggets': 39.7422,
    'Detroit Pistons': 42.3314,
    'Golden State Warriors': 37.7749,
    'Houston Rockets': 29.7419,
    'Indiana Pacers': 39.7684,
    'Los Angeles Clippers': 33.9438,
    'Los Angeles Lakers': 34.0489,
    'Memphis Grizzlies': 35.1495,
    'Miami Heat': 25.7743,
    'Milwaukee Bucks': 43.0389,
    'Minnesota Timberwolves': 44.9793,
    'New Orleans Pelicans': 29.9498,
    'New York Knicks': 40.7537,
    'Oklahoma City Thunder': 35.4676,
    'Orlando Magic': 28.5383,
    'Philadelphia 76ers': 39.9042,
    'Phoenix Suns': 33.4484,
    'Portland Trail Blazers': 45.5152,
    'Sacramento Kings': 38.5766,
    'San Antonio Spurs': 29.4241,
    'Toronto Raptors': 43.6618,
    'Utah Jazz': 40.77,
    'Washington Wizards': 38.8951,
}

long = {    'Atlanta Hawks': -84.3962,
    'Boston Celtics': -71.0636,
    'Brooklyn Nets': -73.9442,
    'Charlotte Hornets': -80.8422,
    'Chicago Bulls': -87.6232,
    'Cleveland Cavaliers': -81.6954,
    'Dallas Mavericks': -96.8032,
    'Denver Nuggets': -104.9923,
    'Detroit Pistons': -83.04,
    'Golden State Warriors': -122.4194,
    'Houston Rockets': -95.3,
    'Indiana Pacers': -86.1581,
    'Los Angeles Clippers':-118.3422,
    'Los Angeles Lakers': -118.2673,
    'Memphis Grizzlies': -90.049,
    'Miami Heat': -80.1937,
    'Milwaukee Bucks': -87.9068,
    'Minnesota Timberwolves': -93.2644,
    'New Orleans Pelicans': -90.0816,
    'New York Knicks': -73.9857,
    'Oklahoma City Thunder': -97.5164,
    'Orlando Magic': -81.3792,
    'Philadelphia 76ers': -75.165,
    'Phoenix Suns': -112.074,
    'Portland Trail Blazers': -122.685,
    'Sacramento Kings': -121.491,
    'San Antonio Spurs': -98.4953,
    'Toronto Raptors': -79.391,
    'Utah Jazz': -111.9,
    'Washington Wizards': -77.0364}

In [20]:
# Create a list of dictionaries from the venue_names, lat, and long dictionaries
venue_data_list = []
for team in venue_names.keys():
    venue_data_list.append({
        'Team': team,
        'Venue Name': venue_names.get(team),
        'Lat': lat.get(team),
        'Long': long.get(team)
    })

# Create a new DataFrame from the list of dictionaries
venue_info_df = pd.DataFrame(venue_data_list)

# Display the new DataFrame
venue_info_df

Unnamed: 0,Team,Venue Name,Lat,Long
0,Atlanta Hawks,State Farm Arena,33.7574,-84.3962
1,Boston Celtics,TD Garden,42.3663,-71.0636
2,Brooklyn Nets,Barclays Center,40.6782,-73.9442
3,Charlotte Hornets,Spectrum Center,35.2232,-80.8422
4,Chicago Bulls,United Center,41.8819,-87.6232
5,Cleveland Cavaliers,Rocket Mortgage Fieldhouse,41.4967,-81.6954
6,Dallas Mavericks,American Airlines Center,32.7767,-96.8032
7,Denver Nuggets,Ball Arena,39.7422,-104.9923
8,Detroit Pistons,Little Caesars Arena,42.3314,-83.04
9,Golden State Warriors,Chase Center,37.7749,-122.4194


### API Call

In [53]:
def safe_request(url, params, retry_delay=1.5, max_retries=6):
    retries = 0
    while True:
        r = requests.get(url, params=params, timeout=30)
        if r.status_code == 429:
            tqdm.write("429 rate limit — sleeping %.1fs..." % retry_delay)
            time.sleep(retry_delay)
            continue
        if r.status_code in (502, 503, 504):
            if retries >= max_retries:
                tqdm.write(f"Server error {r.status_code} — giving up after {max_retries} retries.")
                return None
            tqdm.write(f"Server error {r.status_code} — retrying in {retry_delay}s...")
            time.sleep(retry_delay)
            retries += 1
            continue
        if r.status_code != 200:
            tqdm.write(f"Error {r.status_code}: {r.text[:300]}")
            return None
        return r

def split_date_range(start_date, end_date, step_days=7):
    start = datetime.strptime(start_date, "%Y-%m-%d")
    end = datetime.strptime(end_date, "%Y-%m-%d")
    ranges = []
    cur = start
    while cur <= end:
        nxt = min(cur + timedelta(days=step_days-1), end)
        ranges.append((cur.strftime("%Y-%m-%d"), nxt.strftime("%Y-%m-%d")))
        cur = nxt + timedelta(days=1)
    return ranges


def fetch_all_events_for_venue(api_key, lat, lon, start_date, end_date, radius=5, size=50, show_pages=True):
    """
    Fetch all events for a (lat,lon) between start_date and end_date (YYYY-MM-DD).
    - Respects Ticketmaster paging cap: if monthly range is too large, auto-splits to weekly.
    - Adds a page progress bar if show_pages=True.
    """
    url = "https://app.ticketmaster.com/discovery/v2/events.json"
    all_events = []
    page = 0
    pbar = None

    while True:
        params = {
            "apikey": api_key,
            "latlong": f"{lat},{lon}",
            "radius": radius,
            "unit": "miles",
            "startDateTime": f"{start_date}T00:00:00Z",
            "endDateTime": f"{end_date}T23:59:59Z",
            "size": size,
            "page": page
        }
        r = safe_request(url, params)
        if r is None:
            break

        data = r.json()
        events = data.get("_embedded", {}).get("events", [])

        # Initialize page progress bar after first response (now we know totalPages)
        page_info = data.get("page", {})
        total_pages = page_info.get("totalPages", 1)

        # If this range is too large for TM cap (page*size < 1000), split to weekly
        if total_pages * size >= 1000:
            tqdm.write(f"Splitting {start_date} → {end_date} into weekly chunks...")
            weekly_events = []
            for s, e in split_date_range(start_date, end_date, step_days=7):
                weekly_events.extend(
                    fetch_all_events_for_venue(api_key, lat, lon, s, e, radius, size, show_pages=False)
                )
            # Close any page bar if created
            if pbar is not None:
                pbar.close()
            return weekly_events

        if show_pages and pbar is None and total_pages > 1:
            pbar = tqdm(total=total_pages, desc=f"Pages {start_date}→{end_date}", leave=False)

        # Collect events
        for e in events:
            all_events.append({
                "name": e.get("name"),
                "date": e.get("dates", {}).get("start", {}).get("localDate"),
                "time": e.get("dates", {}).get("start", {}).get("localTime"),
                "venue": e.get("_embedded", {}).get("venues", [{}])[0].get("name")
            })

        # Progress/update paging
        if page >= total_pages - 1:
            if pbar is not None:
                pbar.update(1)
                pbar.close()
            break
        page += 1
        if pbar is not None:
            pbar.update(1)
        time.sleep(1.2)  # respect spike arrest (one+ second between calls)

    return all_events

In [54]:
playoff_start = datetime(2026, 4, 14)  # Play-In begins
playoff_end   = datetime(2026, 6, 19)  # Potential Game 7 of Finals

date_ranges = [
    ("2026-04-14", "2026-04-30"),
    ("2026-05-01", "2026-05-31"),
    ("2026-06-01", "2026-06-19"),
]

date_ranges

In [55]:
all_events = []

venues_bar = tqdm(total=len(venue_info_df), desc="Venues")
for _, row in venue_info_df.iterrows():
    months_bar = tqdm(total=len(date_ranges), desc=row['Venue Name'], leave=False)
    for start, end in date_ranges:
        # fetch monthly (auto-falls-back to weekly inside if needed)
        events = fetch_all_events_for_venue(api_key, row['Lat'], row['Long'], start, end, size=50, show_pages=True)
        # tag team
        for ev in events:
            ev['team'] = row['Team']
        all_events.extend(events)
        months_bar.update(1)
        time.sleep(1.2)  # small gap between month calls
    months_bar.close()
    venues_bar.update(1)
venues_bar.close()

len(all_events)

### Cleaning

In [62]:
# Make sure 'date' is datetime
events_df['date'] = pd.to_datetime(events_df['date'])

# Define playoff range
playoff_start = pd.Timestamp("2026-04-14")
playoff_end   = pd.Timestamp("2026-06-19")

# Filter to events strictly inside the playoff window
events_df = events_df[(events_df["date"] >= playoff_start) & (events_df["date"] <= playoff_end)]
events_df = events_df.reset_index(drop=True)

events_df.head()

In [71]:
events_df = pd.DataFrame(all_events)

# Filter out add-ons / packages
exclude_keywords = ["voucher", "suite pass", "post game", "item", "educator", "access only", "gift", "discount pass", "tour experience", ]
mask = ~events_df['name'].str.lower().apply(lambda x: any(kw in x for kw in exclude_keywords))
clean_events_df = (
    events_df[mask]
    .drop_duplicates(subset=["team", "date", "time", "venue"])
    .sort_values(["date", "time", "venue"])
    .reset_index(drop=True)
)

clean_events_df = clean_events_df[clean_events_df['venue'].isin(venue_info_df['Venue Name'])]
clean_events_df

Unnamed: 0,name,date,time,venue,team
0,Flyers 2024 ROFR,2025-04-15,19:00:00,Xfinity Mobile Arena,Philadelphia 76ers
1,WWE Monday Night RAW,2025-10-20,16:30:00,Golden 1 Center,Sacramento Kings
3,Playboi Carti - Suites,2025-10-20,19:00:00,Ball Arena,Denver Nuggets
12,Boardroom Access,2025-10-20,19:00:00,Madison Square Garden,New York Knicks
14,Billie Eilish Luxury,2025-10-20,19:00:00,Spectrum Center,Charlotte Hornets
...,...,...,...,...,...
28019,HEAT vs. Atlanta Hawks,2026-04-12,18:00:00,Kaseya Center,Miami Heat
28021,New York Knicks vs. Charlotte Hornets,2026-04-12,18:00:00,Madison Square Garden,New York Knicks
28023,Toronto Raptors vs Brooklyn Nets,2026-04-12,18:00:00,Scotiabank Arena,Toronto Raptors
28024,Boston Celtics vs. Orlando Magic,2026-04-12,18:00:00,TD Garden,Boston Celtics


In [72]:
clean_events_df.to_csv('nba_playoff_events_2026.csv', index=False)