In [2]:
# ESPN Current Season Scraper - 2025-26 Season
# Use this to get current season data when NBA API is stale

%pip install requests pandas numpy lxml



In [None]:
# ========================================================
# ESPN NBA 2025-26 Season Scraper (Cleaned & Error-Proofed)
# ========================================================

import requests
import pandas as pd
import numpy as np
import time
from datetime import datetime, timedelta

print("✓ Imports successful")

# -----------------------------
# CONFIGURATION
# -----------------------------
CURRENT_SEASON = "2025-26"
OUTPUT_FILE = "nba_current_season_2025_26.csv"
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"

ESPN_SCOREBOARD_API = "https://site.api.espn.com/apis/site/v2/sports/basketball/nba/scoreboard"
ESPN_SUMMARY_API = "https://site.api.espn.com/apis/site/v2/sports/basketball/nba/summary"

TARGET_PLAYERS = [   # Exact name matches
    'LeBron James', 'Stephen Curry', 'Kevin Durant', 'Giannis Antetokounmpo',
    'Luka Doncic', 'Nikola Jokic', 'Joel Embiid', 'Jayson Tatum',
    'Damian Lillard', 'Anthony Davis', 'Devin Booker', 'Donovan Mitchell',
    'Jaylen Brown', 'Trae Young', 'Anthony Edwards', 'Shai Gilgeous-Alexander',
    'Jimmy Butler', 'Paul George', 'Tyrese Haliburton', 'De\'Aaron Fox',
    'Domantas Sabonis', 'Bam Adebayo', 'Julius Randle', 'DeMar DeRozan',
    'Pascal Siakam', 'LaMelo Ball', 'James Harden', 'Karl-Anthony Towns',
    'Nikola Vucevic', 'Jalen Brunson', 'Fred VanVleet', 'Tyler Herro',
    'Victor Wembanyama', 'Paolo Banchero', 'Franz Wagner', 'Scottie Barnes',
    'Cade Cunningham', 'Alperen Sengun', 'Jaren Jackson Jr', 'Mikal Bridges',
    'Darius Garland', 'Lauri Markkanen', 'Desmond Bane', 'Jalen Williams',
    'OG Anunoby', 'Jarrett Allen', 'Kristaps Porzingis', 'CJ McCollum'
]

TARGET_PLAYERS_LOWER = [p.lower() for p in TARGET_PLAYERS]


# -----------------------------
# SAFE PARSERS
# -----------------------------
def safe_parse(value):
    """
    Converts ESPN fields safely into floats.
    Handles:
        - "0"
        - 0
        - None
        - "0-0"
        - "5-12"
    """
    if value is None:
        return 0.0
    value = str(value)

    # Only digits → OK
    if value.replace(".", "").isdigit():
        return float(value)

    # Values like "5-12" → only take first
    if "-" in value:
        try:
            return float(value.split("-")[0])
        except:
            return 0.0

    return 0.0


def safe_split(value):
    """
    Converts "5-12" → (5.0, 12.0)
    """
    if value is None:
        return 0.0, 0.0
    value = str(value)
    if "-" in value:
        a, b = value.split("-")
        return safe_parse(a), safe_parse(b)
    return 0.0, 0.0


# -----------------------------
# ESPN API HELPERS
# -----------------------------
def get_games_for_date(date_str):
    url = f"{ESPN_SCOREBOARD_API}?dates={date_str}"
    headers = {"User-Agent": USER_AGENT}

    try:
        time.sleep(0.25)
        resp = requests.get(url, headers=headers)
        if resp.status_code == 200:
            return resp.json().get("events", [])
    except Exception as e:
        print(f"Error reading scoreboard for {date_str}: {e}")

    return []


def get_game_box_score(game_id):
    url = f"{ESPN_SUMMARY_API}?event={game_id}"
    headers = {"User-Agent": USER_AGENT}

    try:
        time.sleep(0.25)
        resp = requests.get(url, headers=headers)
        if resp.status_code == 200:
            return resp.json()
    except Exception as e:
        print(f"Error reading game {game_id}: {e}")

    return None


# -----------------------------
# MAIN PARSER
# -----------------------------
def extract_player_stats_from_game(game_data, game_id):

    player_stats = []

    if not game_data or "boxscore" not in game_data:
        return player_stats

    game_date = game_data.get("header", {}).get("competitions", [{}])[0].get("date", "")
    game_date = game_date[:10]

    boxscore = game_data["boxscore"]

    if "players" not in boxscore:
        return player_stats

    # Iterate both teams
    for team_block in boxscore["players"]:

        team_abbrev = team_block.get("team", {}).get("abbreviation", "")
        stat_groups = team_block.get("statistics", [])

        for stat_group in stat_groups:
            athletes = stat_group.get("athletes", [])

            for athlete in athletes:
                player_name = athlete.get("athlete", {}).get("displayName", "")

                # Only track target players
                if player_name.lower() not in TARGET_PLAYERS_LOWER:
                    continue

                raw_stats = athlete.get("stats", [])

                if len(raw_stats) < 13:
                    continue  # skip invalid rows

                fgm, fga = safe_split(raw_stats[1])
                fg3m, fg3a = safe_split(raw_stats[2])
                ftm, fta = safe_split(raw_stats[3])

                row = {
                    "PLAYER_NAME": player_name,
                    "GAME_ID": game_id,
                    "GAME_DATE": game_date,
                    "TEAM": team_abbrev,
                    "SEASON": CURRENT_SEASON,
                    "MIN": raw_stats[0],
                    "FGM": fgm,
                    "FGA": fga,
                    "FG3M": fg3m,
                    "FG3A": fg3a,
                    "FTM": ftm,
                    "FTA": fta,
                    "OREB": safe_parse(raw_stats[4]),
                    "DREB": safe_parse(raw_stats[5]),
                    "REB": safe_parse(raw_stats[6]),
                    "AST": safe_parse(raw_stats[7]),
                    "STL": safe_parse(raw_stats[8]),
                    "BLK": safe_parse(raw_stats[9]),
                    "TOV": safe_parse(raw_stats[10]),
                    "PF": safe_parse(raw_stats[11]),
                    "PTS": safe_parse(raw_stats[12]),
                }

                player_stats.append(row)

    return player_stats


# ====================================================
# MAIN SCRAPE LOOP
# ====================================================

print("Collecting NBA 2025-26 season data from ESPN...\n")

end_date = datetime.now()
start_date = end_date - timedelta(days=60)

all_stats = []
games_found = 0
dates_scanned = 0

current_date = start_date

while current_date <= end_date:

    date_str = current_date.strftime("%Y%m%d")
    dates_scanned += 1

    if dates_scanned % 10 == 0:
        print(f"→ Scanned {dates_scanned} days, {games_found} games, {len(all_stats)} rows...")

    games = get_games_for_date(date_str)

    for game in games:
        game_id = game.get("id")
        if not game_id:
            continue

        games_found += 1

        game_data = get_game_box_score(game_id)
        if game_data:
            stats = extract_player_stats_from_game(game_data, game_id)
            all_stats.extend(stats)

    current_date += timedelta(days=1)

print("\n============================")
print(" SCRAPE COMPLETE ")
print("============================")
print(f"Dates scanned: {dates_scanned}")
print(f"Games found: {games_found}")
print(f"Player stat rows: {len(all_stats)}")

# -----------------------------
# SAVE RESULTS
# -----------------------------
if all_stats:

    df = pd.DataFrame(all_stats)

    # Convert date
    df["GAME_DATE"] = pd.to_datetime(df["GAME_DATE"])

    # Convert MIN format -> decimal
    def convert_minutes(m):
        try:
            m = str(m)
            if ":" in m:
                a, b = m.split(":")
                return float(a) + float(b) / 60
            return float(m)
        except:
            return 0.0

    df["MIN"] = df["MIN"].apply(convert_minutes)

    # Percentages
    df["FG_PCT"] = df["FGM"] / df["FGA"].replace(0, np.nan)
    df["FG3_PCT"] = df["FG3M"] / df["FG3A"].replace(0, np.nan)
    df["FT_PCT"] = df["FTM"] / df["FTA"].replace(0, np.nan)

    df = df.sort_values(["PLAYER_NAME", "GAME_DATE"])

    df.to_csv(OUTPUT_FILE, index=False)

    print(f"\n✓ Saved: {OUTPUT_FILE}")
    print(f"Players collected: {df['PLAYER_NAME'].nunique()}")
    print(f"Date range: {df['GAME_DATE'].min().date()} → {df['GAME_DATE'].max().date()}")

else:
    print("\n✗ ERROR: No data collected.")


✓ Imports successful

Collecting 2025-26 season data from ESPN...
Target players: 48

[1/48] LeBron James... ✗ (player not found)
[2/48] Stephen Curry... ✗ (player not found)
[3/48] Kevin Durant... ✗ (player not found)
[4/48] Giannis Antetokounmpo... ✗ (player not found)
[5/48] Luka Doncic... ✗ (player not found)
[6/48] Nikola Jokic... ✗ (player not found)
[7/48] Joel Embiid... ✗ (player not found)
[8/48] Jayson Tatum... ✗ (player not found)
[9/48] Damian Lillard... ✗ (player not found)
[10/48] Anthony Davis... ✗ (player not found)
[11/48] Devin Booker... ✗ (player not found)
[12/48] Donovan Mitchell... ✗ (player not found)
[13/48] Jaylen Brown... ✗ (player not found)
[14/48] Trae Young... ✗ (player not found)
[15/48] Anthony Edwards... ✗ (player not found)
[16/48] Shai Gilgeous-Alexander... ✗ (player not found)
[17/48] Jimmy Butler... ✗ (player not found)
[18/48] Paul George... ✗ (player not found)
[19/48] Tyrese Haliburton... ✗ (player not found)
[20/48] De'Aaron Fox... ✗ (player not