# Tests

In [None]:
from nba_api.stats.endpoints import playbyplayv2
pbp = playbyplayv2.PlayByPlayV2(game_id='0022401194')  # Example Game ID
df = pbp.get_data_frames()[0]
print(df)

In [None]:
from nba_api.stats.endpoints import boxscoreadvancedv2 as bsav2
pbs = bsav2.BoxScoreAdvancedV2(game_id='0022200001')
df = pbs.get_data_frames()[0]
print(df)

In [None]:
from nba_api.stats.endpoints import boxscoreadvancedv2 as bs
from nba_api.stats.endpoints import playbyplayv2 as pbp
from collections import defaultdict
import pandas as pd

game_id = '0022200001'
bs_df = bsav2.BoxScoreAdvancedV2(game_id=game_id).get_data_frames()[0]
pbp_df = playbyplayv2.PlayByPlayV2(game_id=game_id).get_data_frames()[0]

players_df = bs_df[["PLAYER_NAME"]].copy()
players_df["ACTIVE"] = bs_df["START_POSITION"] != ""
players_df["IOC%"] = 0.0

print(players_df)

name_to_idx = {name: i for i, name in enumerate(players_df['PLAYER_NAME'])}

# filter to only subs
subs = pbp_df[pbp_df['EVENTMSGTYPE'] == 8]

for _, row in subs.iterrows():
    out_player = row['PLAYER1_NAME']
    in_player  = row['PLAYER2_NAME']
    
    # flip flags
    players_df.loc[players_df['PLAYER_NAME'] == out_player, 'ACTIVE'] = False
    players_df.loc[players_df['PLAYER_NAME'] == in_player,  'ACTIVE'] = True
    
    # print what happened
    print(f"Play {_}: 🔁 Substitution: {in_player} in, {out_player} out")
    print(players_df[['PLAYER_NAME','ACTIVE']])

print(players_df)

# Code

In [8]:
# Import Statements
from nba_api.stats.endpoints import boxscoreadvancedv2 as bs
from nba_api.stats.endpoints import boxscoretraditionalv2 as bstrad
from nba_api.stats.endpoints import playbyplayv2 as pbp
from nba_api.stats.endpoints import leaguegamefinder as lgf
from collections import defaultdict
import pandas as pd
import re
import unicodedata
import time
from requests.exceptions import ReadTimeout

# Function to reurn a list of game ids being used. 
def get_game_ids(season, season_type = 'Regular Season', league = '00'):
    # 1) Request every game in 2024‑25 regular season
    finder = lgf.LeagueGameFinder(
        season_nullable       = season,
        season_type_nullable  = season_type,
        league_id_nullable    = league
    )
    
    # 2) Pull the DataFrame
    games = finder.get_data_frames()[0]
    
    # 3) Extract unique game IDs
    game_ids = games['GAME_ID'].unique().tolist()
    return game_ids

def get_game_preview(game_id, league = '00'):
    # Query any NBA game matching that ID
    finder = lgf.LeagueGameFinder(
        league_id_nullable      = league,       # NBA
        season_type_nullable   = 'Regular Season',   # restrict to reg‑season
        game_id_nullable        = game_id
    )
    games = finder.get_data_frames()[0]
    
    if games.empty:
        print(f"No game found for ID {game_id}")
        return
    
    # In case of duplicate rows (one per team), just take the first unique game
    game = games.drop_duplicates(subset=['GAME_ID']).iloc[0]

    print(f"Game ID : {game['GAME_ID']}")
    print(f"Date    : {game['GAME_DATE']}")
    print(f"Matchup : {game['MATCHUP']}")

def get_pbp_df(game_id, max_retries=3, timeout=60):
    """
    Fetch play‐by‐play for `game_id`, retrying up to `max_retries`
    if a ReadTimeout occurs. Uses a `timeout` (in seconds) per try.
    """
    for attempt in range(1, max_retries + 1):
        try:
            df = pbp.PlayByPlayV2(
                game_id=game_id,
                timeout=timeout
            ).get_data_frames()[0]
            return df
        except ReadTimeout:
            print(f"⚠️  Timeout on PBP fetch (attempt {attempt}/{max_retries}); retrying…")
            time.sleep(1)
    # All retries failed
    raise ReadTimeout(f"PlayByPlayV2 timed out {max_retries} times for game {game_id}")
    
# Function to get the player list used to create custom statistics
def get_players_df(game_id, max_retries=3, timeout=60):
    bs_df = bs.BoxScoreAdvancedV2(game_id=game_id).get_data_frames()[0]
    players_df = bs_df[["PLAYER_NAME"]].copy()

    # 1) Pull in TEAM_ABBREVIATION if it exists, else make the column
    if "TEAM_ABBREVIATION" in bs_df.columns:
        players_df["TEAM_ABBREVIATION"] = bs_df["TEAM_ABBREVIATION"]
    else:
        players_df["TEAM_ABBREVIATION"] = pd.NA

    # 2) If all of them are NA or blank, assign home/away by row order
    abv = players_df["TEAM_ABBREVIATION"]
    if abv.dropna().empty or (abv.fillna("").astype(str).str.strip() == "").all():
        n   = len(players_df)
        mid = n // 2
        # first half → home, second half → away
        players_df.loc[players_df.index[:mid], "TEAM_ABBREVIATION"] = "home"
        players_df.loc[players_df.index[mid:], "TEAM_ABBREVIATION"] = "away"

    # 3) initialize your active flag
    players_df["ACTIVE"] = bs_df.get("START_POSITION", pd.Series()).fillna("").astype(str) != ""

    return players_df

# Prefixes and suffixes you already have
prefixes = {'da','de','del','di','du','la','le','van','von','st','mac','mc'}
suffixes = {'jr','jr.','sr','sr.','ii','iii','iv','v'}

def strip_accents(text: str) -> str:
    nfkd = unicodedata.normalize('NFKD', text)
    return ''.join(c for c in nfkd if not unicodedata.combining(c))

# single‑letter initial, with optional dot
_initial_re = re.compile(r'^[A-Za-z]\.?$')

def desc_last_key(desc: str) -> str:
    # 1) normalize accents & strip whitespace
    text = strip_accents(desc).strip()
    if not text:
        return ''
    # 2) split into words
    tokens = text.split()
    if not tokens:
        return ''

    # 3) INITIAL check: e.g. "K.", "K"
    first = tokens[0]
    if _initial_re.match(first) and len(tokens) >= 2:
        initial = first[0].lower()
        last    = tokens[1].rstrip(".,")
        name    = f"{initial}. {last}"
        next_idx = 2
    else:
        # 4) PREFIX check (your existing logic)
        if first.lower() in prefixes and len(tokens) >= 2:
            name     = f"{first} {tokens[1]}"
            next_idx = 2
        else:
            name     = first
            next_idx = 1

    # 5) SUFFIX check (your existing logic)
    if next_idx < len(tokens):
        candidate = tokens[next_idx].rstrip(".,")
        if candidate.lower() in suffixes:
            name = f"{name} {candidate}"

    # 6) return lowercase for lookup
    return name.lower()


# Helper to strip accents
def strip_accents(text: str) -> str:
    # Normalize to decomposed form (NFKD), then drop non‑spacing marks
    nfkd = unicodedata.normalize('NFKD', text)
    return ''.join(c for c in nfkd if not unicodedata.combining(c))

def make_key(full_name: str) -> str:
    """
    Build the PBP lookup key for a roster name:
    - strip accents
    - separate suffix (if any) and keep it
    - detect prefix before last name
    - return 'last[ prefix][ suffix]' in lowercase
    """
    name = strip_accents(full_name).strip()
    # 1) pull off suffix
    m = suffix_re.search(name)
    if m:
        suffix = m.group(1).replace('.', '')
        base = name[:m.start()].strip()
    else:
        suffix = ''
        base = name

    parts = base.split()
    # 2) detect prefix → join with last token
    if len(parts) >= 2 and parts[-2].lower() in prefixes:
        last = f"{parts[-2]} {parts[-1]}"
    else:
        last = parts[-1]

    # 3) re‑attach suffix if present
    key = f"{last} {suffix}" if suffix else last
    return key.lower()
    
# Calculate the stats of the game
def calc_stats(pbp, players, game_id):
    # … inside calc_stats(pbp, players, game_id): …
    team_stats = bstrad.BoxScoreTraditionalV2(game_id=game_id) \
                     .get_data_frames()[1]
    home_abbr = team_stats.loc[
        team_stats['HOME_TEAM_FLAG'] == 1, 'TEAM_ABBREVIATION'
    ].iloc[0]
    away_abbr = team_stats.loc[
        team_stats['HOME_TEAM_FLAG'] == 0, 'TEAM_ABBREVIATION'
    ].iloc[0]
    pbp[['HOMEDESCRIPTION','VISITORDESCRIPTION','NEUTRALDESCRIPTION']] = \
        pbp[['HOMEDESCRIPTION','VISITORDESCRIPTION','NEUTRALDESCRIPTION']].fillna('')
    
    # 1) Compute every player’s base key
    players['BASE_KEY'] = players['PLAYER_NAME'].apply(make_key)
    
    # 2) Find which base_keys are duplicated
    dupe_bases = set(
        players['BASE_KEY'][players['BASE_KEY'].duplicated(keep=False)]
    )
    
    # 3) Decide each player’s initial LAST key and populate full_to_key
    full_to_key = {}
    for idx, full in players['PLAYER_NAME'].items():
        base = players.at[idx, 'BASE_KEY']
        if base in dupe_bases:
            initial = strip_accents(full).split()[0][0].lower()
            key = f"{initial}. {base}"
        else:
            key = base
        players.at[idx, 'LAST']    = key
        full_to_key[full]          = key
    
    # 3b) **Post‑process**: if any LASTs still collide, append initials and update full_to_key
    dupe_keys = players['LAST'][players['LAST'].duplicated(keep=False)].unique()
    for dup in dupe_keys:
        for idx in players.index[players['LAST'] == dup]:
            full = players.at[idx, 'PLAYER_NAME']
            initial = strip_accents(full).split()[0][0].lower()
            new_key = f"{initial}. {dup}"
            players.at[idx, 'LAST']      = new_key
            full_to_key[full]            = new_key
    
    # 4) Build your reverse lookup from the **final** LAST keys
    last_to_full = { key: full for full, key in full_to_key.items() }



    # Set custom stats
    players["IOC"] = 0
    players["POS"] = 0
    players["AIOC"] = 0.0

    players["POINTS"] = 0
    players["ASSISTS"] = 0
     # Counters for “excluded” possessions
    poss_count  = defaultdict(int)
    poss_points = defaultdict(int)

    # Possession‐level state
    current_team    = None    # 'home' or 'away'
    current_points  = 0
    current_scorers = set()
    current_assists = set()

    print(players)
    
    # Loop though plays of the game for analysis
    for _, row in pbp.iterrows(): 
        # 1) SUBSTITUTIONS: flip ACTIVE flags
        if row['EVENTMSGTYPE'] == 8:
            out_player = row['PLAYER1_NAME']
            in_player  = row['PLAYER2_NAME']
            
            # flip flags
            players.loc[players['PLAYER_NAME'] == out_player, 'ACTIVE'] = False
            players.loc[players['PLAYER_NAME'] == in_player,  'ACTIVE'] = True
            #print("Substitution")
            #print(players)
            continue
            
        # Merge descriptions
        desc = row['HOMEDESCRIPTION'] + row['VISITORDESCRIPTION'] + row['NEUTRALDESCRIPTION']
        scorer = None
        assister = None
        # ——— Field Goals Made ———
        if row['EVENTMSGTYPE'] == 1:
            print(desc)

            # 4a) Extract scorer’s last name (first token)
            last = desc_last_key(desc)
            scorer = last_to_full.get(last)
            print(scorer)
            idx = players.index[players['PLAYER_NAME'] == scorer][0]
            play_pts = players.at[idx, "POINTS"]
            if scorer:
                # 4b) Extract points from "(n PTS)"
                m_pts = re.search(r'\((\d+)\s+PTS\)', desc)
                if m_pts:
                    pts = int(m_pts.group(1))
                    #players.loc[players['PLAYER_NAME'] == scorer, "POINTS"] = pts
                    players.at[idx, "POINTS"] = pts
                    play_pts = pts - play_pts
                #print(players.loc[scorer])
    
            # 4c) Extract assist if present: "(Lastname n AST)"
            assister = None
            m_ast = re.search(r'\((\w+)\s+(\d+)\s+AST\)', desc)
            if m_ast:
                last_a  = m_ast.group(1)
                count_a = int(m_ast.group(2))
                assister = last_to_full.get(last_a)
                if assister:
                    idx = players.index[players['PLAYER_NAME'] == assister][0]
                    players.at[idx, 'ASSISTS'] += 1

            # Add IOC to players who were on the team that scored and active but weren't the scorer or assister
            if scorer:
                team_series = players.loc[
                    players.PLAYER_NAME == scorer, 
                    'TEAM_ABBREVIATION'
                ]
                if team_series.empty:
                    # no matching row — skip IOC for this play (or assign default)
                    print(desc)
                    print(f"Scorer {scorer!r} not found in players; skipping IOC increment")
                    continue
            
                team = team_series.iloc[0]

                mask = (
                    (players['TEAM_ABBREVIATION'] == team) &
                    (players['ACTIVE']) &
                    (players['PLAYER_NAME'] != scorer)
                )
                if assister:
                    mask &= (players['PLAYER_NAME'] != assister)
                players.loc[mask, 'IOC'] += play_pts

        # ——— Free Throws Made ———
        if row['EVENTMSGTYPE'] == 3 and 'PTS' in desc:
            #print("FT:", desc)
        
            # 1) Extract shooter’s last name & full name
            last = desc_last_key(desc)
            shooter = last_to_full.get(last)
            if not shooter:
                continue
        
            # 2) Compute how many new points this FT added
            idx = players.index[players['PLAYER_NAME'] == shooter][0]
            m_pts = re.search(r'\((\d+)\s+PTS\)', desc)
            if not m_pts:
                continue
            total_pts = int(m_pts.group(1))
            players.at[idx, "POINTS"] = total_pts

        is_def_reb = (row['EVENTMSGTYPE'] == 4 and 'Defensive' in desc)
        if row['EVENTMSGTYPE'] in (1, 5, 6) or is_def_reb:
            scorer = None
            assister = None
            # 1) figure out which team just had the ball
            offense_abbr = None
    
            if row['EVENTMSGTYPE'] == 1:   # made FG
                last = desc_last_key(desc)
                scorer = last_to_full.get(last)
                if scorer:
                    offense_abbr = players.loc[
                        players.PLAYER_NAME == scorer, 'TEAM_ABBREVIATION'
                    ].iloc[0]
    
            elif row['EVENTMSGTYPE'] == 5: # turnover
                last = desc_last_key(desc)
                turner = last_to_full.get(last)
                if turner:
                    offense_abbr = players.loc[
                        players.PLAYER_NAME == turner, 'TEAM_ABBREVIATION'
                    ].iloc[0]
    
            elif is_def_reb:             # defensive rebound
                last = desc_last_key(desc)
                rebounder = last_to_full.get(last)
                if rebounder:
                    # possession ended for the *other* team, so rebounder’s team is new offense
                    offense_abbr = players.loc[
                        players.PLAYER_NAME == rebounder, 'TEAM_ABBREVIATION'
                    ].iloc[0]
    
            # 2) now credit POS and IOC
            if offense_abbr:
                # mask of teammates on court
                base_mask = (
                    (players['TEAM_ABBREVIATION'] == offense_abbr) &
                    (players['ACTIVE'])
                )
                
                # Build exclusion list
                excl = {scorer}
                if assister:
                    excl.add(assister)
                
                # Final mask: exclude scorer and assister
                mask = base_mask & (~players['PLAYER_NAME'].isin(excl))
                
                # Now increment POS and IOC only for those players
                players.loc[mask, 'POS'] += 1
                                
            # 3) reset for next possession
            current_points = 0
            current_scorers.clear()
            current_assists.clear()

            #print(f"Event: {row['EVENTMSGTYPE']}")
            #print(f"Possession Ended!")
            #print(f"    Scorer:   {scorer}")
            #print(f"    Assister: {assister}")


    # Remove the helper column
    players.drop(columns='LAST', inplace=True)
    # Calculate AIOC
    players['AIOC'] = players['IOC'] / players['POS']
    return players

def main():
    game_ids = get_game_ids(season = '2024-25', season_type = 'Regular Season', league = '00')
    #print(game_ids)
    for game_id in game_ids: # Loop though game ids to make an analysis for each game
        print(game_id)
        preview = get_game_preview(game_id = game_id)
        #print(bs.BoxScoreAdvancedV2(game_id=game_id).get_data_frames()[0])
        players = get_players_df(game_id = game_id)
        #print(players)
        pbp = get_pbp_df(game_id = game_id)
        calc_players = calc_stats(pbp = pbp, players = players, game_id = game_id)
        print("Game Calculated:")
        print(calc_players)
        

if __name__ == "__main__":
    main()

0022401186
Game ID : 0022401194
Date    : 2025-04-13
Matchup : MEM vs. DAL
                 PLAYER_NAME TEAM_ABBREVIATION  ACTIVE       BASE_KEY  \
0           Tristan da Silva               ORL    True       da silva   
1             Jonathan Isaac               ORL    True          isaac   
2         Wendell Carter Jr.               ORL    True      carter jr   
3   Kentavious Caldwell-Pope               ORL    True  caldwell-pope   
4              Anthony Black               ORL    True          black   
5                Jett Howard               ORL   False         howard   
6               Cole Anthony               ORL   False        anthony   
7                Gary Harris               ORL   False         harris   
8               Goga Bitadze               ORL   False        bitadze   
9              Caleb Houstan               ORL   False        houstan   
10               Cory Joseph               ORL   False         joseph   
11        Zaccharie Risacher               ATL   

IndexError: index 0 is out of bounds for axis 0 with size 0