# Tests

In [10]:
from nba_api.stats.endpoints import playbyplayv2
pbp = playbyplayv2.PlayByPlayV2(game_id='0022401194')  # Example Game ID
df = pbp.get_data_frames()[0]
print(df)

        GAME_ID  EVENTNUM  EVENTMSGTYPE  EVENTMSGACTIONTYPE  PERIOD  \
0    0022401194         2            12                   0       1   
1    0022401194         4            10                   0       1   
2    0022401194         7             1                  52       1   
3    0022401194         9             1                  78       1   
4    0022401194        10             1                  75       1   
..          ...       ...           ...                 ...     ...   
460  0022401194       663             2                   1       4   
461  0022401194       664             4                   0       4   
462  0022401194       666             1                  86       4   
463  0022401194       667             5                  11       4   
464  0022401194       668            13                   0       4   

    WCTIMESTRING PCTIMESTRING  \
0        3:40 PM        12:00   
1        3:40 PM        12:00   
2        3:40 PM        11:48   
3        3:41 P

In [None]:
from nba_api.stats.endpoints import boxscoreadvancedv2 as bsav2
pbs = bsav2.BoxScoreAdvancedV2(game_id='0022200001')
df = pbs.get_data_frames()[0]
print(df)

In [None]:
from nba_api.stats.endpoints import boxscoreadvancedv2 as bs
from nba_api.stats.endpoints import playbyplayv2 as pbp
from collections import defaultdict
import pandas as pd

game_id = '0022200001'
bs_df = bsav2.BoxScoreAdvancedV2(game_id=game_id).get_data_frames()[0]
pbp_df = playbyplayv2.PlayByPlayV2(game_id=game_id).get_data_frames()[0]

players_df = bs_df[["PLAYER_NAME"]].copy()
players_df["ACTIVE"] = bs_df["START_POSITION"] != ""
players_df["IOC%"] = 0.0

print(players_df)

name_to_idx = {name: i for i, name in enumerate(players_df['PLAYER_NAME'])}

# filter to only subs
subs = pbp_df[pbp_df['EVENTMSGTYPE'] == 8]

for _, row in subs.iterrows():
    out_player = row['PLAYER1_NAME']
    in_player  = row['PLAYER2_NAME']
    
    # flip flags
    players_df.loc[players_df['PLAYER_NAME'] == out_player, 'ACTIVE'] = False
    players_df.loc[players_df['PLAYER_NAME'] == in_player,  'ACTIVE'] = True
    
    # print what happened
    print(f"Play {_}: 🔁 Substitution: {in_player} in, {out_player} out")
    print(players_df[['PLAYER_NAME','ACTIVE']])

print(players_df)

# Code

In [25]:
# Import Statements
from nba_api.stats.endpoints import boxscoreadvancedv2 as bs
from nba_api.stats.endpoints import playbyplayv2 as pbp
from nba_api.stats.endpoints import leaguegamefinder as lgf
from collections import defaultdict
import pandas as pd
import re
import unicodedata
import time
from requests.exceptions import ReadTimeout

# Function to reurn a list of game ids being used. 
def get_game_ids(season, season_type = 'Regular Season', league = '00'):
    # 1) Request every game in 2024‑25 regular season
    finder = lgf.LeagueGameFinder(
        season_nullable       = season,
        season_type_nullable  = season_type,
        league_id_nullable    = league
    )
    
    # 2) Pull the DataFrame
    games = finder.get_data_frames()[0]
    
    # 3) Extract unique game IDs
    game_ids = games['GAME_ID'].unique().tolist()
    return game_ids

def get_game_preview(game_id, league = '00'):
    # Query any NBA game matching that ID
    finder = lgf.LeagueGameFinder(
        league_id_nullable      = league,       # NBA
        season_type_nullable   = 'Regular Season',   # restrict to reg‑season
        game_id_nullable        = game_id
    )
    games = finder.get_data_frames()[0]
    
    if games.empty:
        print(f"No game found for ID {game_id}")
        return
    
    # In case of duplicate rows (one per team), just take the first unique game
    game = games.drop_duplicates(subset=['GAME_ID']).iloc[0]

    print(f"Game ID : {game['GAME_ID']}")
    print(f"Date    : {game['GAME_DATE']}")
    print(f"Matchup : {game['MATCHUP']}")

def get_pbp_df(game_id, max_retries=3, timeout=60):
    """
    Fetch play‐by‐play for `game_id`, retrying up to `max_retries`
    if a ReadTimeout occurs. Uses a `timeout` (in seconds) per try.
    """
    for attempt in range(1, max_retries + 1):
        try:
            df = pbp.PlayByPlayV2(
                game_id=game_id,
                timeout=timeout
            ).get_data_frames()[0]
            return df
        except ReadTimeout:
            print(f"⚠️  Timeout on PBP fetch (attempt {attempt}/{max_retries}); retrying…")
            time.sleep(1)
    # All retries failed
    raise ReadTimeout(f"PlayByPlayV2 timed out {max_retries} times for game {game_id}")
    
# Function to get the player list used to create custom statistics
def get_players_df(game_id, max_retries=3, timeout=60):
    bs_df = bs.BoxScoreAdvancedV2(game_id=game_id).get_data_frames()[0]
    players_df = bs_df[["PLAYER_NAME"]].copy()

    # 1) Pull in TEAM_ABBREVIATION if it exists, else make the column
    if "TEAM_ABBREVIATION" in bs_df.columns:
        players_df["TEAM_ABBREVIATION"] = bs_df["TEAM_ABBREVIATION"]
    else:
        players_df["TEAM_ABBREVIATION"] = pd.NA

    # 2) If all of them are NA or blank, assign home/away by row order
    abv = players_df["TEAM_ABBREVIATION"]
    if abv.dropna().empty or (abv.fillna("").astype(str).str.strip() == "").all():
        n   = len(players_df)
        mid = n // 2
        # first half → home, second half → away
        players_df.loc[players_df.index[:mid], "TEAM_ABBREVIATION"] = "home"
        players_df.loc[players_df.index[mid:], "TEAM_ABBREVIATION"] = "away"

    # 3) initialize your active flag
    players_df["ACTIVE"] = bs_df.get("START_POSITION", pd.Series()).fillna("").astype(str) != ""

    return players_df


# the same prefixes set you used for extract_last()
prefixes = {
    'da', 'de', 'del', 'di', 'du',
    'la', 'le', 'van', 'von', 'st', 'mac', 'mc'
}

def desc_last_key(desc):
    parts = desc.split()
    if len(parts) >= 2 and parts[0].lower() in prefixes:
        # first token is a prefix → combine with next token
        return f"{parts[0]} {parts[1]}"
    # otherwise just the first token
    return parts[0]

# Helper to strip accents
def strip_accents(text: str) -> str:
    # Normalize to decomposed form (NFKD), then drop non‑spacing marks
    nfkd = unicodedata.normalize('NFKD', text)
    return ''.join(c for c in nfkd if not unicodedata.combining(c))

# Calculate the stats of the game
def calc_stats(pbp, players, game_id):
    pbp[['HOMEDESCRIPTION','VISITORDESCRIPTION','NEUTRALDESCRIPTION']] = \
        pbp[['HOMEDESCRIPTION','VISITORDESCRIPTION','NEUTRALDESCRIPTION']].fillna('')

    # 1) Suffixes to strip off the end
    suffix_pattern = r'\s+(?:Jr\.?|Sr\.?|I{2,3}|IV|V)$'
    
    # 2) Common lowercase prefixes to treat as part of the last name
    prefixes = {
        'da', 'de', 'del', 'di', 'du',
        'la', 'le', 'van', 'von', 'st', 'mac', 'mc'
    }
    
    def extract_last(full_name):
        name = strip_accents(full_name)
        # a) remove suffix
        clean = re.sub(suffix_pattern, '', name, flags=re.IGNORECASE).strip()
        parts = clean.split()
        # b) if the token before last is a known prefix, include it
        if len(parts) >= 2 and parts[-2].lower() in prefixes:
            return f"{parts[-2]} {parts[-1]}"
        # c) otherwise just the final word
        return parts[-1]
    
    # Build the LAST column and map
    players['LAST'] = players['PLAYER_NAME'].apply(extract_last)
    last_to_full = dict(zip(players['LAST'], players['PLAYER_NAME']))

    # 1) Identify which last names are duplicated
    dupes = players['LAST'][players['LAST'].duplicated(keep=False)].unique()
    
    # 2) For each duplicated last, update players['LAST'] to include the first‑initial
    for idx, row in players.iterrows():
        last = row['LAST']
        if last in dupes:
            initial = row['PLAYER_NAME'].split()[0][0]   # first letter of first name
            players.at[idx, 'LAST'] = f"{initial}. {last}"
    
    # 3) Rebuild your lookup
    last_to_full = dict(zip(players['LAST'], players['PLAYER_NAME']))

    # Set custom stats
    players["IOC"] = 0
    players["POS"] = 0
    players["AIOC"] = 0.0

    players["POINTS"] = 0
    players["ASSISTS"] = 0
     # Counters for “excluded” possessions
    poss_count  = defaultdict(int)
    poss_points = defaultdict(int)

    # Possession‐level state
    current_team    = None    # 'home' or 'away'
    current_points  = 0
    current_scorers = set()
    current_assists = set()

    print(players)
    
    # Loop though plays of the game for analysis
    for _, row in pbp.iterrows(): 
        # 1) SUBSTITUTIONS: flip ACTIVE flags
        if row['EVENTMSGTYPE'] == 8:
            out_player = row['PLAYER1_NAME']
            in_player  = row['PLAYER2_NAME']
            
            # flip flags
            players.loc[players['PLAYER_NAME'] == out_player, 'ACTIVE'] = False
            players.loc[players['PLAYER_NAME'] == in_player,  'ACTIVE'] = True
            #print("Substitution")
            #print(players)
            continue
            
        # Merge descriptions
        desc = row['HOMEDESCRIPTION'] + row['VISITORDESCRIPTION'] + row['NEUTRALDESCRIPTION']
        scorer = None
        assister = None
        # ——— Field Goals Made ———
        if row['EVENTMSGTYPE'] == 1:
            print(desc)

            # 4a) Extract scorer’s last name (first token)
            last = desc_last_key(desc)
            scorer = last_to_full.get(last)
            print(scorer)
            idx = players.index[players['PLAYER_NAME'] == scorer][0]
            play_pts = players.at[idx, "POINTS"]
            if scorer:
                # 4b) Extract points from "(n PTS)"
                m_pts = re.search(r'\((\d+)\s+PTS\)', desc)
                if m_pts:
                    pts = int(m_pts.group(1))
                    #players.loc[players['PLAYER_NAME'] == scorer, "POINTS"] = pts
                    players.at[idx, "POINTS"] = pts
                    play_pts = pts - play_pts
                #print(players.loc[scorer])
    
            # 4c) Extract assist if present: "(Lastname n AST)"
            assister = None
            m_ast = re.search(r'\((\w+)\s+(\d+)\s+AST\)', desc)
            if m_ast:
                last_a  = m_ast.group(1)
                count_a = int(m_ast.group(2))
                assister = last_to_full.get(last_a)
                if assister:
                    idx = players.index[players['PLAYER_NAME'] == assister][0]
                    players.at[idx, 'ASSISTS'] += 1

            # Add IOC to players who were on the team that scored and active but weren't the scorer or assister
            if scorer:
                team_series = players.loc[
                    players.PLAYER_NAME == scorer, 
                    'TEAM_ABBREVIATION'
                ]
                if team_series.empty:
                    # no matching row — skip IOC for this play (or assign default)
                    print(desc)
                    print(f"Scorer {scorer!r} not found in players; skipping IOC increment")
                    continue
            
                team = team_series.iloc[0]

                mask = (
                    (players['TEAM_ABBREVIATION'] == team) &
                    (players['ACTIVE']) &
                    (players['PLAYER_NAME'] != scorer)
                )
                if assister:
                    mask &= (players['PLAYER_NAME'] != assister)
                players.loc[mask, 'IOC'] += play_pts

        # ——— Free Throws Made ———
        if row['EVENTMSGTYPE'] == 3 and 'PTS' in desc:
            #print("FT:", desc)
        
            # 1) Extract shooter’s last name & full name
            last = desc_last_key(desc)
            shooter = last_to_full.get(last)
            if not shooter:
                continue
        
            # 2) Compute how many new points this FT added
            idx = players.index[players['PLAYER_NAME'] == shooter][0]
            m_pts = re.search(r'\((\d+)\s+PTS\)', desc)
            if not m_pts:
                continue
            total_pts = int(m_pts.group(1))
            players.at[idx, "POINTS"] = total_pts

        is_def_reb = (row['EVENTMSGTYPE'] == 4 and 'Defensive' in desc)
        if row['EVENTMSGTYPE'] in (1, 5, 6) or is_def_reb:
            scorer = None
            assister = None
            # 1) figure out which team just had the ball
            offense_abbr = None
    
            if row['EVENTMSGTYPE'] == 1:   # made FG
                last = desc_last_key(desc)
                scorer = last_to_full.get(last)
                if scorer:
                    offense_abbr = players.loc[
                        players.PLAYER_NAME == scorer, 'TEAM_ABBREVIATION'
                    ].iloc[0]
    
            elif row['EVENTMSGTYPE'] == 5: # turnover
                last = desc_last_key(desc)
                turner = last_to_full.get(last)
                if turner:
                    offense_abbr = players.loc[
                        players.PLAYER_NAME == turner, 'TEAM_ABBREVIATION'
                    ].iloc[0]
    
            elif is_def_reb:             # defensive rebound
                last = desc_last_key(desc)
                rebounder = last_to_full.get(last)
                if rebounder:
                    # possession ended for the *other* team, so rebounder’s team is new offense
                    offense_abbr = players.loc[
                        players.PLAYER_NAME == rebounder, 'TEAM_ABBREVIATION'
                    ].iloc[0]
    
            # 2) now credit POS and IOC
            if offense_abbr:
                # mask of teammates on court
                base_mask = (
                    (players['TEAM_ABBREVIATION'] == offense_abbr) &
                    (players['ACTIVE'])
                )
                
                # Build exclusion list
                excl = {scorer}
                if assister:
                    excl.add(assister)
                
                # Final mask: exclude scorer and assister
                mask = base_mask & (~players['PLAYER_NAME'].isin(excl))
                
                # Now increment POS and IOC only for those players
                players.loc[mask, 'POS'] += 1
                                
            # 3) reset for next possession
            current_points = 0
            current_scorers.clear()
            current_assists.clear()

            #print(f"Event: {row['EVENTMSGTYPE']}")
            #print(f"Possession Ended!")
            #print(f"    Scorer:   {scorer}")
            #print(f"    Assister: {assister}")


    # Remove the helper column
    players.drop(columns='LAST', inplace=True)
    # Calculate AIOC
    players['AIOC'] = players['IOC'] / players['POS']
    return players

def main():
    game_ids = get_game_ids(season = '2024-25', season_type = 'Regular Season', league = '00')
    #print(game_ids)
    for game_id in game_ids: # Loop though game ids to make an analysis for each game
        print(game_id)
        preview = get_game_preview(game_id = game_id)
        #print(bs.BoxScoreAdvancedV2(game_id=game_id).get_data_frames()[0])
        players = get_players_df(game_id = game_id)
        #print(players)
        pbp = get_pbp_df(game_id = game_id)
        calc_players = calc_stats(pbp = pbp, players = players, game_id = game_id)
        print("Game Calculated:")
        print(calc_players)
        

if __name__ == "__main__":
    main()

0022401186
Game ID : 0022401187
Date    : 2025-04-13
Matchup : CHA @ BOS
                 PLAYER_NAME TEAM_ABBREVIATION  ACTIVE           LAST  IOC  \
0           Tristan da Silva               ORL    True       da Silva    0   
1             Jonathan Isaac               ORL    True          Isaac    0   
2         Wendell Carter Jr.               ORL    True         Carter    0   
3   Kentavious Caldwell-Pope               ORL    True  Caldwell-Pope    0   
4              Anthony Black               ORL    True          Black    0   
5                Jett Howard               ORL   False         Howard    0   
6               Cole Anthony               ORL   False        Anthony    0   
7                Gary Harris               ORL   False         Harris    0   
8               Goga Bitadze               ORL   False        Bitadze    0   
9              Caleb Houstan               ORL   False        Houstan    0   
10               Cory Joseph               ORL   False         Joseph

IndexError: index 0 is out of bounds for axis 0 with size 0