In [None]:
from nba_api.stats.endpoints import playbyplayv3
import pandas as pd
from nba_api.stats.endpoints import leaguegamelog
import numpy as np
import scipy 
import matplotlib.pyplot as plt 
from tqdm import tqdm
import time 
import os 

In [None]:
def classify_shot_from_text(row):
    state = row.lower()
    
    if 'made shot' in state:
        result = 'Made'
    elif 'missed shot' in state:
        result = 'Missed'
    else:
        return None  # Not a shot

    layup_keywords = ['layup', 'dunk', 'tip', 'putback', 'alley oop', 'finger roll', 'hook shot']
    midrange_keywords = ['jump shot', 'pullup', 'fadeaway', 'turnaround', 'step back', 'floating']

    if any(kw in state for kw in layup_keywords):
        shot_type = 'Layup'
    elif any(kw in state for kw in midrange_keywords):
        shot_type = 'Midrange'
    else:
        shot_type = 'Three'

    return f"{result} {shot_type}"

def get_game_transitions(game_id):
    """
    Fetch play-by-play data for a single game and extract (state, next_state) transitions.
    
    Parameters:
        game_id (str): The NBA game ID.
    
    Returns:
        pd.DataFrame: DataFrame containing 'state', 'next_state', and 'game_id'.
    """
    try:
        # Fetch play-by-play data
        log = playbyplayv3.PlayByPlayV3(game_id=game_id)
        gamelog = log.get_data_frames()[0]
        print(log.get_data_frames())
        # Build raw state column
        gamelog['state'] = gamelog['actionType'].fillna('') + ' - ' + gamelog['subType'].fillna('')

        # Drop Instant Replay rows early
        gamelog = gamelog[~gamelog['state'].str.contains('Instant Replay', na=False)].copy()

        # Classify shots using text analysis
        shot_mask = gamelog['state'].str.contains('Made Shot|Missed Shot', na=False)
        gamelog.loc[shot_mask, 'state'] = gamelog.loc[shot_mask, 'state'].apply(classify_shot_from_text)

        
        
        # Normalize other event types
        gamelog.loc[gamelog['state'].str.contains('Turnover', na=False), 'state'] = 'Turnover'
        gamelog.loc[gamelog['state'].str.contains('Foul', na=False), 'state'] = 'Foul'
        gamelog.loc[gamelog['state'].str.contains('Substitution', na=False), 'state'] = 'Substitution'
        gamelog.loc[gamelog['state'].str.contains('Free Throw', na=False), 'state'] = 'Free Throw'
        gamelog.loc[gamelog['state'].str.contains('Rebound', na=False), 'state'] = 'Rebound'
        gamelog.loc[gamelog['state'].str.contains('Timeout', na=False), 'state'] = 'Timeout'
        gamelog.loc[gamelog['state'].str.contains('Violation', na=False), 'state'] = 'Violation'
        gamelog.loc[gamelog['state'].str.contains('Jump Ball', na=False), 'state'] = 'Jump Ball'
        

        # Compute next state
        gamelog['next_state'] = gamelog['state'].shift(-1)

        # Drop rows where next_state is NaN, and make a safe copy
        transitions = gamelog.dropna(subset=['next_state']).copy()

        # Add game_id safely
        transitions['game_id'] = game_id

        # Return only relevant columns
        return transitions
    
    except Exception as e:
        print(f"[ERROR] Game ID {game_id} failed: {e}")
        return pd.DataFrame()  # return empty DataFrame on error
from nba_api.stats.endpoints import leaguegamelog

# Get all game IDs from the 2023–24 season
log = leaguegamelog.LeagueGameLog(season='2024-25', season_type_all_star='Regular Season')
game_ids = log.get_data_frames()[0]['GAME_ID'].unique()

# Collect all transitions
all_transitions = []

for gid in tqdm(game_ids[:1]):  # Use a smaller slice to avoid rate-limiting initially
    df = get_game_transitions(gid)
    if not df.empty:
        all_transitions.append(df)
    time.sleep(0.6)  # Be kind to the API

# Combine into one big DataFrame
df_all = pd.concat(all_transitions, ignore_index=True)
df_all = df_all[~((df_all['state'] == ' - ') | (df_all['next_state'] == ' - '))].copy()


In [None]:
states = list(set(list(df_all['state'])))

In [None]:
# Count transitions between states
transition_counts = df_all.groupby(['state', 'next_state']).size().unstack(fill_value=0)

# Normalize to get probabilities
transition_probs = transition_counts.div(transition_counts.sum(axis=1), axis=0)


import matplotlib.pyplot as plt

plt.figure(figsize=(12, 10))
plt.imshow(transition_probs, cmap='Blues')
plt.colorbar(label='Transition Probability')

plt.xticks(ticks=range(len(transition_probs.columns)), labels=transition_probs.columns, rotation=90)
plt.yticks(ticks=range(len(transition_probs.index)), labels=transition_probs.index)

plt.title('Transition Matrix Heatmap')
plt.tight_layout()
plt.show()


In [None]:
df_all.iloc[:20]

In [None]:
df_all.axes

In [None]:
from nba_api.stats.endpoints import playbyplayv2, boxscoretraditionalv2
import pandas as pd

def extract_game_states(game_id):
    # Get play-by-play and team info
    pbp = playbyplayv2.PlayByPlayV2(game_id=game_id)
    df = pbp.get_data_frames()[0]

    boxscore = boxscoretraditionalv2.BoxScoreTraditionalV2(game_id=game_id)
    teams = boxscore.get_data_frames()[1]
    home_team = teams.iloc[0]['TEAM_ABBREVIATION']
    away_team = teams.iloc[1]['TEAM_ABBREVIATION']

    state_data = []

    for _, row in df.iterrows():
        action = row['HOMEDESCRIPTION'] or row['VISITORDESCRIPTION']
        team_side = 'Home' if pd.notna(row['HOMEDESCRIPTION']) else 'Away'
        team_abbr = home_team if team_side == 'Home' else away_team
        period = row['PERIOD']

        if action is None:
            continue

        action_lower = action.lower()

        if '3pt' in action_lower or '3-point' in action_lower:
            state = 'Made 3PT' if 'makes' in action_lower else 'Missed 3PT'
        elif 'layup' in action_lower:
            state = 'Made Layup' if 'makes' in action_lower else 'Missed Layup'
        elif 'mid' in action_lower or 'pullup' in action_lower or 'jump shot' in action_lower:
            state = 'Made Midrange' if 'makes' in action_lower else 'Missed Midrange'
        elif 'foul' in action_lower:
            state = 'Foul'
        elif 'free throw' in action_lower:
            state = 'Freethrow'
        elif 'jump ball' in action_lower:
            state = 'Jump ball'
        elif 'rebound' in action_lower:
            state = 'Rebound'
        elif 'substitution' in action_lower:
            state = 'Substitution'
        elif 'timeout' in action_lower:
            state = 'Timeout'
        elif 'turnover' in action_lower:
            state = 'Turnover'
        elif 'violation' in action_lower:
            state = 'Violation'
        else:
            continue  # Skip unknown actions

        state_data.append({
            'Time': row['PCTIMESTRING'],
            'Period': period,
            'Team': team_side,
            'State': f"{state} ({team_side})",
            'Possession': team_abbr
        })

    df_states = pd.DataFrame(state_data)

    return df_states


In [None]:
game_id = '0022300001'  
q1_df = extract_game_states(game_id)

(q1_df[:25])


In [None]:
from nba_api.stats.endpoints import leaguegamefinder
import pandas as pd

# Retrieve all games for GSW in the 2024–25 season
gamefinder = leaguegamefinder.LeagueGameFinder(team_id_nullable='1610612744', season_nullable='2024-25')
games = gamefinder.get_data_frames()[0]

# Extract unique game IDs
game_ids = games['GAME_ID'].unique().tolist()


In [None]:
import numpy as np

def build_transition_matrix(states):
    unique_states = list(set(states))
    state_indices = {state: i for i, state in enumerate(unique_states)}
    matrix = np.zeros((len(unique_states), len(unique_states)))

    for (s1, s2) in zip(states[:-1], states[1:]):
        i, j = state_indices[s1], state_indices[s2]
        matrix[i][j] += 1

    # Normalize rows to get probabilities
    row_sums = matrix.sum(axis=1, keepdims=True)
    transition_matrix = np.divide(matrix, row_sums, where=row_sums != 0)

    return pd.DataFrame(transition_matrix, index=unique_states, columns=unique_states)


In [None]:
# Initialize dictionaries to store aggregated matrices
aggregated_matrices = {
    'Q1': None,
    'Q2': None,
    'Q3': None,
    'Q4_OT': None
}

for game_id in tqdm(game_ids):
    try:
        df_q1, df_q2, df_q3, df_q4_ot = extract_game_states(game_id)

        for quarter, df in zip(['Q1', 'Q2', 'Q3', 'Q4_OT'], [df_q1, df_q2, df_q3, df_q4_ot]):
            states = df['State'].tolist()
            if len(states) < 2:
                continue  # Skip if not enough data to build transitions

            tm = build_transition_matrix(states)

            if aggregated_matrices[quarter] is None:
                aggregated_matrices[quarter] = tm
            else:
                # Align matrices before adding
                aggregated_matrices[quarter] = aggregated_matrices[quarter].add(tm, fill_value=0)

    except Exception as e:
        print(f"Error processing game {game_id}: {e}")
        continue


In [None]:
for quarter in aggregated_matrices:
    matrix = aggregated_matrices[quarter]
    if matrix is not None:
        row_sums = matrix.sum(axis=1)
        aggregated_matrices[quarter] = matrix.div(row_sums, axis=0).fillna(0)
