In [None]:
from nba_api.stats.static import teams
from nba_api.stats.endpoints import teamgamelog
import pandas as pd

# Get all NBA teams
nba_teams = teams.get_teams()

# Find the Golden State Warriors
warriors = [team for team in nba_teams if team['full_name'] == 'Golden State Warriors'][0]
warriors_id = warriors['id']
print(f"Warriors Team ID: {warriors_id}")

season = '2023-24'  # Format: 'YYYY-YY'
season_type = 'Regular Season'  # Options: 'Regular Season', 'Playoffs', etc.

# Retrieve game logs
game_log = teamgamelog.TeamGameLog(team_id=warriors_id, season=season, season_type_all_star=season_type)
games_df = game_log.get_data_frames()[0]

# Extract the 'Game_ID' column
game_ids = games_df['Game_ID'].tolist()

# Display the game IDs
print("Warriors Game ID's", game_ids)


In [None]:
from nba_api.stats.endpoints import playbyplayv3
import time

def get_game_log(game_id):
    log = playbyplayv3.PlayByPlayV3(game_id=game_id)
    frames = log.get_data_frames()
    gamelog = frames[0]

    return gamelog

def file_exists(filename):
    try:
        with open(filename, 'r') as f:
            return True
    except FileNotFoundError:
        return False

csv = 'warriorsplaybyplay.csv'

if file_exists(csv):
    gamelog = pd.read_csv(csv)
else:
    gamelog = None
    for gameid in game_ids:
        time.sleep(1)  # Sleep for 1 second to avoid hitting the API too quickly
        log = get_game_log(gameid)
        # log = log[log['teamTricode'] == 'GSW']
        if gamelog is None:
            gamelog = log
        else:
            gamelog = pd.concat([gamelog, log])

    gamelog.to_csv(csv, index=False)

gamelog.loc[gamelog['teamTricode'] != 'GSW', 'teamTricode'] = 'OPP'
gamelog = gamelog[gamelog['actionType'] != 'period']
gamelog = gamelog[gamelog['actionType'] != 'Instant Replay']
gamelog = gamelog[gamelog['actionType'] != 'Substitution']
gamelog = gamelog[gamelog['actionType'] != '']


gamelog['state'] = gamelog['actionType'].fillna('') + "-" + gamelog['teamTricode']
gamelog['next_state'] = gamelog['state'].shift(-1)

transitions = gamelog.dropna(subset=['next_state'])
transition_counts = transitions.groupby(['state', 'next_state']).size().unstack(fill_value=0)
transition_matrix = transition_counts.div(transition_counts.sum(axis=1), axis=0)
transition_matrix.to_csv('transition_matrix.csv')
# transition_matrix = transition_matrix.to_numpy()

print(transition_matrix)


In [None]:
import numpy as np

tm_np = transition_matrix.to_numpy()
eigvals, eigvectors = np.linalg.eig(transition_matrix)
print(transition_matrix.to_numpy().sum(axis=1))

print(eigvals)
print(transition_matrix.index)
eigvectors[0]

In [None]:
import numpy as np
import pandas as pd

P = transition_matrix.values
eigvals, eigvecs = np.linalg.eig(P.T)
idx = np.argmin(np.abs(eigvals - 1))
stationary = np.real(eigvecs[:, idx])
stationary = stationary / stationary.sum()
stationary_distribution = pd.Series(stationary, index=transition_matrix.index)
stationary_distribution = stationary_distribution.sort_values(ascending=False)

print(stationary_distribution)


Rebounds and missed shots dominate. 

In [None]:
import numpy as np
import pandas as pd

def simulate_game_flow(transition_matrix, steps=100, start_state=None):
    states = list(transition_matrix.index)
    P = transition_matrix.values

    # Create state index mapping
    state_to_index = {state: i for i, state in enumerate(states)}
    index_to_state = {i: state for state, i in state_to_index.items()}

    # If no start_state is provided, pick one randomly (weighted by stationary distribution)
    if start_state is None:
        stationary = np.real(np.linalg.eig(P.T)[1][:, np.argmax(np.isclose(np.linalg.eigvals(P.T), 1))])
        stationary = stationary / stationary.sum()
        current_index = np.random.choice(len(states), p=stationary)
    else:
        current_index = state_to_index[start_state]

    sequence = [index_to_state[current_index]]

    for _ in range(steps - 1):
        probs = P[current_index]
        next_index = np.random.choice(len(states), p=probs)
        sequence.append(index_to_state[next_index])
        current_index = next_index

    return sequence

# Example usage:
simulated_sequence = simulate_game_flow(transition_matrix, steps=50, start_state='Jump Ball-GSW')
for i, action in enumerate(simulated_sequence):
    print(f"{i + 1}. {action}")


In [None]:
transitions = gamelog.dropna(subset=['next_state'])
valid_transitions = transitions[transitions['state'].isin(transition_matrix.index)]
predicted_next_state = valid_transitions['state'].apply(
    lambda s: transition_matrix.loc[s].idxmax() if s in transition_matrix.index else None
)
actual_next_state = valid_transitions['next_state'].reset_index(drop=True)
predicted_next_state = predicted_next_state.reset_index(drop=True)

accuracy = (predicted_next_state == actual_next_state).mean()
print(f"Top-1 Accuracy of Markov Model: {accuracy:.4f}")
