In [10]:
from nba_api.stats.static import teams
import pandas as pd
import numpy as np
from nba_api.stats.endpoints import *
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime
import time
import warnings

In [3]:
OKC_data  = teams.find_teams_by_full_name("Oklahoma City Thunder")
OKC_id = OKC_data[0]['id'] # 1610612760 team id 

PAC_data = teams.find_teams_by_full_name("Indiana Pacers")
PAC_id = PAC_data[0]['id'] # 1610612754 team id 

In [12]:
from nba_api.stats.endpoints import leaguegamefinder
import pandas as pd

finder = leaguegamefinder.LeagueGameFinder(
    player_or_team_abbreviation="T",
    team_id_nullable=OKC_id,
    date_from_nullable="06/08/2025",
    date_to_nullable="06/22/2025"
)

games_df = finder.get_data_frames()[0]
games_df = games_df.sort_values(by='GAME_DATE')
game_ids = games_df['GAME_ID'].unique().tolist()
game_ids


['0042400402',
 '0042400403',
 '0042400404',
 '0042400405',
 '0042400406',
 '0042400407']

In [13]:
def safe_api_call(api_function, max_retries=3, delay=1):
    """
    Safely make API calls with retry logic
    """
    for attempt in range(max_retries):
        try:
            time.sleep(delay)  # Rate limiting
            result = api_function()
            return result
        except Exception as e:
            print(f"API call failed (attempt {attempt + 1}): {e}")
            if attempt == max_retries - 1:
                raise e
            time.sleep(delay * 2)  # Exponential backoff

In [19]:

def debug_dataframe_columns(df, df_name):
    """Debug function to print column names"""
    print(f"\n{df_name} columns:")
    print(df.columns.tolist())
    if len(df) > 0:
        print(f"Sample row:")
        print(df.iloc[0])

def collect_game_data(game_ids):
    """
    Step 2a: Pull basic game stats for all games - FIXED VERSION
    """
    games_summary = []
    
    for i, game_id in enumerate(game_ids, 1):
        print(f"Collecting data for Game {i} (ID: {game_id})...")
        try:
            # Basic box score
            boxscore = safe_api_call(
                lambda: boxscoretraditionalv2.BoxScoreTraditionalV2(game_id=game_id)
            )
            
            # Get all dataframes
            dataframes = boxscore.get_data_frames()
            team_stats = dataframes[1]  # Team stats dataframe
            
            # Debug - print column names for first game
            if i == 1:
                debug_dataframe_columns(team_stats, "Team Stats")
            
            # Extract team performance - more robust approach
            thunder_stats = team_stats[team_stats['TEAM_ID'] == OKC_id].iloc[0]
            pacers_stats = team_stats[team_stats['TEAM_ID'] == PAC_id].iloc[0]
            
            # Determine home team using different method
            thunder_home = None
            if 'MATCHUP' in thunder_stats:
                thunder_home = thunder_stats['MATCHUP'].find('@') == -1
            elif 'HOME' in thunder_stats:
                thunder_home = thunder_stats['HOME'] == 1
            else:
                # Fallback - check game finder data
                game_row = games_df[games_df['GAME_ID'] == game_id].iloc[0]
                thunder_home = 'vs.' in game_row['MATCHUP'] if 'MATCHUP' in game_row else True
            
            game_info = {
                'Game_Number': i,
                'Game_ID': game_id,
                'Date': games_df[games_df['GAME_ID'] == game_id]['GAME_DATE'].iloc[0],
                'Thunder_Score': thunder_stats['PTS'],
                'Pacers_Score': pacers_stats['PTS'],
                'Thunder_Home': thunder_home,
                'Winner': 'Thunder' if thunder_stats['PTS'] > pacers_stats['PTS'] else 'Pacers',
                'Point_Differential': abs(thunder_stats['PTS'] - pacers_stats['PTS']),
                'Thunder_FG_PCT': thunder_stats.get('FG_PCT', 0),
                'Pacers_FG_PCT': pacers_stats.get('FG_PCT', 0),
                'Thunder_FG3_PCT': thunder_stats.get('FG3_PCT', 0),
                'Pacers_FG3_PCT': pacers_stats.get('FG3_PCT', 0),
                'Thunder_FT_PCT': thunder_stats.get('FT_PCT', 0),
                'Pacers_FT_PCT': pacers_stats.get('FT_PCT', 0),
                'Thunder_REB': thunder_stats.get('REB', 0),
                'Pacers_REB': pacers_stats.get('REB', 0),
                'Thunder_AST': thunder_stats.get('AST', 0),
                'Pacers_AST': pacers_stats.get('AST', 0),
                'Thunder_TOV': thunder_stats.get('TOV', thunder_stats.get('TURNOVER', 0)),
                'Pacers_TOV': pacers_stats.get('TOV', pacers_stats.get('TURNOVER', 0))
            }
            
            games_summary.append(game_info)
            
        except Exception as e:
            print(f"Error collecting data for Game {i}: {e}")
            # Print available columns for debugging
            try:
                boxscore = boxscoretraditionalv2.BoxScoreTraditionalV2(game_id=game_id)
                team_stats = boxscore.get_data_frames()[1]
                print("Available columns:", team_stats.columns.tolist())
            except:
                pass
            continue
    
    return pd.DataFrame(games_summary)

In [20]:
def collect_pbp_data(game_ids):
    """
    Step 2b: Get play-by-play for momentum analysis
    """
    all_pbp_data = []
    
    for i, game_id in enumerate(game_ids, 1):
        print(f"Collecting play-by-play for Game {i}...")
        try:
            pbp = safe_api_call(
                lambda: playbyplayv2.PlayByPlayV2(game_id=game_id)
            )
            pbp_df = pbp.get_data_frames()[0]
            pbp_df['Game_Number'] = i
            pbp_df['Game_ID'] = game_id
            all_pbp_data.append(pbp_df)
            
        except Exception as e:
            print(f"Error collecting PBP for Game {i}: {e}")
            continue
    
    return pd.concat(all_pbp_data, ignore_index=True) if all_pbp_data else pd.DataFrame()


In [21]:
def collect_four_factors(game_ids):
    """
    Step 2c: Collect four factors data - FIXED VERSION
    """
    four_factors_data = []
    
    for i, game_id in enumerate(game_ids, 1):
        print(f"Collecting four factors for Game {i}...")
        try:
            four_factors = safe_api_call(
                lambda: boxscorefourfactorsv2.BoxScoreFourFactorsV2(game_id=game_id)
            )
            
            dataframes = four_factors.get_data_frames()
            team_ff = dataframes[1]  # Team four factors
            
            # Debug - print column names for first game
            if i == 1:
                debug_dataframe_columns(team_ff, "Four Factors")
            
            thunder_ff = team_ff[team_ff['TEAM_ID'] == OKC_id].iloc[0]
            pacers_ff = team_ff[team_ff['TEAM_ID'] == PAC_id].iloc[0]
            
            ff_info = {
                'Game_Number': i,
                'Game_ID': game_id,
                'Thunder_EFG_PCT': thunder_ff.get('EFG_PCT', 0),
                'Pacers_EFG_PCT': pacers_ff.get('EFG_PCT', 0),
                'Thunder_FTA_RATE': thunder_ff.get('FTA_RATE', 0),
                'Pacers_FTA_RATE': pacers_ff.get('FTA_RATE', 0),
                'Thunder_TOV_PCT': thunder_ff.get('TOV_PCT', thunder_ff.get('TURNOVER_PCT', 0)),
                'Pacers_TOV_PCT': pacers_ff.get('TOV_PCT', pacers_ff.get('TURNOVER_PCT', 0)),
                'Thunder_OREB_PCT': thunder_ff.get('OREB_PCT', 0),
                'Pacers_OREB_PCT': pacers_ff.get('OREB_PCT', 0)
            }
            
            # Calculate differentials
            ff_info['EFG_Differential'] = ff_info['Thunder_EFG_PCT'] - ff_info['Pacers_EFG_PCT']
            ff_info['FTA_Differential'] = ff_info['Thunder_FTA_RATE'] - ff_info['Pacers_FTA_RATE']
            ff_info['TOV_Differential'] = ff_info['Pacers_TOV_PCT'] - ff_info['Thunder_TOV_PCT']  # Lower is better
            ff_info['OREB_Differential'] = ff_info['Thunder_OREB_PCT'] - ff_info['Pacers_OREB_PCT']
            
            four_factors_data.append(ff_info)
            
        except Exception as e:
            print(f"Error collecting four factors for Game {i}: {e}")
            # Print available columns for debugging
            try:
                four_factors = boxscorefourfactorsv2.BoxScoreFourFactorsV2(game_id=game_id)
                team_ff = four_factors.get_data_frames()[1]
                print("Available four factors columns:", team_ff.columns.tolist())
            except:
                pass
            continue
    
    return pd.DataFrame(four_factors_data)

In [22]:
def collect_player_data(game_ids):
    """
    Step 2d: Individual player performance - FIXED VERSION
    """
    all_player_data = []
    key_players = {
        'Shai Gilgeous-Alexander': 'SGA',
        'Jalen Williams': 'J_Williams',
        'Tyrese Haliburton': 'Haliburton',
        'Pascal Siakam': 'Siakam',
        'Myles Turner': 'Turner'
    }
    
    for i, game_id in enumerate(game_ids, 1):
        print(f"Collecting player data for Game {i}...")
        try:
            boxscore = safe_api_call(
                lambda: boxscoretraditionalv2.BoxScoreTraditionalV2(game_id=game_id)
            )
            
            dataframes = boxscore.get_data_frames()
            player_stats = dataframes[0]  # Player stats
            
            # Debug - print column names for first game
            if i == 1:
                debug_dataframe_columns(player_stats, "Player Stats")
            
            game_player_data = []
            
            for _, player in player_stats.iterrows():
                if player['PLAYER_NAME'] in key_players:
                    player_info = {
                        'Game_Number': i,
                        'Game_ID': game_id,
                        'Player': player['PLAYER_NAME'],
                        'Team_ID': player['TEAM_ID'],
                        'Team': 'Thunder' if player['TEAM_ID'] == OKC_id else 'Pacers',
                        'MIN': player.get('MIN', 0),
                        'PTS': player.get('PTS', 0),
                        'REB': player.get('REB', 0),
                        'AST': player.get('AST', 0),
                        'STL': player.get('STL', 0),
                        'BLK': player.get('BLK', 0),
                        'TOV': player.get('TOV', player.get('TURNOVER', 0)),
                        'FG_PCT': player.get('FG_PCT', 0),
                        'FG3_PCT': player.get('FG3_PCT', 0),
                        'FT_PCT': player.get('FT_PCT', 0),
                        'PLUS_MINUS': player.get('PLUS_MINUS', 0)
                    }
                    game_player_data.append(player_info)
            
            all_player_data.extend(game_player_data)
            
        except Exception as e:
            print(f"Error collecting player data for Game {i}: {e}")
            # Print available columns for debugging
            try:
                boxscore = boxscoretraditionalv2.BoxScoreTraditionalV2(game_id=game_id)
                player_stats = boxscore.get_data_frames()[0]
                print("Available player stats columns:", player_stats.columns.tolist())
            except:
                pass
            continue
    
    return pd.DataFrame(all_player_data)

In [23]:
print("=" * 50)
print("STARTING DATA COLLECTION")
print("=" * 50)

# Collect all data
games_summary = collect_game_data(game_ids)
print(f"\nCollected basic stats for {len(games_summary)} games")

pbp_data = collect_pbp_data(game_ids)
print(f"Collected play-by-play data: {len(pbp_data)} plays")

four_factors = collect_four_factors(game_ids)
print(f"Collected four factors for {len(four_factors)} games")

player_stats = collect_player_data(game_ids)
print(f"Collected stats for {len(player_stats)} player-game combinations")

STARTING DATA COLLECTION
Collecting data for Game 1 (ID: 0042400402)...

Team Stats columns:
['GAME_ID', 'TEAM_ID', 'TEAM_NAME', 'TEAM_ABBREVIATION', 'TEAM_CITY', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TO', 'PF', 'PTS', 'PLUS_MINUS']
Sample row:
GAME_ID                 0042400402
TEAM_ID                 1610612760
TEAM_NAME                  Thunder
TEAM_ABBREVIATION              OKC
TEAM_CITY            Oklahoma City
MIN                  240.000000:00
FGM                             40
FGA                             82
FG_PCT                       0.488
FG3M                            14
FG3A                            36
FG3_PCT                      0.389
FTM                             29
FTA                             33
FT_PCT                       0.879
OREB                            11
DREB                            32
REB                             43
AST                             25
S

In [24]:
def calculate_running_score(pbp_df):
    """
    Calculate running score throughout the game
    """
    pbp_df = pbp_df.copy()
    pbp_df['Thunder_Score'] = 0
    pbp_df['Pacers_Score'] = 0
    
    thunder_score = 0
    pacers_score = 0
    
    for idx, play in pbp_df.iterrows():
        # Update scores based on scoring plays
        if pd.notna(play['SCORE']) and play['SCORE'] != '':
            try:
                # Parse score string (format: "XX - YY")
                scores = play['SCORE'].split(' - ')
                if len(scores) == 2:
                    # Determine which team scored based on the play description
                    if play['PLAYER1_TEAM_ID'] == OKC_id:
                        thunder_score = int(scores[0])
                        pacers_score = int(scores[1])
                    else:
                        pacers_score = int(scores[0])
                        thunder_score = int(scores[1])
            except:
                pass
        
        pbp_df.at[idx, 'Thunder_Score'] = thunder_score
        pbp_df.at[idx, 'Pacers_Score'] = pacers_score
        pbp_df.at[idx, 'Score_Differential'] = thunder_score - pacers_score
    
    return pbp_df

In [25]:
def calculate_momentum_swings(pbp_df, game_id):
    """
    Step 5: Calculate momentum shifts based on scoring runs
    """
    pbp_game = pbp_df[pbp_df['GAME_ID'] == game_id].copy()
    pbp_game = calculate_running_score(pbp_game)
    
    momentum_data = []
    current_run = 0
    run_team = None
    last_score_diff = 0
    
    for idx, play in pbp_game.iterrows():
        score_diff = play['Score_Differential']
        
        # Detect scoring plays
        if abs(score_diff) != abs(last_score_diff):
            points_scored = abs(score_diff - last_score_diff)
            scoring_team = 'Thunder' if score_diff > last_score_diff else 'Pacers'
            
            # Track runs
            if run_team == scoring_team:
                current_run += points_scored
            else:
                # Run ended, start new run
                if current_run >= 6:  # Significant run
                    momentum_data.append({
                        'Game_ID': game_id,
                        'Period': play['PERIOD'],
                        'Time': play['PCTIMESTRING'],
                        'Team': run_team,
                        'Run_Points': current_run,
                        'Type': 'Momentum_Swing'
                    })
                current_run = points_scored
                run_team = scoring_team
        
        last_score_diff = score_diff
        
        # Track lead changes
        if idx > 0:
            prev_diff = pbp_game.iloc[idx-1]['Score_Differential']
            if (prev_diff > 0 and score_diff < 0) or (prev_diff < 0 and score_diff > 0):
                momentum_data.append({
                    'Game_ID': game_id,
                    'Period': play['PERIOD'],
                    'Time': play['PCTIMESTRING'],
                    'Team': 'Thunder' if score_diff > 0 else 'Pacers',
                    'Run_Points': 0,
                    'Type': 'Lead_Change'
                })
    
    return momentum_data

In [26]:
def calculate_win_probability(pbp_df):
    """
    Step 5: Simple win probability model
    """
    pbp_df = pbp_df.copy()
    
    for idx, play in pbp_df.iterrows():
        # Calculate time remaining in seconds
        period = play['PERIOD']
        time_str = play['PCTIMESTRING']
        try:
            mins, secs = map(int, time_str.split(':'))
            time_remaining = (4 - period) * 12 * 60 + mins * 60 + secs
            if period > 4:  # Overtime
                time_remaining = mins * 60 + secs
        except:
            time_remaining = 0
        
        score_diff = play['Score_Differential']
        
        # Simple win probability model based on score differential and time
        if time_remaining == 0:
            win_prob = 1.0 if score_diff > 0 else 0.0
        else:
            # Logistic model approximation
            x = score_diff / (np.sqrt(time_remaining / 60) + 1)
            win_prob = 1 / (1 + np.exp(-x * 0.8))
        
        pbp_df.at[idx, 'Thunder_Win_Prob'] = win_prob
        pbp_df.at[idx, 'Time_Remaining_Sec'] = time_remaining
    
    return pbp_df


In [27]:
def identify_turning_points(pbp_df, game_id):
    """
    Step 5: Find moments where win probability shifted significantly
    """
    game_pbp = pbp_df[pbp_df['GAME_ID'] == game_id].copy()
    game_pbp = calculate_win_probability(game_pbp)
    
    turning_points = []
    
    for i in range(1, len(game_pbp)):
        current_prob = game_pbp.iloc[i]['Thunder_Win_Prob']
        prev_prob = game_pbp.iloc[i-1]['Thunder_Win_Prob']
        prob_change = abs(current_prob - prev_prob)
        
        if prob_change > 0.15:  # 15% swing threshold
            turning_points.append({
                'Game_ID': game_id,
                'Period': game_pbp.iloc[i]['PERIOD'],
                'Time': game_pbp.iloc[i]['PCTIMESTRING'],
                'Play_Description': game_pbp.iloc[i]['HOMEDESCRIPTION'] or game_pbp.iloc[i]['VISITORDESCRIPTION'],
                'Win_Prob_Change': prob_change,
                'Thunder_Win_Prob_Before': prev_prob,
                'Thunder_Win_Prob_After': current_prob,
                'Swing_Favored': 'Thunder' if current_prob > prev_prob else 'Pacers'
            })
    
    return turning_points

In [28]:
print("\n" + "=" * 50)
print("ANALYZING GAME FLOW AND MOMENTUM")
print("=" * 50)

all_momentum_data = []
all_turning_points = []
win_prob_data = []

for game_id in game_ids:
    game_num = game_ids.index(game_id) + 1
    print(f"Analyzing Game {game_num} momentum...")
    
    try:
        # Calculate momentum swings
        momentum = calculate_momentum_swings(pbp_data, game_id)
        all_momentum_data.extend(momentum)
        
        # Identify turning points
        turning_points = identify_turning_points(pbp_data, game_id)
        all_turning_points.extend(turning_points)
        
        # Store win probability data for this game
        game_pbp = pbp_data[pbp_data['GAME_ID'] == game_id].copy()
        game_pbp = calculate_win_probability(game_pbp)
        win_prob_data.append(game_pbp)
        
    except Exception as e:
        print(f"Error analyzing Game {game_num}: {e}")

momentum_df = pd.DataFrame(all_momentum_data)
turning_points_df = pd.DataFrame(all_turning_points)
win_probability_df = pd.concat(win_prob_data, ignore_index=True) if win_prob_data else pd.DataFrame()

print(f"Found {len(momentum_df)} momentum events")
print(f"Found {len(turning_points_df)} turning points")


ANALYZING GAME FLOW AND MOMENTUM
Analyzing Game 1 momentum...
Error analyzing Game 1: 'Score_Differential'
Analyzing Game 2 momentum...
Error analyzing Game 2: single positional indexer is out-of-bounds
Analyzing Game 3 momentum...
Error analyzing Game 3: single positional indexer is out-of-bounds
Analyzing Game 4 momentum...
Error analyzing Game 4: single positional indexer is out-of-bounds
Analyzing Game 5 momentum...
Error analyzing Game 5: single positional indexer is out-of-bounds
Analyzing Game 6 momentum...
Error analyzing Game 6: single positional indexer is out-of-bounds
Found 135 momentum events
Found 0 turning points


In [29]:
def track_four_factors_evolution(four_factors_df):
    """
    Track how four factors changed game-by-game
    """
    evolution_summary = {
        'Thunder_Wins': [],
        'Pacers_Wins': [],
        'Close_Games': [],
        'Four_Factors_Winners': []
    }
    
    for _, game in four_factors_df.iterrows():
        game_num = game['Game_Number']
        
        # Determine four factors winner
        ff_score = 0
        if game['EFG_Differential'] > 0: ff_score += 1
        if game['FTA_Differential'] > 0: ff_score += 1
        if game['TOV_Differential'] > 0: ff_score += 1
        if game['OREB_Differential'] > 0: ff_score += 1
        
        ff_winner = 'Thunder' if ff_score >= 2 else 'Pacers'
        
        evolution_summary['Four_Factors_Winners'].append({
            'Game_Number': game_num,
            'Winner': ff_winner,
            'Score': f"{ff_score}-{4-ff_score}",
            'EFG_Diff': game['EFG_Differential'],
            'FTA_Diff': game['FTA_Differential'],
            'TOV_Diff': game['TOV_Differential'],
            'OREB_Diff': game['OREB_Differential']
        })
    
    return evolution_summary

if len(four_factors) > 0:
    four_factors_evolution = track_four_factors_evolution(four_factors)
else:
    four_factors_evolution = {'Four_Factors_Winners': []}

In [30]:
# Step 7: Save all data to CSV files
print("\n" + "=" * 50)
print("SAVING DATA TO CSV FILES")
print("=" * 50)

# Save datasets
games_summary.to_csv('games_summary.csv', index=False)
four_factors.to_csv('four_factors_by_game.csv', index=False)
player_stats.to_csv('player_performance.csv', index=False)
momentum_df.to_csv('momentum_analysis.csv', index=False)
turning_points_df.to_csv('turning_points.csv', index=False)

if not win_probability_df.empty:
    win_probability_df.to_csv('win_probability_data.csv', index=False)

print("Data collection and analysis complete!")
print("\nFiles created:")
print("- games_summary.csv")
print("- four_factors_by_game.csv")
print("- player_performance.csv")
print("- momentum_analysis.csv")
print("- turning_points.csv")
print("- win_probability_data.csv")


SAVING DATA TO CSV FILES
Data collection and analysis complete!

Files created:
- games_summary.csv
- four_factors_by_game.csv
- player_performance.csv
- momentum_analysis.csv
- turning_points.csv
- win_probability_data.csv


In [32]:
print("\n" + "=" * 50)
print("QUICK SUMMARY STATISTICS")
print("=" * 50)

if len(games_summary) > 0:
    thunder_wins = games_summary[games_summary['Winner'] == 'Thunder'].shape[0]
    pacers_wins = games_summary[games_summary['Winner'] == 'Pacers'].shape[0]
    print(f"Series Result: Thunder won {thunder_wins}-{pacers_wins}")
    print(f"Average Point Differential: {games_summary['Point_Differential'].mean():.1f}")
    
    print("\nGame-by-Game Results:")
    for _, game in games_summary.iterrows():
        home_team = "Thunder" if game['Thunder_Home'] else "Pacers"
        print(f"Game {game['Game_Number']}: {game['Winner']} {max(game['Thunder_Score'], game['Pacers_Score'])}-{min(game['Thunder_Score'], game['Pacers_Score'])} (Home: {home_team})")

if len(momentum_df) > 0:
    print(f"Total Momentum Swings: {len(momentum_df[momentum_df['Type'] == 'Momentum_Swing'])}")
    print(f"Total Lead Changes: {len(momentum_df[momentum_df['Type'] == 'Lead_Change'])}")

if len(turning_points_df) > 0:
    print(f"Total Turning Points: {len(turning_points_df)}")

if len(four_factors_evolution['Four_Factors_Winners']) > 0:
    print("\nFour Factors Winners by Game:")
    for ff_game in four_factors_evolution['Four_Factors_Winners']:
        print(f"Game {ff_game['Game_Number']}: {ff_game['Winner']} ({ff_game['Score']})")


QUICK SUMMARY STATISTICS
Series Result: Thunder won 4-2
Average Point Differential: 12.0

Game-by-Game Results:
Game 1: Thunder 123-107 (Home: Thunder)
Game 2: Pacers 116-107 (Home: Pacers)
Game 3: Thunder 111-104 (Home: Pacers)
Game 4: Thunder 120-109 (Home: Thunder)
Game 5: Pacers 108-91 (Home: Pacers)
Game 6: Thunder 103-91 (Home: Thunder)
Total Momentum Swings: 64
Total Lead Changes: 71

Four Factors Winners by Game:
Game 1: Thunder (3-1)
Game 2: Thunder (2-2)
Game 3: Thunder (2-2)
Game 4: Pacers (0-4)
Game 5: Pacers (1-3)
Game 6: Pacers (0-4)
