In [None]:
import os
import pandas as pd
from nba_api.stats.static import teams
from pathlib import Path

# Setup data directory path - go up one level from notebooks to project root
BASE_DIR = Path(os.getcwd()).parent
DATA_DIR = BASE_DIR / 'data' / 'game7'
os.makedirs(DATA_DIR, exist_ok=True)

print(f"Working directory: {BASE_DIR}")
print(f"Data directory: {DATA_DIR}")
print(f"Data directory exists: {DATA_DIR.exists()}")
print(f"Data directory is writable: {os.access(DATA_DIR, os.W_OK)}")

# Get team IDs
OKC_data = teams.find_teams_by_full_name("Oklahoma City Thunder")
OKC_id = OKC_data[0]['id'] # 1610612760

PAC_data = teams.find_teams_by_full_name("Indiana Pacers")
PAC_id = PAC_data[0]['id'] # 1610612754


In [None]:
# Get Game ID
from nba_api.stats.endpoints import leaguegamefinder

finder = leaguegamefinder.LeagueGameFinder(
    player_or_team_abbreviation="T",
    team_id_nullable=OKC_id,
    date_from_nullable="06/22/2025",
    date_to_nullable="06/22/2025"
)

g7 = finder.get_data_frames()[0]
g7_id = g7["GAME_ID"].iloc[0] # Should be 0042400407

print("Game Data:")
print(g7)


In [None]:
# Import required endpoints
from nba_api.stats.endpoints import (
    boxscoretraditionalv2,
    boxscoreadvancedv2,
    playbyplayv2,
    hustlestatsboxscore,
    winprobabilitypbp,
    boxscorefourfactorsv2,
    shotchartdetail
)
import pandas as pd

# Traditional Stats
trad_stats = boxscoretraditionalv2.BoxScoreTraditionalV2(game_id=g7_id)
trad_stats_df = trad_stats.get_data_frames()[0]
trad_stats_df.to_csv(DATA_DIR / 'traditional_stats.csv', index=False)

# Get player IDs from traditional stats
okc_players = trad_stats_df[trad_stats_df['TEAM_ID'] == OKC_id]['PLAYER_ID'].tolist()
pac_players = trad_stats_df[trad_stats_df['TEAM_ID'] == PAC_id]['PLAYER_ID'].tolist()

# Advanced Stats
adv_stats = boxscoreadvancedv2.BoxScoreAdvancedV2(game_id=g7_id)
adv_stats_df = adv_stats.get_data_frames()[0]
adv_stats_df.to_csv(DATA_DIR / 'advanced_stats.csv', index=False)

# Play by Play
pbp = playbyplayv2.PlayByPlayV2(game_id=g7_id)
pbp_df = pbp.get_data_frames()[0]
pbp_df.to_csv(DATA_DIR / 'play_by_play.csv', index=False)

# Hustle Stats
hustle = hustlestatsboxscore.HustleStatsBoxScore(game_id=g7_id)
hustle_df = hustle.get_data_frames()[0]
hustle_df.to_csv(DATA_DIR / 'hustle_stats.csv', index=False)

# Win Probability
wp = winprobabilitypbp.WinProbabilityPBP(game_id=g7_id)
wp_df = wp.get_data_frames()[0]
wp_df.to_csv(DATA_DIR / 'win_probability.csv', index=False)

# Four Factors
ff = boxscorefourfactorsv2.BoxScoreFourFactorsV2(game_id=g7_id)
ff_df = ff.get_data_frames()[0]
ff_df.to_csv(DATA_DIR / 'four_factors.csv', index=False)

# Import required for handling timeouts and delays
import time
from requests.exceptions import ReadTimeout, ConnectionError
import random

def get_shot_chart_with_retries(team_id, player_id, game_id, max_retries=3, base_timeout=60):
    """Helper function to get shot chart data with retries and exponential backoff"""
    for attempt in range(max_retries):
        try:
            # Increase timeout for each retry attempt
            timeout = base_timeout * (attempt + 1)
            shots = shotchartdetail.ShotChartDetail(
                team_id=team_id,
                player_id=player_id,
                game_id_nullable=game_id,
                timeout=timeout
            )
            return shots.get_data_frames()[0]
        except (ReadTimeout, ConnectionError) as e:
            if attempt == max_retries - 1:  # Last attempt
                print(f"Failed to get shot chart data for player {player_id} after {max_retries} attempts")
                raise e
            # Wait with exponential backoff before retrying
            wait_time = (2 ** attempt) + random.uniform(0, 1)
            print(f"Attempt {attempt + 1} failed. Waiting {wait_time:.2f} seconds before retry...")
            time.sleep(wait_time)

# Shot Charts - collect for all players on each team with error handling
print("Collecting OKC shot charts...")
okc_shots_list = []
for player_id in okc_players:
    try:
        shots_df = get_shot_chart_with_retries(OKC_id, player_id, g7_id)
        okc_shots_list.append(shots_df)
        # Add a small random delay between requests
        time.sleep(random.uniform(1, 2))
    except Exception as e:
        print(f"Error collecting shots for OKC player {player_id}: {str(e)}")
        continue

print("\nCollecting Pacers shot charts...")
pac_shots_list = []
for player_id in pac_players:
    try:
        shots_df = get_shot_chart_with_retries(PAC_id, player_id, g7_id)
        pac_shots_list.append(shots_df)
        # Add a small random delay between requests
        time.sleep(random.uniform(1, 2))
    except Exception as e:
        print(f"Error collecting shots for Pacers player {player_id}: {str(e)}")
        continue

# Combine all shots for each team
if okc_shots_list:
    print("\nCombining OKC shots...")
    okc_shots_df = pd.concat(okc_shots_list, ignore_index=True)
    okc_shots_df.to_csv(DATA_DIR / 'okc_shots.csv', index=False)
    print(f"Saved {len(okc_shots_df)} OKC shots")

if pac_shots_list:
    print("\nCombining Pacers shots...")
    pac_shots_df = pd.concat(pac_shots_list, ignore_index=True)
    pac_shots_df.to_csv(DATA_DIR / 'pac_shots.csv', index=False)
    print(f"Saved {len(pac_shots_df)} Pacers shots")

print("All data has been collected and saved to the data/game7 directory.")
