## Import Required Libraries

In [1]:
from nba_api.stats.endpoints import teamdetails, commonteamyears, teamgamelogs,playergamelogs, boxscoreadvancedv3, scoreboardv2
import pandas as pd
from tqdm import tqdm
import os
import pandavro as pdx
import time
from datetime import datetime
from warnings import filterwarnings
import json
filterwarnings('ignore')

## Get All Team IDs

In [2]:
if 'all_teams.parquet' not in os.listdir('Static Files'):
    teams = commonteamyears.CommonTeamYears()
    print('Established connection')
    teams = teams.get_dict()
    print('Retrieved list of dicts')
    team_ids = []
    for team in tqdm(teams['resultSets'][0]['rowSet']):
        team_id = team[1]
        team_name = team[2]
        start_year = team[3]
        end_year = team[4]
        team_ids.append((team_id, team_name, start_year, end_year))
        time.sleep(0.5)
    print('Established list of tuples')
    teams = pd.DataFrame(team_ids, columns=['team_id', 'year_founded', 'year_depreciated', 'abbreviation'])
    print('Created DataFrame')
    teams.to_parquet('Static Files/all_teams.parquet', index=False)

## Get All Team Details

In [3]:
if 'team_details.parquet' not in os.listdir('Static Files'):
    teams = pd.read_parquet('Static Files/all_teams.parquet')
    initial_df = []
    for team in tqdm(teams['team_id']):
        get_coaches = teamdetails.TeamDetails(team_id=team, timeout=60)
        next_df = get_coaches.get_data_frames()[0]
        initial_df.append(next_df)
        time.sleep(0.5)
    output_df = pd.concat(initial_df)
    output_df.to_parquet('Static Files/team_details.parquet')

## Get All Team Game Logs

In [4]:
if 'team_game_logs.parquet' not in os.listdir('Static Files'):
    start = 0
    end = 1
    initial_df = []
    for integer in tqdm(range(1,25)):
        if end < 10:
            get_game_stats = teamgamelogs.TeamGameLogs(season_nullable=f'200{start}-0{end}')
            next_df = get_game_stats.get_data_frames()[0]
            initial_df.append(next_df)
            time.sleep(0.5)
            start += 1
            end += 1
        else:
            start += 1
            end += 1
            get_game_stats = teamgamelogs.TeamGameLogs(season_nullable=f'20{start}-{end}')
            next_df = get_game_stats.get_data_frames()[0]
            initial_df.append(next_df)
            time.sleep(0.5)

    output_df = pd.concat(initial_df)
    output_df.to_parquet('Static Files/team_game_logs.parquet')

## Get All Current Player Injury Reports

In [5]:
if 'player_injury_reports.avro' not in os.listdir('Static Files'):
    player_injuries = pd.concat(pd.read_html('https://www.espn.com/nba/injuries')).fillna('No Info Available')
    player_injuries['date_updated'] = datetime.today()
    pdx.to_avro('Static Files/player_injury_reports.avro', player_injuries)
elif ('player_injury_reports.avro' in os.listdir('Static Files')):
    player_injuries = pdx.read_avro('Static Files/player_injury_reports.avro')
    if player_injuries['date_updated'].all() != datetime.today():
        player_injuries = pd.concat(pd.read_html('https://www.espn.com/nba/injuries')).fillna('No Info Available')
        player_injuries['date_updated'] = datetime.today()
        pdx.to_avro('Static Files/player_injury_reports.avro', player_injuries)

## Get All Player Game Logs

In [6]:
if 'player_game_logs.parquet' not in os.listdir('Static Files'):
    start = 0
    end = 1
    initial_df = []
    for integer in tqdm(range(1,25)):
        if end < 10:
            get_game_stats = playergamelogs.PlayerGameLogs(season_nullable=f'200{start}-0{end}')
            next_df = get_game_stats.get_data_frames()[0]
            initial_df.append(next_df)
            time.sleep(0.5)
            start += 1
            end += 1
        else:
            start += 1
            end += 1
            get_game_stats = playergamelogs.PlayerGameLogs(season_nullable=f'20{start}-{end}')
            next_df = get_game_stats.get_data_frames()[0]
            initial_df.append(next_df)
            time.sleep(0.5)

    output_df = pd.concat(initial_df)
    output_df.to_parquet('Static Files/player_game_logs.parquet')

## Get All Games Next Season

In [7]:
if '2024-25_game_ids.parquet' not in os.listdir('Static Files'):
    start = datetime.strptime("2024-10-04", "%Y-%m-%d")
    end = datetime.strptime("2025-04-15", "%Y-%m-%d")

    date_range = pd.date_range(start=start, end=end).to_list()

    games = {}
    # Assuming 'range' contains the list of dates for the 2024-25 season
    for date in tqdm(date_range):
        json_string = json.loads(scoreboardv2.ScoreboardV2(game_date=date).get_json())

        i = 0  # resultSets index for 'GameHeader'
        headers = json_string['resultSets'][i].get('headers', '')

        # Iterate through each game in the 'rowSet'
        for game in json_string['resultSets'][i]['rowSet']:
            game_id = game[2]  # Index 2 is the 'GAME_ID'

            # Add game_id to the list of games for the specific date
            if date in games:
                games[date].append(game_id)
            else:
                games[date] = [game_id]

        time.sleep(0.5)  # Delay to avoid rate limits
    games = {key.strftime('%Y-%m-%d'): value for key, value in games.items()}
    games_2025 = pd.DataFrame(list(games.items()), columns=['game_date', 'game_id'])
    games_2025 = games_2025.explode(column='game_id')
    games_2025.to_parquet('Static Files/2024-25_game_ids.parquet')

## Get Advanced Player Game Stats

In [8]:
if 'current_player_advanced_game_stats.parquet' not in os.listdir('Static Files'):
    # Filter games for the 2023-24 season
    games = game_log_df[game_log_df['SEASON_YEAR'] == '2023-24']['GAME_ID'].unique()
    rows = []

    # Iterate over each game
    for game_id in tqdm(games):
        try:
            # Fetch box score data for the current game
            game = boxscoreadvancedv3.BoxScoreAdvancedV3(game_id).get_data_frames()[0]

            # Extract relevant fields and append rows
            rows.extend([{
                'game_id': game_id,
                'team_id': player_stats.teamId,
                'team_name': f"{player_stats.teamCity} {player_stats.teamName}",
                'player_id': player_stats.personId,
                'player_name': f"{player_stats.firstName} {player_stats.familyName}",
                'position': player_stats.position,
                'comment': player_stats.comment,
                'offensive_rating': player_stats.offensiveRating,
                'defensive_rating': player_stats.defensiveRating,
                'net_rating': player_stats.netRating,
                'assist_percentage': player_stats.assistPercentage,
                'assist_turnover_ratio': player_stats.assistToTurnover,
                'assist_ratio': player_stats.assistRatio,
                'usage_percentage': player_stats.usagePercentage,
                'possessions': player_stats.possessions,
                'effective_fg_percentage': player_stats.effectiveFieldGoalPercentage,
                'true_shooting_percentage': player_stats.trueShootingPercentage,
                'player_impact_score': player_stats.PIE
            } for player_stats in game.itertuples()])

            # Sleep to avoid hitting the rate limit
            time.sleep(0.5)
        except Exception as e:
            print(f"Error processing game {game_id}: {e}")
            continue

    # Convert list of rows to DataFrame
    df = pd.DataFrame(rows)

    # Save to Parquet
    df.to_parquet('Static Files/current_player_advanced_game_stats.parquet')


## Get Advanced Team Game Stats

In [9]:
def fetch_team_advanced_stats():

    if 'current_team_advanced_game_stats.parquet' not in os.listdir('Static Files'):
        # Filter games for the 2023-24 season
        games = game_log_df2[game_log_df2['SEASON_YEAR'] == '2023-24']['GAME_ID'].unique()
        rows = []

        # Iterate over each game
        for game_id in tqdm(games):
            try:
                # Fetch box score data for the current game
                game = boxscoreadvancedv3.BoxScoreAdvancedV3(game_id).get_data_frames()[1]

                # Extract relevant fields and append rows
                rows.extend([{
                    'game_id': game_id,
                    'team_id': team_stats.teamId,
                    'team_name': f"{team_stats.teamCity} {team_stats.teamName}",
                    'offensive_rating': team_stats.offensiveRating,
                    'defensive_rating': team_stats.defensiveRating,
                    'net_rating': team_stats.netRating,
                    'assist_percentage': team_stats.assistPercentage,
                    'assist_turnover_ratio': team_stats.assistToTurnover,
                    'assist_ratio': team_stats.assistRatio,
                    'possessions': team_stats.possessions,
                    'effective_fg_percentage': team_stats.effectiveFieldGoalPercentage,
                    'true_shooting_percentage': team_stats.trueShootingPercentage
                } for team_stats in game.itertuples()])

                # Sleep to avoid hitting the rate limit
                time.sleep(1)
            except:
                print(f"last processed game: {game_id}")
                df = pd.DataFrame(rows)
                df.to_parquet('Static Files/current_team_advanced_game_stats.parquet')

        # Convert list of rows to DataFrame
        df = pd.DataFrame(rows)

        # Save to Parquet
        df.to_parquet('Static Files/current_team_advanced_game_stats.parquet')

# Run the function
fetch_team_advanced_stats()


## Get Player Stats

In [10]:
from nba_api.live.nba.endpoints import boxscore
import json
import pandas as pd

# get home player stats
home_players = json.loads(boxscore.BoxScore(game_id='0012400027').get_json())['game']['homeTeam']['players']
home_team = json.loads(boxscore.BoxScore(game_id='0012400027').get_json())['game']['homeTeam']['teamTricode']
home_player_stats = []
for player in home_players:
    stats = player['statistics']
    stats['player_name'] = f"{player['firstName']} {player['familyName']}"
    stats['team_name'] = home_team  # Include team name

    home_player_stats.append(stats)
home_player_stats = pd.DataFrame(home_player_stats)
home_player_stats = home_player_stats[['team_name', 'player_name'] + [col for col in home_player_stats.columns if col not in ['team_name', 'player_name']]]

# get away player stats
away_players = json.loads(boxscore.BoxScore(game_id='0012400027').get_json())['game']['awayTeam']['players']
away_team = json.loads(boxscore.BoxScore(game_id='0012400027').get_json())['game']['awayTeam']['teamTricode']
away_player_stats = []
for player in away_players:
    stats = player['statistics']
    stats['player_name'] = f"{player['firstName']} {player['familyName']}"
    stats['team_name'] = away_team  # Include team name

    away_player_stats.append(stats)
away_player_stats = pd.DataFrame(away_player_stats)
away_player_stats = away_player_stats[['team_name', 'player_name'] + [col for col in away_player_stats.columns if col not in ['team_name', 'player_name']]]
home_player_stats = home_player_stats[['team_name', 'player_name', 'minutes', 'points', 'reboundsTotal', 'assists', 'steals', 'blocks', 'blocksReceived', 'turnovers', 'foulsPersonal', 'foulsDrawn', 'foulsTechnical', 'fieldGoalsAttempted', 'fieldGoalsMade', 'fieldGoalsPercentage', 'freeThrowsMade', 'freeThrowsAttempted', 'freeThrowsPercentage', 'threePointersAttempted', 'threePointersMade', 'threePointersPercentage', 'pointsFastBreak', 'pointsInThePaint', 'pointsSecondChance']]
home_player_stats.rename(columns={'reboundsTotal': 'rebounds', 'blocksReceived': 'blocks_against', 'fieldGoalsAttempted': 'field_goals_attempted', 'fieldGoalsMade': 'field_goals_made', 'freeThrowsAttempted': 'free_throws_attempted', 'threePointersAttempted': 'three_pointers_attempted', 'foulsPersonal': 'personal_fouls', 'foulsDrawn': 'fouls_drawn', 'foulsTechnical': 'technical_fouls', 'fieldGoalsPercentage': 'field_goals_percentage', 'freeThrowsPercentage': 'free_throws_percentage', 'threePointersPercentage': 'three_pointers_percentage', 'freeThrowsMade': 'free_throws_made', 'threePointersMade': 'three_pointers_made', 'pointsFastBreak': 'fast_break_points', 'pointsInThePaint': 'points_in_the_paint', 'pointsSecondChance': 'second_chance_points'}, inplace=True)

away_player_stats = away_player_stats[['team_name', 'player_name', 'minutes', 'points', 'reboundsTotal', 'assists', 'steals', 'blocks', 'blocksReceived', 'turnovers', 'foulsPersonal', 'foulsDrawn', 'foulsTechnical', 'fieldGoalsAttempted', 'fieldGoalsMade', 'fieldGoalsPercentage', 'freeThrowsMade', 'freeThrowsAttempted', 'freeThrowsPercentage', 'threePointersAttempted', 'threePointersMade', 'threePointersPercentage', 'pointsFastBreak', 'pointsInThePaint', 'pointsSecondChance']]
away_player_stats.rename(columns={'reboundsTotal': 'rebounds', 'blocksReceived': 'blocks_against', 'fieldGoalsAttempted': 'field_goals_attempted', 'fieldGoalsMade': 'field_goals_made', 'freeThrowsAttempted': 'free_throws_attempted', 'threePointersAttempted': 'three_pointers_attempted', 'foulsPersonal': 'personal_fouls', 'foulsDrawn': 'fouls_drawn', 'foulsTechnical': 'technical_fouls', 'fieldGoalsPercentage': 'field_goals_percentage', 'freeThrowsPercentage': 'free_throws_percentage', 'threePointersPercentage': 'three_pointers_percentage', 'freeThrowsMade': 'free_throws_made', 'threePointersMade': 'three_pointers_made', 'pointsFastBreak': 'fast_break_points', 'pointsInThePaint': 'points_in_the_paint', 'pointsSecondChance': 'second_chance_points'}, inplace=True)

home_player_stats['minutes'] = home_player_stats['minutes'].str.replace('PT', '')
home_player_stats['minutes'] = home_player_stats['minutes'].str.replace('M', ':')
home_player_stats['minutes'] = home_player_stats['minutes'].str[:-4]

away_player_stats['minutes'] = away_player_stats['minutes'].str.replace('PT', '')
away_player_stats['minutes'] = away_player_stats['minutes'].str.replace('M', ':')
away_player_stats['minutes'] = away_player_stats['minutes'].str[:-4]
home_player_stats = home_player_stats.sort_values(by='points', ascending=False)
away_player_stats = away_player_stats.sort_values(by='points', ascending=False)



In [60]:
player_stats = boxscoreadvancedv3.BoxScoreAdvancedV3(game_id='0012400027').get_data_frames()[0]
player_stats['player_name'] = player_stats['firstName'] + ' ' + player_stats['familyName']
player_stats['team_name'] = player_stats['teamCity'] + ' ' + player_stats['teamName']
player_stats.drop(columns=['teamId', 'teamTricode', 'teamSlug', 'personId', 'playerSlug', 'jerseyNum', 'firstName', 'familyName', 'teamCity', 'teamName', 'nameI', 'estimatedOffensiveRating', 'estimatedDefensiveRating', 'estimatedNetRating', 'estimatedUsagePercentage'])[['gameId', 'team_name', 'player_name', 'minutes', 'PIE', 'offensiveRating', 'defensiveRating', 'netRating', 'assistPercentage', 'assistRatio', 'possessions', 'effectiveFieldGoalPercentage', 'assistToTurnover', 'trueShootingPercentage']].sort_values(by='PIE', ascending=False)


Unnamed: 0,gameId,team_name,player_name,minutes,PIE,offensiveRating,defensiveRating,netRating,assistPercentage,assistRatio,possessions,effectiveFieldGoalPercentage,assistToTurnover,trueShootingPercentage
11,12400027,Charlotte Hornets,Moussa Diabaté,12:11,0.4,117.9,88.9,29.0,0.111,16.7,28.0,1.0,0.0,1.025
3,12400027,Charlotte Hornets,Brandon Miller,24:29,0.216,111.8,105.7,6.1,0.143,12.5,51.0,0.846,2.0,0.846
5,12400027,Charlotte Hornets,Taj Gibson,10:26,0.193,119.0,85.7,33.3,0.0,0.0,21.0,1.0,0.0,1.0
12,12400027,Charlotte Hornets,Charlie Brown Jr.,11:33,0.191,137.0,76.9,60.1,0.083,20.0,27.0,1.0,0.0,1.0
23,12400027,Memphis Grizzlies,Desmond Bane,24:47,0.19,89.1,100.0,-10.9,0.273,16.7,55.0,0.654,1.5,0.654
7,12400027,Charlotte Hornets,Tre Mann,22:18,0.188,120.8,87.5,33.3,0.154,10.5,48.0,0.692,0.5,0.707
26,12400027,Memphis Grizzlies,Jay Huff,23:11,0.186,110.4,110.0,0.4,0.143,15.4,48.0,0.7,0.0,0.689
2,12400027,Charlotte Hornets,Nick Richards,17:27,0.142,92.1,89.5,2.6,0.083,12.5,38.0,0.4,0.5,0.4
0,12400027,Charlotte Hornets,Seth Curry,21:12,0.138,93.6,78.7,14.9,0.143,20.0,47.0,0.571,2.0,0.571
10,12400027,Charlotte Hornets,Keyontae Johnson,20:59,0.138,123.4,71.1,52.3,0.105,22.2,47.0,0.583,2.0,0.583
