## Import Required Libraries

In [None]:
%pip install --q -r requirements.txt

In [None]:
from nba_api.stats.endpoints import teamdetails, commonteamyears, teamgamelogs,playergamelogs, commonplayerinfo, boxscoreadvancedv3, boxscoretraditionalv3, boxscoremiscv3, scoreboardv2
import pandas as pd
from tqdm import tqdm
import os
import pandavro as pdx
import numpy as np
import time
from datetime import datetime, date
from warnings import filterwarnings
import json
import multiprocessing as mp
filterwarnings('ignore')

## Get All Team IDs

In [None]:
if 'all_teams.parquet' not in os.listdir('Static Files'):
    teams = commonteamyears.CommonTeamYears()
    print('Established connection')
    teams = teams.get_dict()
    print('Retrieved list of dicts')
    team_ids = []
    for team in tqdm(teams['resultSets'][0]['rowSet']):
        team_id = team[1]
        team_name = team[2]
        start_year = team[3]
        end_year = team[4]
        team_ids.append((team_id, team_name, start_year, end_year))
        time.sleep(0.5)
    print('Established list of tuples')
    teams = pd.DataFrame(team_ids, columns=['team_id', 'year_founded', 'year_depreciated', 'abbreviation'])
    print('Created DataFrame')
    teams.to_parquet('Static Files/all_teams.parquet', index=False)

## Get All Team Details

In [None]:
if 'team_details.parquet' not in os.listdir('Static Files'):
    teams = pd.read_parquet('Static Files/all_teams.parquet')
    initial_df = []
    for team in tqdm(teams['team_id']):
        get_coaches = teamdetails.TeamDetails(team_id=team, timeout=60)
        next_df = get_coaches.get_data_frames()[0]
        initial_df.append(next_df)
        time.sleep(0.5)
    output_df = pd.concat(initial_df)
    output_df.to_parquet('Static Files/team_details.parquet')

## Get All Team Game Logs

In [None]:
if 'team_game_logs.parquet' not in os.listdir('Static Files'):
    start = 0
    end = 1
    initial_df = []
    for integer in tqdm(range(1,25)):
        if end < 10:
            get_game_stats = teamgamelogs.TeamGameLogs(season_nullable=f'200{start}-0{end}')
            next_df = get_game_stats.get_data_frames()[0]
            initial_df.append(next_df)
            time.sleep(0.5)
            start += 1
            end += 1
        else:
            start += 1
            end += 1
            get_game_stats = teamgamelogs.TeamGameLogs(season_nullable=f'20{start}-{end}')
            next_df = get_game_stats.get_data_frames()[0]
            initial_df.append(next_df)
            time.sleep(0.5)

    output_df = pd.concat(initial_df)
    output_df.to_parquet('Static Files/team_game_logs.parquet')

## Get All Current Player Injury Reports

In [None]:
if 'player_injury_reports.avro' not in os.listdir('Static Files'):
    player_injuries = pd.concat(pd.read_html('https://www.espn.com/nba/injuries')).fillna('No Info Available')
    player_injuries['date_updated'] = datetime.today()
    pdx.to_avro('Static Files/player_injury_reports.avro', player_injuries)
elif ('player_injury_reports.avro' in os.listdir('Static Files')):
    player_injuries = pdx.read_avro('Static Files/player_injury_reports.avro')
    if player_injuries['date_updated'].all() != datetime.today():
        player_injuries = pd.concat(pd.read_html('https://www.espn.com/nba/injuries')).fillna('No Info Available')
        player_injuries['date_updated'] = datetime.today()
        pdx.to_avro('Static Files/player_injury_reports.avro', player_injuries)

## Get All Player Game Logs

In [None]:
if 'player_game_logs.parquet' not in os.listdir('Static Files'):
    start = 0
    end = 1
    initial_df = []
    for integer in tqdm(range(1,25)):
        if end < 10:
            get_game_stats = playergamelogs.PlayerGameLogs(season_nullable=f'200{start}-0{end}')
            next_df = get_game_stats.get_data_frames()[0]
            initial_df.append(next_df)
            time.sleep(0.5)
            start += 1
            end += 1
        else:
            start += 1
            end += 1
            get_game_stats = playergamelogs.PlayerGameLogs(season_nullable=f'20{start}-{end}')
            next_df = get_game_stats.get_data_frames()[0]
            initial_df.append(next_df)
            time.sleep(0.5)

    output_df = pd.concat(initial_df)
    output_df.to_parquet('Static Files/player_game_logs.parquet')

## Get All Games Next Season

In [None]:
if '2024-25_game_ids.parquet' not in os.listdir('Static Files'):
    start = datetime.strptime("2024-10-04", "%Y-%m-%d")
    end = datetime.strptime("2025-04-15", "%Y-%m-%d")

    date_range = pd.date_range(start=start, end=end).to_list()

    games = {}
    # Assuming 'range' contains the list of dates for the 2024-25 season
    for date in tqdm(date_range):
        json_string = json.loads(scoreboardv2.ScoreboardV2(game_date=date).get_json())

        i = 0  # resultSets index for 'GameHeader'
        headers = json_string['resultSets'][i].get('headers', '')

        # Iterate through each game in the 'rowSet'
        for game in json_string['resultSets'][i]['rowSet']:
            game_id = game[2]  # Index 2 is the 'GAME_ID'

            # Add game_id to the list of games for the specific date
            if date in games:
                games[date].append(game_id)
            else:
                games[date] = [game_id]

        time.sleep(0.5)  # Delay to avoid rate limits
    games = {key.strftime('%Y-%m-%d'): value for key, value in games.items()}
    games_2025 = pd.DataFrame(list(games.items()), columns=['game_date', 'game_id'])
    games_2025 = games_2025.explode(column='game_id')
    games_2025.to_parquet('Static Files/2024-25_game_ids.parquet')

## Get Advanced Player Game Stats

In [None]:
if 'current_player_advanced_game_stats.parquet' not in os.listdir('Static Files'):
    # Filter games for the 2023-24 season
    games = game_log_df[game_log_df['SEASON_YEAR'] == '2023-24']['GAME_ID'].unique()
    rows = []

    # Iterate over each game
    for game_id in tqdm(games):
        try:
            # Fetch box score data for the current game
            game = boxscoreadvancedv3.BoxScoreAdvancedV3(game_id).get_data_frames()[0]

            # Extract relevant fields and append rows
            rows.extend([{
                'game_id': game_id,
                'team_id': player_stats.teamId,
                'team_name': f"{player_stats.teamCity} {player_stats.teamName}",
                'player_id': player_stats.personId,
                'player_name': f"{player_stats.firstName} {player_stats.familyName}",
                'position': player_stats.position,
                'comment': player_stats.comment,
                'offensive_rating': player_stats.offensiveRating,
                'defensive_rating': player_stats.defensiveRating,
                'net_rating': player_stats.netRating,
                'assist_percentage': player_stats.assistPercentage,
                'assist_turnover_ratio': player_stats.assistToTurnover,
                'assist_ratio': player_stats.assistRatio,
                'usage_percentage': player_stats.usagePercentage,
                'possessions': player_stats.possessions,
                'effective_fg_percentage': player_stats.effectiveFieldGoalPercentage,
                'true_shooting_percentage': player_stats.trueShootingPercentage,
                'player_impact_score': player_stats.PIE
            } for player_stats in game.itertuples()])

            # Sleep to avoid hitting the rate limit
            time.sleep(0.5)
        except Exception as e:
            print(f"Error processing game {game_id}: {e}")
            continue

    # Convert list of rows to DataFrame
    df = pd.DataFrame(rows)

    # Save to Parquet
    df.to_parquet('Static Files/current_player_advanced_game_stats.parquet')


## Get Advanced Team Game Stats

In [None]:
def fetch_team_advanced_stats():

    if 'current_team_advanced_game_stats.parquet' not in os.listdir('Static Files'):
        # Filter games for the 2023-24 season
        games = game_log_df2[game_log_df2['SEASON_YEAR'] == '2023-24']['GAME_ID'].unique()
        rows = []

        # Iterate over each game
        for game_id in tqdm(games):
            try:
                # Fetch box score data for the current game
                game = boxscoreadvancedv3.BoxScoreAdvancedV3(game_id).get_data_frames()[1]

                # Extract relevant fields and append rows
                rows.extend([{
                    'game_id': game_id,
                    'team_id': team_stats.teamId,
                    'team_name': f"{team_stats.teamCity} {team_stats.teamName}",
                    'offensive_rating': team_stats.offensiveRating,
                    'defensive_rating': team_stats.defensiveRating,
                    'net_rating': team_stats.netRating,
                    'assist_percentage': team_stats.assistPercentage,
                    'assist_turnover_ratio': team_stats.assistToTurnover,
                    'assist_ratio': team_stats.assistRatio,
                    'possessions': team_stats.possessions,
                    'effective_fg_percentage': team_stats.effectiveFieldGoalPercentage,
                    'true_shooting_percentage': team_stats.trueShootingPercentage
                } for team_stats in game.itertuples()])

                # Sleep to avoid hitting the rate limit
                time.sleep(1)
            except:
                print(f"last processed game: {game_id}")
                df = pd.DataFrame(rows)
                df.to_parquet('Static Files/current_team_advanced_game_stats.parquet')

        # Convert list of rows to DataFrame
        df = pd.DataFrame(rows)

        # Save to Parquet
        df.to_parquet('Static Files/current_team_advanced_game_stats.parquet')

# Run the function
fetch_team_advanced_stats()


In [36]:
from nba_api.live.nba.endpoints import boxscore
import duckdb
import json
import pandas as pd
# Fetch the boxscore data
json_obj = json.loads(boxscore.BoxScore(game_id='0012400029').get_json())

# Extract game ID and team stats
game_id = json_obj['game']['gameId']
home_team_stats = json_obj['game']['homeTeam']
away_team_stats = json_obj['game']['awayTeam']

# Create a dictionary with the required information
game_data = {
    'game_id': game_id,
    'home_team_id': home_team_stats['teamId'],
    'home_team_name': f"{home_team_stats['teamCity']} {home_team_stats['teamName']}",
    'home_team_score': home_team_stats['score'],
    'away_team_id': away_team_stats['teamId'],
    'away_team_name': f"{away_team_stats['teamCity']} {away_team_stats['teamName']}",
    'away_team_score': away_team_stats['score']
}

# Add statistics for both teams
for stat, value in home_team_stats['statistics'].items():
    game_data[f'home_{stat}'] = value
for stat, value in away_team_stats['statistics'].items():
    game_data[f'away_{stat}'] = value

# Create the DataFrame
df = pd.DataFrame([game_data])
df.iloc[:,48:53]

Unnamed: 0,home_reboundsTeamDefensive,home_reboundsTeamOffensive,home_reboundsTotal,home_secondChancePointsAttempted,home_secondChancePointsMade
0,3,3,53,11,6
