In [11]:
from nba_api.stats.endpoints import playercareerstats, leaguegamefinder, teamgamelog, playergamelog
import requests
import numpy
import pandas as pd

In [50]:
from nba_api.stats.static import teams

# Get all teams
all_teams = teams.get_teams()

# Create a list to store team data as dictionaries
team_data = []
for team in all_teams:
    team_data.append({'team_name': team['full_name'], 'team_id': team['id']})

# Create the DataFrame
teams_df = pd.DataFrame(team_data)

# Sort the DataFrame by team_name
teams_df.sort_values(by='team_name', inplace=True)

# Remove the default index
teams_df.set_index('team_name', inplace=True)
teams_df = teams_df.reset_index()  # Reset the index to remove the name

# Print the DataFrame without the index
teams_df
    

Unnamed: 0,team_name,team_id
0,Atlanta Hawks,1610612737
1,Boston Celtics,1610612738
2,Brooklyn Nets,1610612751
3,Charlotte Hornets,1610612766
4,Chicago Bulls,1610612741
5,Cleveland Cavaliers,1610612739
6,Dallas Mavericks,1610612742
7,Denver Nuggets,1610612743
8,Detroit Pistons,1610612765
9,Golden State Warriors,1610612744


In [43]:
# Export the DataFrame to an Excel file named 'nba_teams.xlsx'
teams_df.to_excel('../Data/nba_teams.xlsx', index=False) 

In [59]:
import time
import requests
import pandas as pd
from nba_api.stats.endpoints import commonteamroster

# Assuming you have the teams_df DataFrame from the previous examples
# If not, here's how to create it:
from nba_api.stats.static import teams
all_teams = teams.get_teams()
team_data = []
for team in all_teams:
    team_data.append({'team_name': team['full_name'], 'team_id': team['id']})
teams_df = pd.DataFrame(team_data)


def get_players_by_team_id(team_id, season='2024-25'):
    """
    Retrieves a list of players and their IDs for a given NBA team ID and season.

    Args:
      team_id: The NBA team ID.
      season: The NBA season (e.g., '2023-24'). Defaults to the current season.

    Returns:
      A DataFrame with columns 'player_name' and 'player_id'.
    """

    retries = 3
    for i in range(retries):
        try:
            roster = commonteamroster.CommonTeamRoster(season=season, team_id=team_id, timeout=60)
            roster_df = roster.get_data_frames()[0]
            break  # Break out of the retry loop if successful
        except requests.exceptions.ReadTimeout:
            if i < retries - 1:
                time.sleep(5)
                print(f"ReadTimeout error for team_id {team_id}. Retrying ({i+1}/{retries})...")
            else:
                print(f"Failed to retrieve data for team_id {team_id} after multiple retries.")
                return pd.DataFrame(columns=['player_name', 'player_id'])  # Return empty DataFrame on failure

    # Extract player names and IDs
    player_names = roster_df['PLAYER'].tolist()
    player_ids = roster_df['PLAYER_ID'].tolist()

    # Create a DataFrame
    players_df = pd.DataFrame({'player_name': player_names, 'player_id': player_ids})
    return players_df


all_player_data = []
for index, row in teams_df.iterrows():
    team_name = row['team_name']
    team_id = row['team_id']
    
    players_df = get_players_by_team_id(team_id)
    players_df['team_name'] = team_name  # Add team_name column

    all_player_data.append(players_df)

    time.sleep(1)  # Wait for 1 second

all_players_df = pd.concat(all_player_data)


In [61]:
# Export the DataFrame to an Excel file named 'all_players.xlsx'
all_players_df.to_excel('../Data/all_players.xlsx', index=False) 

In [60]:
raptors_df = all_players_df[all_players_df['team_name']=='Toronto Raptors'] 
raptors_df

Unnamed: 0,player_name,player_id,team_name
0,A.J. Lawson,1630639,Toronto Raptors
1,Gradey Dick,1641711,Toronto Raptors
2,Jonathan Mogbo,1642367,Toronto Raptors
3,Scottie Barnes,1630567,Toronto Raptors
4,Immanuel Quickley,1630193,Toronto Raptors
5,RJ Barrett,1629628,Toronto Raptors
6,Bruce Brown,1628971,Toronto Raptors
7,Ja'Kobe Walter,1642266,Toronto Raptors
8,Garrett Temple,202066,Toronto Raptors
9,Jakob Poeltl,1627751,Toronto Raptors


In [92]:

def get_season_averages(player_id):
    """
    Retrieves season averages for a given player ID.

    Args:
      player_id: The NBA player ID.

    Returns:
      A DataFrame with season averages and a 'season' column.
    """

    retries = 3
    for i in range(retries):
        try:
            career = playercareerstats.PlayerCareerStats(player_id=player_id, timeout=60)
            career_df = career.get_data_frames()[0]
            break
        except requests.exceptions.ReadTimeout:
            if i < retries - 1:
                time.sleep(5)
                print(f"ReadTimeout error for player_id {player_id}. Retrying ({i+1}/{retries})...")
            else:
                print(f"Failed to retrieve data for player_id {player_id} after multiple retries.")
                return pd.DataFrame()  # Return empty DataFrame on failure

    # Select relevant columns and calculate season averages
    season_averages_df = career_df[['SEASON_ID', 'PLAYER_AGE',
        'GP', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT',
        'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS']]
    season_averages_df = season_averages_df.groupby('SEASON_ID').mean().reset_index()
    season_averages_df.rename(columns={'SEASON_ID': 'season'}, inplace=True)

    return season_averages_df

all_season_averages = []
for index, row in all_players_df.iterrows():
    player_id = row['player_id']
    
    season_averages_df = get_season_averages(player_id)
    season_averages_df['player_id'] = player_id  # Add player_id column
    season_averages_df['player_name'] = row['player_name']  # Add player_name column

    all_season_averages.append(season_averages_df)

    time.sleep(1)  # Wait for 1 second

all_seasons_averages_df = pd.concat(all_season_averages)

# Round all numeric columns to 1 decimal place
all_seasons_averages_df = all_seasons_averages_df.round(1)

In [93]:

# Calculate 'PTS_AVG' and round to 1 decimal place
all_seasons_averages_df['PTS_AVG'] = (all_seasons_averages_df['PTS'] / all_seasons_averages_df['GP']).round(1)
all_seasons_averages_df['AST_AVG'] = (all_seasons_averages_df['AST'] / all_seasons_averages_df['GP']).round(1)
all_seasons_averages_df['REB_AVG'] = (all_seasons_averages_df['REB'] / all_seasons_averages_df['GP']).round(1)
all_seasons_averages_df['STL_AVG'] = (all_seasons_averages_df['STL'] / all_seasons_averages_df['GP']).round(1)
all_seasons_averages_df['BLK_AVG'] = (all_seasons_averages_df['BLK'] / all_seasons_averages_df['GP']).round(1)
all_seasons_averages_df['TOV_AVG'] = (all_seasons_averages_df['TOV'] / all_seasons_averages_df['GP']).round(1)
all_seasons_averages_df['PF_AVG'] = (all_seasons_averages_df['PF'] / all_seasons_averages_df['GP']).round(1)
all_seasons_averages_df['MIN_AVG'] = (all_seasons_averages_df['MIN'] / all_seasons_averages_df['GP']).round(1)

new_column_order = ['season','player_id','player_name','PLAYER_AGE', 
                    'MIN_AVG','PTS_AVG', 'AST_AVG','REB_AVG','STL_AVG','BLK_AVG','TOV_AVG','PF_AVG',
                    'GP', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 
                    'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL', 
                    'BLK', 'TOV', 'PF', 'PTS']

all_seasons_averages_df=all_seasons_averages_df[new_column_order] 

all_seasons_averages_df

Unnamed: 0,season,player_id,player_name,PLAYER_AGE,MIN_AVG,PTS_AVG,AST_AVG,REB_AVG,STL_AVG,BLK_AVG,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS
0,2021-22,1630552,Jalen Johnson,20.0,5.5,2.4,0.1,1.2,0.1,0.1,...,0.7,2.0,24.0,26.0,3.0,3.0,2.0,9.0,9.0,52.0
1,2022-23,1630552,Jalen Johnson,21.0,14.9,5.6,1.2,4.0,0.5,0.5,...,0.6,51.0,231.0,282.0,83.0,38.0,34.0,41.0,113.0,395.0
2,2023-24,1630552,Jalen Johnson,22.0,33.7,16.0,3.6,8.7,1.2,0.8,...,0.7,74.0,414.0,488.0,203.0,67.0,47.0,101.0,133.0,896.0
3,2024-25,1630552,Jalen Johnson,23.0,36.3,19.8,5.3,10.1,1.5,1.1,...,0.8,58.0,266.0,324.0,169.0,48.0,34.0,97.0,64.0,635.0
0,2024-25,1630811,Keaton Wallace,25.0,15.5,4.7,1.9,1.4,0.8,0.4,...,1.0,2.0,20.0,22.0,30.0,13.0,6.0,17.0,15.0,75.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11,2020-21,201959,Taj Gibson,36.0,20.8,5.4,0.8,5.6,0.7,1.1,...,0.7,99.0,151.0,250.0,36.0,31.0,49.0,22.0,99.0,241.0
12,2021-22,201959,Taj Gibson,37.0,18.2,4.4,0.6,4.4,0.4,0.8,...,0.8,88.0,141.0,229.0,32.0,22.0,40.0,27.0,137.0,229.0
13,2022-23,201959,Taj Gibson,38.0,9.8,3.4,0.7,1.9,0.3,0.2,...,0.7,32.0,61.0,93.0,34.0,15.0,12.0,26.0,84.0,168.0
14,2023-24,201959,Taj Gibson,39.0,10.2,1.7,0.5,1.9,0.2,0.4,...,0.7,8.0,16.7,24.7,7.3,2.0,4.7,3.3,20.7,22.7


In [95]:
# Export the DataFrame to an Excel file named 'all_players.xlsx'
all_seasons_averages_df.to_excel('../Data/player_season_stats.xlsx', index=False) 