In [1]:
from nba_api.stats.endpoints import playercareerstats
import time
import requests
import pandas as pd

In [2]:
# Team Name and Team Id
from nba_api.stats.static import teams

# Get all teams
all_teams = teams.get_teams()

# Create a list to store team data as dictionaries
team_data = []
for team in all_teams:
    team_data.append({'team_name': team['full_name'], 'team_id': team['id']})

# Create the DataFrame
teams_df = pd.DataFrame(team_data)

# Sort the DataFrame by team_name
teams_df.sort_values(by='team_name', inplace=True)

# Remove the default index and Reset the index to remove the name
teams_df.set_index('team_name', inplace=True)
teams_df = teams_df.reset_index() 

# Print the DataFrame
teams_df
    

Unnamed: 0,team_name,team_id
0,Atlanta Hawks,1610612737
1,Boston Celtics,1610612738
2,Brooklyn Nets,1610612751
3,Charlotte Hornets,1610612766
4,Chicago Bulls,1610612741
5,Cleveland Cavaliers,1610612739
6,Dallas Mavericks,1610612742
7,Denver Nuggets,1610612743
8,Detroit Pistons,1610612765
9,Golden State Warriors,1610612744


In [3]:
teams_df['logo_url'] = teams_df['team_id'].apply(lambda x:f'https://cdn.nba.com/logos/nba/{x}/global/L/logo.svg')
teams_df

Unnamed: 0,team_name,team_id,logo_url
0,Atlanta Hawks,1610612737,https://cdn.nba.com/logos/nba/1610612737/globa...
1,Boston Celtics,1610612738,https://cdn.nba.com/logos/nba/1610612738/globa...
2,Brooklyn Nets,1610612751,https://cdn.nba.com/logos/nba/1610612751/globa...
3,Charlotte Hornets,1610612766,https://cdn.nba.com/logos/nba/1610612766/globa...
4,Chicago Bulls,1610612741,https://cdn.nba.com/logos/nba/1610612741/globa...
5,Cleveland Cavaliers,1610612739,https://cdn.nba.com/logos/nba/1610612739/globa...
6,Dallas Mavericks,1610612742,https://cdn.nba.com/logos/nba/1610612742/globa...
7,Denver Nuggets,1610612743,https://cdn.nba.com/logos/nba/1610612743/globa...
8,Detroit Pistons,1610612765,https://cdn.nba.com/logos/nba/1610612765/globa...
9,Golden State Warriors,1610612744,https://cdn.nba.com/logos/nba/1610612744/globa...


In [4]:
# Export the DataFrame to an Excel file named 'nba_teams.xlsx'
teams_df.to_csv('../Data/nba_teams.csv', index=False) 

In [6]:
from nba_api.stats.endpoints import commonteamroster


def get_players_by_team_id(team_id, season='2024-25'):
    retries = 3
    for i in range(retries):
        try:
            # Timeout set to 60s to give API time to respond
            roster = commonteamroster.CommonTeamRoster(season=season, team_id=team_id, timeout=60)
            roster_df = roster.get_data_frames()[0]
            break  # Break out of the retry loop if successful
        except requests.exceptions.ReadTimeout:
            if i < retries - 1:
                time.sleep(5)
                print(f"ReadTimeout error for team_id {team_id}. Retrying ({i+1}/{retries})...")
            else:
                print(f"Failed to retrieve data for team_id {team_id} after multiple retries.")
                return pd.DataFrame(columns=['player_name', 'player_id'])  # Return empty DataFrame on failure

    # Extract player names and IDs
    player_names = roster_df['PLAYER'].tolist()
    player_ids = roster_df['PLAYER_ID'].tolist()

    # Create a DataFrame
    players_df = pd.DataFrame({'player_name': player_names, 'player_id': player_ids})
    return players_df


# Get the players for the Toronto Raptors (team_id = 1610612761)
raptors_players_df = get_players_by_team_id(team_id='1610612761')

# Print the DataFrame
print(raptors_players_df.to_markdown(index=False, numalign="left", stralign="left"))

| player_name       | player_id   |
|:------------------|:------------|
| A.J. Lawson       | 1630639     |
| Gradey Dick       | 1641711     |
| Jonathan Mogbo    | 1642367     |
| Scottie Barnes    | 1630567     |
| Immanuel Quickley | 1630193     |
| RJ Barrett        | 1629628     |
| Bruce Brown       | 1628971     |
| Ja'Kobe Walter    | 1642266     |
| Garrett Temple    | 202066      |
| Jakob Poeltl      | 1627751     |
| Ulrich Chomche    | 1642279     |
| Jamal Shead       | 1642347     |
| Chris Boucher     | 1628449     |
| Ochai Agbaji      | 1630534     |
| Kelly Olynyk      | 203482      |
| Davion Mitchell   | 1630558     |
| Jamison Battle    | 1642419     |
| Eugene Omoruyi    | 1630647     |


In [None]:

from nba_api.stats.endpoints import commonplayerinfo

def get_player_info(player_id):
    retries = 3
    for i in range(retries):
        try:
            # Fetch player info with a 60-second timeout
            player_info = commonplayerinfo.CommonPlayerInfo(player_id=player_id, timeout=60)
            player_info_df = player_info.get_data_frames()[0]
            return player_info_df
        except requests.exceptions.ReadTimeout:
            if i < retries - 1:
                time.sleep(5)
                print(f"ReadTimeout error for player_id {player_id}. Retrying ({i+1}/{retries})...")
            else:
                print(f"Failed to retrieve data for player_id {player_id} after multiple retries.")
                return pd.DataFrame()  # Return empty DataFrame on failure

# Assuming you have the all_players_df DataFrame with a 'player_id' column

raptors_player_info = []
for index, row in raptors_players_df.iterrows():
    player_id = row['player_id']
    
    player_info_df = get_player_info(player_id)
    raptors_player_info.append(player_info_df)

    time.sleep(1)  # Wait for 1 second

raptors_player_info_df = pd.concat(raptors_player_info)

In [30]:

columns_to_keep = ['PERSON_ID', 'FIRST_NAME', 'LAST_NAME','DISPLAY_FIRST_LAST', 'BIRTHDATE', 
                   'SCHOOL','COUNTRY', 'HEIGHT', 'WEIGHT','SEASON_EXP','JERSEY', 'POSITION', 
                   'FROM_YEAR','TO_YEAR','FROM_YEAR','DRAFT_YEAR','DRAFT_ROUND','DRAFT_NUMBER']  


all_raptors_player_info_df = raptors_player_info_df[columns_to_keep].copy()
all_raptors_player_info_df

Unnamed: 0,PERSON_ID,FIRST_NAME,LAST_NAME,DISPLAY_FIRST_LAST,BIRTHDATE,SCHOOL,COUNTRY,HEIGHT,WEIGHT,SEASON_EXP,JERSEY,POSITION,FROM_YEAR,TO_YEAR,FROM_YEAR.1,DRAFT_YEAR,DRAFT_ROUND,DRAFT_NUMBER
0,1630639,A.J.,Lawson,A.J. Lawson,2000-07-15T00:00:00,South Carolina,Canada,6-6,179,2,0,Guard,2022,2024,2022,Undrafted,Undrafted,Undrafted
0,1641711,Gradey,Dick,Gradey Dick,2003-11-20T00:00:00,Kansas,USA,6-6,200,1,1,Guard-Forward,2023,2024,2023,2023,1,13
0,1642367,Jonathan,Mogbo,Jonathan Mogbo,2001-10-29T00:00:00,San Francisco,USA,6-9,225,0,2,Forward,2024,2024,2024,2024,2,31
0,1630567,Scottie,Barnes,Scottie Barnes,2001-08-01T00:00:00,Florida State,USA,6-7,237,3,4,Forward-Guard,2021,2024,2021,2021,1,4
0,1630193,Immanuel,Quickley,Immanuel Quickley,1999-06-17T00:00:00,Kentucky,USA,6-2,190,4,5,Guard,2020,2024,2020,2020,1,25
0,1629628,RJ,Barrett,RJ Barrett,2000-06-14T00:00:00,Duke,Canada,6-6,214,5,9,Forward-Guard,2019,2024,2019,2019,1,3
0,1628971,Bruce,Brown,Bruce Brown,1996-08-15T00:00:00,Miami,USA,6-4,202,6,11,Guard-Forward,2018,2024,2018,2018,2,42
0,1642266,Ja'Kobe,Walter,Ja'Kobe Walter,2004-09-04T00:00:00,Baylor,USA,6-5,180,0,14,Guard,2024,2024,2024,2024,1,19
0,202066,Garrett,Temple,Garrett Temple,1986-05-08T00:00:00,Louisiana State,USA,6-5,195,14,17,Guard-Forward,2009,2024,2009,Undrafted,Undrafted,Undrafted
0,1627751,Jakob,Poeltl,Jakob Poeltl,1995-10-15T00:00:00,Utah,Austria,7-0,260,8,19,Center,2016,2024,2016,2016,1,9


In [34]:
# Export the DataFrame to an Excel file named 'all_raptors_player_info.csv'
all_raptors_player_info_df.to_csv('../Data/raptors_player_info.csv', index=False) 

In [31]:
from nba_api.stats.endpoints import playercareerstats

all_regular_season_stats = []
for index, row in raptors_players_df.iterrows():
    player_id = row['player_id']

    retries = 3  # Number of retries
    for i in range(retries):
        try:
            # Get the player's regular season stats with a 60-second timeout
            career = playercareerstats.PlayerCareerStats(player_id=player_id, timeout=60)
            regular_season_stats_df = career.get_data_frames()[0]  # Get the first DataFrame (Regular Season Totals)

            # Add player_id to the DataFrame
            regular_season_stats_df['player_id'] = player_id

            all_regular_season_stats.append(regular_season_stats_df)

            time.sleep(1)  # Add a delay to avoid rate limiting
            break  # Exit the retry loop if successful
        except requests.exceptions.ReadTimeout:
            if i < retries - 1:
                time.sleep(2 ** i)  # Exponential backoff (1, 2, 4, ... seconds)
                print(f"ReadTimeout error for player_id {player_id}. Retrying ({i+1}/{retries})...")
            else:
                print(f"Failed to retrieve data for player_id {player_id} after multiple retries.")

# Concatenate all the DataFrames into one
all_regular_season_stats_df = pd.concat(all_regular_season_stats)

# Print the combined DataFrame (or do further analysis/processing)
print(all_regular_season_stats_df.to_markdown(index=False, numalign="left", stralign="left"))

| PLAYER_ID   | SEASON_ID   | LEAGUE_ID   | TEAM_ID    | TEAM_ABBREVIATION   | PLAYER_AGE   | GP   | GS   | MIN   | FGM   | FGA   | FG_PCT   | FG3M   | FG3A   | FG3_PCT   | FTM   | FTA   | FT_PCT   | OREB   | DREB   | REB   | AST   | STL   | BLK   | TOV   | PF   | PTS   | player_id   |
|:------------|:------------|:------------|:-----------|:--------------------|:-------------|:-----|:-----|:------|:------|:------|:---------|:-------|:-------|:----------|:------|:------|:---------|:-------|:-------|:------|:------|:------|:------|:------|:-----|:------|:------------|
| 1630639     | 2022-23     | 00          | 1610612750 | MIN                 | 22           | 1    | 0    | 2     | 1     | 1     | 1        | 0      | 0      | 0         | 0     | 0     | 0        | 0      | 1      | 1     | 0     | 0     | 0     | 0     | 1    | 2     | 1630639     |
| 1630639     | 2022-23     | 00          | 1610612742 | DAL                 | 22           | 14   | 0    | 107   | 21    | 43    | 0.488  

In [35]:
# Export the DataFrame to an Excel file named 'all_regular_season_stats.csv'
all_regular_season_stats_df.to_csv('../Data/raptors_regular_season_stats.csv', index=False) 

In [36]:

all_post_season_stats = []
for index, row in raptors_players_df.iterrows():
    player_id = row['player_id']

    retries = 3  # Number of retries
    for i in range(retries):
        try:
            # Get the player's post season stats with a 60-second timeout
            career = playercareerstats.PlayerCareerStats(player_id=player_id, timeout=60)
            post_season_stats_df = career.get_data_frames()[2]  # Get the first DataFrame (Post Season Totals)

            # Add player_id to the DataFrame
            post_season_stats_df['player_id'] = player_id

            all_post_season_stats.append(post_season_stats_df)

            time.sleep(1)  # Add a delay to avoid rate limiting
            break  # Exit the retry loop if successful
        except requests.exceptions.ReadTimeout:
            if i < retries - 1:
                time.sleep(2 ** i)  # Exponential backoff (1, 2, 4, ... seconds)
                print(f"ReadTimeout error for player_id {player_id}. Retrying ({i+1}/{retries})...")
            else:
                print(f"Failed to retrieve data for player_id {player_id} after multiple retries.")

# Concatenate all the DataFrames into one
all_post_season_stats_df = pd.concat(all_post_season_stats)

# Print the combined DataFrame (or do further analysis/processing)
print(all_post_season_stats_df.to_markdown(index=False, numalign="left", stralign="left"))

| PLAYER_ID   | SEASON_ID   | LEAGUE_ID   | TEAM_ID    | TEAM_ABBREVIATION   | PLAYER_AGE   | GP   | GS   | MIN   | FGM   | FGA   | FG_PCT   | FG3M   | FG3A   | FG3_PCT   | FTM   | FTA   | FT_PCT   | OREB   | DREB   | REB   | AST   | STL   | BLK   | TOV   | PF   | PTS   | player_id   |
|:------------|:------------|:------------|:-----------|:--------------------|:-------------|:-----|:-----|:------|:------|:------|:---------|:-------|:-------|:----------|:------|:------|:---------|:-------|:-------|:------|:------|:------|:------|:------|:-----|:------|:------------|
| 1630639     | 2023-24     | 00          | 1610612742 | DAL                 | 23           | 10   | 0    | 30    | 4     | 9     | 0.444    | 2      | 6      | 0.333     | 1     | 2     | 0.5      | 1      | 2      | 3     | 0     | 0     | 1     | 0     | 1    | 11    | 1630639     |
| 1630567     | 2021-22     | 00          | 1610612761 | TOR                 | 20           | 4    | 3    | 133   | 18    | 42    | 0.429  

In [37]:
# Export the DataFrame to an Excel file named 'raptors_post_season_stats.csv'
all_post_season_stats_df.to_csv('../Data/raptors_post_season_stats.csv', index=False) 

In [40]:
from nba_api.stats.endpoints import playergamelog


all_gamelogs = []
for index, row in all_regular_season_stats_df.iterrows():
    player_id = row['PLAYER_ID']
    season_id = row['SEASON_ID']

    try:
        # Fetch the game log with a 60-second timeout
        gamelog = playergamelog.PlayerGameLog(player_id=player_id, season=season_id, timeout=60)
        gamelog_df = gamelog.get_data_frames()[0]

        # Add player_id and season_id to the DataFrame
        gamelog_df['PLAYER_ID'] = player_id
        gamelog_df['SEASON_ID'] = season_id

        all_gamelogs.append(gamelog_df)

        # Print completion message
        print(f"Finished for player_id: {player_id} in season {season_id}")  

        time.sleep(5)  # Wait for 5 seconds between API calls

    except IndexError:
        print(f"No game log data found for player ID: {player_id} in season {season_id}")

# Concatenate all the DataFrames into one
all_gamelogs_df = pd.concat(all_gamelogs)

# Print the combined DataFrame (or do further analysis/processing)
print(all_gamelogs_df.to_markdown(index=False, numalign="left", stralign="left"))

Finished for player_id: 1630639 in season 2022-23
Finished for player_id: 1630639 in season 2022-23
Finished for player_id: 1630639 in season 2022-23
Finished for player_id: 1630639 in season 2023-24
Finished for player_id: 1630639 in season 2024-25
Finished for player_id: 1641711 in season 2023-24
Finished for player_id: 1641711 in season 2024-25
Finished for player_id: 1642367 in season 2024-25
Finished for player_id: 1630567 in season 2021-22
Finished for player_id: 1630567 in season 2022-23
Finished for player_id: 1630567 in season 2023-24
Finished for player_id: 1630567 in season 2024-25
Finished for player_id: 1630193 in season 2020-21
Finished for player_id: 1630193 in season 2021-22
Finished for player_id: 1630193 in season 2022-23
Finished for player_id: 1630193 in season 2023-24
Finished for player_id: 1630193 in season 2023-24
Finished for player_id: 1630193 in season 2023-24
Finished for player_id: 1630193 in season 2024-25
Finished for player_id: 1629628 in season 2019-20


In [55]:
all_gamelogs_df['MATCHUP'] = all_gamelogs_df['MATCHUP'].astype(str)
all_gamelogs_df['TEAM'] = all_gamelogs_df['MATCHUP'].str[:3]
all_gamelogs_df['OPPONENT'] = all_gamelogs_df['MATCHUP'].str[-3:]
all_gamelogs_df

Unnamed: 0,SEASON_ID,Player_ID,Game_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,...,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE,PLAYER_ID,TEAM,OPPONENT
0,2022-23,1630639,22201224,"APR 09, 2023",DAL vs. SAS,L,23,4,9,0.444,...,1,0,0,3,10,-13,1,1630639,DAL,SAS
1,2022-23,1630639,22201210,"APR 07, 2023",DAL vs. CHI,L,23,5,9,0.556,...,0,0,2,0,12,-19,1,1630639,DAL,CHI
2,2022-23,1630639,22200881,"FEB 15, 2023",DAL @ DEN,L,12,4,6,0.667,...,1,0,0,1,10,-9,1,1630639,DAL,DEN
3,2022-23,1630639,22200813,"FEB 06, 2023",DAL @ UTA,W,12,1,4,0.250,...,0,0,1,3,3,1,1,1630639,DAL,UTA
4,2022-23,1630639,22200800,"FEB 04, 2023",DAL @ GSW,L,11,2,4,0.500,...,0,0,0,1,5,4,1,1630639,DAL,GSW
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6014,2023-24,1630647,22300148,"NOV 06, 2023",WAS @ PHI,L,11,1,2,0.500,...,0,0,3,4,4,-3,1,1630647,WAS,PHI
6015,2023-24,1630647,22300003,"NOV 03, 2023",WAS @ MIA,L,7,2,2,1.000,...,1,0,1,1,4,18,1,1630647,WAS,MIA
6016,2023-24,1630647,22300117,"NOV 01, 2023",WAS @ ATL,L,8,3,6,0.500,...,0,0,1,1,7,-3,1,1630647,WAS,ATL
6017,2023-24,1630647,22300103,"OCT 30, 2023",WAS vs. BOS,L,12,5,5,1.000,...,1,1,0,0,14,14,1,1630647,WAS,BOS


In [57]:
# Export the DataFrame to an Excel file named 'raptors_gamelogs.csv'
all_gamelogs_df.to_csv('../Data/raptors_gamelogs.csv', index=False) 