In [18]:
import pandas as pd
from nba_api.stats.static import teams
from nba_api.stats.endpoints import leaguegamefinder
from nba_api.stats.endpoints import boxscoretraditionalv2

In [19]:
# Fetch all team info
nba_teams = teams.get_teams()

# list of their ids
team_id_list = []
for team in nba_teams:
  team_id_list.append(team.get('id'))
len(team_id_list)

30

In [20]:
# Fetch all games for all teams
games_list_by_team = []
for team_id in team_id_list:
  gamefinder = leaguegamefinder.LeagueGameFinder(team_id_nullable=team_id)
  games_list_by_team.append(gamefinder.get_data_frames()[0])

games_dataframe = pd.concat(games_list_by_team)
games_dataframe.head()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,22023,1610612737,ATL,Atlanta Hawks,1522300074,2023-07-16,ATL @ DAL,L,201,80,...,0.773,13.0,28.0,41.0,16,5.0,6,17,18,-23.0
1,22023,1610612737,ATL,Atlanta Hawks,1522300046,2023-07-13,ATL vs. PHI,W,199,99,...,0.75,15.0,29.0,44.0,25,2.0,5,11,23,-3.4
2,22023,1610612737,ATL,Atlanta Hawks,1522300038,2023-07-12,ATL vs. MIN,W,201,99,...,0.788,15.0,31.0,46.0,19,4.0,7,19,22,10.6
3,22023,1610612737,ATL,Atlanta Hawks,1522300023,2023-07-09,ATL @ DEN,W,199,98,...,0.529,11.0,26.0,37.0,24,4.0,8,12,20,5.4
4,22023,1610612737,ATL,Atlanta Hawks,1522300007,2023-07-07,ATL vs. SAC,L,200,76,...,0.833,21.0,26.0,47.0,16,12.0,2,21,25,-13.8


In [21]:
# Filtering for rows containing '2022' in the 'season' column, this would include pre-season, regular season and final
games_last_season = games_dataframe[games_dataframe['SEASON_ID'].str.contains('2022')]
games_last_season.head()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
5,42022,1610612737,ATL,Atlanta Hawks,42200116,2023-04-27,ATL vs. BOS,L,241,120,...,0.773,12.0,33.0,45.0,28,5.0,7,10,15,-8.0
6,42022,1610612737,ATL,Atlanta Hawks,42200115,2023-04-25,ATL @ BOS,W,242,119,...,1.0,6.0,28.0,34.0,26,5.0,4,8,16,2.0
7,42022,1610612737,ATL,Atlanta Hawks,42200114,2023-04-23,ATL vs. BOS,L,240,121,...,0.875,11.0,31.0,42.0,25,8.0,4,12,24,-8.0
8,42022,1610612737,ATL,Atlanta Hawks,42200113,2023-04-21,ATL vs. BOS,W,240,130,...,0.813,11.0,37.0,48.0,24,5.0,6,18,15,8.0
9,42022,1610612737,ATL,Atlanta Hawks,42200112,2023-04-18,ATL @ BOS,L,241,106,...,0.5,19.0,30.0,49.0,21,10.0,4,15,11,-13.0


In [22]:
games_last_season.SEASON_ID.unique()

array(['42022', '52022', '22022', '12022'], dtype=object)

In [23]:
games_last_season.GAME_ID.nunique()
# 1477 games for NBA seem right, there are 30 teams, and regular season each team play 82 games 30*82/2 = 1230 plus pre-season, playoffs, finals

1477

In [29]:
games_last_season.to_csv('../../data/game_2022.csv', index = False)
games_last_season.to_parquet('../../data/game_2022.parquet')

In [24]:
# Loop through all games and fetch all box scores
# Result is a player level stat dataframe for all games
# Such that each player will have N rows, where N is the number of games they were available for
unique_game_ids = games_last_season.GAME_ID.unique()

total_box_score_list_by_game = []
for game in unique_game_ids:
  single_game_box_score = boxscoretraditionalv2.BoxScoreTraditionalV2(game_id=game)
  total_box_score_list_by_game.append(single_game_box_score.get_data_frames()[0])

total_player_stat_df = pd.concat(total_box_score_list_by_game)
total_player_stat_df.head()

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_CITY,PLAYER_ID,PLAYER_NAME,NICKNAME,START_POSITION,COMMENT,MIN,...,OREB,DREB,REB,AST,STL,BLK,TO,PF,PTS,PLUS_MINUS
0,42200116,1610612738,BOS,Boston,1627759,Jaylen Brown,Jaylen,F,,39.000000:35,...,0.0,5.0,5.0,2.0,0.0,1.0,2.0,3.0,32.0,7.0
1,42200116,1610612738,BOS,Boston,1628369,Jayson Tatum,Jayson,F,,39.000000:52,...,2.0,12.0,14.0,7.0,1.0,2.0,0.0,1.0,30.0,11.0
2,42200116,1610612738,BOS,Boston,201143,Al Horford,Al,C,,29.000000:54,...,3.0,9.0,12.0,4.0,2.0,3.0,0.0,4.0,10.0,9.0
3,42200116,1610612738,BOS,Boston,1628401,Derrick White,Derrick,G,,29.000000:46,...,1.0,1.0,2.0,2.0,0.0,1.0,1.0,3.0,7.0,-1.0
4,42200116,1610612738,BOS,Boston,203935,Marcus Smart,Marcus,G,,32.000000:33,...,3.0,1.0,4.0,4.0,1.0,0.0,2.0,0.0,22.0,11.0


In [25]:
total_player_stat_df.to_csv('../../data/player_stat_2022.csv', index = False)

In [27]:
total_player_stat_df.to_parquet('../../data/player_stat_2022.parquet')