## Data Collecting

In [2]:
# import nba_api
#!pip install nba_api
from nba_api.stats.endpoints import playercareerstats, DraftHistory, commonallplayers, leaguegamefinder, boxscoretraditionalv2, teamgamelog
from nba_api.stats.endpoints import playergamelog
from nba_api.stats.static import teams, players
from nba_api.stats.endpoints import playerdashboardbyyearoveryear
import requests
import numpy as np
import pandas as pd
import time
from requests.exceptions import ReadTimeout

## Draft

In [None]:
# need to remove the 'ROUND_NUMBER' '2', but maybe we can do it as data cleaning
all_draft_picks = pd.DataFrame()

for year in range(2003, 2025):
    draft_history = DraftHistory(league_id='00', season_year_nullable=str(year))
    draft_df = draft_history.get_data_frames()[0]
    all_draft_picks = pd.concat([all_draft_picks, draft_df], ignore_index=True)

grouped_draft_picks = all_draft_picks.groupby('SEASON')
draft_picks_by_year = {year: group.copy() for year, group in grouped_draft_picks}

pd.set_option('display.max_rows', None)
display(all_draft_picks.head())

Unnamed: 0,PERSON_ID,PLAYER_NAME,SEASON,ROUND_NUMBER,ROUND_PICK,OVERALL_PICK,DRAFT_TYPE,TEAM_ID,TEAM_CITY,TEAM_NAME,TEAM_ABBREVIATION,ORGANIZATION,ORGANIZATION_TYPE,PLAYER_PROFILE_FLAG
0,2544,LeBron James,2003,1,1,1,Draft,1610612739,Cleveland,Cavaliers,CLE,Saint Vincent-Saint Mary,High School,1
1,2545,Darko Milicic,2003,1,2,2,Draft,1610612765,Detroit,Pistons,DET,KK Vrsac (Serbia),Other Team/Club,1
2,2546,Carmelo Anthony,2003,1,3,3,Draft,1610612743,Denver,Nuggets,DEN,Syracuse,College/University,1
3,2547,Chris Bosh,2003,1,4,4,Draft,1610612761,Toronto,Raptors,TOR,Georgia Tech,College/University,1
4,2548,Dwyane Wade,2003,1,5,5,Draft,1610612748,Miami,Heat,MIA,Marquette,College/University,1


In [None]:
all_draft_picks.to_csv('Data/all_draft_picks.csv', index=False)

## Current player stats

In [None]:
all_players = commonallplayers.CommonAllPlayers(is_only_current_season=1)
players_df = all_players.get_data_frames()[0]
# Drop unknown columns
players_df = players_df.drop(columns='OTHERLEAGUE_EXPERIENCE_CH')
display(players_df.head())
players_df.to_csv('Data/current_players.csv', index=False)

Unnamed: 0,PERSON_ID,DISPLAY_LAST_COMMA_FIRST,DISPLAY_FIRST_LAST,ROSTERSTATUS,FROM_YEAR,TO_YEAR,PLAYERCODE,PLAYER_SLUG,TEAM_ID,TEAM_CITY,TEAM_NAME,TEAM_ABBREVIATION,TEAM_SLUG,TEAM_CODE,GAMES_PLAYED_FLAG
0,1630173,"Achiuwa, Precious",Precious Achiuwa,1,2020,2023,precious_achiuwa,precious_achiuwa,1610612752,New York,Knicks,NYK,knicks,knicks,Y
1,203500,"Adams, Steven",Steven Adams,1,2013,2023,steven_adams,steven_adams,1610612745,Houston,Rockets,HOU,rockets,rockets,Y
2,1628389,"Adebayo, Bam",Bam Adebayo,1,2017,2023,bam_adebayo,bam_adebayo,1610612748,Miami,Heat,MIA,heat,heat,Y
3,1630534,"Agbaji, Ochai",Ochai Agbaji,1,2022,2023,ochai_agbaji,ochai_agbaji,1610612761,Toronto,Raptors,TOR,raptors,raptors,Y
4,1630583,"Aldama, Santi",Santi Aldama,1,2021,2023,santi_aldama,santi_aldama,1610612763,Memphis,Grizzlies,MEM,grizzlies,grizzlies,Y


In [None]:
active_players = players.get_active_players()
all_players_season_stats = []

for player in active_players:
    time.sleep(0.6)
    player_id = player['id']
    player_name = player['full_name']

    player_season_stats = playerdashboardbyyearoveryear.PlayerDashboardByYearOverYear(player_id=player_id)

    season_stats_df = player_season_stats.get_data_frames()[1]  # Index 1 is usually the season totals
    season_stats_df = season_stats_df[season_stats_df['GROUP_VALUE'] == '2023-24']

    selected_columns_df = season_stats_df[['GP', 'MIN', 'FG_PCT', 'FG3_PCT', 'FT_PCT', 'REB', 'AST', 'BLK', 'STL', 'PF', 'TOV', 'PTS']]
    season_stats_df['PLAYER_NAME'] = player_name

    all_players_season_stats.append(season_stats_df)

all_players_season_stats = pd.concat(all_players_season_stats, ignore_index=True)

# Drop useless columns
all_players_season_stats = all_players_season_stats.drop(columns=['GROUP_SET', 'GROUP_VALUE'])

display(all_players_season_stats.head(5))
display(all_players_season_stats.tail(5))

# Output to csv
all_players_season_stats.to_csv('Data/all_players_season_stats_2023_24.csv', index=False)

In [None]:
all_players_season_stats.to_csv('Data/all_players_season_stats_2023_24.csv', index=False)