## Scraping bio data (height, weight, nationality) from NBA Stats  

In [1]:
import pandas as pd
import requests
pd.set_option('display.max_columns', None) # so we can see all columns in a wide DataFrame
import time
import numpy as np

In [2]:
raw_api_url ='https://stats.nba.com/stats/leaguedashplayerbiostats?College=&Conference=&Country=&DateFrom=&DateTo=&Division=&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&ISTRound=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&Season=2023-24&SeasonSegment=&SeasonType=Regular Season&ShotClockRange=&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight='

In [3]:
headers = {
    'Accept': '*/*',
    'Accept-Encoding': 'gzip, deflate, br, zstd',
    'Accept-Language': 'de-DE,de;q=0.9,en-US;q=0.8,en;q=0.7',
    'Connection': 'keep-alive',
    'Host': 'stats.nba.com',
    'If-Modified-Since': 'Thu, 20 Oct 2022 08:06:46 GMT',
    'Origin': 'https://www.nba.com',
    'Referer': 'https://www.nba.com/',
    'Sec-Ch-Ua': '"Google Chrome";v="123", "Not:A-Brand";v="8", "Chromium";v="123"',
    'Sec-Ch-Ua-Mobile': '?0',
    'Sec-Ch-Ua-Platform': '"macOS"',
    'Sec-Fetch-Dest': 'empty',
    'Sec-Fetch-Mode': 'cors',
    'Sec-Fetch-Site': 'same-site',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36'
}

In [4]:
r = requests.get(url=raw_api_url, headers=headers).json() #parses return as JSON and returns a Python object(dictionary) 
r

{'resource': 'leaguedashplayerbiostats',
 'parameters': {'PerMode': 'Totals',
  'LeagueID': '00',
  'Season': '2023-24',
  'SeasonType': 'Regular Season',
  'PORound': 0,
  'Outcome': None,
  'Location': None,
  'Month': 0,
  'SeasonSegment': None,
  'DateFrom': None,
  'DateTo': None,
  'OpponentTeamID': 0,
  'VsConference': None,
  'VsDivision': None,
  'TeamID': 0,
  'Conference': None,
  'Division': None,
  'GameSegment': None,
  'Period': 0,
  'ShotClockRange': None,
  'LastNGames': 0,
  'GameScope': None,
  'PlayerExperience': None,
  'PlayerPosition': None,
  'StarterBench': None,
  'DraftYear': None,
  'DraftPick': None,
  'College': None,
  'Country': None,
  'Height': None,
  'Weight': None,
  'ISTRound': None},
 'resultSets': [{'name': 'LeagueDashPlayerBioStats',
   'headers': ['PLAYER_ID',
    'PLAYER_NAME',
    'TEAM_ID',
    'TEAM_ABBREVIATION',
    'AGE',
    'PLAYER_HEIGHT',
    'PLAYER_HEIGHT_INCHES',
    'PLAYER_WEIGHT',
    'COLLEGE',
    'COUNTRY',
    'DRAFT_YEAR',

In [5]:
# Accessing the headers and resultSets in dict.

df_cols_short = r['resultSets'][0]['headers']

df_cols_short

['PLAYER_ID',
 'PLAYER_NAME',
 'TEAM_ID',
 'TEAM_ABBREVIATION',
 'AGE',
 'PLAYER_HEIGHT',
 'PLAYER_HEIGHT_INCHES',
 'PLAYER_WEIGHT',
 'COLLEGE',
 'COUNTRY',
 'DRAFT_YEAR',
 'DRAFT_ROUND',
 'DRAFT_NUMBER',
 'GP',
 'PTS',
 'REB',
 'AST',
 'NET_RATING',
 'OREB_PCT',
 'DREB_PCT',
 'USG_PCT',
 'TS_PCT',
 'AST_PCT']

In [6]:
data = r['resultSets'][0]['rowSet']
data

[[1630639,
  'A.J. Lawson',
  1610612742,
  'DAL',
  23.0,
  '6-6',
  78,
  '179',
  'South Carolina',
  'Canada',
  'Undrafted',
  'Undrafted',
  'Undrafted',
  42,
  136,
  50,
  20,
  1.2,
  0.039,
  0.105,
  0.18,
  0.519,
  0.089],
 [1631260,
  'AJ Green',
  1610612749,
  'MIL',
  24.0,
  '6-4',
  76,
  '190',
  'Northern Iowa',
  'USA',
  'Undrafted',
  'Undrafted',
  'Undrafted',
  56,
  252,
  64,
  30,
  3.5,
  0.015,
  0.089,
  0.15,
  0.617,
  0.068],
 [1631100,
  'AJ Griffin',
  1610612737,
  'ATL',
  20.0,
  '6-6',
  78,
  '220',
  'Duke',
  'USA',
  '2022',
  '1',
  '16',
  20,
  48,
  18,
  5,
  -14.0,
  0.011,
  0.108,
  0.167,
  0.382,
  0.041],
 [203932,
  'Aaron Gordon',
  1610612743,
  'DEN',
  28.0,
  '6-8',
  80,
  '235',
  'Arizona',
  'USA',
  '2014',
  '1',
  '4',
  73,
  1013,
  471,
  259,
  8.7,
  0.078,
  0.129,
  0.174,
  0.607,
  0.147],
 [1628988,
  'Aaron Holiday',
  1610612745,
  'HOU',
  27.0,
  '6-0',
  72,
  '185',
  'UCLA',
  'USA',
  '2018',
  '1'

In [7]:
df_cols = df_cols_short

years = ['1996-97', '1997-98', '1998-99', '1999-00', '2000-01', '2001-02', '2002-03', '2003-04', '2004-05', '2005-06', '2006-07', '2007-08', '2008-09', '2009-10', '2010-11', '2011-12',
         '2012-13','2013-14','2014-15','2015-16','2016-17','2017-18','2018-19','2019-20','2020-21','2021-22', '2022-23']
season_types = ['Regular%20Season','Playoffs']

In [8]:
df_bio = pd.DataFrame(columns=df_cols)

for y in years:
    for s in season_types:
        api_url = "https://stats.nba.com/stats/leaguedashplayerbiostats?College=&Conference=&Country=&DateFrom=&DateTo=&Division=&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&ISTRound=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&Season="+y+"&SeasonSegment=&SeasonType="+s+"&ShotClockRange=&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight="
        r = requests.get(url=api_url, headers=headers).json()
        data = r['resultSets'][0]['rowSet']
        columns = r['resultSets'][0]['headers']
        temp_df1 = pd.DataFrame(data, columns=columns)
        temp_df1['Year'] = y
        temp_df1['Season'] = s
        df_bio = pd.concat([df_bio, temp_df1])

  df_bio = pd.concat([df_bio, temp_df1])


In [None]:
df_bio


In [None]:
#Export dataframe 

df_bio.to_csv('df_bio.csv', index=False)