In [1]:
from unidecode import unidecode
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np

In [68]:
def extract_player_stats_pg(year):
    url = f'https://www.basketball-reference.com/leagues/NBA_{year}_per_game.html'
    response = requests.get(url)
    if response.status_code != 200:
        return response.status_code
    
    soup = BeautifulSoup(response.text)
    table_header = soup.find('table', {'id': 'per_game_stats'}).find('thead')
    header = [row.text for row in table_header.find_all('th')]

    table_body = soup.find('table', {'id': 'per_game_stats'}).find('tbody')
    rows = table_body.find_all('tr', {'class': ['full_table', 'italic_text partial_table']})
    players = []
    for row in rows:
        player_data = [stat.text for stat in row.find_all(['td', 'th'])]
        players.append(player_data)

    df_player_stats_pg = pd.DataFrame(players)
    df_player_stats_pg.columns = header

    df_player_stats_pg.replace('', '0', inplace = True)
    df_player_stats_pg = df_player_stats_pg.apply(pd.to_numeric, errors = 'ignore')

    df_player_stats_pg['Player'] = df_player_stats_pg['Player'].str.strip('*')
    
    return df_player_stats_pg
        
    

In [75]:
extract_player_stats_pg(1984)

Unnamed: 0,Rk,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
0,1,Kareem Abdul-Jabbar,C,36,LAL,80,80,32.8,9.0,15.5,...,0.723,2.1,5.2,7.3,2.6,0.7,1.8,2.8,2.6,21.5
1,2,Alvan Adams,C,29,PHO,70,13,20.7,3.8,8.3,...,0.825,1.7,2.9,4.6,3.1,1.0,0.4,1.7,2.8,9.6
2,3,Mark Aguirre,SF,24,DAL,79,79,36.7,11.7,22.3,...,0.749,2.0,3.9,5.9,4.5,1.0,0.3,3.6,3.1,29.5
3,4,Danny Ainge,SG,24,BOS,71,3,16.3,2.3,5.1,...,0.821,0.4,1.2,1.6,2.3,0.6,0.1,1.0,2.0,5.4
4,5,J.J. Anderson,SF,23,UTA,48,0,6.5,1.1,2.7,...,0.414,0.8,0.5,1.3,0.5,0.3,0.2,0.4,0.6,2.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
338,306,Randy Wittman,SG,24,ATL,78,1,13.7,2.1,4.1,...,0.609,0.2,0.7,0.9,0.9,0.2,0.0,0.4,1.1,4.5
339,307,Al Wood,SG,25,SEA,81,81,27.6,5.8,11.7,...,0.823,1.2,2.2,3.4,2.0,0.8,0.4,1.6,2.6,14.3
340,308,Mike Woodson,SG,25,KCK,71,12,25.9,5.5,11.5,...,0.818,0.9,1.6,2.5,2.5,1.2,0.4,1.6,2.5,14.5
341,309,Orlando Woolridge,SF,24,CHI,75,74,33.9,7.6,14.5,...,0.715,1.7,3.2,4.9,1.8,0.9,0.8,2.5,3.4,19.3


In [71]:
def extract_player_stats_totals(year):
    url = f'https://www.basketball-reference.com/leagues/NBA_{year}_totals.html'
    response = requests.get(url)
    if response.status_code != 200:
        return response.status_code
    
    soup = BeautifulSoup(response.text)
    table_header = soup.find('table', {'id': 'totals_stats'}).find('thead')
    header = [row.text for row in table_header.find_all('th')]

    table_body = soup.find('table', {'id': 'totals_stats'}).find('tbody')
    rows = table_body.find_all('tr', {'class': ['full_table', 'italic_text partial_table']})
    players = []
    for row in rows:
        player_data = [stat.text for stat in row.find_all(['td', 'th'])]
        players.append(player_data)

    df_player_stats_totals = pd.DataFrame(players)
    df_player_stats_totals.columns = header

    df_player_stats_totals.replace('', '0', inplace = True)

    df_player_stats_totals = df_player_stats_totals.apply(pd.to_numeric, errors = 'ignore')

    df_player_stats_totals['Player'] = df_player_stats_totals['Player'].str.strip('*')
    
    return df_player_stats_totals

In [72]:
extract_player_stats_totals(1984)

Unnamed: 0,Rk,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
0,1,Kareem Abdul-Jabbar,C,36,LAL,80,80,2622,716,1238,...,0.723,169,418,587,211,55,143,221,211,1717
1,2,Alvan Adams,C,29,PHO,70,13,1452,269,582,...,0.825,118,201,319,219,73,31,117,195,670
2,3,Mark Aguirre,SF,24,DAL,79,79,2900,925,1765,...,0.749,161,308,469,358,80,22,285,246,2330
3,4,Danny Ainge,SG,24,BOS,71,3,1154,166,361,...,0.821,29,87,116,162,41,4,70,143,384
4,5,J.J. Anderson,SF,23,UTA,48,0,311,55,130,...,0.414,38,25,63,22,15,9,20,28,122
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
338,306,Randy Wittman,SG,24,ATL,78,1,1071,160,318,...,0.609,14,57,71,71,17,0,32,82,350
339,307,Al Wood,SG,25,SEA,81,81,2236,467,945,...,0.823,94,181,275,166,64,32,126,207,1160
340,308,Mike Woodson,SG,25,KCK,71,12,1838,389,816,...,0.818,62,113,175,175,83,28,115,174,1027
341,309,Orlando Woolridge,SF,24,CHI,75,74,2544,570,1086,...,0.715,130,239,369,136,71,60,188,253,1444


In [106]:
def extract_player_stats_advanced(year):
    url = f'https://www.basketball-reference.com/leagues/NBA_{year}_advanced.html'
    response = requests.get(url)
    if response.status_code != 200:
        return response.status_code
    
    soup = BeautifulSoup(response.text)
    table_header = soup.find('table', {'id': 'advanced_stats'}).find('thead')
    header = [row.text for row in table_header.find_all('th')]

    table_body = soup.find('table', {'id': 'advanced_stats'}).find('tbody')
    rows = table_body.find_all('tr', {'class': ['full_table', 'italic_text partial_table']})
    players = []
    for row in rows:
        player_data = [stat.text for stat in row.find_all(['td', 'th'])]
        players.append(player_data)

    df_player_stats_advanced = pd.DataFrame(players)
    df_player_stats_advanced.columns = header

    df_player_stats_advanced.drop(columns = '\xa0', inplace = True)

    df_player_stats_advanced.replace('', '0', inplace = True)

    df_player_stats_advanced = df_player_stats_advanced.apply(pd.to_numeric, errors = 'ignore')

    df_player_stats_advanced['Player'] = df_player_stats_advanced['Player'].str.strip('*')
    
    return df_player_stats_advanced

In [110]:
extract_player_stats_advanced(1984)

Unnamed: 0,Rk,Player,Pos,Age,Tm,G,MP,PER,TS%,3PAr,...,TOV%,USG%,OWS,DWS,WS,WS/48,OBPM,DBPM,BPM,VORP
0,1,Kareem Abdul-Jabbar,C,36,LAL,80,2622,21.3,0.608,0.001,...,13.5,25.1,5.9,3.1,8.9,0.163,2.8,0.2,3.0,3.3
1,2,Alvan Adams,C,29,PHO,70,1452,16.6,0.513,0.007,...,15.2,21.7,1.5,1.8,3.3,0.109,0.7,0.9,1.6,1.3
2,3,Mark Aguirre,SF,24,DAL,79,2900,23.5,0.572,0.032,...,12.3,33.4,7.0,2.0,9.0,0.149,5.0,-1.2,3.7,4.1
3,4,Danny Ainge,SG,24,BOS,71,1154,10.4,0.498,0.061,...,15.4,16.2,0.5,1.3,1.8,0.076,-2.2,0.4,-1.8,0.0
4,5,J.J. Anderson,SF,23,UTA,48,311,11.6,0.427,0.023,...,12.3,20.8,-0.2,0.3,0.1,0.013,-2.1,-1.0,-3.1,-0.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
338,306,Randy Wittman,SG,24,ATL,78,1071,9.3,0.517,0.016,...,8.6,14.9,0.6,0.6,1.2,0.054,-2.0,-0.9,-3.0,-0.3
339,307,Al Wood,SG,25,SEA,81,2236,14.9,0.545,0.022,...,10.6,22.1,2.7,1.7,4.4,0.094,-0.2,-0.7,-0.9,0.6
340,308,Mike Woodson,SG,25,KCK,71,1838,15.9,0.541,0.010,...,10.8,23.3,2.6,1.6,4.2,0.110,0.4,0.1,0.5,1.2
341,309,Orlando Woolridge,SF,24,CHI,75,2544,15.9,0.567,0.002,...,12.9,23.8,3.0,2.3,5.3,0.100,0.7,-0.6,0.1,1.4
