In [20]:
!pip install nba_api
from nba_api.stats.endpoints import leaguedashplayerstats
from nba_api.stats.endpoints import leaguestandings
import pandas as pd
import numpy as np

def get_player_stats(season) :
  df = leaguedashplayerstats.LeagueDashPlayerStats(
      season = season,
      season_type_all_star='Regular Season',
      per_mode_detailed='PerGame'
  )
  stats = df.get_data_frames()[0]
  stats = stats.rename(columns = {'PLAYER_NAME' : 'PLAYER'})
  return stats[['PLAYER_ID', 'PLAYER', 'TEAM_ID', 'GP', 'W_PCT', 'FGA', 'FG_PCT', 'FG3A', 'FG3_PCT', 'PTS', 'REB', 'AST', 'TOV', 'STL', 'BLK']].sort_values(by='PTS', ascending=False)



In [21]:
get_player_stats('2020-21').head(10)

Unnamed: 0,PLAYER_ID,PLAYER,TEAM_ID,GP,W_PCT,FGA,FG_PCT,FG3A,FG3_PCT,PTS,REB,AST,TOV,STL,BLK
470,201939,Stephen Curry,1610612744,63,0.587,21.7,0.482,12.7,0.421,32.0,5.5,5.8,3.4,1.2,0.1
45,203078,Bradley Beal,1610612764,60,0.533,23.0,0.485,6.2,0.349,31.3,4.7,4.4,3.1,1.2,0.4
95,203081,Damian Lillard,1610612757,67,0.582,19.9,0.451,10.5,0.391,28.8,4.2,7.5,3.0,0.9,0.3
260,203954,Joel Embiid,1610612755,51,0.765,17.6,0.513,3.0,0.377,28.5,10.6,2.8,3.1,1.0,1.4
184,203507,Giannis Antetokounmpo,1610612749,61,0.656,18.0,0.569,3.6,0.303,28.1,11.0,5.9,3.4,1.2,1.2
339,1629029,Luka Dončić,1610612742,66,0.606,20.5,0.479,8.3,0.35,27.7,8.0,8.6,4.3,1.0,0.5
537,203897,Zach LaVine,1610612741,58,0.448,19.4,0.507,8.2,0.419,27.4,5.0,4.9,3.5,0.8,0.5
539,1629627,Zion Williamson,1610612740,61,0.475,17.0,0.611,0.6,0.294,27.0,7.2,3.7,2.7,0.9,0.6
306,201142,Kevin Durant,1610612751,35,0.657,17.2,0.537,5.4,0.45,26.9,7.1,5.6,3.4,0.7,1.3
325,202681,Kyrie Irving,1610612751,54,0.667,20.1,0.506,7.0,0.402,26.9,4.8,6.0,2.4,1.4,0.7


In [22]:
import requests
from bs4 import BeautifulSoup

def get_offensive_stats(season):
    url = f"https://www.basketball-reference.com/leagues/NBA_{season}_advanced.html"
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    table = soup.find('table', {'id': 'advanced'})
    df = pd.read_html(str(table))[0]
    df = df[df['Player'] != 'Player']
    df = df[~df.duplicated(subset=['Player'], keep='first')]
    df.columns = df.columns.str.replace(' ', '')
    df.columns = df.columns.str.upper()
    columns_to_keep = ['PLAYER', 'TS%', 'USG%','OWS', 'OBPM']
    return df[columns_to_keep].reset_index(drop=True)

In [23]:
offensive_2021 = get_offensive_stats(2021)
offensive_2021.head(20)

  df = pd.read_html(str(table))[0]


Unnamed: 0,PLAYER,TS%,USG%,OWS,OBPM
0,Julius Randle,0.567,29.3,3.4,2.9
1,RJ Barrett,0.535,23.4,0.9,-0.9
2,Nikola Jokić,0.647,29.6,12.2,9.1
3,Buddy Hield,0.567,20.7,1.5,1.0
4,Damian Lillard,0.623,31.4,9.6,7.5
5,Terry Rozier,0.575,24.4,3.3,2.2
6,Russell Westbrook,0.509,30.2,0.5,2.6
7,Andrew Wiggins,0.568,23.3,1.2,0.0
8,Mikal Bridges,0.667,14.9,5.4,2.4
9,Nikola Vučević,0.56,29.3,3.3,4.9


In [24]:
def get_defensive_stats(season) :
    url = f"https://www.basketball-reference.com/leagues/NBA_{season}_advanced.html"
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    table = soup.find('table', {'id': 'advanced'})
    df = pd.read_html(str(table))[0]
    df = df[df['Player'] != 'Player']
    df = df[~df.duplicated(subset=['Player'], keep='first')]
    df.columns = df.columns.str.replace(' ', '')
    df.columns = df.columns.str.upper()
    columns_to_keep = ['PLAYER', 'DWS', 'DBPM']
    return df[columns_to_keep].reset_index(drop=True)

In [42]:
defensive_2021 = get_defensive_stats(2021)
defensive_2021[defensive_2021['PLAYER'] == 'Jimmy Butler']

  df = pd.read_html(str(table))[0]


Unnamed: 0,PLAYER,DWS,DBPM
98,Jimmy Butler,2.7,2.3


In [26]:
def get_advanced_stats(season) :
  url = f"https://www.basketball-reference.com/leagues/NBA_{season}_advanced.html"
  response = requests.get(url)
  soup = BeautifulSoup(response.content, 'html.parser')
  table = soup.find('table', {'id': 'advanced'})
  df = pd.read_html(str(table))[0]
  df = df[df['Player'] != 'Player']
  df = df[~df.duplicated(subset=['Player'], keep='first')]
  df.columns = df.columns.str.replace(' ', '')
  df.columns = df.columns.str.upper()
  columns_to_keep = ['PLAYER', 'PER', 'VORP', 'AWARDS']
  return df[columns_to_keep].reset_index(drop=True)

In [40]:
advanced_2021 = get_advanced_stats(2021).head(20)
advanced_2021

  df = pd.read_html(str(table))[0]


Unnamed: 0,PLAYER,PER,VORP,AWARDS
98,Jimmy Butler,26.5,4.3,"DPOY-10,NBA3,DEF2"


In [28]:
def get_team_record(season) :
  standings = leaguestandings.LeagueStandings(season=season).get_data_frames()[0]
  return standings[['TeamID', 'TeamName', 'Record']]

In [29]:
get_team_record('2020-21')

Unnamed: 0,TeamID,TeamName,Record
0,1610612755,76ers,49-23
1,1610612762,Jazz,52-20
2,1610612751,Nets,48-24
3,1610612756,Suns,51-21
4,1610612743,Nuggets,47-25
5,1610612749,Bucks,46-26
6,1610612746,Clippers,47-25
7,1610612752,Knicks,41-31
8,1610612737,Hawks,41-31
9,1610612742,Mavericks,42-30


In [34]:
def is_all_nba(awards) :
  if pd.isna(awards) :
    return np.nan
  if 'NBA1' in awards :
    return 1
  elif 'NBA2' in awards :
    return 2
  elif 'NBA3' in awards :
    return 3
  else :
    return np.nan
def create_final_data(season):
  end_season = '20' + season.split('-')[1]
  player_stats = get_player_stats(season)
  offensive_stats = get_offensive_stats(end_season)
  defensive_stats = get_defensive_stats(end_season)
  advanced_stats = get_advanced_stats(end_season)
  team_record = get_team_record(season)
  merge_first = player_stats.merge(offensive_stats, on = 'PLAYER', how = 'left')
  merge_second = merge_first.merge(defensive_stats, on = 'PLAYER', how = 'left')
  merge_third = merge_second.merge(advanced_stats, on = 'PLAYER', how = 'left')
  final_data = merge_third.merge(team_record, left_on = 'TEAM_ID', right_on = 'TeamID', how = 'left')
  final_data = final_data.drop(columns = ['TeamID'])
  final_data['All_NBA'] = final_data['AWARDS'].apply(is_all_nba).astype('Int64')
  final_data = final_data.drop(columns = ['AWARDS'])
  return final_data

In [35]:
create_final_data('2020-21')

  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


Unnamed: 0,PLAYER_ID,PLAYER,TEAM_ID,GP,W_PCT,FGA,FG_PCT,FG3A,FG3_PCT,PTS,...,USG%,OWS,OBPM,DWS,DBPM,PER,VORP,TeamName,Record,All_NBA
0,201939,Stephen Curry,1610612744,63,0.587,21.7,0.482,12.7,0.421,32.0,...,34.8,6.5,8.3,2.5,0.4,26.3,5.8,Warriors,39-33,1
1,203078,Bradley Beal,1610612764,60,0.533,23.0,0.485,6.2,0.349,31.3,...,34.1,4.2,4.8,1.7,-1.4,22.7,2.9,Wizards,34-38,3
2,203081,Damian Lillard,1610612757,67,0.582,19.9,0.451,10.5,0.391,28.8,...,31.4,9.6,7.5,0.8,-1.3,25.6,5.0,Trail Blazers,42-30,2
3,203954,Joel Embiid,1610612755,51,0.765,17.6,0.513,3.0,0.377,28.5,...,35.3,5.6,6.3,3.2,1.2,30.3,3.8,76ers,49-23,2
4,203507,Giannis Antetokounmpo,1610612749,61,0.656,18.0,0.569,3.6,0.303,28.1,...,32.5,6.9,6.2,3.3,2.8,29.2,5.6,Bucks,46-26,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
535,204222,Greg Whittington,1610612743,4,0.750,0.8,0.000,0.5,0.000,0.0,...,10.9,-0.1,-11.4,0.0,-5.7,-10.2,0.0,Nuggets,47-25,
536,203943,Noah Vonleh,1610612751,4,0.750,0.8,0.000,0.5,0.000,0.0,...,19.8,-0.1,-21.3,0.0,-5.0,-19.0,-0.1,Nets,48-24,
537,1630266,Will Magnay,1610612740,1,1.000,1.0,0.000,1.0,0.000,0.0,...,28.0,0.0,-30.7,0.0,-8.6,-35.1,0.0,Pelicans,31-41,
538,1628394,Anžejs Pasečņiks,1610612764,1,1.000,1.0,0.000,1.0,0.000,0.0,...,41.4,-0.1,-40.7,0.0,-5.9,-40.6,-0.1,Wizards,34-38,


In [36]:
data_2021 = create_final_data('2020-21')
data_2021.to_excel('data_2021.xlsx', index = False)

  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
