In [1]:
import pandas as pd

In [3]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

In [4]:
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.get("https://www.google.com")

[WDM] - Downloading: 100%|██████████| 6.58M/6.58M [00:02<00:00, 2.53MB/s]


In [5]:
import time

player_stats_url = "https://www.basketball-reference.com/leagues/NBA_{}_per_game.html"
year = 2023
url = player_stats_url.format(year)

driver.get(url)
driver.execute_script("window.scrollTo(1,10000)")
time.sleep(2)

html = driver.page_source

In [6]:
with open("player/{}.html".format(year), "w+", encoding="utf-8") as f:
    f.write(html)

In [7]:
years = list(range(1991,2024))

In [9]:
from bs4 import BeautifulSoup

In [10]:
# save table while getting rid of all header rows
dfs = []
for year in years:
    with open("player/{}.html".format(year), encoding="utf-8") as f:
        page = f.read()
        
    soup = BeautifulSoup(page, "html.parser")
    tHeads = soup.find_all('tr', class_="thead")
    for thead in tHeads:
        thead.decompose()
    player_table = soup.find(id="per_game_stats")
    player = pd.read_html(str(player_table))[0]
    player["Year"] = year
    dfs.append(player)

In [11]:
players = pd.concat(dfs)

In [12]:
players

Unnamed: 0,Rk,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,...,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,Year
0,1,Alaa Abdelnaby,PF,22,POR,43,0,6.7,1.3,2.7,...,0.6,1.4,2.1,0.3,0.1,0.3,0.5,0.9,3.1,1991
1,2,Mahmoud Abdul-Rauf,PG,21,DEN,67,19,22.5,6.2,15.1,...,0.5,1.3,1.8,3.1,0.8,0.1,1.6,2.2,14.1,1991
2,3,Mark Acres,C,28,ORL,68,0,19.3,1.6,3.1,...,2.1,3.2,5.3,0.4,0.4,0.4,0.6,3.2,4.2,1991
3,4,Michael Adams,PG,28,DEN,66,66,35.5,8.5,21.5,...,0.9,3.0,3.9,10.5,2.2,0.1,3.6,2.5,26.5,1991
4,5,Mark Aguirre,SF,31,DET,78,13,25.7,5.4,11.7,...,1.7,3.1,4.8,1.8,0.6,0.3,1.6,2.7,14.2,1991
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
486,485,Delon Wright,PG,30,WAS,4,0,22.8,2.5,6.0,...,1.0,1.5,2.5,3.3,2.8,1.0,1.0,1.3,6.5,2023
487,486,McKinley Wright IV,PG,24,DAL,2,0,4.0,0.5,2.0,...,1.0,0.5,1.5,1.5,0.5,0.0,0.0,0.0,1.0,2023
488,487,Thaddeus Young,PF,34,TOR,29,9,17.5,2.3,4.2,...,1.8,2.1,3.9,1.8,1.1,0.1,0.8,1.9,5.1,2023
489,488,Trae Young,PG,24,ATL,29,29,35.8,8.8,21.3,...,0.8,2.1,2.9,9.8,0.7,0.1,3.9,1.4,27.6,2023


In [13]:
players.to_csv("players.csv")

In [14]:
team_stats_url = "https://www.basketball-reference.com/leagues/NBA_{}_standings.html"

In [16]:
import requests
year = 2023
url = team_stats_url.format(year)
data = requests.get(url)

with open("team/{}.html".format(year), "w+", encoding="utf-8") as f:
    f.write(data.text)

In [17]:
dfs = []
for year in years:
    with open("team/{}.html".format(year), encoding="utf-8") as f:
        page = f.read()

    soup = BeautifulSoup(page, "html.parser")
    tHeads = soup.find_all('tr', class_="thead")
    for thead in tHeads:
        thead.decompose()
    team_table = soup.find(id="divs_standings_E")
    team = pd.read_html(str(team_table))[0]
    team["Year"] = year
    team["Team"] = team["Eastern Conference"]
    del team["Eastern Conference"]
    dfs.append(team)

    soup = BeautifulSoup(page, "html.parser")
    tHeads = soup.find_all('tr', class_="thead")
    for thead in tHeads:
        thead.decompose()
    team_table = soup.find(id="divs_standings_W")
    team = pd.read_html(str(team_table))[0]
    team["Year"] = year
    team["Team"] = team["Western Conference"]
    del team["Western Conference"]
    dfs.append(team)

In [18]:
teams = pd.concat(dfs)

In [19]:
teams

Unnamed: 0,W,L,W/L%,GB,PS/G,PA/G,SRS,Year,Team
0,56,26,0.683,—,111.5,105.7,5.22,1991,Boston Celtics*
1,44,38,0.537,12.0,105.4,105.6,-0.39,1991,Philadelphia 76ers*
2,39,43,0.476,17.0,103.1,103.3,-0.43,1991,New York Knicks*
3,30,52,0.366,26.0,101.4,106.4,-4.84,1991,Washington Bullets
4,26,56,0.317,30.0,102.9,107.5,-4.53,1991,New Jersey Nets
...,...,...,...,...,...,...,...,...,...
10,19,11,0.633,—,115.4,111.2,4.00,2023,Memphis Grizzlies (2)
11,18,12,0.600,1.0,117.3,112.5,4.82,2023,New Orleans Pelicans (3)
12,16,16,0.500,4.0,110.9,109.5,2.38,2023,Dallas Mavericks (9)
13,10,20,0.333,9.0,110.3,119.5,-9.24,2023,San Antonio Spurs (14)


In [20]:
teams.to_csv("teams.csv")

In [21]:
driver.quit()

In [22]:
players = pd.read_csv("players.csv")

In [23]:
players

Unnamed: 0.1,Unnamed: 0,Rk,Player,Pos,Age,Tm,G,GS,MP,FG,...,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,Year
0,0,1,Alaa Abdelnaby,PF,22,POR,43,0,6.7,1.3,...,0.6,1.4,2.1,0.3,0.1,0.3,0.5,0.9,3.1,1991
1,1,2,Mahmoud Abdul-Rauf,PG,21,DEN,67,19,22.5,6.2,...,0.5,1.3,1.8,3.1,0.8,0.1,1.6,2.2,14.1,1991
2,2,3,Mark Acres,C,28,ORL,68,0,19.3,1.6,...,2.1,3.2,5.3,0.4,0.4,0.4,0.6,3.2,4.2,1991
3,3,4,Michael Adams,PG,28,DEN,66,66,35.5,8.5,...,0.9,3.0,3.9,10.5,2.2,0.1,3.6,2.5,26.5,1991
4,4,5,Mark Aguirre,SF,31,DET,78,13,25.7,5.4,...,1.7,3.1,4.8,1.8,0.6,0.3,1.6,2.7,14.2,1991
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18684,486,485,Delon Wright,PG,30,WAS,4,0,22.8,2.5,...,1.0,1.5,2.5,3.3,2.8,1.0,1.0,1.3,6.5,2023
18685,487,486,McKinley Wright IV,PG,24,DAL,2,0,4.0,0.5,...,1.0,0.5,1.5,1.5,0.5,0.0,0.0,0.0,1.0,2023
18686,488,487,Thaddeus Young,PF,34,TOR,29,9,17.5,2.3,...,1.8,2.1,3.9,1.8,1.1,0.1,0.8,1.9,5.1,2023
18687,489,488,Trae Young,PG,24,ATL,29,29,35.8,8.8,...,0.8,2.1,2.9,9.8,0.7,0.1,3.9,1.4,27.6,2023


In [24]:
del players["Unnamed: 0"]
del players["Rk"]

In [25]:
players.head()

Unnamed: 0,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,...,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,Year
0,Alaa Abdelnaby,PF,22,POR,43,0,6.7,1.3,2.7,0.474,...,0.6,1.4,2.1,0.3,0.1,0.3,0.5,0.9,3.1,1991
1,Mahmoud Abdul-Rauf,PG,21,DEN,67,19,22.5,6.2,15.1,0.413,...,0.5,1.3,1.8,3.1,0.8,0.1,1.6,2.2,14.1,1991
2,Mark Acres,C,28,ORL,68,0,19.3,1.6,3.1,0.509,...,2.1,3.2,5.3,0.4,0.4,0.4,0.6,3.2,4.2,1991
3,Michael Adams,PG,28,DEN,66,66,35.5,8.5,21.5,0.394,...,0.9,3.0,3.9,10.5,2.2,0.1,3.6,2.5,26.5,1991
4,Mark Aguirre,SF,31,DET,78,13,25.7,5.4,11.7,0.462,...,1.7,3.1,4.8,1.8,0.6,0.3,1.6,2.7,14.2,1991


In [26]:
players["Player"] = players["Player"].str.replace("*","", regex=False)

In [29]:
def single_row(df):
    if df.shape[0]==1:
        return df
    else:
        row = df[df["Tm"] == "TOT"]
        row["Tm"] = df.iloc[-1,:]["Tm"]
        return row
    
players = players.groupby(["Player", "Year"]).apply(single_row)

In [31]:
players.index = players.index.droplevel()
players.index = players.index.droplevel()

In [32]:
players

Unnamed: 0,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,...,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,Year
159,A.C. Green,PF,27,LAL,82,21,26.4,3.1,6.6,0.476,...,2.5,3.8,6.3,0.9,0.7,0.3,1.2,1.4,9.1,1991
609,A.C. Green,PF,28,LAL,82,53,35.4,4.7,9.8,0.476,...,3.7,5.6,9.3,1.4,1.1,0.4,1.4,1.7,13.6,1992
1051,A.C. Green,PF,29,LAL,82,55,34.4,4.6,8.6,0.537,...,3.5,5.2,8.7,1.4,1.1,0.5,1.4,1.8,12.8,1993
1519,A.C. Green,PF,30,PHO,82,55,34.5,5.7,11.3,0.502,...,3.4,5.8,9.2,1.7,0.9,0.5,1.2,1.7,14.7,1994
1988,A.C. Green,SF,31,PHO,82,52,32.8,3.8,7.5,0.504,...,2.4,5.8,8.2,1.5,0.7,0.4,1.4,1.8,11.2,1995
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5866,Željko Rebrača,C,29,DET,74,4,15.9,2.6,5.1,0.505,...,1.1,2.8,3.9,0.5,0.4,1.0,1.1,2.6,6.9,2002
6347,Željko Rebrača,C,30,DET,30,12,16.3,2.7,4.8,0.552,...,0.9,2.2,3.1,0.3,0.2,0.6,1.0,2.6,6.6,2003
6907,Željko Rebrača,C,31,ATL,24,2,11.4,1.4,3.2,0.442,...,1.0,1.5,2.4,0.3,0.2,0.5,0.7,2.2,3.8,2004
7485,Željko Rebrača,C,32,LAC,58,2,16.0,2.3,4.0,0.568,...,0.8,2.3,3.2,0.4,0.2,0.7,0.8,2.2,5.8,2005


In [33]:
teams = pd.read_csv("teams.csv")

In [34]:
teams.head(30)

Unnamed: 0.1,Unnamed: 0,W,L,W/L%,GB,PS/G,PA/G,SRS,Year,Team
0,0,56,26,0.683,—,111.5,105.7,5.22,1991,Boston Celtics*
1,1,44,38,0.537,12.0,105.4,105.6,-0.39,1991,Philadelphia 76ers*
2,2,39,43,0.476,17.0,103.1,103.3,-0.43,1991,New York Knicks*
3,3,30,52,0.366,26.0,101.4,106.4,-4.84,1991,Washington Bullets
4,4,26,56,0.317,30.0,102.9,107.5,-4.53,1991,New Jersey Nets
5,5,24,58,0.293,32.0,101.8,107.8,-5.91,1991,Miami Heat
6,6,61,21,0.744,—,110.0,101.0,8.57,1991,Chicago Bulls*
7,7,50,32,0.61,11.0,100.1,96.8,3.08,1991,Detroit Pistons*
8,8,48,34,0.585,13.0,106.4,104.0,2.33,1991,Milwaukee Bucks*
9,9,43,39,0.524,18.0,109.8,109.0,0.72,1991,Atlanta Hawks*


In [35]:
teams["Team"] = teams["Team"].str.replace("*", "", regex=False)

In [36]:
teams["Team"].unique()

array(['Boston Celtics', 'Philadelphia 76ers', 'New York Knicks',
       'Washington Bullets', 'New Jersey Nets', 'Miami Heat',
       'Chicago Bulls', 'Detroit Pistons', 'Milwaukee Bucks',
       'Atlanta Hawks', 'Indiana Pacers', 'Cleveland Cavaliers',
       'Charlotte Hornets', 'San Antonio Spurs', 'Utah Jazz',
       'Houston Rockets', 'Orlando Magic', 'Minnesota Timberwolves',
       'Dallas Mavericks', 'Denver Nuggets', 'Portland Trail Blazers',
       'Los Angeles Lakers', 'Phoenix Suns', 'Golden State Warriors',
       'Seattle SuperSonics', 'Los Angeles Clippers', 'Sacramento Kings',
       'Toronto Raptors', 'Vancouver Grizzlies', 'Washington Wizards',
       'Memphis Grizzlies', 'New Orleans Hornets', 'Charlotte Bobcats',
       'New Orleans/Oklahoma City Hornets', 'Oklahoma City Thunder',
       'Brooklyn Nets', 'New Orleans Pelicans', 'Boston Celtics\xa0(2)',
       'Brooklyn Nets\xa0(4)', 'Philadelphia 76ers\xa0(5)',
       'New York Knicks\xa0(6)', 'Toronto Raptors\xa0(

In [56]:
teams["Team"] = teams["Team"].str.replace("\xa0([^)]*)", "", regex=True)

In [57]:
teams

Unnamed: 0.1,Unnamed: 0,W,L,W/L%,GB,PS/G,PA/G,SRS,Year,Team
0,0,56,26,0.683,—,111.5,105.7,5.22,1991,Boston Celtics
1,1,44,38,0.537,12.0,105.4,105.6,-0.39,1991,Philadelphia 76ers
2,2,39,43,0.476,17.0,103.1,103.3,-0.43,1991,New York Knicks
3,3,30,52,0.366,26.0,101.4,106.4,-4.84,1991,Washington Bullets
4,4,26,56,0.317,30.0,102.9,107.5,-4.53,1991,New Jersey Nets
...,...,...,...,...,...,...,...,...,...,...
961,10,19,11,0.633,—,115.4,111.2,4.00,2023,Memphis Grizzlies)
962,11,18,12,0.600,1.0,117.3,112.5,4.82,2023,New Orleans Pelicans)
963,12,16,16,0.500,4.0,110.9,109.5,2.38,2023,Dallas Mavericks)
964,13,10,20,0.333,9.0,110.3,119.5,-9.24,2023,San Antonio Spurs)


In [58]:
teams["Team"].unique()

array(['Boston Celtics', 'Philadelphia 76ers', 'New York Knicks',
       'Washington Bullets', 'New Jersey Nets', 'Miami Heat',
       'Chicago Bulls', 'Detroit Pistons', 'Milwaukee Bucks',
       'Atlanta Hawks', 'Indiana Pacers', 'Cleveland Cavaliers',
       'Charlotte Hornets', 'San Antonio Spurs', 'Utah Jazz',
       'Houston Rockets', 'Orlando Magic', 'Minnesota Timberwolves',
       'Dallas Mavericks', 'Denver Nuggets', 'Portland Trail Blazers',
       'Los Angeles Lakers', 'Phoenix Suns', 'Golden State Warriors',
       'Seattle SuperSonics', 'Los Angeles Clippers', 'Sacramento Kings',
       'Toronto Raptors', 'Vancouver Grizzlies', 'Washington Wizards',
       'Memphis Grizzlies', 'New Orleans Hornets', 'Charlotte Bobcats',
       'New Orleans/Oklahoma City Hornets', 'Oklahoma City Thunder',
       'Brooklyn Nets', 'New Orleans Pelicans', 'Boston Celtics)',
       'Brooklyn Nets)', 'Philadelphia 76ers)', 'New York Knicks)',
       'Toronto Raptors)', 'Milwaukee Bucks)', 'Clev

In [62]:
teams["Team"] = teams["Team"].str.replace(")", "", regex=False)

In [63]:
teams

Unnamed: 0.1,Unnamed: 0,W,L,W/L%,GB,PS/G,PA/G,SRS,Year,Team
0,0,56,26,0.683,—,111.5,105.7,5.22,1991,Boston Celtics
1,1,44,38,0.537,12.0,105.4,105.6,-0.39,1991,Philadelphia 76ers
2,2,39,43,0.476,17.0,103.1,103.3,-0.43,1991,New York Knicks
3,3,30,52,0.366,26.0,101.4,106.4,-4.84,1991,Washington Bullets
4,4,26,56,0.317,30.0,102.9,107.5,-4.53,1991,New Jersey Nets
...,...,...,...,...,...,...,...,...,...,...
961,10,19,11,0.633,—,115.4,111.2,4.00,2023,Memphis Grizzlies
962,11,18,12,0.600,1.0,117.3,112.5,4.82,2023,New Orleans Pelicans
963,12,16,16,0.500,4.0,110.9,109.5,2.38,2023,Dallas Mavericks
964,13,10,20,0.333,9.0,110.3,119.5,-9.24,2023,San Antonio Spurs


In [67]:
teams["Team"].unique()

array(['Boston Celtics', 'Philadelphia 76ers', 'New York Knicks',
       'Washington Bullets', 'New Jersey Nets', 'Miami Heat',
       'Chicago Bulls', 'Detroit Pistons', 'Milwaukee Bucks',
       'Atlanta Hawks', 'Indiana Pacers', 'Cleveland Cavaliers',
       'Charlotte Hornets', 'San Antonio Spurs', 'Utah Jazz',
       'Houston Rockets', 'Orlando Magic', 'Minnesota Timberwolves',
       'Dallas Mavericks', 'Denver Nuggets', 'Portland Trail Blazers',
       'Los Angeles Lakers', 'Phoenix Suns', 'Golden State Warriors',
       'Seattle SuperSonics', 'Los Angeles Clippers', 'Sacramento Kings',
       'Toronto Raptors', 'Vancouver Grizzlies', 'Washington Wizards',
       'Memphis Grizzlies', 'New Orleans Hornets', 'Charlotte Bobcats',
       'New Orleans/Oklahoma City Hornets', 'Oklahoma City Thunder',
       'Brooklyn Nets', 'New Orleans Pelicans'], dtype=object)

In [70]:
teams["GB"] = teams["GB"].str.replace("—", "0")

In [71]:
teams

Unnamed: 0.1,Unnamed: 0,W,L,W/L%,GB,PS/G,PA/G,SRS,Year,Team
0,0,56,26,0.683,0,111.5,105.7,5.22,1991,Boston Celtics
1,1,44,38,0.537,12.0,105.4,105.6,-0.39,1991,Philadelphia 76ers
2,2,39,43,0.476,17.0,103.1,103.3,-0.43,1991,New York Knicks
3,3,30,52,0.366,26.0,101.4,106.4,-4.84,1991,Washington Bullets
4,4,26,56,0.317,30.0,102.9,107.5,-4.53,1991,New Jersey Nets
...,...,...,...,...,...,...,...,...,...,...
961,10,19,11,0.633,0,115.4,111.2,4.00,2023,Memphis Grizzlies
962,11,18,12,0.600,1.0,117.3,112.5,4.82,2023,New Orleans Pelicans
963,12,16,16,0.500,4.0,110.9,109.5,2.38,2023,Dallas Mavericks
964,13,10,20,0.333,9.0,110.3,119.5,-9.24,2023,San Antonio Spurs


In [75]:
read_file = pd.read_csv(r'C:\Users\brian\nicknames.txt')
read_file.to_csv (r'C:\Users\brian\nicknames.csv', index=None)

In [76]:
nicknames = {}

with open("nicknames.csv") as f:
    lines = f.readlines()
    for line in lines[1:]:
        abbrev, name = line.replace("\n", "").split(",")
        nicknames[abbrev] = name

In [77]:
players["Team"] = players["Tm"].map(nicknames)

In [78]:
players

Unnamed: 0,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,...,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,Year,Team
159,A.C. Green,PF,27,LAL,82,21,26.4,3.1,6.6,0.476,...,3.8,6.3,0.9,0.7,0.3,1.2,1.4,9.1,1991,Los Angeles Lakers
609,A.C. Green,PF,28,LAL,82,53,35.4,4.7,9.8,0.476,...,5.6,9.3,1.4,1.1,0.4,1.4,1.7,13.6,1992,Los Angeles Lakers
1051,A.C. Green,PF,29,LAL,82,55,34.4,4.6,8.6,0.537,...,5.2,8.7,1.4,1.1,0.5,1.4,1.8,12.8,1993,Los Angeles Lakers
1519,A.C. Green,PF,30,PHO,82,55,34.5,5.7,11.3,0.502,...,5.8,9.2,1.7,0.9,0.5,1.2,1.7,14.7,1994,Phoenix Suns
1988,A.C. Green,SF,31,PHO,82,52,32.8,3.8,7.5,0.504,...,5.8,8.2,1.5,0.7,0.4,1.4,1.8,11.2,1995,Phoenix Suns
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5866,Željko Rebrača,C,29,DET,74,4,15.9,2.6,5.1,0.505,...,2.8,3.9,0.5,0.4,1.0,1.1,2.6,6.9,2002,Detroit Pistons
6347,Željko Rebrača,C,30,DET,30,12,16.3,2.7,4.8,0.552,...,2.2,3.1,0.3,0.2,0.6,1.0,2.6,6.6,2003,Detroit Pistons
6907,Željko Rebrača,C,31,ATL,24,2,11.4,1.4,3.2,0.442,...,1.5,2.4,0.3,0.2,0.5,0.7,2.2,3.8,2004,Atlanta Hawks
7485,Željko Rebrača,C,32,LAC,58,2,16.0,2.3,4.0,0.568,...,2.3,3.2,0.4,0.2,0.7,0.8,2.2,5.8,2005,Los Angeles Clippers


In [79]:
players.to_csv("players.csv")

In [80]:
teams.to_csv("teams.csv")