In [18]:
#import required libraries
import pandas as pd
import requests
from bs4 import BeautifulSoup


In [20]:
#request html content, both for regular season and playoffs
url_base_reg = 'https://www.basketball-reference.com/leagues/NBA_{}.html'
url_base_play = 'https://www.basketball-reference.com/playoffs/NBA_{}.html'

years = [2022, 2021, 2020, 2019, 2018, 2017, 2016, 2015, 2014, 2013]

teams = ["Boston Celtics", "Brooklyn Nets", "New York Knicks", "Philadelphia 76ers", "Toronto Raptors", "Chicago Bulls",
"Cleveland Cavaliers", "Detroit Pistons", "Indiana Pacers", "Milwaukee Bucks", "Atlanta Hawks", "Charlotte Hornets",
"Miami Heat", "Orlando Magic", "Washington Wizards", "Denver Nuggets", "Minnesota Timberwolves", "Oklahoma City Thunder",
"Portland Trail Blazers", "Utah Jazz", "Golden State Warriors", "Los Angeles Clippers", "Los Angeles Lakers", "Phoenix Suns",
"Sacramento Kings", "Dallas Mavericks", "Houston Rockets", "Memphis Grizzlies", "New Orleans Pelicans", "San Antonio Spurs", "Charlotte Bobcats", "New Orleans Hornets"]

df_main = pd.DataFrame()

for i in years:

    url_reg = url_base_reg.format(i)

    per_game = requests.get(url_reg)

    #create BeautifulSoup object
    soup = BeautifulSoup(per_game.content, 'html.parser')

    #locate correct table
    table = soup.find(lambda tag: tag.name=='table' and tag.has_attr('id') and tag['id']=="per_game-team") 
    rows = table.findAll(lambda tag: tag.name=='tr')

    #create DataFrame
    df = pd.read_html(str(table))[0]

    #clean DataFrame
    df = df.drop(index=30)
    df['Team'] = df['Team'].str.replace('*', '')
    df.insert(2, "Year", i, True)
    df = df.drop(columns=["G","Rk"])
    df = df.reset_index(drop=True)

    #add advanced stats to DataFrame
    table = soup.find(lambda tag: tag.name=='table' and tag.has_attr('id') and tag['id']=="advanced-team") 
    rows = table.findAll(lambda tag: tag.name=='tr')
    df_adv = pd.read_html(str(table))[0]
    df_adv.columns = df_adv.columns.droplevel()
    df_adv = df_adv.drop(index=30)
    df_adv['Team'] = df_adv['Team'].str.replace('*', '')
    df_adv = df_adv.drop(columns=["Rk", "L", "PW", "PL", "Unnamed: 17_level_1", "Unnamed: 22_level_1", "Unnamed: 27_level_1", "Arena"])
    df_adv = df_adv.reset_index(drop=True)

    #add playoff wins to DataFrame
    url_play = url_base_play.format(i)
    playoff_wins = requests.get(url_play)
    soup = BeautifulSoup(playoff_wins.content, 'html.parser')
    table = soup.find(lambda tag: tag.name=='table' and tag.has_attr('id') and tag['id']=="advanced-team")
    df_play = pd.read_html(str(table))[0]
    df_play.columns = df_play.columns.droplevel()
    df_play = df_play.drop(index=16)
    df_play = df_play.rename(columns={"Tm": "Team", "W" : "Playoff_W"})
    df_play = df_play[["Team", "Playoff_W"]]
    
    #in order to prevent non-playoff team rows from being deleted from df during the merge, we must append non-playoff teams to df_play
    #with non-playoff teams treated as having 0 playoff wins
    for i in teams:
        if i not in df_play.values:
            new_row = {"Team": i, "Playoff_W": 0}
            df_play = df_play.append(new_row, ignore_index=True)

    df = pd.merge(df, df_adv, on='Team')
    df = pd.merge(df, df_play, on='Team')

    df_main = pd.concat([df_main, df])


  df['Team'] = df['Team'].str.replace('*', '')
  df_adv['Team'] = df_adv['Team'].str.replace('*', '')


Unnamed: 0,Team,Year,MP,FG,FGA,FG%,3P,3PA,3P%,2P,...,TOV%,ORB%,FT/FGA,eFG%,TOV%.1,DRB%,FT/FGA.1,Attend.,Attend./G,Playoff_W
0,Minnesota Timberwolves,2022,241.2,41.6,91.0,0.457,14.8,41.3,0.358,26.8,...,12.4,24.4,0.198,0.535,14.2,74.9,0.227,657148.0,16028.0,2.0
1,Memphis Grizzlies,2022,241.2,43.5,94.4,0.461,11.5,32.7,0.353,32.0,...,11.2,30.0,0.180,0.523,13.3,77.8,0.195,646785.0,15775.0,6.0
2,Milwaukee Bucks,2022,240.9,41.8,89.4,0.468,14.1,38.4,0.366,27.8,...,11.9,23.0,0.199,0.536,11.6,78.6,0.165,715581.0,17453.0,7.0
3,Charlotte Hornets,2022,242.4,42.8,91.4,0.468,13.9,38.2,0.365,28.8,...,11.6,23.3,0.173,0.544,13.1,74.8,0.187,700755.0,17092.0,0.0
4,Phoenix Suns,2022,240.6,43.7,90.1,0.485,11.6,31.9,0.364,32.1,...,11.6,22.3,0.176,0.510,13.0,77.1,0.195,663171.0,16175.0,7.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23,Orlando Magic,2013,241.2,37.7,84.2,0.448,6.2,18.7,0.329,31.5,...,13.7,25.4,0.149,0.508,11.8,74.6,0.195,722716.0,17627.0,0.0
24,Memphis Grizzlies,2013,241.5,36.1,81.5,0.444,4.7,13.5,0.345,31.5,...,13.3,31.0,0.202,0.475,15.2,74.3,0.209,681613.0,16625.0,8.0
25,Philadelphia 76ers,2013,240.9,37.3,84.1,0.444,6.3,17.5,0.360,31.0,...,12.5,24.9,0.146,0.495,13.7,73.4,0.200,685412.0,16717.0,0.0
26,Chicago Bulls,2013,241.8,35.7,81.7,0.437,5.4,15.4,0.353,30.2,...,13.6,29.4,0.201,0.477,13.2,73.6,0.206,896944.0,21877.0,5.0


In [21]:
#write data to csv
df_main.to_csv('team_per_game_2013-22.csv')