In [1]:
from bs4 import BeautifulSoup
import requests as rq
from datetime import datetime
import pandas as pd
import numpy as np
import re

In [2]:
url = 'https://www.basketball-reference.com/teams/HOU/2020.html'

agent = {'User-Agent': 'Mozilla/5.0'}

url_text = rq.get(url, headers=agent).text

soup = BeautifulSoup(url_text, 'html.parser')

In [3]:
##################################################################################
# Roster Table
##################################################################################

roster_df = pd.DataFrame()

roster_table = soup.find('table', attrs={'id': 'roster'})

roster_info = roster_table.find('tbody')

for tr in roster_info.find_all('tr'):

        nmbr = tr.find('th')

        roster_dict = {}
        
        if nmbr['data-stat'] == 'number':
            
            roster_dict[nmbr['data-stat']] = nmbr.text

        
        for td in tr.find_all('td'):
            
            if td['data-stat'] == 'player':
                roster_dict['link'] = td.contents[0]['href'] 
            
            roster_dict[td['data-stat']] = td.text

        roster_df = roster_df.append(roster_dict, ignore_index=True)

roster_df.head()

Unnamed: 0,birth_country,birth_date,college,height,link,number,player,pos,weight,years_experience
0,us,"May 5, 1985",Texas,6-5,/players/t/tuckepj01.html,17,P.J. Tucker,PF,245,8
1,us,"February 11, 1993",Kansas,6-3,/players/m/mclembe01.html,16,Ben McLemore,SG,195,6
2,us,"August 26, 1989",Arizona State,6-5,/players/h/hardeja01.html,13,James Harden,SG,220,10
3,us,"August 1, 1992",Duke,6-3,/players/r/riverau01.html,25,Austin Rivers,SG,200,7
4,us,"June 7, 1993",Texas A&M,6-6,/players/h/houseda01.html,4,Danuel House,SF,215,3


In [33]:
##################################################################################################
# All Roster Table
##################################################################################################

all_roster_df = pd.DataFrame()

all_roster_table = soup.find('table', attrs={'id': 'roster'})


all_roster_df_index = []

all_roster_head = all_roster_table.find('thead')

for th in all_roster_head.find_all('th'):
    
    if th.text == '\xa0':
        all_roster_df_index.append('Country')
    
    else:
        all_roster_df_index.append(th.text)

all_roster_df = pd.DataFrame(columns=all_roster_df_index)                    


all_roster_data = all_roster_table.find('tbody')

for tr in all_roster_data.find_all('tr'):
    
    nmbr = tr.find('th')
    
    all_roster_list = []
    
    all_roster_list.append(nmbr.text)

    for td in tr.find_all('td'):
        
        all_roster_list.append(td.text)
        
    all_roster_df.loc[len(all_roster_df)] = all_roster_list

all_roster_df.head()

Unnamed: 0,No.,Player,Pos,Ht,Wt,Birth Date,Country,Exp,College
0,17,P.J. Tucker,PF,6-5,245,"May 5, 1985",us,8,Texas
1,16,Ben McLemore,SG,6-3,195,"February 11, 1993",us,6,Kansas
2,13,James Harden,SG,6-5,220,"August 26, 1989",us,10,Arizona State
3,25,Austin Rivers,SG,6-3,200,"August 1, 1992",us,7,Duke
4,4,Danuel House,SF,6-6,215,"June 7, 1993",us,3,Texas A&M


In [34]:
##################################################################################################
# All Per Game Table
##################################################################################################

all_per_game_df = pd.DataFrame()

all_per_game_table = soup.find('div', attrs={'id': 'all_per_game'})

all_per_game_info = BeautifulSoup(all_per_game_table.contents[5], 'lxml')

all_per_game_df_index = []

all_per_game_head = all_per_game_info.find('thead')

for th in all_per_game_head.find_all('th'):
    
    if th.text == '\xa0':
        all_per_game_df_index.append('Player')
    
    else:
        all_per_game_df_index.append(th.text)

all_per_game_df = pd.DataFrame(columns=all_per_game_df_index)                    


all_per_game_data = all_per_game_info.find('tbody')

for tr in all_per_game_data.find_all('tr'):
    
    rnk = tr.find('th')
    
    all_per_game_list = []
    
    all_per_game_list.append(rnk.text)

    for td in tr.find_all('td'):
        
        all_per_game_list.append(td.text)
        
    all_per_game_df.loc[len(all_per_game_df)] = all_per_game_list

all_per_game_df.head()

Unnamed: 0,Rk,Player,Age,G,GS,MP,FG,FGA,FG%,3P,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS/G
0,1,James Harden,30,68,68,36.5,9.9,22.3,0.444,4.4,...,0.865,1.0,5.5,6.6,7.5,1.8,0.9,4.5,3.3,34.3
1,2,Russell Westbrook,31,57,57,35.9,10.6,22.5,0.472,1.0,...,0.763,1.8,6.2,7.9,7.0,1.6,0.4,4.5,3.5,27.2
2,3,P.J. Tucker,34,72,72,34.3,2.5,5.9,0.415,1.5,...,0.813,1.6,5.1,6.6,1.6,1.1,0.5,1.0,3.2,6.9
3,4,Robert Covington,29,22,21,33.0,4.0,10.3,0.392,2.4,...,0.8,1.2,6.8,8.0,1.5,1.6,2.2,1.4,3.3,11.6
4,5,Clint Capela,25,39,39,32.8,6.3,9.9,0.629,0.0,...,0.529,4.3,9.5,13.8,1.2,0.8,1.8,1.6,2.6,13.9


In [35]:
##################################################################################################
# All Totals Table
##################################################################################################

all_totals_df = pd.DataFrame()

all_totals_table = soup.find('div', attrs={'id': 'all_totals'})

all_totals_info = BeautifulSoup(all_totals_table.contents[5], 'lxml')

all_totals_df_index = []

all_totals_head = all_totals_info.find('thead')

for th in all_totals_head.find_all('th'):
    
    if th.text == '\xa0':
        all_totals_df_index.append('Player')
    
    else:
        all_totals_df_index.append(th.text)

all_totals_df = pd.DataFrame(columns=all_totals_df_index)                    


all_totals_data = all_totals_info.find('tbody')

for tr in all_totals_data.find_all('tr'):
    
    rnk = tr.find('th')
    
    all_totals_list = []
    
    all_totals_list.append(rnk.text)

    for td in tr.find_all('td'):
        
        all_totals_list.append(td.text)
        
    all_totals_df.loc[len(all_totals_df)] = all_totals_list

all_totals_df.head()

Unnamed: 0,Rk,Player,Age,G,GS,MP,FG,FGA,FG%,3P,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
0,1,James Harden,30,68,68,2483,672,1514,0.444,299,...,0.865,70,376,446,512,125,60,308,227,2335
1,2,P.J. Tucker,34,72,72,2467,177,426,0.415,107,...,0.813,112,364,476,115,77,36,72,229,500
2,3,Russell Westbrook,31,57,57,2049,604,1281,0.472,55,...,0.763,100,351,451,401,93,20,255,200,1553
3,4,Danuel House,26,63,52,1913,230,539,0.427,129,...,0.811,57,210,267,85,67,34,54,132,662
4,5,Ben McLemore,26,71,23,1619,242,545,0.444,181,...,0.746,21,136,157,58,45,16,50,155,715


In [36]:
##################################################################################################
# All Per Minute Table
##################################################################################################

all_per_minute_df = pd.DataFrame()

all_per_minute_table = soup.find('div', attrs={'id': 'all_per_minute'})

all_per_minute_info = BeautifulSoup(all_per_minute_table.contents[5], 'lxml')

all_per_minute_df_index = []

all_per_minute_head = all_per_minute_info.find('thead')

for th in all_per_minute_head.find_all('th'):
    
    if th.text == '\xa0':
        all_per_minute_df_index.append('Player')
    
    else:
        all_per_minute_df_index.append(th.text)

all_per_minute_df = pd.DataFrame(columns=all_per_minute_df_index)                    


all_per_minute_data = all_per_minute_info.find('tbody')

for tr in all_per_minute_data.find_all('tr'):
    
    rnk = tr.find('th')
    
    all_per_minute_list = []
    
    all_per_minute_list.append(rnk.text)

    for td in tr.find_all('td'):
        
        all_per_minute_list.append(td.text)
        
    all_per_minute_df.loc[len(all_per_minute_df)] = all_per_minute_list

all_per_minute_df.head()

Unnamed: 0,Rk,Player,Age,G,GS,MP,FG,FGA,FG%,3P,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
0,1,James Harden,30,68,68,2483,9.7,22.0,0.444,4.3,...,0.865,1.0,5.5,6.5,7.4,1.8,0.9,4.5,3.3,33.9
1,2,P.J. Tucker,34,72,72,2467,2.6,6.2,0.415,1.6,...,0.813,1.6,5.3,6.9,1.7,1.1,0.5,1.1,3.3,7.3
2,3,Russell Westbrook,31,57,57,2049,10.6,22.5,0.472,1.0,...,0.763,1.8,6.2,7.9,7.0,1.6,0.4,4.5,3.5,27.3
3,4,Danuel House,26,63,52,1913,4.3,10.1,0.427,2.4,...,0.811,1.1,4.0,5.0,1.6,1.3,0.6,1.0,2.5,12.5
4,5,Ben McLemore,26,71,23,1619,5.4,12.1,0.444,4.0,...,0.746,0.5,3.0,3.5,1.3,1.0,0.4,1.1,3.4,15.9


In [37]:
##################################################################################################
# All Per Poss Table
##################################################################################################

all_per_poss_df = pd.DataFrame()

all_per_poss_table = soup.find('div', attrs={'id': 'all_per_poss'})

all_per_poss_info = BeautifulSoup(all_per_poss_table.contents[5], 'lxml')

all_per_poss_df_index = []

all_per_poss_head = all_per_poss_info.find('thead')

for th in all_per_poss_head.find_all('th'):
    
    if th.text == '\xa0':
        all_per_poss_df_index.append('Player')
    
    else:
        all_per_poss_df_index.append(th.text)

all_per_poss_df = pd.DataFrame(columns=all_per_poss_df_index)                    


all_per_poss_data = all_per_poss_info.find('tbody')

for tr in all_per_poss_data.find_all('tr'):
    
    rnk = tr.find('th')
    
    all_per_poss_list = []
    
    all_per_poss_list.append(rnk.text)

    for td in tr.find_all('td'):
        
        all_per_poss_list.append(td.text)
        
    all_per_poss_df.loc[len(all_per_poss_df)] = all_per_poss_list

all_per_poss_df.head()

Unnamed: 0,Rk,Player,Age,G,GS,MP,FG,FGA,FG%,3P,...,TRB,AST,STL,BLK,TOV,PF,PTS,Unnamed: 19,ORtg,DRtg
0,1,James Harden,30,68,68,2483,12.5,28.2,0.444,5.6,...,8.3,9.5,2.3,1.1,5.7,4.2,43.5,,120,108
1,2,P.J. Tucker,34,72,72,2467,3.3,8.0,0.415,2.0,...,8.9,2.2,1.4,0.7,1.4,4.3,9.4,,114,111
2,3,Russell Westbrook,31,57,57,2049,13.6,28.9,0.472,1.2,...,10.2,9.1,2.1,0.5,5.8,4.5,35.1,,106,109
3,4,Danuel House,26,63,52,1913,5.6,13.0,0.427,3.1,...,6.5,2.1,1.6,0.8,1.3,3.2,16.0,,114,112
4,5,Ben McLemore,26,71,23,1619,6.9,15.6,0.444,5.2,...,4.5,1.7,1.3,0.5,1.4,4.4,20.4,,118,113


In [38]:
##################################################################################################
# All Advanced Table
##################################################################################################

all_advanced_df = pd.DataFrame()

all_advanced_table = soup.find('div', attrs={'id': 'all_advanced'})

all_advanced_info = BeautifulSoup(all_advanced_table.contents[5], 'lxml')

all_advanced_df_index = []

all_advanced_head = all_advanced_info.find('thead')

for th in all_advanced_head.find_all('th'):
    
    if th.text == '\xa0':
        all_advanced_df_index.append('Player')
    
    else:
        all_advanced_df_index.append(th.text)

all_advanced_df = pd.DataFrame(columns=all_advanced_df_index)                    


all_advanced_data = all_advanced_info.find('tbody')

for tr in all_advanced_data.find_all('tr'):
    
    rnk = tr.find('th')
    
    all_advanced_list = []
    
    all_advanced_list.append(rnk.text)

    for td in tr.find_all('td'):
        
        all_advanced_list.append(td.text)
        
    all_advanced_df.loc[len(all_advanced_df)] = all_advanced_list

all_advanced_df.head()

Unnamed: 0,Rk,Player,Age,G,MP,PER,TS%,3PAr,FTr,ORB%,...,Player.1,OWS,DWS,WS,WS/48,Player.2,OBPM,DBPM,BPM,VORP
0,1,James Harden,30,68,2483,29.1,0.626,0.557,0.528,2.9,...,,9.9,3.2,13.1,0.254,,8.2,1.5,9.7,7.3
1,2,P.J. Tucker,34,72,2467,8.3,0.559,0.702,0.113,4.7,...,,1.6,2.4,4.0,0.078,,-2.3,0.5,-1.8,0.1
2,3,Russell Westbrook,31,57,2049,21.0,0.536,0.166,0.297,5.1,...,,1.7,2.5,4.2,0.098,,1.7,-0.2,1.5,1.8
3,4,Danuel House,26,63,1913,11.3,0.572,0.659,0.167,3.1,...,,1.9,1.8,3.6,0.091,,-0.7,0.0,-0.6,0.7
4,5,Ben McLemore,26,71,1619,11.8,0.622,0.829,0.123,1.3,...,,2.2,1.1,3.3,0.099,,0.1,-0.6,-0.5,0.6


In [39]:
##################################################################################################
# All Adj Shooting Table RETRY
##################################################################################################

all_adj_shooting_df = pd.DataFrame()

all_adj_shooting_table = soup.find('div', attrs={'id': 'all_adj-shooting'})

all_adj_shooting_info = BeautifulSoup(all_adj_shooting_table.contents[5], 'lxml')

all_adj_shooting_df_index = []

all_adj_shooting_head = all_adj_shooting_info.find('thead')

for tr in all_adj_shooting_head.find_all('tr'):
    
    try:
        if tr['class'][0]=='over_header':
            pass
    except:

        for th in tr.find_all('th'):
            if th['data-stat'] == 'ranker':
                all_adj_shooting_df_index.append(('Player', 'Rank'))
            elif th['data-stat'] == 'player':
                all_adj_shooting_df_index.append(('Player', 'Player'))
            elif re.search('DUMMY[\d]?[\s]?', th['data-stat']):
                pass
            elif th.text in ['FG Add', 'TS Add']:
                all_adj_shooting_df_index.append(('Extra', th.text))
            elif th['data-over-header'] == '\xa0':
                all_adj_shooting_df_index.append(('Player', th.text))
            else:
                all_adj_shooting_df_index.append((th['data-over-header'], th.text))

all_adj_shooting_df = pd.DataFrame(columns=all_adj_shooting_df_index)                    
all_adj_shooting_df.columns = pd.MultiIndex.from_tuples(all_adj_shooting_df.columns, names=['Header', 'Stat'])


all_adj_shooting_data = all_adj_shooting_info.find('tbody')

for tr in all_adj_shooting_data.find_all('tr'):
    
    rnk = tr.find('th')
    
    all_adj_shooting_list = []
    
    all_adj_shooting_list.append(rnk.text)

    for td in tr.find_all('td'):
        if re.search('DUMMY[\d]?[\s]?', td['data-stat']):
            pass
        else:
            all_adj_shooting_list.append(td.text)

    all_adj_shooting_df.loc[len(all_adj_shooting_df)] = all_adj_shooting_list

all_adj_shooting_df.head()

Header,Player,Player,Player,Player,Player,Player Shooting %,Player Shooting %,Player Shooting %,Player Shooting %,Player Shooting %,...,League-Adjusted,League-Adjusted,League-Adjusted,League-Adjusted,League-Adjusted,League-Adjusted,League-Adjusted,League-Adjusted,Extra,Extra
Stat,Rank,Player,Age,G,MP,FG,2P,3P,eFG,FT,...,FG+,2P+,3P+,eFG+,FT+,TS+,FTr+,3PAr+,FG Add,TS Add
0,1,James Harden,30,68,2483,0.444,0.556,0.355,0.543,0.865,...,96,106,99,103,112,111,203,145,41.7,227.5
1,2,Russell Westbrook,31,57,2049,0.471,0.514,0.258,0.493,0.763,...,102,98,72,93,99,95,114,43,-91.9,-82.7
2,3,Ben McLemore,26,71,1619,0.444,0.656,0.4,0.61,0.746,...,97,125,112,115,97,110,47,216,88.6,66.2
3,4,Danuel House,26,63,1913,0.427,0.549,0.363,0.546,0.811,...,93,105,102,103,105,101,64,171,18.9,8.5
4,5,Austin Rivers,27,68,1594,0.421,0.498,0.356,0.517,0.703,...,92,95,99,98,91,96,85,140,-11.9,-25.0


In [40]:
##################################################################################################
# All Shooting Table RETRY
##################################################################################################

all_shooting_df = pd.DataFrame()

all_shooting_table = soup.find('div', attrs={'id': 'all_shooting'})

all_shooting_info = BeautifulSoup(all_shooting_table.contents[5], 'lxml')

all_shooting_df_index = []

all_shooting_head = all_shooting_info.find('thead')

for tr in all_shooting_head.find_all('tr'):
    
    try:
        if tr['class'][0]=='over_header':
            pass
    except:

        for th in tr.find_all('th'):
            if th['data-stat'] == 'ranker':
                all_shooting_df_index.append(('Player', 'Rank'))
            elif th['data-stat'] == 'player':
                all_shooting_df_index.append(('Player', 'Player'))
            elif re.search('DUMMY[\d]?[\s]?', th['data-stat']):
                pass
            elif th.text in ['FG Add', 'TS Add']:
                all_shooting_df_index.append(('Extra', th.text))
            elif th['data-over-header'] == '\xa0':
                all_shooting_df_index.append(('Player', th.text))
            else:
                all_shooting_df_index.append((th['data-over-header'], th.text))

all_shooting_df = pd.DataFrame(columns=all_shooting_df_index)                    
all_shooting_df.columns = pd.MultiIndex.from_tuples(all_shooting_df.columns, names=['Header', 'Stat'])


all_shooting_data = all_shooting_info.find('tbody')

for tr in all_shooting_data.find_all('tr'):
    
    rnk = tr.find('th')
    
    all_shooting_list = []
    
    all_shooting_list.append(rnk.text)

    for td in tr.find_all('td'):
        if re.search('DUMMY[\d]?[\s]?', td['data-stat']):
            pass
        else:
            all_shooting_list.append(td.text)
 
    all_shooting_df.loc[len(all_shooting_df)] = all_shooting_list

all_shooting_df.head()

Header,Player,Player,Player,Player,Player,Player,Player,% of FGA by Distance,% of FGA by Distance,% of FGA by Distance,...,FG% by Distance,FG% by Distance,% of FG Ast'd,% of FG Ast'd,Dunks,Dunks,Corner 3s,Corner 3s,Heaves,Heaves
Stat,Rank,Player,Age,G,MP,FG%,Dist.,2P,0-3,3-10,...,16-3P,3P,2P,3P,%FGA,#,%3PA,3P%,Att.,#
0,1,James Harden,30,68,2483,0.444,16.6,0.443,0.243,0.177,...,0.5,0.355,0.139,0.171,0.015,20,0.045,0.342,1,0
1,2,P.J. Tucker,34,72,2467,0.415,18.4,0.298,0.148,0.089,...,0.545,0.358,0.729,0.991,0.0,0,0.766,0.384,0,0
2,3,Russell Westbrook,31,57,2049,0.472,10.1,0.834,0.415,0.142,...,0.402,0.258,0.235,0.655,0.041,40,0.192,0.22,0,0
3,4,Danuel House,26,63,1913,0.427,17.6,0.341,0.243,0.065,...,0.2,0.363,0.733,0.946,0.072,31,0.414,0.354,2,0
4,5,Ben McLemore,26,71,1619,0.444,21.5,0.171,0.136,0.031,...,,0.4,0.869,0.967,0.04,20,0.294,0.466,2,0


In [41]:
##################################################################################################
# Play-by-Play Table
##################################################################################################

all_pbp_df = pd.DataFrame()

all_pbp_table = soup.find('div', attrs={'id': 'all_pbp'})

all_pbp_info = BeautifulSoup(all_pbp_table.contents[5], 'lxml')

df_index = []

all_pbp_head = all_pbp_info.find('thead')

for tr in all_pbp_head.find_all('tr'):
    try:
        if tr['class'][0]=='over_header':
            pass
    except:

        for th in tr.find_all('th'):
            if th['data-stat'] == 'ranker' or th['data-over-header'] == ' ':
                df_index.append(('Player', th.text))
            else:
                if th['data-over-header'] == '\xa0':
                    df_index.append(('Player', th.text))
                else:
                    df_index.append((th['data-over-header'], th.text))

all_pbp_df = pd.DataFrame(columns=df_index)                    
all_pbp_df.columns = pd.MultiIndex.from_tuples(all_pbp_df.columns, names=['Header', 'Stat'])


all_pbp_data = all_pbp_info.find('tbody')

for tr in all_pbp_data.find_all('tr'):
    
    rnk = tr.find('th')
    
    all_pbp_list = []
    
    all_pbp_list.append(rnk.text)

    for td in tr.find_all('td'):
        
        all_pbp_list.append(td.text)
        
    all_pbp_df.loc[len(all_pbp_df)] = all_pbp_list

all_pbp_df.head()

Header,Player,Player,Player,Player,Player,Position Estimate,Position Estimate,Position Estimate,Position Estimate,Position Estimate,...,+/- Per 100 Poss.,Turnovers,Turnovers,Fouls Committed,Fouls Committed,Fouls Drawn,Fouls Drawn,Misc.,Misc.,Misc.
Stat,Rk,Player,Age,G,MP,PG%,SG%,SF%,PF%,C%,...,On-Off,BadPass,LostBall,Shoot,Off.,Shoot,Off.,PGA,And1,Blkd
0,1,James Harden,30,68,2483,1%,43%,52%,4%,,...,9.1,145,95,80,51,326,12,1226,75,92
1,2,P.J. Tucker,34,72,2467,,,3%,71%,26%,...,1.0,35,9,121,15,11,42,286,2,13
2,3,Russell Westbrook,31,57,2049,100%,,,,,...,-0.5,131,80,68,25,187,5,1014,46,81
3,4,Danuel House,26,63,1913,,1%,63%,36%,1%,...,0.5,28,10,68,6,41,21,210,8,19
4,5,Ben McLemore,26,71,1619,47%,52%,1%,,,...,4.4,22,10,79,9,26,8,155,3,21


In [43]:
##################################################################################################
# All Playoffs Totals Table
##################################################################################################

all_playoffs_totals_df = pd.DataFrame()

all_playoffs_totals_table = soup.find('div', attrs={'id': 'all_playoffs_totals'})

all_playoffs_totals_info = BeautifulSoup(all_playoffs_totals_table.contents[5], 'lxml')

all_playoffs_totals_df_index = []

all_playoffs_totals_head = all_playoffs_totals_info.find('thead')

for th in all_playoffs_totals_head.find_all('th'):
    
    if th.text == '\xa0':
        all_playoffs_totals_df_index.append('Player')
    
    else:
        all_playoffs_totals_df_index.append(th.text)

all_playoffs_totals_df = pd.DataFrame(columns=all_playoffs_totals_df_index)                    


all_playoffs_totals_data = all_playoffs_totals_info.find('tbody')

for tr in all_playoffs_totals_data.find_all('tr'):
    
    rnk = tr.find('th')
    
    all_playoffs_totals_list = []
    
    all_playoffs_totals_list.append(rnk.text)

    for td in tr.find_all('td'):
        
        all_playoffs_totals_list.append(td.text)
        
    all_playoffs_totals_df.loc[len(all_playoffs_totals_df)] = all_playoffs_totals_list

all_playoffs_totals_df.head()

Unnamed: 0,Rk,Player,Age,G,GS,MP,FG,FGA,FG%,3P,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
0,1,James Harden,30,1,1,34,12,22,0.545,6,...,0.875,2,9,11,3,0,0,3,4,37
1,2,P.J. Tucker,34,1,1,34,4,9,0.444,3,...,,2,2,4,4,0,0,0,4,11
2,3,Jeff Green,33,1,0,32,8,12,0.667,3,...,0.75,0,6,6,4,1,0,1,3,22
3,4,Robert Covington,29,1,1,31,1,6,0.167,1,...,,0,5,5,3,1,1,1,5,3
4,5,Eric Gordon,31,1,1,30,7,17,0.412,2,...,1.0,0,3,3,4,0,0,2,3,21


In [45]:
##################################################################################################
# All Playoffs Per Game Table
##################################################################################################

all_playoffs_per_game_df = pd.DataFrame()

all_playoffs_per_game_table = soup.find('div', attrs={'id': 'all_playoffs_per_game'})

all_playoffs_per_game_info = BeautifulSoup(all_playoffs_per_game_table.contents[5], 'lxml')

all_playoffs_per_game_df_index = []

all_playoffs_per_game_head = all_playoffs_per_game_info.find('thead')

for th in all_playoffs_per_game_head.find_all('th'):
    
    if th.text == '\xa0':
        all_playoffs_per_game_df_index.append('Player')
    
    else:
        all_playoffs_per_game_df_index.append(th.text)

all_playoffs_per_game_df = pd.DataFrame(columns=all_playoffs_per_game_df_index)                    


all_playoffs_per_game_data = all_playoffs_per_game_info.find('tbody')

for tr in all_playoffs_per_game_data.find_all('tr'):
    
    rnk = tr.find('th')
    
    all_playoffs_per_game_list = []
    
    all_playoffs_per_game_list.append(rnk.text)

    for td in tr.find_all('td'):
        
        all_playoffs_per_game_list.append(td.text)
        
    all_playoffs_per_game_df.loc[len(all_playoffs_per_game_df)] = all_playoffs_per_game_list

all_playoffs_per_game_df.head()

Unnamed: 0,Rk,Player,Age,G,GS,MP,FG,FGA,FG%,3P,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
0,1,James Harden,30,1,1,34.0,12.0,22.0,0.545,6.0,...,0.875,2.0,9.0,11.0,3.0,0.0,0.0,3.0,4.0,37.0
1,2,P.J. Tucker,34,1,1,34.0,4.0,9.0,0.444,3.0,...,,2.0,2.0,4.0,4.0,0.0,0.0,0.0,4.0,11.0
2,3,Jeff Green,33,1,0,32.0,8.0,12.0,0.667,3.0,...,0.75,0.0,6.0,6.0,4.0,1.0,0.0,1.0,3.0,22.0
3,4,Robert Covington,29,1,1,31.0,1.0,6.0,0.167,1.0,...,,0.0,5.0,5.0,3.0,1.0,1.0,1.0,5.0,3.0
4,5,Eric Gordon,31,1,1,30.0,7.0,17.0,0.412,2.0,...,1.0,0.0,3.0,3.0,4.0,0.0,0.0,2.0,3.0,21.0


In [46]:
##################################################################################################
# All Playoffs Per Minute Table
##################################################################################################

all_playoffs_per_minute_df = pd.DataFrame()

all_playoffs_per_minute_table = soup.find('div', attrs={'id': 'all_playoffs_per_minute'})

all_playoffs_per_minute_info = BeautifulSoup(all_playoffs_per_minute_table.contents[5], 'lxml')

all_playoffs_per_minute_df_index = []

all_playoffs_per_minute_head = all_playoffs_per_minute_info.find('thead')

for th in all_playoffs_per_minute_head.find_all('th'):
    
    if th.text == '\xa0':
        all_playoffs_per_minute_df_index.append('Player')
    
    else:
        all_playoffs_per_minute_df_index.append(th.text)

all_playoffs_per_minute_df = pd.DataFrame(columns=all_playoffs_per_minute_df_index)                    


all_playoffs_per_minute_data = all_playoffs_per_minute_info.find('tbody')

for tr in all_playoffs_per_minute_data.find_all('tr'):
    
    rnk = tr.find('th')
    
    all_playoffs_per_minute_list = []
    
    all_playoffs_per_minute_list.append(rnk.text)

    for td in tr.find_all('td'):
        
        all_playoffs_per_minute_list.append(td.text)
        
    all_playoffs_per_minute_df.loc[len(all_playoffs_per_minute_df)] = all_playoffs_per_minute_list

all_playoffs_per_minute_df.head()

Unnamed: 0,Rk,Player,Age,G,GS,MP,FG,FGA,FG%,3P,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
0,1,James Harden,30,1,1,34,12.7,23.3,0.545,6.4,...,0.875,2.1,9.5,11.6,3.2,0.0,0.0,3.2,4.2,39.2
1,2,P.J. Tucker,34,1,1,34,4.2,9.5,0.444,3.2,...,,2.1,2.1,4.2,4.2,0.0,0.0,0.0,4.2,11.6
2,3,Jeff Green,33,1,0,32,9.0,13.5,0.667,3.4,...,0.75,0.0,6.8,6.8,4.5,1.1,0.0,1.1,3.4,24.8
3,4,Robert Covington,29,1,1,31,1.2,7.0,0.167,1.2,...,,0.0,5.8,5.8,3.5,1.2,1.2,1.2,5.8,3.5
4,5,Eric Gordon,31,1,1,30,8.4,20.4,0.412,2.4,...,1.0,0.0,3.6,3.6,4.8,0.0,0.0,2.4,3.6,25.2


In [47]:
##################################################################################################
# All Playoffs Per Poss Table
##################################################################################################

all_playoffs_per_poss_df = pd.DataFrame()

all_playoffs_per_poss_table = soup.find('div', attrs={'id': 'all_playoffs_per_poss'})

all_playoffs_per_poss_info = BeautifulSoup(all_playoffs_per_poss_table.contents[5], 'lxml')

all_playoffs_per_poss_df_index = []

all_playoffs_per_poss_head = all_playoffs_per_poss_info.find('thead')

for th in all_playoffs_per_poss_head.find_all('th'):
    
    if th.text == '\xa0':
        all_playoffs_per_poss_df_index.append('Player')
    
    else:
        all_playoffs_per_poss_df_index.append(th.text)

all_playoffs_per_poss_df = pd.DataFrame(columns=all_playoffs_per_poss_df_index)                    


all_playoffs_per_poss_data = all_playoffs_per_poss_info.find('tbody')

for tr in all_playoffs_per_poss_data.find_all('tr'):
    
    rnk = tr.find('th')
    
    all_playoffs_per_poss_list = []
    
    all_playoffs_per_poss_list.append(rnk.text)

    for td in tr.find_all('td'):
        
        all_playoffs_per_poss_list.append(td.text)
        
    all_playoffs_per_poss_df.loc[len(all_playoffs_per_poss_df)] = all_playoffs_per_poss_list

all_playoffs_per_poss_df.head()

Unnamed: 0,Rk,Player,Age,G,GS,MP,FG,FGA,FG%,3P,...,TRB,AST,STL,BLK,TOV,PF,PTS,Unnamed: 19,ORtg,DRtg
0,1,James Harden,30,1,1,34,17.2,31.5,0.545,8.6,...,15.8,4.3,0.0,0.0,4.3,5.7,53.0,,130,109
1,2,P.J. Tucker,34,1,1,34,5.7,12.9,0.444,4.3,...,5.7,5.7,0.0,0.0,0.0,5.7,15.8,,141,114
2,3,Jeff Green,33,1,0,32,12.2,18.3,0.667,4.6,...,9.1,6.1,1.5,0.0,1.5,4.6,33.5,,150,108
3,4,Robert Covington,29,1,1,31,1.6,9.4,0.167,1.6,...,7.9,4.7,1.6,1.6,1.6,7.9,4.7,,67,106
4,5,Eric Gordon,31,1,1,30,11.4,27.6,0.412,3.2,...,4.9,6.5,0.0,0.0,3.2,4.9,34.1,,106,113


In [48]:
##################################################################################################
# All Playoffs Advanced Table
##################################################################################################

all_playoffs_advanced_df = pd.DataFrame()

all_playoffs_advanced_table = soup.find('div', attrs={'id': 'all_playoffs_advanced'})

all_playoffs_advanced_info = BeautifulSoup(all_playoffs_advanced_table.contents[5], 'lxml')

all_playoffs_advanced_df_index = []

all_playoffs_advanced_head = all_playoffs_advanced_info.find('thead')

for th in all_playoffs_advanced_head.find_all('th'):
    
    if th.text == '\xa0':
        all_playoffs_advanced_df_index.append('Player')
    
    else:
        all_playoffs_advanced_df_index.append(th.text)

all_playoffs_advanced_df = pd.DataFrame(columns=all_playoffs_advanced_df_index)                    


all_playoffs_advanced_data = all_playoffs_advanced_info.find('tbody')

for tr in all_playoffs_advanced_data.find_all('tr'):
    
    rnk = tr.find('th')
    
    all_playoffs_advanced_list = []
    
    all_playoffs_advanced_list.append(rnk.text)

    for td in tr.find_all('td'):
        
        all_playoffs_advanced_list.append(td.text)
        
    all_playoffs_advanced_df.loc[len(all_playoffs_advanced_df)] = all_playoffs_advanced_list

all_playoffs_advanced_df.head()

Unnamed: 0,Rk,Player,Age,G,MP,PER,TS%,3PAr,FTr,ORB%,...,Player.1,OWS,DWS,WS,WS/48,Player.2,OBPM,DBPM,BPM,VORP
0,1,James Harden,30,1,34,34.4,0.725,0.591,0.364,6.6,...,,0.2,0.1,0.2,0.326,,,,,
1,2,P.J. Tucker,34,1,34,12.5,0.611,0.889,0.0,6.6,...,,0.1,0.0,0.1,0.185,,,,,
2,3,Jeff Green,33,1,32,27.3,0.799,0.583,0.333,0.0,...,,0.2,0.1,0.2,0.328,,,,,
3,4,Robert Covington,29,1,31,1.0,0.25,0.833,0.0,0.0,...,,-0.1,0.1,0.0,-0.026,,,,,
4,5,Eric Gordon,31,1,30,15.3,0.547,0.353,0.294,0.0,...,,0.0,0.0,0.0,0.065,,,,,


In [49]:
##################################################################################################
# All Playoffs Shooting Table RETRY
##################################################################################################

all_playoffs_shooting_df = pd.DataFrame()

all_playoffs_shooting_table = soup.find('div', attrs={'id': 'all_playoffs_shooting'})

all_playoffs_shooting_info = BeautifulSoup(all_playoffs_shooting_table.contents[5], 'lxml')

all_playoffs_shooting_df_index = []

all_playoffs_shooting_head = all_playoffs_shooting_info.find('thead')

for tr in all_playoffs_shooting_head.find_all('tr'):
    
    try:
        if tr['class'][0]=='over_header':
            pass
    except:

        for th in tr.find_all('th'):
            if th['data-stat'] == 'ranker':
                all_playoffs_shooting_df_index.append(('Player', 'Rank'))
                
            elif th['data-stat'] == 'player':
                all_playoffs_shooting_df_index.append(('Player', 'Player'))
                
            elif re.search('DUMMY[\d]?[\s]?', th['data-stat']):
                pass
            
            elif th.text in ['FG Add', 'TS Add']:
                all_playoffs_shooting_df_index.append(('Extra', th.text))
                
            elif th['data-over-header'] == '\xa0':
                all_playoffs_shooting_df_index.append(('Player', th.text))
                
            else:
                all_playoffs_shooting_df_index.append((th['data-over-header'], th.text))

all_playoffs_shooting_df = pd.DataFrame(columns=all_playoffs_shooting_df_index)                    
all_playoffs_shooting_df.columns = pd.MultiIndex.from_tuples(all_playoffs_shooting_df.columns, names=['Header', 'Stat'])


all_playoffs_shooting_data = all_playoffs_shooting_info.find('tbody')

for tr in all_playoffs_shooting_data.find_all('tr'):
    
    rnk = tr.find('th')
    
    all_playoffs_shooting_list = []
    
    all_playoffs_shooting_list.append(rnk.text)

    for td in tr.find_all('td'):
        if re.search('DUMMY[\d]?[\s]?', td['data-stat']):
            pass
        else:
            all_playoffs_shooting_list.append(td.text)
 
    all_playoffs_shooting_df.loc[len(all_playoffs_shooting_df)] = all_playoffs_shooting_list

all_playoffs_shooting_df.head()

Header,Player,Player,Player,Player,Player,Player,Player,% of FGA by Distance,% of FGA by Distance,% of FGA by Distance,...,FG% by Distance,FG% by Distance,% of FG Ast'd,% of FG Ast'd,Dunks,Dunks,Corner 3s,Corner 3s,Heaves,Heaves
Stat,Rank,Player,Age,G,MP,FG%,Dist.,2P,0-3,3-10,...,16-3P,3P,2P,3P,%FGA,#,%3PA,3P%,Att.,#
0,1,James Harden,30,1,34,0.545,17.9,0.409,0.091,0.273,...,,0.462,0.333,0.333,0.0,0,0.077,1.0,0,0
1,2,P.J. Tucker,34,1,34,0.444,21.0,0.111,0.0,0.111,...,,0.375,1.0,1.0,0.0,0,0.875,0.429,0,0
2,3,Jeff Green,33,1,32,0.667,16.6,0.417,0.083,0.25,...,,0.429,0.0,0.667,0.0,0,0.429,0.667,0,0
3,4,Robert Covington,29,1,31,0.167,21.7,0.167,0.0,0.167,...,,0.2,,1.0,0.0,0,0.4,0.5,0,0
4,5,Eric Gordon,31,1,30,0.412,13.2,0.647,0.294,0.294,...,0.0,0.333,0.4,1.0,0.0,0,0.0,,0,0


In [50]:
##################################################################################################
# All Playoffs Play-by-Play Table
##################################################################################################

all_playoffs_pbp_df = pd.DataFrame()

all_playoffs_pbp_table = soup.find('div', attrs={'id': 'all_playoffs_pbp'})

all_playoffs_pbp_info = BeautifulSoup(all_playoffs_pbp_table.contents[5], 'lxml')

all_playoffs_pbp_df_index = []

all_playoffs_pbp_head = all_playoffs_pbp_info.find('thead')

for tr in all_playoffs_pbp_head.find_all('tr'):
    try:
        if tr['class'][0]=='over_header':
            pass
    except:

        for th in tr.find_all('th'):
            if th['data-stat'] == 'ranker' or th['data-over-header'] == ' ':
                all_playoffs_pbp_df_index.append(('Player', th.text))
            else:
                if th['data-over-header'] == '\xa0':
                    all_playoffs_pbp_df_index.append(('Player', th.text))
                else:
                    all_playoffs_pbp_df_index.append((th['data-over-header'], th.text))

all_playoffs_pbp_df = pd.DataFrame(columns=all_playoffs_pbp_df_index)                    
all_playoffs_pbp_df.columns = pd.MultiIndex.from_tuples(all_playoffs_pbp_df.columns, names=['Header', 'Stat'])


all_playoffs_pbp_data = all_playoffs_pbp_info.find('tbody')

for tr in all_playoffs_pbp_data.find_all('tr'):
    
    rnk = tr.find('th')
    
    all_playoffs_pbp_list = []
    
    all_playoffs_pbp_list.append(rnk.text)

    for td in tr.find_all('td'):
        
        all_playoffs_pbp_list.append(td.text)
        
    all_playoffs_pbp_df.loc[len(all_playoffs_pbp_df)] = all_playoffs_pbp_list

all_playoffs_pbp_df.head()

Header,Player,Player,Player,Player,Player,Position Estimate,Position Estimate,Position Estimate,Position Estimate,Position Estimate,...,+/- Per 100 Poss.,Turnovers,Turnovers,Fouls Committed,Fouls Committed,Fouls Drawn,Fouls Drawn,Misc.,Misc.,Misc.
Stat,Rk,Player,Age,G,MP,PG%,SG%,SF%,PF%,C%,...,On-Off,BadPass,LostBall,Shoot,Off.,Shoot,Off.,PGA,And1,Blkd
0,1,James Harden,30,1,34,,86%,14%,,,...,-4.1,2,0,0,1,4,0,8,2,0
1,2,P.J. Tucker,34,1,34,,,11%,89%,,...,2.0,0,0,1,0,0,1,11,0,0
2,3,Jeff Green,33,1,32,,,,,100%,...,72.7,0,0,1,1,3,2,9,2,0
3,4,Robert Covington,29,1,31,,,,56%,44%,...,-60.9,0,0,4,1,0,0,7,0,0
4,5,Eric Gordon,31,1,30,62%,11%,27%,,,...,-61.5,1,1,2,0,2,1,12,1,1
