In [1]:
from bs4 import BeautifulSoup
import requests as rq
from datetime import datetime
import pandas as pd
import numpy as np
import re


In [2]:
url = 'https://www.basketball-reference.com/teams/HOU/2020.html'

agent = {'User-Agent': 'Mozilla/5.0'}

url_text = rq.get(url, headers=agent).text

soup = BeautifulSoup(url_text, 'html.parser')

In [6]:
##################################################################################
# Roster Table
##################################################################################

roster_df = pd.DataFrame()

roster_table = soup.find('table', attrs={'id': 'roster'})

roster_info = roster_table.find('tbody')

for tr in roster_info.find_all('tr'):

        nmbr = tr.find('th')

        roster_dict = {}
        
        if nmbr['data-stat'] == 'number':
            
            roster_dict[nmbr['data-stat']] = nmbr.text

        
        for td in tr.find_all('td'):
            
            if td['data-stat'] == 'player':
                roster_dict['link'] = td.contents[0]['href'] 
            
            roster_dict[td['data-stat']] = td.text

        roster_df = roster_df.append(roster_dict, ignore_index=True)

roster_df.head()

Unnamed: 0,birth_country,birth_date,college,height,link,number,player,pos,weight,years_experience
0,us,"May 5, 1985",Texas,6-5,/players/t/tuckepj01.html,17,P.J. Tucker,PF,245,8
1,us,"February 11, 1993",Kansas,6-3,/players/m/mclembe01.html,16,Ben McLemore,SG,195,6
2,us,"August 26, 1989",Arizona State,6-5,/players/h/hardeja01.html,13,James Harden,SG,220,10
3,us,"August 1, 1992",Duke,6-3,/players/r/riverau01.html,25,Austin Rivers,SG,200,7
4,us,"June 7, 1993",Texas A&M,6-6,/players/h/houseda01.html,4,Danuel House,SF,215,3


In [7]:
##################################################################################
# Per Game Table
##################################################################################
pg_df = pd.DataFrame()

pg_table = soup.find('div', attrs={'id': 'all_per_game'})

pg_info = BeautifulSoup(pg_table.contents[5], 'lxml')

pg_data = pg_info.find('tbody')

for tr in pg_data.find_all('tr'):
    
    rnk = tr.find('th')
    
    pg_dict = {}
    
    pg_dict['rank'] = rnk.text
     

    for td in tr.find_all('td'):
        
        if td['data-stat'] == 'player':
            pg_dict['link'] = td.contents[0]['href']  
        
        pg_dict[td['data-stat']] = td.text

    pg_df = pg_df.append(pg_dict, ignore_index=True)

pg_df.head()

Unnamed: 0,age,ast_per_g,blk_per_g,drb_per_g,efg_pct,fg2_pct,fg2_per_g,fg2a_per_g,fg3_pct,fg3_per_g,...,link,mp_per_g,orb_per_g,pf_per_g,player,pts_per_g,rank,stl_per_g,tov_per_g,trb_per_g
0,30,7.5,0.9,5.5,0.543,0.556,5.5,9.9,0.355,4.4,...,/players/h/hardeja01.html,36.5,1.0,3.3,James Harden,34.3,1,1.8,4.5,6.6
1,31,7.0,0.4,6.2,0.493,0.514,9.6,18.7,0.258,1.0,...,/players/w/westbru01.html,35.9,1.8,3.5,Russell Westbrook,27.2,2,1.6,4.5,7.9
2,34,1.6,0.5,5.1,0.541,0.551,1.0,1.8,0.358,1.5,...,/players/t/tuckepj01.html,34.3,1.6,3.2,P.J. Tucker,6.9,3,1.1,1.0,6.6
3,29,1.5,2.2,6.8,0.509,0.61,1.6,2.7,0.315,2.4,...,/players/c/covinro01.html,33.0,1.2,3.3,Robert Covington,11.6,4,1.6,1.4,8.0
4,25,1.2,1.8,9.5,0.629,0.629,6.3,9.9,,0.0,...,/players/c/capelca01.html,32.8,4.3,2.6,Clint Capela,13.9,5,0.8,1.6,13.8


In [8]:
##################################################################################
# All Totals Table
##################################################################################

at_df = pd.DataFrame()

at_table = soup.find('div', attrs={'id': 'all_totals'})

at_info = BeautifulSoup(at_table.contents[5], 'lxml')

at_data = at_info.find('tbody')

for tr in at_data.find_all('tr'):
    
    rnk = tr.find('th')
    
    at_dict = {}
    
    at_dict['rank'] = rnk.text

    for td in tr.find_all('td'):
        
        if td['data-stat'] == 'player':
            at_dict['link'] = td.contents[0]['href'] 
            
        at_dict[td['data-stat']] = td.text

    at_df = at_df.append(at_dict, ignore_index=True)

at_df.head()

Unnamed: 0,age,ast,blk,drb,efg_pct,fg,fg2,fg2_pct,fg2a,fg3,...,link,mp,orb,pf,player,pts,rank,stl,tov,trb
0,30,512,60,376,0.543,672,373,0.556,671,299,...,/players/h/hardeja01.html,2483,70,227,James Harden,2335,1,125,308,446
1,34,115,36,364,0.541,177,70,0.551,127,107,...,/players/t/tuckepj01.html,2467,112,229,P.J. Tucker,500,2,77,72,476
2,31,401,20,351,0.493,604,549,0.514,1068,55,...,/players/w/westbru01.html,2049,100,200,Russell Westbrook,1553,3,93,255,451
3,26,85,34,210,0.546,230,101,0.549,184,129,...,/players/h/houseda01.html,1913,57,132,Danuel House,662,4,67,54,267
4,26,58,16,136,0.61,242,61,0.656,93,181,...,/players/m/mclembe01.html,1619,21,155,Ben McLemore,715,5,45,50,157


In [9]:
##################################################################################
# Per 36 Minutes Table
##################################################################################

per_mp_df = pd.DataFrame()

per_mp_table = soup.find('div', attrs={'id': 'all_per_minute'})

per_mp_info = BeautifulSoup(per_mp_table.contents[5], 'lxml')

per_mp_data = per_mp_info.find('tbody')

for tr in per_mp_data.find_all('tr'):
    
    rnk = tr.find('th')
    
    per_mp_dict = {}
    
    per_mp_dict['rank'] = rnk.text

    for td in tr.find_all('td'):
        
        if td['data-stat'] == 'player':
            per_mp_dict['link'] = td.contents[0]['href'] 

        per_mp_dict[td['data-stat']] = td.text

    per_mp_df = per_mp_df.append(per_mp_dict, ignore_index=True)

per_mp_df.head()

Unnamed: 0,age,ast_per_mp,blk_per_mp,drb_per_mp,fg2_pct,fg2_per_mp,fg2a_per_mp,fg3_pct,fg3_per_mp,fg3a_per_mp,...,link,mp,orb_per_mp,pf_per_mp,player,pts_per_mp,rank,stl_per_mp,tov_per_mp,trb_per_mp
0,30,7.4,0.9,5.5,0.556,5.4,9.7,0.355,4.3,12.2,...,/players/h/hardeja01.html,2483,1.0,3.3,James Harden,33.9,1,1.8,4.5,6.5
1,34,1.7,0.5,5.3,0.551,1.0,1.9,0.358,1.6,4.4,...,/players/t/tuckepj01.html,2467,1.6,3.3,P.J. Tucker,7.3,2,1.1,1.1,6.9
2,31,7.0,0.4,6.2,0.514,9.6,18.8,0.258,1.0,3.7,...,/players/w/westbru01.html,2049,1.8,3.5,Russell Westbrook,27.3,3,1.6,4.5,7.9
3,26,1.6,0.6,4.0,0.549,1.9,3.5,0.363,2.4,6.7,...,/players/h/houseda01.html,1913,1.1,2.5,Danuel House,12.5,4,1.3,1.0,5.0
4,26,1.3,0.4,3.0,0.656,1.4,2.1,0.4,4.0,10.1,...,/players/m/mclembe01.html,1619,0.5,3.4,Ben McLemore,15.9,5,1.0,1.1,3.5


In [10]:
##################################################################################################
# Per 100 Poss Table
##################################################################################################

per_poss_df = pd.DataFrame()

per_poss_table = soup.find('div', attrs={'id': 'all_per_poss'})

per_poss_info = BeautifulSoup(per_poss_table.contents[5], 'lxml')

per_poss_data = per_poss_info.find('tbody')

for tr in per_poss_data.find_all('tr'):
    
    rnk = tr.find('th')
    
    per_poss_dict = {}
    
    per_poss_dict['rank'] = rnk.text

    for td in tr.find_all('td'):
        
        if td['data-stat'] == 'player':
            per_poss_dict['link'] = td.contents[0]['href'] 
        
        per_poss_dict[td['data-stat']] = td.text

    per_poss_df = per_poss_df.append(per_poss_dict, ignore_index=True)

per_poss_df.head()

Unnamed: 0,Unnamed: 1,age,ast_per_poss,blk_per_poss,def_rtg,drb_per_poss,fg2_pct,fg2_per_poss,fg2a_per_poss,fg3_pct,...,mp,off_rtg,orb_per_poss,pf_per_poss,player,pts_per_poss,rank,stl_per_poss,tov_per_poss,trb_per_poss
0,,30,9.5,1.1,108,7.0,0.556,7.0,12.5,0.355,...,2483,120,1.3,4.2,James Harden,43.5,1,2.3,5.7,8.3
1,,34,2.2,0.7,111,6.8,0.551,1.3,2.4,0.358,...,2467,114,2.1,4.3,P.J. Tucker,9.4,2,1.4,1.4,8.9
2,,31,9.1,0.5,109,7.9,0.514,12.4,24.1,0.258,...,2049,106,2.3,4.5,Russell Westbrook,35.1,3,2.1,5.8,10.2
3,,26,2.1,0.8,112,5.1,0.549,2.4,4.5,0.363,...,1913,114,1.4,3.2,Danuel House,16.0,4,1.6,1.3,6.5
4,,26,1.7,0.5,113,3.9,0.656,1.7,2.7,0.4,...,1619,118,0.6,4.4,Ben McLemore,20.4,5,1.3,1.4,4.5


In [11]:
##################################################################################################
# Advanced Table
##################################################################################################

adv_df = pd.DataFrame()

adv_table = soup.find('div', attrs={'id': 'all_advanced'})

adv_info = BeautifulSoup(adv_table.contents[5], 'lxml')

adv_data = adv_info.find('tbody')

for tr in adv_data.find_all('tr'):
    
    rnk = tr.find('th')
    
    adv_dict = {}
    
    adv_dict['rank'] = rnk.text

    for td in tr.find_all('td'):
        
        if td['data-stat'] == 'player':
            adv_dict['link'] = td.contents[0]['href'] 
        
        adv_dict[td['data-stat']] = td.text

    adv_df = adv_df.append(adv_dict, ignore_index=True)

adv_df.head()

Unnamed: 0,age,ast_pct,blk_pct,bpm,bpm-dum,dbpm,drb_pct,dws,fg3a_per_fga_pct,fta_per_fga_pct,...,rank,stl_pct,tov_pct,trb_pct,ts_pct,usg_pct,vorp,ws,ws-dum,ws_per_48
0,30,35.9,2.1,9.7,,1.5,16.0,3.2,0.557,0.528,...,1,2.3,14.2,9.4,0.626,36.3,7.3,13.1,,0.254
1,34,6.0,1.2,-1.8,,0.5,15.6,2.4,0.702,0.113,...,2,1.4,13.9,10.1,0.559,8.7,0.1,4.0,,0.078
2,31,35.6,0.8,1.5,,-0.2,18.1,2.5,0.166,0.297,...,3,2.1,15.0,11.5,0.536,34.4,1.8,4.2,,0.098
3,26,6.1,1.5,-0.6,,0.0,11.6,1.8,0.659,0.167,...,4,1.6,8.5,7.3,0.572,13.7,0.7,3.6,,0.091
4,26,5.2,0.8,-0.5,,-0.6,8.9,1.1,0.829,0.123,...,5,1.3,8.0,5.1,0.622,16.0,0.6,3.3,,0.099


In [12]:
##################################################################################################
# Adjusted Shooting Table
##################################################################################################

adj_shooting_df = pd.DataFrame()

adj_shooting_table = soup.find('div', attrs={'id': 'all_adj-shooting'})

adj_shooting_info = BeautifulSoup(adj_shooting_table.contents[5], 'lxml')

adj_shooting_data = adj_shooting_info.find('tbody')

for tr in adj_shooting_data.find_all('tr'):
    
    rnk = tr.find('th')
    
    adj_shooting_dict = {}
    
    adj_shooting_dict['rank'] = rnk.text

    for td in tr.find_all('td'):
        
        if td['data-stat'] == 'player':
            adj_shooting_dict['link'] = td.contents[0]['href'] 
        
        adj_shooting_dict[td['data-stat']] = td.text

    adj_shooting_df = adj_shooting_df.append(adj_shooting_dict, ignore_index=True)

names = adj_shooting_df.columns

for n in names:
    dummy = re.match('DUMMY[\d]?[\s]?', n)
    if dummy:
        adj_shooting_df = adj_shooting_df.drop(dummy.group(), axis=1)
        
adj_shooting_df.head()

Unnamed: 0,adj_efg_pct,adj_fg2_pct,adj_fg3_pct,adj_fg3a_rate,adj_fg_pct,adj_ft_pct,adj_ft_rate,adj_ts_pct,age,efg_pct,...,fg_pts_added,ft_pct,ft_rate,g,link,mp,player,rank,ts_pct,ts_pts_added
0,103,106,99,145,96,112,203,111,30,0.543,...,41.7,0.865,0.528,68,/players/h/hardeja01.html,2483,James Harden,1,0.626,227.5
1,93,98,72,43,102,99,114,95,31,0.493,...,-91.9,0.763,0.297,57,/players/w/westbru01.html,2049,Russell Westbrook,2,0.536,-82.6
2,115,125,112,216,97,97,47,110,26,0.61,...,88.6,0.746,0.123,71,/players/m/mclembe01.html,1619,Ben McLemore,3,0.622,66.2
3,103,105,102,171,93,105,64,101,26,0.546,...,18.9,0.811,0.167,63,/players/h/houseda01.html,1913,Danuel House,4,0.572,8.5
4,98,95,99,140,92,91,85,96,27,0.517,...,-11.9,0.703,0.222,68,/players/r/riverau01.html,1594,Austin Rivers,5,0.542,-25.0


In [13]:
##################################################################################################
# All Shooting Table
##################################################################################################

all_shooting_df = pd.DataFrame()

all_shooting_table = soup.find('div', attrs={'id': 'all_shooting'})

all_shooting_info = BeautifulSoup(all_shooting_table.contents[5], 'lxml')

all_shooting_data = all_shooting_info.find('tbody')

for tr in all_shooting_data.find_all('tr'):
    
    rnk = tr.find('th')
    
    all_shooting_dict = {}
    
    all_shooting_dict['rank'] = rnk.text

    for td in tr.find_all('td'):
        
        if td['data-stat'] == 'player':
            all_shooting_dict['link'] = td.contents[0]['href'] 
        
        all_shooting_dict[td['data-stat']] = td.text

    all_shooting_df = all_shooting_df.append(all_shooting_dict, ignore_index=True)

    
names = all_shooting_df.columns

for n in names:
    dummy = re.match('DUMMY[\d]?[\s]?', n)
    if dummy:
        all_shooting_df = all_shooting_df.drop(dummy.group(), axis=1)
        
all_shooting_df.head()

Unnamed: 0,age,avg_dist,fg3_heave,fg3a_heave,fg_dunk,fg_pct,fg_pct_00_03,fg_pct_03_10,fg_pct_10_16,fg_pct_16_xx,...,pct_fg3a_corner3,pct_fga_00_03,pct_fga_03_10,pct_fga_10_16,pct_fga_16_xx,pct_fga_dunk,pct_fga_fg2a,pct_fga_fg3a,player,rank
0,30,16.6,0,1,20,0.444,0.668,0.414,0.444,0.5,...,0.045,0.243,0.177,0.018,0.005,0.015,0.443,0.557,James Harden,1
1,34,18.4,0,0,0,0.415,0.619,0.474,0.467,0.545,...,0.766,0.148,0.089,0.035,0.026,0.0,0.298,0.702,P.J. Tucker,2
2,31,10.1,0,0,40,0.472,0.64,0.363,0.404,0.402,...,0.192,0.415,0.142,0.13,0.148,0.041,0.834,0.166,Russell Westbrook,3
3,26,17.6,0,2,31,0.427,0.679,0.2,0.308,0.2,...,0.414,0.243,0.065,0.024,0.009,0.072,0.341,0.659,Danuel House,4
4,26,21.5,0,2,20,0.444,0.743,0.235,1.0,,...,0.294,0.136,0.031,0.004,0.0,0.04,0.171,0.829,Ben McLemore,5


In [14]:
##################################################################################################
# Play-by-Play Table
##################################################################################################

all_pbp_df = pd.DataFrame()

all_pbp_table = soup.find('div', attrs={'id': 'all_pbp'})

all_pbp_info = BeautifulSoup(all_pbp_table.contents[5], 'lxml')

all_pbp_data = all_pbp_info.find('tbody')

for tr in all_pbp_data.find_all('tr'):
    
    rnk = tr.find('th')
    
    all_pbp_dict = {}
    
    all_pbp_dict['rank'] = rnk.text

    for td in tr.find_all('td'):
        
        if td['data-stat'] == 'player':
            all_pbp_dict['link'] = td.contents[0]['href'] 
        
        all_pbp_dict[td['data-stat']] = td.text

    all_pbp_df = all_pbp_df.append(all_pbp_dict, ignore_index=True)

    
names = all_pbp_df.columns

for n in names:
    dummy = re.match('DUMMY[\d]?[\s]?', n)
    if dummy:
        all_pbp_df = all_pbp_df.drop(dummy.group(), axis=1)
        
all_pbp_df.head()

Unnamed: 0,age,and1s,astd_pts,drawn_offensive,drawn_shooting,fouls_offensive,fouls_shooting,g,link,mp,...,pct_2,pct_3,pct_4,pct_5,player,plus_minus_net,plus_minus_on,rank,tov_bad_pass,tov_lost_ball
0,30,75,1226,12,326,51,80,68,/players/h/hardeja01.html,2483,...,43%,52%,4%,,James Harden,9.1,5.6,1,145,95
1,34,2,286,42,11,15,121,72,/players/t/tuckepj01.html,2467,...,,3%,71%,26%,P.J. Tucker,1.0,3.3,2,35,9
2,31,46,1014,5,187,25,68,57,/players/w/westbru01.html,2049,...,,,,,Russell Westbrook,-0.5,2.8,3,131,80
3,26,8,210,21,41,6,68,63,/players/h/houseda01.html,1913,...,1%,63%,36%,1%,Danuel House,0.5,3.2,4,28,10
4,26,3,155,8,26,9,79,71,/players/m/mclembe01.html,1619,...,52%,1%,,,Ben McLemore,4.4,5.3,5,22,10


In [65]:
##################################################################################################
# Play-by-Play Table
##################################################################################################

all_pbp_df = pd.DataFrame()

all_pbp_table = soup.find('div', attrs={'id': 'all_pbp'})

all_pbp_info = BeautifulSoup(all_pbp_table.contents[5], 'lxml')

df_index = []

all_pbp_head = all_pbp_info.find('thead')

for tr in all_pbp_head.find_all('tr'):
    try:
        if tr['class'][0]=='over_header':
            pass
    except:
        #df_index.append(tr.text)
        for th in tr.find_all('th'):
            if th['data-stat'] == 'ranker' or th['data-over-header'] == ' ':
                df_index.append(('Player', th.text))
            else:
                if th['data-over-header'] == '\xa0':
                    df_index.append(('Player', th.text))
                else:
                    df_index.append((th['data-over-header'], th.text))

df_index

[('Player', 'Rk'),
 ('Player', 'Player'),
 ('Player', 'Age'),
 ('Player', 'G'),
 ('Player', 'MP'),
 ('Position Estimate', 'PG%'),
 ('Position Estimate', 'SG%'),
 ('Position Estimate', 'SF%'),
 ('Position Estimate', 'PF%'),
 ('Position Estimate', 'C%'),
 ('+/- Per 100 Poss.', 'OnCourt'),
 ('+/- Per 100 Poss.', 'On-Off'),
 ('Turnovers', 'BadPass'),
 ('Turnovers', 'LostBall'),
 ('Fouls Committed', 'Shoot'),
 ('Fouls Committed', 'Off.'),
 ('Fouls Drawn', 'Shoot'),
 ('Fouls Drawn', 'Off.'),
 ('Misc.', 'PGA'),
 ('Misc.', 'And1'),
 ('Misc.', 'Blkd')]

In [15]:
##################################################################################################
# Play-by-Play Table
##################################################################################################

all_pbp_df = pd.DataFrame()

all_pbp_table = soup.find('div', attrs={'id': 'all_pbp'})

all_pbp_info = BeautifulSoup(all_pbp_table.contents[5], 'lxml')

all_pbp_data = all_pbp_info.find('tbody')

all_pbp_head = all_pbp_info.find('thead')

for th in all_pbp_head.find_all('th', attrs={'class': 'over_header'}):
    i = 0
#    try:
#         if th.has_attr('colspan'):
#             print(th.attrs)

    for tr in all_pbp_data.find_all('tr'):

        i += 1

        rnk = tr.find('th')

        all_pbp_dict = {}

        for td in tr.find_all('td'):
            
            all_pbp_dict[('Player', 'rank')] = rnk.text
            
            if i < 8:
                
                if td['data-stat'] == 'player':
                    all_pbp_dict[('Player', 'link')] = td.contents[0]['href'] 
                #print(td)
                all_pbp_dict[('Player', td['data-stat'])] = td.text

            elif i >= 8 & i < 14:
                
                all_pbp_dict[('% of FGA by Distance', td['data-stat'])] = td.text

            elif i >= 14 & i < 20:

                all_pbp_dict[('FG% by Distance', td['data-stat'])] = td.text

            elif i >= 20 & i < 22:

                all_pbp_dict[('% of FG Ast\'d', td['data-stat'])] = td.text

            elif i >= 22 & i < 24:
                
                all_pbp_dict[('Dunks', td['data-stat'])] = td.text

            elif i >= 24 & i < 26:

                all_pbp_dict[('Corner 3s', td['data-stat'])] = td.text

            else:
                all_pbp_dict[('Heaves', td['data-stat'])] = td.text

        all_pbp_df = all_pbp_df.append(all_pbp_dict, ignore_index=True)
#pd.DataFrame(all_pbp_dict, index=['Michael', 'Joey'])
    
# names = all_pbp_df.columns

# for n in names:
#     dummy = re.match('DUMMY[\d]?[\s]?', n)
#     if dummy:
#         all_pbp_df = all_pbp_df.drop(dummy.group(), axis=1)
        
# all_pbp_df.head()

In [16]:
all_pbp_df

Unnamed: 0,"(Player, rank)","(Player, link)","(Player, player)","(Player, age)","(Player, g)","(Player, mp)","(Player, pct_1)","(Player, pct_2)","(Player, pct_3)","(Player, pct_4)",...,"(% of FGA by Distance, pct_1)","(% of FGA by Distance, pct_2)","(% of FGA by Distance, pct_3)","(% of FGA by Distance, pct_4)","(% of FGA by Distance, pct_5)","(% of FGA by Distance, player)","(% of FGA by Distance, plus_minus_net)","(% of FGA by Distance, plus_minus_on)","(% of FGA by Distance, tov_bad_pass)","(% of FGA by Distance, tov_lost_ball)"
0,1,/players/h/hardeja01.html,James Harden,30,68,2483,1%,43%,52%,4%,...,,,,,,,,,,
1,2,/players/t/tuckepj01.html,P.J. Tucker,34,72,2467,,,3%,71%,...,,,,,,,,,,
2,3,/players/w/westbru01.html,Russell Westbrook,31,57,2049,100%,,,,...,,,,,,,,,,
3,4,/players/h/houseda01.html,Danuel House,26,63,1913,,1%,63%,36%,...,,,,,,,,,,
4,5,/players/m/mclembe01.html,Ben McLemore,26,71,1619,47%,52%,1%,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
142,17,,,,,,,,,,...,4%,46%,47%,4%,,Michael Frazier,-18.8,-15.1,4,0
143,18,,,,,,,,,,...,,,,,100%,Bruno Caboclo,-13.4,-10.2,4,2
144,19,,,,,,,,,,...,,,,100%,,Luc Mbah a Moute,-14.8,-11.7,1,0
145,20,,,,,,,,,,...,,,,,100%,Ryan Anderson,+19.0,+21.9,1,0


In [17]:
##################################################################################################
# Play-by-Play Table
##################################################################################################

all_pbp_df = pd.DataFrame()

all_pbp_table = soup.find('div', attrs={'id': 'all_pbp'})

all_pbp_info = BeautifulSoup(all_pbp_table.contents[5], 'lxml')

all_pbp_data = all_pbp_info.find('tbody')

all_pbp_head = all_pbp_info.find('thead')

for tr in all_pbp_head.find_all('tr', attrs={'class': 'over_header'}):
    for th in tr.find_all('th'):
        print(th.text)
        
        fo
        




NameError: name 'fo' is not defined

In [298]:
dic =  {('candy', 'popsicle'): ['chocolate', 'purple'], ('candy', 'cake'): ['vanilla', 'pink'], ('healthy', 'fruit'): ['carrot', 'orange'], ('healthy', 'veggie'): ['peas', 'green']}
#dic = pd.Series(dic).reset_index()
# pd.DataFrame.from_dict(dic, orient='column')
pd.DataFrame(dic)

Unnamed: 0_level_0,candy,candy,healthy,healthy
Unnamed: 0_level_1,popsicle,cake,fruit,veggie
0,chocolate,vanilla,carrot,peas
1,purple,pink,orange,green


In [264]:
index = [np.array(['Player'] * 5, ['Position Estimate'] * 5, ['+/- Per 100 Poss.'] * 2, ''),]
index

[array(['Player', 'Player', 'Player', 'Player', 'Player'], dtype='<U6')]

In [257]:
all_pbp_df.columns = pd.MultiIndex().to_frame()

TypeError: Must pass both levels and codes

In [278]:
dictionary = {'A' : {'a': [1,2,3,4,5],
                     'b': [6,7,8,9,1]},

              'B' : {'a': [2,3,4,5,6],
                     'b': [7,8,9,1,2]}}
reform = {(outerKey, innerKey): values for outerKey, innerDict in dictionary.iteritems() for innerKey, values in innerDict.iteritems()}
reform

AttributeError: 'dict' object has no attribute 'iteritems'