# NBA Playoff Code
## Dan McDonough
## June 14th, 2021

This is the first notebook in a project to better understand what contributes to success in the postseason in the NBA. All of the data for this project was scraped from either basketball reference or wikipedia.

Note: some of these reference the same webpage and do not need to be re-scraped.

## 1. Scrape Annual Standings

In [2]:
import requests
import urllib.request
import time
from bs4 import BeautifulSoup
import pandas as pd
import random
import timeit
import numpy as np

In [2]:
years = list(range(1980,2020))

east_skip = [7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9]
west_skip = [7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9]
east_dict = dict(zip(years,east_skip))
west_dict = dict(zip(years,west_skip))

for year in years:
    url = 'https://www.basketball-reference.com/leagues/NBA_'+str(year)+'.html'
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")

    if year <= 2015:
        datarows = soup.findAll('table')[0].findAll('tr')
    else:
        datarows = soup.findAll('table')[2].findAll('tr')
    nrows = len(datarows)

    teams = []
    win_pct = []
    conf = []
    seed = []
    seed_num = 1
    yeartemp = []

    for i in range(2,nrows):
        if year <=2004:
            if i == east_dict[year]:
                continue
        if year > 2004 and (i == 7 or i == 13):
            continue
        yeartemp.append(year)
        teams.append(str(datarows[i]).split('>')[3].split('<')[0])
        win_pct.append(str(datarows[i]).split('>')[12].split('<')[0])
        conf.append('East')
        seed.append(str(datarows[i]).split('>')[5].split('(')[1].split(")")[0])
        seed_num += 1
    
    east_standings = pd.DataFrame({'Year':yeartemp,'Conference':conf,'Seed':seed,'Team':teams,'Win_pct':win_pct})

    if year <= 2015:
        datarows = soup.findAll('table')[1].findAll('tr')
    else:
        datarows = soup.findAll('table')[3].findAll('tr')
    nrows = len(datarows)

    teams = []
    win_pct = []
    conf = []
    seed = []
    seed_num = 1
    yeartemp = []

    for i in range(2,nrows):
        if year <=2004:
            if i == west_dict[year]:
                continue
        if year > 2004 and (i == 7 or i == 13):
            continue
        yeartemp.append(year)
        teams.append(str(datarows[i]).split('>')[3].split('<')[0])
        win_pct.append(str(datarows[i]).split('>')[12].split('<')[0])
        conf.append('West')
        seed.append(str(datarows[i]).split('>')[5].split('(')[1].split(")")[0])
        seed_num += 1

    west_standings = pd.DataFrame({'Year':yeartemp,'Conference':conf,'Seed':seed,'Team':teams,'Win_pct':win_pct})
    
    annual_standings = pd.concat([east_standings, west_standings])
    
    if year == 1980:
        total_standings = annual_standings.copy()
    else:
        total_standings = pd.concat([total_standings, annual_standings])
        
    time.sleep(1)

KeyboardInterrupt: 

In [None]:
total_standings.tail(30)

In [87]:
total_standings.to_csv('standings.csv')

## 2. Playoff Outcomes

In [95]:
years = range(1980,2020)

for year in years:
    url = 'https://www.basketball-reference.com/playoffs/NBA_'+str(year)+'.html'
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")
    text = str(soup.findAll('table')[0].findAll('a')).split('>')
    
    keep = []
    exclude = ['[<a ', ', <a', 'Seri','Game', ']']
    team = 'a'
    yeartemp = []

    for line in text:
        if line[0:4] not in exclude:
            team = line.split('<')[0]
            if team not in keep:
                keep.append(team)
                yeartemp.append(year)

    if year >= 1984:
        result_options = ['Champ'] + ['Finals'] + ['Conference Finals'] * 2 + ['2nd Round'] * 4 + ['Playoffs'] * 8
    else:
        result_options = ['Champ'] + ['Finals'] + ['Conference Finals'] * 2 + ['2nd Round'] * 4 + ['Playoffs'] * 4 
    
    result_df = pd.DataFrame({'Year':yeartemp,'Team':keep,'Result':result_options})
    
    if year == 1980:
        total_df = result_df.copy()
    else:
        total_df = pd.concat([result_df,total_df])

    time.sleep(1)

In [98]:
total_df.tail(12)

Unnamed: 0,Year,Team,Result
0,1980,Los Angeles Lakers,Champ
1,1980,Philadelphia 76ers,Finals
2,1980,Boston Celtics,Conference Finals
3,1980,Seattle SuperSonics,Conference Finals
4,1980,Houston Rockets,2nd Round
5,1980,Atlanta Hawks,2nd Round
6,1980,Phoenix Suns,2nd Round
7,1980,Milwaukee Bucks,2nd Round
8,1980,San Antonio Spurs,Playoffs
9,1980,Washington Bullets,Playoffs


In [99]:
total_df.to_csv('playoff_outcomes.csv')

## 3. Season Data

In [3]:
years = range(1980,2020)

for year in years:
    url = 'https://www.basketball-reference.com/leagues/NBA_'+str(year)+'.html'
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")
    
    team_table = str(soup).split('Team Per 100 Poss Stats')[3].split('Opponent Per 100 Poss Stats')[0].split(".html\">")
    rows = len(team_table)
    
    teams = []
    g = []
    mp = []
    fg = []
    fga = []
    fga_pct = []
    fg3 = []
    fg3a = []
    fg3_pct = []
    fg2 = []
    fg2a = []
    fg2_pct = []
    ft = []
    fta = []
    ft_pct = []
    orb = []
    drb = []
    trb = []
    ast = []
    stl = []
    blk = []
    tov = []
    pf = []
    pts = []
    yeartemp= []
    
    for i in range(1,rows):
        current_team = team_table[i].split('</td>')
        yeartemp.append(year)
        teams.append(current_team[0].split('<')[0])
        g.append(current_team[1].split(">")[1])
        mp.append(current_team[2].split(">")[1])
        fg.append(current_team[3].split(">")[1])
        fga.append(current_team[4].split(">")[1])
        fga_pct.append(current_team[5].split(">")[1])
        fg3.append(current_team[6].split(">")[1])
        fg3a.append(current_team[7].split(">")[1])
        fg3_pct.append(current_team[8].split(">")[1])
        fg2.append(current_team[9].split(">")[1])
        fg2a.append(current_team[10].split(">")[1])
        fg2_pct.append(current_team[11].split(">")[1])
        ft.append(current_team[12].split(">")[1])
        fta.append(current_team[13].split(">")[1])
        ft_pct.append(current_team[14].split(">")[1])
        orb.append(current_team[15].split(">")[1])
        drb.append(current_team[16].split(">")[1])
        trb.append(current_team[17].split(">")[1])
        ast.append(current_team[18].split(">")[1])
        stl.append(current_team[19].split(">")[1])
        blk.append(current_team[20].split(">")[1])
        tov.append(current_team[21].split(">")[1])
        pf.append(current_team[22].split(">")[1])
        pts.append(current_team[23].split(">")[1])
    
    team_df = pd.DataFrame({'Year':yeartemp,'Team':teams,'Games':g,'Minutes':mp,'FG':fg,'FGA':fga,'FGA_pct':fga_pct,\
                            'FG3':fg3,'FG3A':fg3a,'FG3_pct':fg3_pct,'FG2':fg2,'FG2A':fg2a,'FG2_pct':fg2_pct,\
                            'FT':ft,'FTA':fta,'FT_pct':ft_pct,'ORB':orb,'DRB':drb,'TRB':trb,'AST':ast,\
                            'STL':stl,'BLK':blk,'TOV':tov,'PF':pf,'PTS':pts})
    
    opp_table = str(soup).split('Team Per 100 Poss Stats')[3].split('Opponent Per 100 Poss Stats')[3].split(".html\">")
    
    teams = []
    g = []
    mp = []
    fg = []
    fga = []
    fga_pct = []
    fg3 = []
    fg3a = []
    fg3_pct = []
    fg2 = []
    fg2a = []
    fg2_pct = []
    ft = []
    fta = []
    ft_pct = []
    orb = []
    drb = []
    trb = []
    ast = []
    stl = []
    blk = []
    tov = []
    pf = []
    pts = []
    yeartemp = []
    
    for i in range(1,rows):
        current_team = opp_table[i].split('</td>')
        yeartemp.append(year)
        teams.append(current_team[0].split('<')[0])
        g.append(current_team[1].split(">")[1])
        mp.append(current_team[2].split(">")[1])
        fg.append(current_team[3].split(">")[1])
        fga.append(current_team[4].split(">")[1])
        fga_pct.append(current_team[5].split(">")[1])
        fg3.append(current_team[6].split(">")[1])
        fg3a.append(current_team[7].split(">")[1])
        fg3_pct.append(current_team[8].split(">")[1])
        fg2.append(current_team[9].split(">")[1])
        fg2a.append(current_team[10].split(">")[1])
        fg2_pct.append(current_team[11].split(">")[1])
        ft.append(current_team[12].split(">")[1])
        fta.append(current_team[13].split(">")[1])
        ft_pct.append(current_team[14].split(">")[1])
        orb.append(current_team[15].split(">")[1])
        drb.append(current_team[16].split(">")[1])
        trb.append(current_team[17].split(">")[1])
        ast.append(current_team[18].split(">")[1])
        stl.append(current_team[19].split(">")[1])
        blk.append(current_team[20].split(">")[1])
        tov.append(current_team[21].split(">")[1])
        pf.append(current_team[22].split(">")[1])
        pts.append(current_team[23].split(">")[1])
    
    opp_df = pd.DataFrame({'Year':yeartemp,'Team':teams,'Games':g,'Minutes':mp,'FG':fg,'FGA':fga,'FGA_pct':fga_pct,\
                            'FG3':fg3,'FG3A':fg3a,'FG3_pct':fg3_pct,'FG2':fg2,'FG2A':fg3a,'FG2_pct':fg2_pct,\
                            'FT':ft,'FTA':fta,'FT_pct':ft_pct,'ORB':orb,'DRB':drb,'TRB':trb,'AST':ast,\
                            'STL':stl,'BLK':blk,'TOV':tov,'PF':pf,'PTS':pts})
    
    if year == 1980:
        total_team_df = team_df.copy()
        total_opp_df = opp_df.copy()
    else:
        total_team_df = pd.concat([team_df,total_team_df])
        total_opp_df = pd.concat([opp_df,total_opp_df])
        
    time.sleep(1)

In [4]:
total_team_df['EFG_pct'] = (total_team_df['FG3'].astype(float) * 1.5 + total_team_df['FG2'].astype(float)) / total_team_df['FGA'].astype(float)
total_team_df['FTR'] = total_team_df['FT'].astype(float)/total_team_df['FGA'].astype(float)
total_team_df['AST_pct'] = total_team_df['AST'].astype(float)/total_team_df['FG'].astype(float)
total_team_df['Mix3'] = total_team_df['FG3A'].astype(float)/total_team_df['FGA'].astype(float)

total_opp_df['EFG_pct'] = (total_opp_df['FG3'].astype(float) * 1.5 + total_opp_df['FG2'].astype(float)) / total_opp_df['FGA'].astype(float)
total_opp_df['FTR'] = total_opp_df['FT'].astype(float)/total_opp_df['FGA'].astype(float)
total_opp_df['AST_pct'] = total_opp_df['AST'].astype(float)/total_opp_df['FG'].astype(float)
total_opp_df['Mix3'] = total_opp_df['FG3A'].astype(float)/total_opp_df['FGA'].astype(float)

total_stat_df = total_team_df.merge(total_opp_df,left_on=['Year','Team'],right_on=['Year','Team'],suffixes=('','_opp'))
total_stat_df['Net_Rtg'] = total_stat_df['PTS'].astype(float) - total_stat_df['PTS_opp'].astype(float)
total_stat_df['ORB_pct'] = total_stat_df['ORB'].astype(float) / (total_stat_df['ORB'].astype(float) + total_stat_df['DRB_opp'].astype(float))
total_stat_df['ORB_pct_opp'] = total_stat_df['ORB_opp'].astype(float) / (total_stat_df['ORB_opp'].astype(float) + total_stat_df['DRB'].astype(float))

In [5]:
total_stat_df

Unnamed: 0,Year,Team,Games,Minutes,FG,FGA,FGA_pct,FG3,FG3A,FG3_pct,...,TOV_opp,PF_opp,PTS_opp,EFG_pct_opp,FTR_opp,AST_pct_opp,Mix3_opp,Net_Rtg,ORB_pct,ORB_pct_opp
0,2019,Milwaukee Bucks,82,19780,41.8,87.8,.476,13.0,36.8,.353,...,12.9,19.4,105.2,0.502775,0.162042,0.612821,0.388457,8.6,0.208817,0.197531
1,2019,Golden State Warriors,82,19805,43.4,88.4,.491,13.1,33.9,.385,...,13.3,19.2,109.5,0.507804,0.205128,0.600503,0.369008,6.4,0.225352,0.227957
2,2019,New Orleans Pelicans,82,19755,42.1,89.0,.473,9.9,28.8,.344,...,13.1,20.3,112.6,0.531844,0.193296,0.613909,0.363128,-1.2,0.241535,0.232967
3,2019,Philadelphia 76ers,82,19805,40.6,86.2,.471,10.6,29.5,.359,...,12.5,21.7,110.0,0.512291,0.205587,0.561275,0.328492,2.6,0.244804,0.213508
4,2019,Los Angeles Clippers,82,19830,40.3,85.4,.471,9.8,25.2,.388,...,12.9,23.4,111.5,0.513468,0.224467,0.577396,0.331089,0.9,0.220418,0.239651
5,2019,Portland Trail Blazers,82,19855,42.3,90.6,.467,11.0,30.7,.359,...,12.4,20.7,110.5,0.515538,0.194229,0.564477,0.330744,4.2,0.265766,0.221505
6,2019,Oklahoma City Thunder,82,19855,41.1,90.6,.454,11.0,31.5,.348,...,16.1,21.7,107.0,0.522807,0.205848,0.610687,0.356725,3.3,0.259657,0.217391
7,2019,Toronto Raptors,82,19880,41.7,88.1,.474,12.2,33.4,.366,...,14.8,20.3,107.1,0.509029,0.190745,0.610553,0.347630,6.0,0.219400,0.229759
8,2019,Sacramento Kings,82,19730,41.8,90.1,.464,10.9,29.0,.378,...,15.4,20.7,111.5,0.531963,0.207763,0.584352,0.382420,-1.1,0.231102,0.245455
9,2019,Washington Wizards,82,19930,41.1,87.8,.468,11.0,32.4,.341,...,15.3,20.7,113.9,0.546485,0.198413,0.605201,0.361678,-2.8,0.212190,0.258741


In [6]:
total_stat_df.to_csv('team_stats.csv')

## 4. All Star Selections

In [447]:
url = 'https://www.basketball-reference.com/allstar/NBA_1980.html'
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")

In [506]:
years = range(1980,2020)

for year in years:
    if year == 1999:
        continue
    url = 'https://www.basketball-reference.com/allstar/NBA_'+str(year)+'.html'
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")

    east = soup.findAll('table')[1].findAll('a')
    rows = len(east)
    teams = []
    yeartemp = []

    for i in range(1, rows, 2):
        yeartemp.append(year)
        teams.append(str(east[i]).split('>')[1].split('<')[0])    

    east_frame = pd.DataFrame({'Year':yeartemp,'Team':teams})

    west = soup.findAll('table')[2].findAll('a')
    rows = len(west)
    teams = []
    yeartemp = []

    for i in range(1, rows, 2):
        yeartemp.append(year)
        teams.append(str(west[i]).split('>')[1].split('<')[0])

    west_frame = pd.DataFrame({'Year':yeartemp,'Team':teams})

    combined_frame = pd.concat([east_frame, west_frame])
    
    if year == 1980:
        total_star_df = combined_frame.copy()
    else:
        total_star_df = pd.concat([combined_frame,total_star_df])
    time.sleep(1)

(22, 2)
(44, 2)
(68, 2)
(92, 2)
(116, 2)
(140, 2)
(163, 2)
(187, 2)
(211, 2)
(235, 2)
(259, 2)
(283, 2)
(308, 2)
(332, 2)
(356, 2)
(380, 2)
(404, 2)
(428, 2)
(452, 2)
(476, 2)
(500, 2)
(523, 2)
(547, 2)
(571, 2)
(595, 2)
(619, 2)
(643, 2)
(667, 2)
(691, 2)
(715, 2)
(739, 2)
(763, 2)
(787, 2)
(811, 2)
(835, 2)
(859, 2)
(883, 2)
(906, 2)
(932, 2)


In [507]:
total_star_df

Unnamed: 0,Year,Team
0,2019,LAL
1,2019,HOU
2,2019,GSW
3,2019,BOS
4,2019,TOR
5,2019,POR
6,2019,GSW
7,2019,WAS
8,2019,PHI
9,2019,MIN


In [508]:
total_star_df.to_csv('all_stars.csv')

## 5. All NBA

In [2]:
url = 'https://en.wikipedia.org/wiki/All-NBA_Team'
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")

Table 5 - Rows 2 to 47 - findAll a - check value of first - then either take 2 and 4 or 1 and 3

1947 - 1955

Need special handling for tie in 1952

In [3]:
table = soup.findAll('table')[5].findAll('tr')
year = 1946
yearlist = []
first = []
second = []

for i in range (2,48):
    if 'season' in str(table[i].findAll('a')[0]):
        year += 1
        yearlist.append(year)
        first.append(str(table[i].findAll('a')[2]).split('>')[1].split('<')[0])
        second.append(str(table[i].findAll('a')[4]).split('>')[1].split('<')[0])
    else:
        yearlist.append(year)
        first.append(str(table[i].findAll('a')[1]).split('>')[1].split('<')[0])
        if i == 32:
            second.append('')
            continue
        second.append(str(table[i].findAll('a')[3]).split('>')[1].split('<')[0])
all_nba_1 = pd.DataFrame({'Year':yearlist,'First':first,'Second':second})

In [4]:
all_nba_1.head()

Unnamed: 0,Year,First,Second
0,1947,Philadelphia Warriors,Providence Steamrollers
1,1947,Washington Capitols,Cleveland Rebels
2,1947,Detroit Falcons,St. Louis Bombers
3,1947,Washington Capitols,Chicago Stags
4,1947,Chicago Stags,Washington Capitols


Table 5 - Rows 2 to 166 - findAll a - check value of first - then either take 2 and 4 or 1 and 3

1956 - 1988

Add special handling to look for a bracket as first character in team name, if so moving on one spot

In [5]:
table = soup.findAll('table')[6].findAll('tr')
year = 1955
yearlist = []
first = []
second = []

temp1 = ''
temp2 = ''

for i in range (2,167):
    
    if i == 154:
            yearlist.append(year)
            first.append('Los Angeles Lakers')
            second.append('Houston Rockets')
            continue
    
    if 'season' in str(table[i].findAll('a')[0]):
        year += 1
        yearlist.append(year)
        first.append(str(table[i].findAll('a')[2]).split('>')[1].split('<')[0])
        second.append(str(table[i].findAll('a')[4]).split('>')[1].split('<')[0])
    else:
        yearlist.append(year)
        
        temp1 = str(table[i].findAll('a')[1]).split('>')[1].split('<')[0]
        if temp1[0]=='[':
            first.append(str(table[i].findAll('a')[2]).split('>')[1].split('<')[0])
            second.append(str(table[i].findAll('a')[4]).split('>')[1].split('<')[0])
            continue
        else:
            first.append(temp1)
    
        temp2 = str(table[i].findAll('a')[3]).split('>')[1].split('<')[0]
        if temp2[0]=='[':
            second.append(str(table[i].findAll('a')[4]).split('>')[1].split('<')[0])
        else:
            second.append(temp2)
all_nba_2 = pd.DataFrame({'Year':yearlist,'First':first,'Second':second})

Table 7 - Rows 2 to 166 - findAll a - check value of first - then either take 2 and 4 or 1 and 3

1956 - 1988

Add special handling to look for a bracket as first character in team name, if so moving on one spot

In [36]:
table = soup.findAll('table')[7].findAll('tr')
year = 1988
yearlist = []
first = []
second = []
third = []

temp1 = ''
temp2 = ''
temp3 = ''

for i in range (2,157):
    if 'season' in str(table[i].findAll('a')[0]):
        year += 1
        yearlist.append(year)
        first.append(str(table[i].findAll('a')[2]).split('>')[1].split('<')[0])
        second.append(str(table[i].findAll('a')[4]).split('>')[1].split('<')[0])
        third.append(str(table[i].findAll('a')[6]).split('>')[1].split('<')[0])
        
    else:
        yearlist.append(year)
        
        temp1 = str(table[i].findAll('a')[1]).split('>')[1].split('<')[0]
        if temp1[0]=='[':
            first.append(str(table[i].findAll('a')[2]).split('>')[1].split('<')[0])
            second.append(str(table[i].findAll('a')[4]).split('>')[1].split('<')[0])
            third.append(str(table[i].findAll('a')[6]).split('>')[1].split('<')[0])
            continue
        else:
            first.append(temp1)
    
        temp2 = str(table[i].findAll('a')[3]).split('>')[1].split('<')[0]
        if temp2[0]=='[':
            second.append(str(table[i].findAll('a')[4]).split('>')[1].split('<')[0])
            third.append(str(table[i].findAll('a')[6]).split('>')[1].split('<')[0])
            continue
        else:
            second.append(temp2)
            
        temp3 = str(table[i].findAll('a')[5]).split('>')[1].split('<')[0]
        if temp3[0]=='[':
            third.append(str(table[i].findAll('a')[6]).split('>')[1].split('<')[0])
        else:
            third.append(temp3)
        
all_nba_3 = pd.DataFrame({'Year':yearlist,'First':first,'Second':second,'Third':third})

In [38]:
all_nba_final = pd.concat([all_nba_1,all_nba_2,all_nba_3])
all_nba_final.to_csv('all_nba.csv')

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=True'.


  """Entry point for launching an IPython kernel.


## 6. MVP

In [133]:
years = list(range(1980,2020))

for year in years:
    url = 'https://www.basketball-reference.com/awards/awards_'+str(year)+'.html'
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")
    
    table = soup.findAll('table')[0].findAll('tr')
    rows = len(table)
    yeartemp = []
    teams = [] 
    votes = []

    for i in range (2, rows):
        yeartemp.append(year)
        if (year == 1994 and i == 17):
            teams.append('LAC')
            votes.append('1')
        elif (year == 1995 and i == 15):
            teams.append('HOU')
            votes.append('3')
        elif (year == 2005 and i == 14):
            teams.append('NJN')
            votes.append('3')
        elif (year == 2009 and i == 7):
            teams.append('DEN')
            votes.append('33')
        elif (year == 2010 and i == 15):
            teams.append('SAS')
            votes.append('1')
        else:
            teams.append(str(table[i].findAll('a')[1]).split('>')[1].split('<')[0])
            votes.append(str(table[i].findAll('td')[4]).split('>')[1].split('<')[0])

    mvp = pd.DataFrame({'Year':yeartemp,'Team':teams,'Votes':votes})
    
    if year == 1980:
        total_mvp_df = mvp.copy()
    else:
        total_mvp_df = pd.concat([mvp,total_mvp_df])
    time.sleep(1)

In [191]:
total_mvp_df
total_mvp_df.to_csv('mvp.csv')

## 7. DPOY

In [135]:
url = 'https://www.basketball-reference.com/awards/dpoy.html'
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")

In [188]:
years = list(range(2019,1982, -1))
teams = []

for i in range(2, len(years)+2):
    if i == 20:
        teams.append('PHI')
    else:
        teams.append(str(soup.findAll('table')[0].findAll('tr')[i].findAll('a')[4]).split('>')[1].split('<')[0])
        
dpoy = pd.DataFrame({'Year':years,'Team':teams})

In [192]:
dpoy
dpoy.to_csv('dpoy.csv')

## 8. All Defense

In [193]:
url = 'https://en.wikipedia.org/wiki/NBA_All-Defensive_Team'
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")

Types of exception handling: season starter, brackets (now needed in both), uneven numbers

In [311]:
table = soup.findAll('table')[5].findAll('tr')
year = 1968
yearlist = []
first = []
second = []

temp1 = ''
temp2 = ''

for i in range (2,268):
    if 'season' in str(table[i].findAll('a')[0]):
        year += 1
        yearlist.append(year)
        
        temp1 = str(table[i].findAll('a')[2]).split('>')[1].split('<')[0]
        if temp1[0]=='[':
            first.append(str(table[i].findAll('a')[3]).split('>')[1].split('<')[0])
            second.append(str(table[i].findAll('a')[5]).split('>')[1].split('<')[0])
            continue
        else:
            first.append(temp1)
            
        temp2 = str(table[i].findAll('a')[4]).split('>')[1].split('<')[0]    
        if temp2[0]=='[':
            second.append(str(table[i].findAll('a')[5]).split('>')[1].split('<')[0])
        else:
            second.append(temp2)
            
    else:
        yearlist.append(year)
        
        if i == 52:
            first.append('')
            second.append('Chicago Bulls')
            continue 
            
        if i == 106:
            first.append('')
            second.append('New York Knicks')
            continue 

        if i == 195:
            first.append('')
            second.append('Miami Heat')
            continue 
        
        temp1 = str(table[i].findAll('a')[1]).split('>')[1].split('<')[0]
        if temp1[0]=='[':
            first.append(str(table[i].findAll('a')[2]).split('>')[1].split('<')[0])
            if i == 22:
                second.append('')
                continue
            second.append(str(table[i].findAll('a')[4]).split('>')[1].split('<')[0])
            continue
        else:
            first.append(temp1)
        
        if i in [22,65, 71, 82, 164, 201, 235]:
            second.append('')
            continue
        
        temp2 = str(table[i].findAll('a')[3]).split('>')[1].split('<')[0]
        if temp2[0]=='[':
            second.append(str(table[i].findAll('a')[4]).split('>')[1].split('<')[0])
        else:
            second.append(temp2)
all_defense = pd.DataFrame({'Year':yearlist,'First':first,'Second':second})

In [None]:
all_defense
all_defense.to_csv('all_defense.csv')