In [1]:
import pandas as pd
from bs4 import BeautifulSoup
import requests
import time
import numpy as np
from collections import Counter

# Project Objective

My ultimate goal in this project, which will be spread over multiple notebooks, is to predict with some degree of accuracy which draft picks are likely to be selected for an all star game during the early part (first five years) of their careers. In order to make this determination, I want to use only data that would be available shortly after the draft was completed and that is readily available for players going back to the 1976-77 season, the year that the NBA and ABA merged into a single league. 

### Data Scraping

The first step in completing this project is, obviously, to obtain the necessary data. I have chosen to use the following data sets from the specified sources:
1. NBA drafts (Wikipedia)
2. NBA all star selections (Wikipedia)
3. NCAA tournament teams (sports reference)
4. NCAA division assignments (ncaa)

6. NBA standings (basketball reference)
5. NBA rosters (basketball reference)

### Scraping the NBA drafts

Because I have chosen to use Wikipedia as the source for this information (largely because their charts contain information that is missing from most other readily available sources), there is considerable variation in the layout of the pages. As a result, it was necessary for me to use multiple variations of the scraping function in order to obtain what was needed.

In [2]:
years = [str(1976 + i) for i in range(45)]

In [48]:
def NBA_draft(year):
        
    group_1 = ['1976', '1977', '1978', '1979', '1980', '1981']
    group_2 = ['1984', '1996', '1997']
    group_3 = ['1982', '1983', '1988', '1991', '1995', '2009']
    group_4 = ['1985', '1986', '1987', '1989', '1990', '1992', 
               '1993', '1994', '1998', '1999', '2000', '2001', 
               '2002', '2003', '2004', '2005', '2006', '2007', 
               '2008', '2010', '2011', '2012', '2013', '2014',
               '2015', '2016', '2017', '2018', '2019']
        
    url = 'https://en.wikipedia.org/wiki/{}_NBA_draft'.format(year)  
        
    page = requests.get(url)
    soup = BeautifulSoup(page.content, 'html.parser').prettify()
        
    if (year in group_1) or (year in group_3):
        start_marker = '<span class="mw-headline" id="Draft">'
    else:
        start_marker = '<span class="mw-headline" id="Draft_select'
        
    start_index = soup.find(start_marker)
    
    soup = soup[start_index:]
    
    my_soup = soup.split('\n')

    table_starts = [i for i in range(len(my_soup)) if '<table class' in my_soup[i]]
    
    if (year in group_1) or (year in group_2):
        table_ends = [i for i in range(table_starts[0], table_starts[1]) if '</table>' in my_soup[i]]
        to_use = my_soup[table_starts[0]: table_ends[0]]
        
    else:
        table_ends = [i for i in range(table_starts[2], len(my_soup)) if '</table>' in my_soup[i]]
        to_use = my_soup[table_starts[2]: table_ends[0]]

    to_use = [item.lstrip() for item in to_use]

    row_indices = [i for i in range(len(to_use)) if to_use[i] == '<tr>']
    row_indices.append(-1)
    
    cols = to_use[row_indices[0]:row_indices[1]]
    col_div = [i for i in range(len(cols)) if '<th' in cols[i]]
    col_div.append(-1)
    
    col_names = []
    for i in range(len(col_div) - 1):
        col = cols[col_div[i]:col_div[i+1]]
        col = [item for item in col if ('<' not in item) & ('>' not in item)]
        col = ' '.join(col)
        col_names.append(col)

    rows = []

    for i in range(1, len(row_indices) - 1):
        row = to_use[row_indices[i]:row_indices[i+1]]
        row_div = [i for i in range(len(row)) if ('<td' in row[i]) or ('<th' in row[i])]
        row_div.append(-1)
        
        new_row = []
        for i in range(len(row_div)-1):
            entry = row[row_div[i]: row_div[i+1]]
            entry = [item for item in entry if ('<' not in item) & ('>' not in item)]
            entry = ' '.join(entry)
            new_row.append(entry)
            
        rows.append(new_row)
    
    data = pd.DataFrame(rows)
    
    data.dropna(how = 'any', axis = 1, inplace = True)
    data.dropna(how = 'any', axis = 0, inplace = True)
    data.columns = col_names


    return data

In [49]:
for year in years[:-1]:
    df = NBA_draft(year)
    
    df.to_csv("NBA_draft_{}.csv".format(year))
    
    print("There were {} draft picks in ".format(len(df)), year)

There were 34 draft picks in  1976
There were 44 draft picks in  1977
There were 44 draft picks in  1978
There were 44 draft picks in  1979
There were 46 draft picks in  1980
There were 46 draft picks in  1981
There were 46 draft picks in  1982
There were 47 draft picks in  1983
There were 47 draft picks in  1984
There were 47 draft picks in  1985
There were 47 draft picks in  1986
There were 46 draft picks in  1987
There were 75 draft picks in  1988
There were 54 draft picks in  1989
There were 54 draft picks in  1990
There were 54 draft picks in  1991
There were 54 draft picks in  1992
There were 54 draft picks in  1993
There were 54 draft picks in  1994
There were 58 draft picks in  1995
There were 58 draft picks in  1996
There were 57 draft picks in  1997
There were 58 draft picks in  1998
There were 58 draft picks in  1999
There were 58 draft picks in  2000
There were 58 draft picks in  2001
There were 58 draft picks in  2002
There were 58 draft picks in  2003
There were 59 draft 

### Scraping the NBA All Star selections

In [5]:
def NBA_All_Stars(year):
    
    if year == '1999':
        return None
    
    format_1 = ['1977']
    format_2 = ['1978']
    format_3 = ['1979', '1980', '1981']
    format_4 = ['1982', '1985']
    format_5 = ['2006']
    
    url = 'https://en.wikipedia.org/wiki/{}_NBA_All-Star_Game'.format(year) 
        
    page = requests.get(url)
    soup = BeautifulSoup(page.content, 'html.parser').prettify()
    
    if year in format_1:
        start_marker = '<span class="mw-headline" id="Eastern'
    elif year in format_2:
        start_marker = '<span class="mw-headline" id="Teams"'
    elif year in format_3:
        start_marker = '<span class="mw-headline" id="Team_rost'
    elif year in format_4:
        start_marker = '<span class="mw-headline" id="Western'
    elif year in format_5:
        start_marker = '<span class="mw-headline" id="Players"'
    else:
        start_marker = '<span class="mw-headline" id="Roster'
        
    start_index = soup.find(start_marker)
    
    my_soup = soup[start_index:].split('\n')
    my_soup = [item.lstrip() for item in my_soup]
    
    table_starts = [i for i in range(len(my_soup)) if '<table' in my_soup[i]]
    table_ends = [i for i in range(table_starts[0], len(my_soup)) if '</table>' in my_soup[i]]
    
    players = []
    
    for count in range(2):
        to_use = my_soup[table_starts[count]:table_ends[count]]
        row_starts = [i for i in range(len(to_use)) if to_use[i] == '<tr>']
        row_starts.append(-1)
        
        for i in range(len(row_starts)-1):
            
            row = to_use[row_starts[i]:row_starts[i+1]]
            
            player = []
            
            player_breaks = [j for j in range(len(row)) if '<t' in row[j] ]
            player_breaks.append(-1)
            
            for k in range(len(player_breaks)-1):
                
                datum = row[player_breaks[k]:player_breaks[k+1]]
                datum = [item for item in datum if '<' not in item]
                player.append(' '.join(datum))
        
            player = [item for item in player if item != '']
            
    
            if len(player) >= 4:
                players.append(player)

    players = pd.DataFrame(players)
    players.drop_duplicates([1, 2], inplace = True)
    players.reset_index(inplace = True, drop = True)
    
    return players

In [6]:
for year in years[1:23]:
    df = NBA_All_Stars(year)
    df.to_csv("NBA_AllStars_{}.csv".format(year))
    
    print("There were {} all star selections in ".format(len(df)), year)
    
for year in years[24:]:
    df = NBA_All_Stars(year)
    df.to_csv("NBA_AllStars_{}.csv".format(year))
    
    print("There were {} all star selections in ".format(len(df)), year) 

There were 24 all star selections in  1977
There were 23 all star selections in  1978
There were 21 all star selections in  1979
There were 23 all star selections in  1980
There were 21 all star selections in  1981
There were 24 all star selections in  1982
There were 27 all star selections in  1983
There were 27 all star selections in  1984
There were 25 all star selections in  1985
There were 28 all star selections in  1986
There were 26 all star selections in  1987
There were 26 all star selections in  1988
There were 26 all star selections in  1989
There were 26 all star selections in  1990
There were 27 all star selections in  1991
There were 30 all star selections in  1992
There were 29 all star selections in  1993
There were 27 all star selections in  1994
There were 26 all star selections in  1995
There were 25 all star selections in  1996
There were 30 all star selections in  1997
There were 25 all star selections in  1998
There were 25 all star selections in  2000
There were 

### Scraping the NCAA tournament teams

In [7]:
def NCAA_Tournament(year):
    
    url = 'https://www.sports-reference.com/cbb/postseason/{}-ncaa.html'.format(year) 
        
    page = requests.get(url)
    soup = BeautifulSoup(page.content, 'html.parser').prettify()
    
    #return soup
    
    start = soup.find('<div id="brackets">')
    end = soup.find('All-Tournament Team')
    
    my_soup = soup[start:end].split('\n')

    my_soup = [item.lstrip() for item in my_soup if '<' not in item]
    my_soup = [item for item in my_soup if item != '']

    # Get rid of the locations

    my_soup = [item for item in my_soup if not item.isdigit()]

    # Get rid of the scores

    my_soup = [item for item in my_soup if 'at ' not in item]
    
    # Get rid of 'First Four'
    
    my_soup = [item for item in my_soup if ('First Four' not in item) &('Play-In' not in item)]

    cnt = Counter()
    for word in my_soup:
        cnt[word] +=1
    
    teams = pd.DataFrame.from_dict(cnt, orient = 'index')
    teams.reset_index(inplace = True)
    teams.columns = ['Team', 'Round']
    teams = teams.loc[teams['Team'] !='Third Place', :]
    teams = teams.loc[teams['Team'] !=',', :]

    
    return teams

In [8]:
for year in years[:-1]:
    df = NCAA_Tournament(year)
    
    df.to_csv("NCAA_tournament_{}.csv".format(year))
    
    print("There were {} tournament teams in ".format(len(df)), year)

There were 32 tournament teams in  1976
There were 32 tournament teams in  1977
There were 32 tournament teams in  1978
There were 40 tournament teams in  1979
There were 48 tournament teams in  1980
There were 48 tournament teams in  1981
There were 48 tournament teams in  1982
There were 52 tournament teams in  1983
There were 53 tournament teams in  1984
There were 64 tournament teams in  1985
There were 64 tournament teams in  1986
There were 64 tournament teams in  1987
There were 64 tournament teams in  1988
There were 64 tournament teams in  1989
There were 64 tournament teams in  1990
There were 64 tournament teams in  1991
There were 64 tournament teams in  1992
There were 64 tournament teams in  1993
There were 64 tournament teams in  1994
There were 64 tournament teams in  1995
There were 64 tournament teams in  1996
There were 64 tournament teams in  1997
There were 64 tournament teams in  1998
There were 64 tournament teams in  1999
There were 64 tournament teams in  2000


### Scraping the NCAA division memberships

In [9]:
def NCAA_division(div):
    
    url = 'https://www.ncsasports.org/mens-basketball/division-{}-colleges'.format(div)
    
    page = requests.get(url)

    soup = BeautifulSoup(page.content, 'html.parser').prettify()
    
    start = soup.find('Full list of ')
    end = soup.find(' <div class="right-sidebar pull-sidebar layout-main-sidebar">')
    
    my_soup = soup[start:end].split('\n')
    my_soup = [item.lstrip() for item in my_soup]
    
    univ_keys = [i for i in range(len(my_soup)) if my_soup[i] == '<span itemprop="name">']
    univ_keys.append(-1)
    
    universities = []
    
    for key1, key2 in zip(univ_keys[:-1], univ_keys[1:]):

        university = [item for item in my_soup[key1 : key2] if '<' not in item]
        university = university[0:1] + [''.join(university[1:4])] + university[4:]
        universities.append(university)
        
    cols = ['School', 'City and State', 'Region', 'Conference', 'Division']
    
    universities = pd.DataFrame(universities, columns = cols)
    
    return universities

In [11]:
for div in ['1', '2', '3']:

    df = NCAA_division(div)
    
    df.to_csv('NCAA_division_{}'.format(div))
    
    print("There are {} teams in NCAA division ".format(len(df)), div)

There are 355 teams in NCAA division  1
There are 304 teams in NCAA division  2
There are 420 teams in NCAA division  3


### Scraping NBA standings

In [12]:
def NBA_Standings(year):
    
    url = 'https://www.basketball-reference.com/leagues/NBA_{}.html'.format(year)

    page = requests.get(url)

    soup = BeautifulSoup(page.content, 'html.parser').prettify()
    
    start = soup.find('Division Standings')
    end = soup.find('Playoff Series')
    
    my_soup = soup[start:end].split('\n')
    my_soup = [item.lstrip() for item in my_soup]

    table_starts = [i for i in range(len(my_soup)) if '<thead' in my_soup[i]]
    table_ends = [i for i in range(len(my_soup)) if '</table>' in my_soup[i]]

    standings = []
    
    for i in range(2):
        table_info = my_soup[table_starts[i]: table_ends[i]]
        table_rows = [i for i in range(len(table_info)) if '<tr' in table_info[i]]
        table_rows.append(-1)
        
        for key1, key2 in zip(table_rows[:-1], table_rows[1:]):
            table_row = table_info[key1:key2]
            team_ind = [i for i in range(len(table_row)) if ('<td' in table_row[i]) or ('<th' in table_row[i])]
            team_ind.append(-1)
            
            team = []
            for ind1, ind2 in zip(team_ind[:-1], team_ind[1:]):
                dat = table_row[ind1:ind2]
                dat = [item for item in dat if '<' not in item]
                dat = "".join(dat)
    
                team.append(dat)
        
            standings.append(team)
    
    standings = pd.DataFrame(standings)
    
    cols = standings.loc[0, :].copy()
    cols[0] = 'Teams'
    standings.columns = cols
    
    to_drop = [i for i in range(len(standings)) if 
               ('Division' in standings.loc[i, 'Teams']) or 
               ('Conference' in standings.loc[i, 'Teams'])]
    
    standings.drop(to_drop, axis = 0, inplace = True)
    standings.reset_index(drop = True, inplace = True)
    
    return standings

In [13]:
for year in years[:-1]:
    df = NBA_Standings(year)
    
    df.to_csv("NBA_Standings_{}.csv".format(year))
    
    print("There were {} NBA teams in ".format(len(df)), year)

There were 18 NBA teams in  1976
There were 22 NBA teams in  1977
There were 22 NBA teams in  1978
There were 22 NBA teams in  1979
There were 22 NBA teams in  1980
There were 23 NBA teams in  1981
There were 23 NBA teams in  1982
There were 23 NBA teams in  1983
There were 23 NBA teams in  1984
There were 23 NBA teams in  1985
There were 23 NBA teams in  1986
There were 23 NBA teams in  1987
There were 23 NBA teams in  1988
There were 25 NBA teams in  1989
There were 27 NBA teams in  1990
There were 27 NBA teams in  1991
There were 27 NBA teams in  1992
There were 27 NBA teams in  1993
There were 27 NBA teams in  1994
There were 27 NBA teams in  1995
There were 29 NBA teams in  1996
There were 29 NBA teams in  1997
There were 29 NBA teams in  1998
There were 29 NBA teams in  1999
There were 29 NBA teams in  2000
There were 29 NBA teams in  2001
There were 29 NBA teams in  2002
There were 29 NBA teams in  2003
There were 29 NBA teams in  2004
There were 30 NBA teams in  2005
There were

### Scraping NBA Rosters

Here I'm going to scrape each team in a separate cell, in part because some teams have changed locations and/or names, and as a result their urls are not consistant.

In [16]:
def NBA_Rosters(url):
    
    page = requests.get(url)

    soup = BeautifulSoup(page.content, 'html.parser').prettify()
    
    start = soup.find('<span class="section_anchor" data-label="Roster" id="roster_link">')
    end = start + soup[start:].find('</table>')
    
    my_soup = soup[start:end].split('\n')
    my_soup = [item.lstrip() for item in my_soup]
    
    row_indices = [i for i in range(len(my_soup)) if my_soup[i] == '<tr>']
    row_indices.append(-1)

    roster = []
    
    for ind1, ind2 in zip(row_indices[:-1], row_indices[1:]):
        player_info = my_soup[ind1 : ind2]
        
        player_index = [i for i in range(len(player_info)) if 
                        ('<td' in player_info[i]) or ('<th' in player_info[i])]
        player_index.append(-1)
        
        player = []

        for key1, key2 in zip(player_index[:-1], player_index[1:]):
            dat = player_info[key1:key2]
            dat = [item for item in dat if '<' not in item]
            player.append("".join(dat))
    
        roster.append(player)
        
    cols = ['No','Player','Pos','Ht','Wt','Birth Date','Flag','Exp','College']
        
    roster = pd.DataFrame(roster, columns = cols)
    
    
    
    return roster

In [17]:
Chicago_Bulls = pd.DataFrame()
for year in years:
    url = "https://www.basketball-reference.com/teams/CHI/{}.html".format(year)
    players = NBA_Rosters(url)
    
    Chicago_Bulls = pd.concat([Chicago_Bulls, players], axis = 0)
    
Chicago_Bulls.to_csv('Chicago_Bulls.csv')

print("There are a total of {} players on these Chicago_Bulls rosters".format(len(Chicago_Bulls)))

There are a total of 776 players on these Chicago_Bulls rosters


In [19]:
Detroit_Pistons = pd.DataFrame()
for year in years:
    url = "https://www.basketball-reference.com/teams/DET/{}.html".format(year)
    players = NBA_Rosters(url)
    
    Detroit_Pistons = pd.concat([Detroit_Pistons, players], axis = 0)
    
Detroit_Pistons.to_csv('Detroit_Pistons.csv')

print("There are a total of {} players Detroit_Pistons on these rosters".format(len(Detroit_Pistons)))

There are a total of 774 players Detroit_Pistons on these rosters


In [20]:
Cleveland_Cavaliers = pd.DataFrame()
for year in years:
    url = "https://www.basketball-reference.com/teams/CLE/{}.html".format(year)
    players = NBA_Rosters(url)
    
    Cleveland_Cavaliers = pd.concat([Cleveland_Cavaliers, players], axis = 0)
    
Cleveland_Cavaliers.to_csv('Cleveland_Cavaliers.csv')

print("There are a total of {} players on these Cleveland_Cavaliers rosters".format(len(Cleveland_Cavaliers)))

There are a total of 824 players on these Cleveland_Cavaliers rosters


In [21]:
Denver_Nuggets = pd.DataFrame()
for year in years:
    url = "https://www.basketball-reference.com/teams/DEN/{}.html".format(year)
    players = NBA_Rosters(url)
    
    Denver_Nuggets = pd.concat([Denver_Nuggets, players], axis = 0)
    
Denver_Nuggets.to_csv('Denver_Nuggets.csv')

print("There are a total of {} players on these Denver_Nuggets rosters".format(len(Denver_Nuggets)))

There are a total of 768 players on these Denver_Nuggets rosters


In [22]:
Indiana_Pacers = pd.DataFrame()
for year in years:
    url = "https://www.basketball-reference.com/teams/IND/{}.html".format(year)
    players = NBA_Rosters(url)
    
    Indiana_Pacers = pd.concat([Indiana_Pacers, players], axis = 0)
    
Indiana_Pacers.to_csv('Indiana_Pacers.csv')

print("There are a total of {} players on these Indiana_Pacers rosters".format(len(Indiana_Pacers)))

There are a total of 722 players on these Indiana_Pacers rosters


In [23]:
Milwaukee_Bucks = pd.DataFrame()
for year in years:
    url = "https://www.basketball-reference.com/teams/MIL/{}.html".format(year)
    players = NBA_Rosters(url)
    
    Milwaukee_Bucks = pd.concat([Milwaukee_Bucks, players], axis = 0)
    
Milwaukee_Bucks.to_csv('Milwaukee_Bucks.csv')

print("There are a total of {} players on these Milwaukee_Bucks rosters".format(len(Milwaukee_Bucks)))

There are a total of 803 players on these Milwaukee_Bucks rosters


In [24]:
Houston_Rockets = pd.DataFrame()
for year in years:
    url = "https://www.basketball-reference.com/teams/HOU/{}.html".format(year)
    players = NBA_Rosters(url)
    
    Houston_Rockets = pd.concat([Houston_Rockets, players], axis = 0)
    
Houston_Rockets.to_csv('Houston_Rockets.csv')

print("There are a total of {} players on these Houston_Rockets rosters".format(len(Houston_Rockets)))

There are a total of 795 players on these Houston_Rockets rosters


In [25]:
SanAntonio_Spurs = pd.DataFrame()
for year in years:
    url = "https://www.basketball-reference.com/teams/SAS/{}.html".format(year)
    players = NBA_Rosters(url)
    
    SanAntonio_Spurs = pd.concat([SanAntonio_Spurs, players], axis = 0)
    
SanAntonio_Spurs.to_csv('SanAntonio_Spurs.csv')

print("There are a total of {} players on these SanAntonio_Spurs rosters".format(len(SanAntonio_Spurs)))

There are a total of 784 players on these SanAntonio_Spurs rosters


In [26]:
Atlanta_Hawks = pd.DataFrame()
for year in years:
    url = "https://www.basketball-reference.com/teams/ATL/{}.html".format(year)
    players = NBA_Rosters(url)
    
    Atlanta_Hawks = pd.concat([Atlanta_Hawks, players], axis = 0)
    
Atlanta_Hawks.to_csv('Atlanta_Hawks.csv')

print("There are a total of {} players on these Atlanta_Hawks rosters".format(len(Atlanta_Hawks)))

There are a total of 799 players on these Atlanta_Hawks rosters


In [27]:
Boston_Celtics = pd.DataFrame()
for year in years:
    url = "https://www.basketball-reference.com/teams/BOS/{}.html".format(year)
    players = NBA_Rosters(url)
    
    Boston_Celtics = pd.concat([Boston_Celtics, players], axis = 0)
    
Boston_Celtics.to_csv('Boston_Celtics.csv')

print("There are a total of {} players on these Boston_Celtics rosters".format(len(Boston_Celtics)))

There are a total of 778 players on these Boston_Celtics rosters


In [28]:
Philadelphia_76ers = pd.DataFrame()
for year in years:
    url = "https://www.basketball-reference.com/teams/PHI/{}.html".format(year)
    players = NBA_Rosters(url)
    
    Philadelphia_76ers = pd.concat([Philadelphia_76ers, players], axis = 0)
    
Philadelphia_76ers.to_csv('Philadelphia_76ers.csv')

print("There are a total of {} players on these Philadelphia_76ers rosters".format(len(Philadelphia_76ers)))

There are a total of 815 players on these Philadelphia_76ers rosters


In [29]:
NewYork_Knicks = pd.DataFrame()
for year in years:
    url = "https://www.basketball-reference.com/teams/NYK/{}.html".format(year)
    players = NBA_Rosters(url)
    
    NewYork_Knicks = pd.concat([NewYork_Knicks, players], axis = 0)
    
NewYork_Knicks.to_csv('NewYork_Knicks.csv')

print("There are a total of {} players on these NewYork_Knicks rosters".format(len(NewYork_Knicks)))

There are a total of 778 players on these NewYork_Knicks rosters


In [30]:
Phoenix_Suns = pd.DataFrame()
for year in years:
    url = "https://www.basketball-reference.com/teams/PHO/{}.html".format(year)
    players = NBA_Rosters(url)
    
    Phoenix_Suns = pd.concat([Phoenix_Suns, players], axis = 0)
    
Phoenix_Suns.to_csv('Phoenix_Suns.csv')

print("There are a total of {} players on these Phoenix_Suns rosters".format(len(Phoenix_Suns)))

There are a total of 781 players on these Phoenix_Suns rosters


In [31]:
Portland_TrailBlazers = pd.DataFrame()
for year in years:
    url = "https://www.basketball-reference.com/teams/POR/{}.html".format(year)
    players = NBA_Rosters(url)
    
    Portland_TrailBlazers = pd.concat([Portland_TrailBlazers, players], axis = 0)
    
Portland_TrailBlazers.to_csv('Portland_TrailBlazers.csv')

print("There are a total of {} players on these Portland_TrailBlazers rosters".format(len(Portland_TrailBlazers)))

There are a total of 751 players on these Portland_TrailBlazers rosters


In [32]:
GoldenState_Warriors = pd.DataFrame()
for year in years:
    url = "https://www.basketball-reference.com/teams/GSW/{}.html".format(year)
    players = NBA_Rosters(url)
    
    GoldenState_Warriors = pd.concat([GoldenState_Warriors, players], axis = 0)
    
GoldenState_Warriors.to_csv('GoldenState_Warriors.csv')

print("There are a total of {} players on these GoldenState_Warriors rosters".format(len(GoldenState_Warriors)))

There are a total of 805 players on these GoldenState_Warriors rosters


In [33]:
LosAngeles_Lakers = pd.DataFrame()
for year in years:
    url = "https://www.basketball-reference.com/teams/LAL/{}.html".format(year)
    players = NBA_Rosters(url)
    
    LosAngeles_Lakers = pd.concat([LosAngeles_Lakers, players], axis = 0)
    
LosAngeles_Lakers.to_csv('LosAngeles_Lakers.csv')

print("There are a total of {} players on these LosAngeles_Lakers rosters".format(len(LosAngeles_Lakers)))

There are a total of 751 players on these LosAngeles_Lakers rosters


In [34]:
LosAngeles_Clippers = pd.DataFrame()

for year in ['1976', '1977', '1978']:
    url = "https://www.basketball-reference.com/teams/BUF/{}.html".format(year)
    players = NBA_Rosters(url)
    
    LosAngeles_Clippers = pd.concat([LosAngeles_Clippers, players], axis = 0)
    
for year in ['1979', '1980', '1981', '1982', '1983', '1984']:
    url = "https://www.basketball-reference.com/teams/SDC/{}.html".format(year)
    players = NBA_Rosters(url)
    
    LosAngeles_Clippers = pd.concat([LosAngeles_Clippers, players], axis = 0)
    
for year in years[9:]:
    url = "https://www.basketball-reference.com/teams/LAC/{}.html".format(year)
    players = NBA_Rosters(url)
    
    LosAngeles_Clippers = pd.concat([LosAngeles_Clippers, players], axis = 0)
    
LosAngeles_Clippers.to_csv('LosAngeles_Clippers.csv')

print("There are a total of {} players on these LosAngeles_Clippers rosters".format(len(LosAngeles_Clippers)))

There are a total of 819 players on these LosAngeles_Clippers rosters


In [35]:
Brooklyn_Nets = pd.DataFrame()

for year in ['1977']:
    url = "https://www.basketball-reference.com/teams/NYY/{}.html".format(year)
    players = NBA_Rosters(url)
    
    Brooklyn_Nets = pd.concat([Brooklyn_Nets, players], axis = 0)
    
for year in years[2: 37]:
    url = "https://www.basketball-reference.com/teams/NJN/{}.html".format(year)
    players = NBA_Rosters(url)
    
    Brooklyn_Nets = pd.concat([Brooklyn_Nets, players], axis = 0)
    
for year in years[37:]:
    url = "https://www.basketball-reference.com/teams/BRK/{}.html".format(year)
    players = NBA_Rosters(url)
    
    Brooklyn_Nets = pd.concat([Brooklyn_Nets, players], axis = 0)
    
Brooklyn_Nets.to_csv('Brooklyn_Nets.csv')

print("There are a total of {} players on these Brooklyn_Nets rosters".format(len(Brooklyn_Nets)))

There are a total of 801 players on these Brooklyn_Nets rosters


In [36]:
Utah_Jazz = pd.DataFrame()

for year in years[: 4]:
    url = "https://www.basketball-reference.com/teams/NOJ/{}.html".format(year)
    players = NBA_Rosters(url)
    
    Utah_Jazz = pd.concat([Utah_Jazz, players], axis = 0)
    
for year in years[4:]:
    url = "https://www.basketball-reference.com/teams/UTA/{}.html".format(year)
    players = NBA_Rosters(url)
    
    Utah_Jazz = pd.concat([Utah_Jazz, players], axis = 0)
    
Utah_Jazz.to_csv('Utah_Jazz.csv')

print("There are a total of {} players on these Utah_Jazz rosters".format(len(Utah_Jazz)))

There are a total of 733 players on these Utah_Jazz rosters


In [37]:
Washington_Wizards = pd.DataFrame()

for year in years[: 23]:
    url = "https://www.basketball-reference.com/teams/WSB/{}.html".format(year)
    players = NBA_Rosters(url)
    
    Washington_Wizards = pd.concat([Washington_Wizards, players], axis = 0)
    
for year in years[23:]:
    url = "https://www.basketball-reference.com/teams/WAS/{}.html".format(year)
    players = NBA_Rosters(url)
    
    Washington_Wizards = pd.concat([Washington_Wizards, players], axis = 0)
    
Washington_Wizards.to_csv('Washington_Wizards.csv')

print("There are a total of {} players on these Washington_Wizards rosters".format(len(Washington_Wizards)))

There are a total of 779 players on these Washington_Wizards rosters


In [38]:
OklahomaCity_Thunder = pd.DataFrame()

for year in years[: 33]:
    url = "https://www.basketball-reference.com/teams/SEA/{}.html".format(year)
    players = NBA_Rosters(url)
    
    OklahomaCity_Thunder = pd.concat([OklahomaCity_Thunder, players], axis = 0)
    
for year in years[33:]:
    url = "https://www.basketball-reference.com/teams/OKC/{}.html".format(year)
    players = NBA_Rosters(url)
    
    OklahomaCity_Thunder = pd.concat([OklahomaCity_Thunder, players], axis = 0)
    
OklahomaCity_Thunder.to_csv('OklahomaCity_Thunder.csv')

print("There are a total of {} players on these OklahomaCity_Thunder rosters".format(len(OklahomaCity_Thunder)))

There are a total of 755 players on these OklahomaCity_Thunder rosters


In [39]:
Sacramento_Kings = pd.DataFrame()

for year in years[: 10]:
    url = "https://www.basketball-reference.com/teams/KCK/{}.html".format(year)
    players = NBA_Rosters(url)
    
    Sacramento_Kings = pd.concat([Sacramento_Kings, players], axis = 0)
    
for year in years[10:]:
    url = "https://www.basketball-reference.com/teams/SAC/{}.html".format(year)
    players = NBA_Rosters(url)
    
    Sacramento_Kings = pd.concat([Sacramento_Kings, players], axis = 0)
    
Sacramento_Kings.to_csv('Sacramento_Kings.csv')

print("There are a total of {} players on these Sacramento_Kings rosters".format(len(Sacramento_Kings)))

There are a total of 770 players on these Sacramento_Kings rosters


In [40]:
Toronto_Raptors = pd.DataFrame()

for year in years[20:]:
    url = "https://www.basketball-reference.com/teams/TOR/{}.html".format(year)
    players = NBA_Rosters(url)
    
    Toronto_Raptors = pd.concat([Toronto_Raptors, players], axis = 0)
    
Toronto_Raptors.to_csv('Toronto_Raptors.csv')

print("There are a total of {} players on these Toronto_Raptors rosters".format(len(Toronto_Raptors)))

There are a total of 484 players on these Toronto_Raptors rosters


In [41]:
Orlando_Magic = pd.DataFrame()

for year in years[14:]:
    url = "https://www.basketball-reference.com/teams/ORL/{}.html".format(year)
    players = NBA_Rosters(url)
    
    Orlando_Magic = pd.concat([Orlando_Magic, players], axis = 0)
    
Orlando_Magic.to_csv('Orlando_Magic.csv')

print("There are a total of {} players on these Orlando_Magic rosters".format(len(Orlando_Magic)))

There are a total of 553 players on these Orlando_Magic rosters


In [42]:
Minnesota_Timberwolves = pd.DataFrame()

for year in years[14:]:
    url = "https://www.basketball-reference.com/teams/MIN/{}.html".format(year)
    players = NBA_Rosters(url)
    
    Minnesota_Timberwolves = pd.concat([Minnesota_Timberwolves, players], axis = 0)
    
Minnesota_Timberwolves.to_csv('Minnesota_Timberwolves.csv')

print("There are a total of {} players on these Minnesota_Timberwolves rosters".format(len(Minnesota_Timberwolves)))

There are a total of 539 players on these Minnesota_Timberwolves rosters


In [43]:
Miami_Heat = pd.DataFrame()

for year in years[13:]:
    url = "https://www.basketball-reference.com/teams/MIA/{}.html".format(year)
    players = NBA_Rosters(url)
    
    Miami_Heat = pd.concat([Miami_Heat, players], axis = 0)
    
Miami_Heat.to_csv('Miami_Heat.csv')

print("There are a total of {} players on these Miami_Heat rosters".format(len(Miami_Heat)))

There are a total of 575 players on these Miami_Heat rosters


In [44]:
Dallas_Mavericks = pd.DataFrame()

for year in years[5:]:
    url = "https://www.basketball-reference.com/teams/DAL/{}.html".format(year)
    players = NBA_Rosters(url)
    
    Dallas_Mavericks = pd.concat([Dallas_Mavericks, players], axis = 0)
    
Dallas_Mavericks.to_csv('Dallas_Mavericks.csv')

print("There are a total of {} players on these Dallas_Mavericks rosters".format(len(Dallas_Mavericks)))

There are a total of 720 players on these Dallas_Mavericks rosters


In [45]:
Charlotte_Hornets = pd.DataFrame()
    
for year in years[29: 39]:
    url = "https://www.basketball-reference.com/teams/CHA/{}.html".format(year)
    players = NBA_Rosters(url)
    
    Charlotte_Hornets = pd.concat([Charlotte_Hornets, players], axis = 0)
    
for year in years[39:]:
    url = "https://www.basketball-reference.com/teams/CHO/{}.html".format(year)
    players = NBA_Rosters(url)
    
    Charlotte_Hornets = pd.concat([Charlotte_Hornets, players], axis = 0)
    
Charlotte_Hornets.to_csv('Charlotte_Hornets.csv')

print("There are a total of {} players on these Charlotte_Hornets rosters".format(len(Charlotte_Hornets)))

There are a total of 302 players on these Charlotte_Hornets rosters


In [46]:
Memphis_Grizzlies = pd.DataFrame()

for year in years[20: 26]:
    url = "https://www.basketball-reference.com/teams/VAN/{}.html".format(year)
    players = NBA_Rosters(url)
    
    Memphis_Grizzlies = pd.concat([Memphis_Grizzlies, players], axis = 0)
    
for year in years[26:]:
    url = "https://www.basketball-reference.com/teams/MEM/{}.html".format(year)
    players = NBA_Rosters(url)
    
    Memphis_Grizzlies = pd.concat([Memphis_Grizzlies, players], axis = 0)
    
Memphis_Grizzlies.to_csv('Memphis_Grizzlies.csv')

print("There are a total of {} players on these Memphis_Grizzlies rosters".format(len(Memphis_Grizzlies)))

There are a total of 479 players on these Memphis_Grizzlies rosters


In [47]:
NewOrleans_Pelicans = pd.DataFrame()

for year in years[13: 27]:
    url = "https://www.basketball-reference.com/teams/CHH/{}.html".format(year)
    players = NBA_Rosters(url)
    
    NewOrleans_Pelicans = pd.concat([NewOrleans_Pelicans, players], axis = 0)
    
for year in years[27: 30]:
    url = "https://www.basketball-reference.com/teams/NOH/{}.html".format(year)
    players = NBA_Rosters(url)
    
    NewOrleans_Pelicans = pd.concat([NewOrleans_Pelicans, players], axis = 0)
    
for year in years[30: 32]:
    url = "https://www.basketball-reference.com/teams/NOK/{}.html".format(year)
    players = NBA_Rosters(url)
    
    NewOrleans_Pelicans = pd.concat([NewOrleans_Pelicans, players], axis = 0)
    
for year in years[32: 38]:
    url = "https://www.basketball-reference.com/teams/NOH/{}.html".format(year)
    players = NBA_Rosters(url)
    
    NewOrleans_Pelicans = pd.concat([NewOrleans_Pelicans, players], axis = 0)
    
for year in years[38:]:
    url = "https://www.basketball-reference.com/teams/NOP/{}.html".format(year)
    players = NBA_Rosters(url)
    
    NewOrleans_Pelicans = pd.concat([NewOrleans_Pelicans, players], axis = 0)
    
NewOrleans_Pelicans.to_csv('NewOrleans_Pelicans.csv')

print("There are a total of {} players on these NewOrleans_Pelicans rosters".format(len(NewOrleans_Pelicans)))

There are a total of 619 players on these NewOrleans_Pelicans rosters
