In [55]:
import pandas as pd
from bs4 import BeautifulSoup
from urllib.request import urlopen

In [56]:
from datetime import datetime
import unicodedata
import re
def clean_names(df):
    return [re.sub(pattern="\*",string=str(player),repl="") for player in df.Player]


def get_drafts():
    current_year = int(datetime.now().year)
    url_template = 'https://www.basketball-reference.com/draft/NBA_{year}.html'
    draft_list = []
    errors_list = []

    for year in range(1970, current_year+1): 

        # Use try/except block to catch and inspect any urls that cause an error
        try:
            # get the draft url
            url = url_template.format(year=year)

            # get the html
            html = urlopen(url)

            # create the BeautifulSoup object
            soup = BeautifulSoup(html, "lxml")     

            table = soup.find('tbody')
            table_rows = table.find_all('tr')
            row_list = list()
            for tr in table_rows:
                td = tr.find_all('td')
                th = tr.find_all('th')
                row = [i.text for i in td]
                row_list.append(row[0:4])
            df = pd.DataFrame(row_list)
            df.columns = ['Pick','Tm','Player','School/Country']
            df['Year'] = year
        
            draft_list.append(df)

        except Exception as e:
            # Store the url and the error it causes in a list
            error =[url, e] 
            # then append it to the list of errors
            errors_list.append(error)
        
        
    draft_df = pd.concat(draft_list, ignore_index=True)
    draft_df = draft_df.apply(pd.to_numeric, errors="ignore")
    draft_df.drop(columns={'Tm'},inplace=True)
    return draft_df


def strip_accents(s):
    return ''.join(c for c in unicodedata.normalize('NFD', s)
                  if unicodedata.category(c) != 'Mn')

In [57]:
df = get_drafts()
df.Player = clean_names(df)
df.Player = df['Player'].apply(strip_accents)

In [58]:
df[df.Pick == 1997]

Unnamed: 0,Pick,Player,School/Country,Year


In [59]:
df.Pick = pd.to_numeric(df.Pick,errors = 'coerce')

In [60]:
max(df.Pick)

239.0

In [61]:
df.to_csv('Draft.csv',)