In [1]:
# Notes

# Other DataBase Option - except doesn't have table so would have read html
# Football DB Link - https://www.footballdb.com/draft/draft.html?lg=NFL&yr=2021&rnd=1

In [2]:
from requests import get
from bs4 import BeautifulSoup
import pandas as pd

In [3]:
CURRENT_YEAR = 2021
#ROUNDS = [1]
URL = 'https://www.pro-football-reference.com/years/' + str(CURRENT_YEAR) + '/draft.htm'
TEAM_DICT = {'Bengals':'cin','Redskins':'was', 'Lions':'det','Giants':'nyg','Dolphins':'mia','Chargers':'sdg',
            'Panthers':'car','Cardinals':'crd','Jaguars':'jax','Browns':'cle','Jets':'nyj','Raiders':'rai',
             'Buccaneers':'tam','49ers':'sfo','Broncos':'den','Falcons':'atl','Cowboys':'dal','Eagles':'phi',
             'Vikings':'min','Saints':'nor','Packers':'gnb','Seahawks':'sea','Ravens':'rav','Titans':'oti',
             'Chiefs':'kan','Colts':'clt','Texans':'htx','Bears':'chi','Steelers':'pit','Rams':'ram','Bills':'buf',
             'Patriots':'nwe', 'Football Team':'was'}

In [4]:
response = get(URL)
soup = BeautifulSoup(response.text, 'html.parser')
table = soup.find_all('table')
df = pd.read_html(str(table))[0]

In [5]:
df

Unnamed: 0,Rnd,Pick,Tm,Notes
0,1,1,Jaguars,
1,1,2,Jets,
2,1,3,49ers,
3,1,4,Falcons,
4,1,5,Bengals,
...,...,...,...,...
254,7,255,Saints,
255,7,256,Packers,
256,7,257,Browns,
257,7,258,Dolphins,


In [6]:
df.drop(columns='Notes', inplace=True)
df.rename(columns = {'Rnd':'Round', 'Tm':'Team'}, inplace=True)
#for i in ROUNDS:
    #new = df[df['Round'] == i]
    #df = pd.merge(df, new, how='inner')
for row in df.index:
    df.loc[row, 'Team'] = df.loc[row, 'Team'].replace('*', '')

In [7]:
df

Unnamed: 0,Round,Pick,Team
0,1,1,Jaguars
1,1,2,Jets
2,1,3,49ers
3,1,4,Falcons
4,1,5,Bengals
...,...,...,...
254,7,255,Saints
255,7,256,Packers
256,7,257,Browns
257,7,258,Dolphins


In [8]:
df.to_csv(str(CURRENT_YEAR) + ' Draft Order.csv', encoding='utf-8-sig', index=False)

In [9]:
df = df[df['Round'] == 1]
df[['Team']]

Unnamed: 0,Team
0,Jaguars
1,Jets
2,49ers
3,Falcons
4,Bengals
5,Dolphins
6,Lions
7,Panthers
8,Broncos
9,Cowboys


In [10]:
def _get_url(team_name: str, year) -> str:
    base_url = 'https://www.pro-football-reference.com/teams/'
    end_url = '.htm'
    try: 
        team_id = TEAM_DICT[team_name]
    except:
        team_id = team_name
    return base_url + team_id + '/' + str(year) + end_url

def _get_team_data(url):
    
    # Get Table DF
    response = get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    table = soup.find_all('table')
    try:
        df = pd.read_html(str(table))[0]
    except:
        return pd.Series()
    
    # Fix Multi Columns
    df.columns = df.columns.map('{0[0]}|{0[1]}'.format) 
    for c in df.columns:
        if 'Unnamed' in c:
            df.rename(columns={c:c.split('|')[1]}, inplace=True)
        else:
            df.rename(columns={c:c.replace('|', '')}, inplace=True)
    
    # Convert Multi Rows To One Row
    df.index = ['Team', 'Opp.', 'Off. Rank', 'Def. Rank']
    df.drop(columns='Player', inplace=True)
    df = pd.DataFrame(df.stack()).T
    df.columns = df.columns.map('{0[0]} {0[1]}'.format) 
    
    return df

def make_stats_df(picks_df):

    df = pd.DataFrame([])
    total = len(picks_df.index)
    
    for i in picks_df.index:
        draft_year = CURRENT_YEAR-1
        team_name = picks_df.loc[i, 'Team']
        url = _get_url(team_name, draft_year)
        
        new = _get_team_data(url)
        df = df.append(new, ignore_index = True)
        
        print('Scraped ' + str(i+1) + '/' + str(total))
        
    return df

In [11]:
stats_df = make_stats_df(df)

Scraped 1/32
Scraped 2/32
Scraped 3/32
Scraped 4/32
Scraped 5/32
Scraped 6/32
Scraped 7/32
Scraped 8/32
Scraped 9/32
Scraped 10/32
Scraped 11/32
Scraped 12/32
Scraped 13/32
Scraped 14/32
Scraped 15/32
Scraped 16/32
Scraped 17/32
Scraped 18/32
Scraped 19/32
Scraped 20/32
Scraped 21/32
Scraped 22/32
Scraped 23/32
Scraped 24/32
Scraped 25/32
Scraped 26/32
Scraped 27/32
Scraped 28/32
Scraped 29/32
Scraped 30/32
Scraped 31/32
Scraped 32/32


In [12]:
final = pd.concat([df, stats_df], axis=1)
final

Unnamed: 0,Round,Pick,Team,Team PF,Team Yds,Team Tot Yds & TOPly,Team Tot Yds & TOY/P,Team Tot Yds & TOTO,Team FL,Team 1stD,...,Def. Rank RushingYds,Def. Rank RushingTD,Def. Rank RushingY/A,Def. Rank Sc%,Def. Rank TO%,Def. Rank Average DriveStart,Def. Rank Average DriveTime,Def. Rank Average DrivePlays,Def. Rank Average DriveYds,Def. Rank Average DrivePts
0,1,1,Jaguars,306,5218,997,5.2,25,9,310,...,30,29,27,4,26,29,22,13,26,29
1,1,2,Jets,243,4479,948,4.7,19,5,269,...,12,14,7,8,20,26,26,21,23,24
2,1,3,49ers,376,5922,1046,5.7,31,14,350,...,7,4,6,20,23,25,3,5,4,8
3,1,4,Falcons,396,5895,1078,5.5,18,7,366,...,6,13,14,14,16,15,11,16,27,21
4,1,5,Bengals,311,5116,1040,4.9,24,13,318,...,29,7,31,11,25,18,7,8,20,20
5,1,6,Dolphins,404,5424,1021,5.3,20,7,345,...,16,18,18,29,1,6,4,6,14,5
6,1,7,Lions,377,5603,991,5.7,21,8,350,...,28,32,15,3,31,11,28,26,32,32
7,1,8,Panthers,350,5592,993,5.6,21,5,335,...,20,18,28,6,11,23,24,28,24,26
8,1,9,Broncos,323,5369,1030,5.2,32,9,308,...,25,28,29,9,30,30,11,11,11,19
9,1,10,Cowboys,395,5949,1113,5.3,26,13,371,...,31,25,30,12,15,28,17,12,19,25


In [13]:
final.to_csv(str(CURRENT_YEAR) + ' Raw Test Data.csv', encoding='utf-8-sig', index=False)