In [1]:
from requests import get
from bs4 import BeautifulSoup
from pprint import pprint
import pandas as pd

In [2]:
FIRST_YEAR = 2000
CURRENT_YEAR = 2021
DRAFT_PICKS = pd.read_csv(str(FIRST_YEAR) + '-' + str(CURRENT_YEAR-1) + ' 1st Round Draft Picks.csv', encoding='utf-8')
TEAM_DICT = {'Bengals':'cin','Redskins':'was','Lions':'det','Giants':'nyg','Dolphins':'mia','Chargers':'sdg',
            'Panthers':'car','Cardinals':'crd','Jaguars':'jax','Browns':'cle','Jets':'nyj','Raiders':'rai',
             'Buccaneers':'tam','49ers':'sfo','Broncos':'den','Falcons':'atl','Cowboys':'dal','Eagles':'phi',
             'Vikings':'min','Saints':'nor','Packers':'gnb','Seahawks':'sea','Ravens':'rav','Titans':'oti',
             'Chiefs':'kan','Colts':'clt','Texans':'htx','Bears':'chi','Steelers':'pit','Rams':'ram','Bills':'buf',
             'Patriots':'nwe'}

In [3]:
DRAFT_PICKS 

Unnamed: 0,Year,Round,Pick,Name,Team,Position
0,2020,1,1,Joe Burrow,Bengals,QB
1,2020,1,2,Chase Young,Redskins,DE
2,2020,1,3,Jeff Okudah,Lions,DB
3,2020,1,4,Andrew Thomas,Giants,T
4,2020,1,5,Tua Tagovailoa,Dolphins,QB
...,...,...,...,...,...,...
663,2000,1,27,Anthony Becht,Jets,TE
664,2000,1,28,Rob Morris,Colts,LB
665,2000,1,29,R. Jay Soward,Jaguars,WR
666,2000,1,30,Keith Bulluck,Titans,LB


In [4]:
def get_url(team_name: str, year) -> str:
    base_url = 'https://www.pro-football-reference.com/teams/'
    end_url = '.htm'
    try: 
        team_id = TEAM_DICT[team_name]
    except:
        team_id = team_name
    return base_url + team_id + '/' + str(year) + end_url

def get_team_data(url):
    
    # Get Table DF
    response = get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    table = soup.find_all('table')
    try:
        df = pd.read_html(str(table))[0]
    except:
        return pd.Series()
    
    # Fix Multi Columns
    df.columns = df.columns.map('{0[0]}|{0[1]}'.format) 
    for c in df.columns:
        if 'Unnamed' in c:
            df.rename(columns={c:c.split('|')[1]}, inplace=True)
        else:
            df.rename(columns={c:c.replace('|', '')}, inplace=True)
    
    # Convert Multi Rows To One Row
    df.index = ['Team', 'Opp.', 'Off. Rank', 'Def. Rank']
    df.drop(columns='Player', inplace=True)
    df = pd.DataFrame(df.stack()).T
    df.columns = df.columns.map('{0[0]} {0[1]}'.format) 
    
    return df

def make_stats_df():

    df = pd.DataFrame([])
    total = len(DRAFT_PICKS.index)
    
    for i in DRAFT_PICKS.index:
        draft_year = DRAFT_PICKS.loc[i, 'Year']-1
        team_name = DRAFT_PICKS.loc[i, 'Team']
        url = get_url(team_name, draft_year)
        
        new = get_team_data(url)
        df = df.append(new, ignore_index = True)
        
        print('Scraped ' + str(i+1) + '/' + str(total))
        
    return df

In [5]:
# Takes 5 minutes to run
df = make_stats_df()

Scraped 0/668
Scraped 1/668
Scraped 2/668
Scraped 3/668
Scraped 4/668
Scraped 5/668
Scraped 6/668
Scraped 7/668
Scraped 8/668
Scraped 9/668
Scraped 10/668
Scraped 11/668
Scraped 12/668
Scraped 13/668
Scraped 14/668
Scraped 15/668
Scraped 16/668
Scraped 17/668
Scraped 18/668
Scraped 19/668
Scraped 20/668
Scraped 21/668
Scraped 22/668
Scraped 23/668
Scraped 24/668
Scraped 25/668
Scraped 26/668
Scraped 27/668
Scraped 28/668
Scraped 29/668
Scraped 30/668
Scraped 31/668
Scraped 32/668
Scraped 33/668
Scraped 34/668
Scraped 35/668
Scraped 36/668
Scraped 37/668
Scraped 38/668
Scraped 39/668
Scraped 40/668
Scraped 41/668
Scraped 42/668
Scraped 43/668
Scraped 44/668
Scraped 45/668
Scraped 46/668
Scraped 47/668
Scraped 48/668
Scraped 49/668
Scraped 50/668
Scraped 51/668
Scraped 52/668
Scraped 53/668
Scraped 54/668
Scraped 55/668
Scraped 56/668
Scraped 57/668
Scraped 58/668
Scraped 59/668
Scraped 60/668
Scraped 61/668
Scraped 62/668
Scraped 63/668
Scraped 64/668
Scraped 65/668
Scraped 66/668
Scrap

Scraped 519/668
Scraped 520/668
Scraped 521/668
Scraped 522/668
Scraped 523/668
Scraped 524/668
Scraped 525/668
Scraped 526/668
Scraped 527/668
Scraped 528/668
Scraped 529/668
Scraped 530/668
Scraped 531/668
Scraped 532/668
Scraped 533/668
Scraped 534/668
Scraped 535/668
Scraped 536/668
Scraped 537/668
Scraped 538/668
Scraped 539/668
Scraped 540/668
Scraped 541/668
Scraped 542/668
Scraped 543/668
Scraped 544/668
Scraped 545/668
Scraped 546/668
Scraped 547/668
Scraped 548/668
Scraped 549/668
Scraped 550/668
Scraped 551/668
Scraped 552/668
Scraped 553/668
Scraped 554/668
Scraped 555/668
Scraped 556/668
Scraped 557/668
Scraped 558/668
Scraped 559/668
Scraped 560/668
Scraped 561/668
Scraped 562/668
Scraped 563/668
Scraped 564/668
Scraped 565/668
Scraped 566/668
Scraped 567/668
Scraped 568/668
Scraped 569/668
Scraped 570/668
Scraped 571/668
Scraped 572/668
Scraped 573/668


  return pd.Series()


Scraped 574/668
Scraped 575/668
Scraped 576/668
Scraped 577/668
Scraped 578/668
Scraped 579/668
Scraped 580/668
Scraped 581/668
Scraped 582/668
Scraped 583/668
Scraped 584/668
Scraped 585/668
Scraped 586/668
Scraped 587/668
Scraped 588/668
Scraped 589/668
Scraped 590/668
Scraped 591/668
Scraped 592/668
Scraped 593/668
Scraped 594/668
Scraped 595/668
Scraped 596/668
Scraped 597/668
Scraped 598/668
Scraped 599/668
Scraped 600/668
Scraped 601/668
Scraped 602/668
Scraped 603/668
Scraped 604/668
Scraped 605/668
Scraped 606/668
Scraped 607/668
Scraped 608/668
Scraped 609/668
Scraped 610/668
Scraped 611/668
Scraped 612/668
Scraped 613/668
Scraped 614/668
Scraped 615/668
Scraped 616/668
Scraped 617/668
Scraped 618/668
Scraped 619/668
Scraped 620/668
Scraped 621/668
Scraped 622/668
Scraped 623/668
Scraped 624/668
Scraped 625/668
Scraped 626/668
Scraped 627/668
Scraped 628/668
Scraped 629/668
Scraped 630/668
Scraped 631/668
Scraped 632/668
Scraped 633/668
Scraped 634/668
Scraped 635/668
Scraped 

In [6]:
df

Unnamed: 0,Team PF,Team Yds,Team Tot Yds & TOPly,Team Tot Yds & TOY/P,Team Tot Yds & TOTO,Team FL,Team 1stD,Team PassingCmp,Team PassingAtt,Team PassingYds,...,Def. Rank RushingYds,Def. Rank RushingTD,Def. Rank RushingY/A,Def. Rank Sc%,Def. Rank TO%,Def. Rank Average DriveStart,Def. Rank Average DriveTime,Def. Rank Average DrivePlays,Def. Rank Average DriveYds,Def. Rank Average DrivePts
0,279,5169,1049,4.9,30,14,312,356,616,3652,...,32,27,25,9,27,12,18,15,28,22
1,266,4395,885,5,21,8,248,298,479,2812,...,31,17,26,4,15,31,31,29,29,30
2,341,5549,1021,5.4,23,8,313,344,571,3900,...,21,14,10,7,25,3,21,28,30,24
3,341,5416,1012,5.4,33,16,311,376,607,3731,...,20,28,4,8,29,25,19,17,20,25
4,306,4960,1022,4.9,26,8,315,371,615,3804,...,27,21,22,1,28,27,20,20,26,32
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
663,308,4752,999,4.8,22,6,268,272,476,2791,...,17,25,20,12,6,4,13,23,22,17
664,423,5726,979,5.8,28,11,327,338,546,4066,...,18,18,26,8,27,14,24,28,25,23
665,396,5586,1085,5.1,18,7,331,320,535,3495,...,7,2,14,30,14,1,9,6,5,4
666,392,5296,1011,5.2,22,9,294,304,527,3485,...,10,7,22,26,4,2,6,11,17,12


In [7]:
final = pd.concat([DRAFT_PICKS, df], axis=1)
final

Unnamed: 0,Year,Round,Pick,Name,Team,Position,Team PF,Team Yds,Team Tot Yds & TOPly,Team Tot Yds & TOY/P,...,Def. Rank RushingYds,Def. Rank RushingTD,Def. Rank RushingY/A,Def. Rank Sc%,Def. Rank TO%,Def. Rank Average DriveStart,Def. Rank Average DriveTime,Def. Rank Average DrivePlays,Def. Rank Average DriveYds,Def. Rank Average DrivePts
0,2020,1,1,Joe Burrow,Bengals,QB,279,5169,1049,4.9,...,32,27,25,9,27,12,18,15,28,22
1,2020,1,2,Chase Young,Redskins,DE,266,4395,885,5,...,31,17,26,4,15,31,31,29,29,30
2,2020,1,3,Jeff Okudah,Lions,DB,341,5549,1021,5.4,...,21,14,10,7,25,3,21,28,30,24
3,2020,1,4,Andrew Thomas,Giants,T,341,5416,1012,5.4,...,20,28,4,8,29,25,19,17,20,25
4,2020,1,5,Tua Tagovailoa,Dolphins,QB,306,4960,1022,4.9,...,27,21,22,1,28,27,20,20,26,32
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
663,2000,1,27,Anthony Becht,Jets,TE,308,4752,999,4.8,...,17,25,20,12,6,4,13,23,22,17
664,2000,1,28,Rob Morris,Colts,LB,423,5726,979,5.8,...,18,18,26,8,27,14,24,28,25,23
665,2000,1,29,R. Jay Soward,Jaguars,WR,396,5586,1085,5.1,...,7,2,14,30,14,1,9,6,5,4
666,2000,1,30,Keith Bulluck,Titans,LB,392,5296,1011,5.2,...,10,7,22,26,4,2,6,11,17,12


In [8]:
final.to_csv(str(FIRST_YEAR) + '-' + str(CURRENT_YEAR-1) + ' Raw Data.csv', encoding='utf-8-sig', index=False)