In [1]:
import pandas as pd
import time
import requests

### Importing Yearly Standard Deviations

In [2]:
pd.set_option('display.max_columns', None)

# Only set to true if 'passing_distribution.csv' does not exist, or is not up to date
first_time = False

if first_time:
    dics = []
    
    avgs_url = 'https://www.pro-football-reference.com/years/NFL/passing.htm'
    means = pd.read_html(avgs_url, header=1)[0]
    
    time.sleep(3.1)
    
    
    for i in range(1960, 2024):
        url = f'https://www.pro-football-reference.com/years/{i}/passing.htm'
        df = pd.read_html(url)[0]
        if i == 1960:
            gp = 12
        elif 1961 <= i <= 1977:
            gp = 14
        elif i == 1982:
            gp = 9
        elif i == 1987:
            gp = 15
        elif i >= 2021:
            gp = 17
        else:
            gp = 16
    
        min_att = 14*gp
        df = df[df['Att'] >= min_att]
    
        year_dict = {"Year": i, "Mean": means[means['Year'] == str(i)].iloc[0]['ANY/A'], "StD": df['ANY/A'].std()}
    
        dics.append(year_dict)
        print(f"Year {i} Complete!")
    
        time.sleep(3.1)
    
    year_by_year = pd.DataFrame(dics)

    year_by_year.to_csv('passing_distribution.csv')
else:
    year_by_year = pd.read_csv('passing_distribution.csv')[['Year', 'Mean', 'StD']]

# Adding Coefficient of Variation
year_by_year['CV'] = year_by_year["StD"] / year_by_year['Mean'].astype(float)

year_by_year

Unnamed: 0,Year,Mean,StD,CV
0,1960,4.5,2.390281,0.531174
1,1961,4.9,1.582216,0.322901
2,1962,5.5,1.191261,0.216593
3,1963,4.9,1.204944,0.245907
4,1964,4.6,1.510419,0.328352
...,...,...,...,...
59,2019,6.2,1.122476,0.181045
60,2020,6.4,1.268942,0.198272
61,2021,6.1,1.072539,0.175826
62,2022,5.9,0.975593,0.165355


### List of Quarterbacks with >2000 Pass Attempts

In [8]:
url = 'https://www.pro-football-reference.com/leaders/pass_att_career.htm'
df_att = pd.read_html(url)[0]
df_qual = df_att[df_att['Att'] >= 2000].copy()
df_qual.loc[:, 'Player'] = df_qual['Player'].str.replace(r'[^a-zA-Z\s]', '', regex=True)
df_qual.loc[:, 'Start'] = df_qual['Years'].str[:4]
df_qual.loc[:, 'End'] = df_qual['Years'].str[-4:]
df_qual = df_qual[['Player', 'Start', 'End']]
df_qual

Unnamed: 0,Player,Start,End
0,Tom Brady,2000,2022
1,Drew Brees,2001,2020
2,Brett Favre,1991,2010
3,Peyton Manning,1998,2015
4,Matt Ryan,2008,2022
...,...,...,...
169,Teddy Bridgewater,2014,2023
170,Josh Freeman,2009,2015
171,Rick Mirer,1993,2003
172,Trevor Lawrence,2021,2024


In [27]:
df_qual.at[0, 'Player']

'Tom Brady'

In [28]:
df_qual[df_qual['Player'] == 'Tom Brady']

Unnamed: 0,Player,Start,End
0,Tom Brady,2000,2022


In [43]:
name_str = "Joe Montana"
split = name_str.split()

val = 0

while True:
    lookup = split[1][:4] + split[0][:2] + "0" + str(val)
    
    url = f'https://www.pro-football-reference.com/players/{split[1][0]}/{lookup}.htm'
    df_test = pd.read_html(url)[0]
    if 'Season' not in df_test.columns.tolist():
        df_test = pd.read_html(url, header=1)[0]

    if df_test.loc[0, "Season"] == df_qual[df_qual['Player'] == name_str].iloc[0]['Start']:
        break
    else:
        val += 1

df_test

Unnamed: 0,Season,Age,Team,Lg,Pos,G,GS,QBrec,Cmp,Att,Cmp%,Yds,TD,TD%,Int,Int%,1D,Succ%,Lng,Y/A,AY/A,Y/C,Y/G,Rate,Sk,Yds.1,Sk%,NY/A,ANY/A,4QC,GWD,AV,Awards
0,1979,23,SFO,NFL,QB,16,1,0-1-0,13,23,56.5,96,1,4.3,0,0.0,,,18,4.2,5.04,7.4,6.0,81.1,0,0,0.00,4.17,5.04,0,0,0,
1,1980,24,SFO,NFL,QB,15,7,2-5-0,176,273,64.5,1795,15,5.5,9,3.3,,,71,6.6,6.19,10.2,119.7,87.8,15,100,5.21,5.89,5.52,1,1,6,
2,1981,25,SFO,NFL,QB,16,16,13-3-0,311,488,63.7,3565,19,3.9,12,2.5,,,78,7.3,6.98,11.5,222.8,88.4,26,193,5.06,6.56,6.25,2,2,12,"PB,AP-2,AP MVP-2,AP OPoY-3"
3,1982,26,SFO,NFL,QB,9,9,3-6-0,213,346,61.6,2613,17,4.9,11,3.2,,,55,7.6,7.10,12.3,290.3,88.0,20,166,5.46,6.69,6.26,2,2,16,
4,1983,27,SFO,NFL,QB,16,16,10-6-0,332,515,64.5,3910,26,5.0,12,2.3,,,77,7.6,7.55,11.8,244.4,94.6,33,224,6.02,6.73,6.69,2,1,17,"PB,AP MVP-5,AP OPoY-5"
5,1984,28,SFO,NFL,QB,16,15,14-1-0,279,432,64.6,3630,28,6.5,10,2.3,,,80,8.4,8.66,13.0,226.9,102.9,22,138,4.85,7.69,7.93,2,2,16,"PB,AP-2"
6,1985,29,SFO,NFL,QB,15,15,9-6-0,303,494,61.3,3653,27,5.5,13,2.6,,,73,7.4,7.30,12.1,243.5,91.3,35,246,6.62,6.44,6.36,1,1,13,PB
7,1986,30,SFO,NFL,QB,8,8,6-2-0,191,307,62.2,2236,8,2.6,9,2.9,,,48,7.3,6.49,11.7,279.5,80.7,12,95,3.76,6.71,5.94,1,1,7,AP MVP-6
8,1987,31,SFO,NFL,QB,13,11,10-1-0,266,398,66.8,3054,31,7.8,13,3.3,,,57,7.7,7.76,11.5,234.9,102.1,22,158,5.24,6.90,6.98,3,3,16,"PB,AP-1,AP MVP-3,AP OPoY-2"
9,1988,32,SFO,NFL,QB,14,13,8-5-0,238,397,59.9,2981,18,4.5,10,2.5,,,96,7.5,7.28,12.5,212.9,87.9,34,223,7.89,6.40,6.19,2,2,13,


In [41]:
url = f'https://www.pro-football-reference.com/players/M/MontJo01.htm'
df_test = pd.read_html(url, header=1)[0]

df_test.columns.tolist()

['1979',
 '23',
 'SFO',
 'NFL',
 'QB',
 '16',
 '1',
 '0-1-0',
 '13',
 '23.1',
 '56.5',
 '96',
 '1.1',
 '4.3',
 '0',
 '0.0',
 'Unnamed: 16',
 'Unnamed: 17',
 '18',
 '4.2',
 '5.04',
 '7.4',
 '6.0',
 '81.1',
 '0.1',
 '0.2',
 '0.00',
 '4.17',
 '5.04.1',
 '0.3',
 '0.4',
 '0.5',
 'Unnamed: 32']