### Scrape pro-football-reference.com for total stats in 2021

In [1]:
import pandas as pd
import numpy as np
from urllib.request import urlopen
from bs4 import BeautifulSoup
from datetime import datetime

In [2]:
year = 2021
url = "https://www.pro-football-reference.com/years/{}/fantasy.htm#".format(year)
html = urlopen(url)
soup = BeautifulSoup(html)

In [3]:
headers = [th.getText() for th in soup.findAll('tr')[1].findAll('th')]
headers = headers[1:]

In [4]:
rows = soup.findAll('tr', class_ = lambda table_rows: table_rows != "thead")
player_stats = [[td.getText() for td in rows[i].findAll('td')]
               for i in range(len(rows))]
player_stats = player_stats[2:]

In [5]:
stats = pd.DataFrame(player_stats, columns = headers)

In [6]:
stats = stats.replace(r'', 0, regex = True)
stats = stats.rename(columns={'FantPos': 'Pos', 'Tm': 'Team', 'PPR': 'Points'})
stats.columns.values[7] = 'Pass Att'
stats.columns.values[8] = 'Pass Y'
stats.columns.values[9] = 'Pass TD'
stats.columns.values[11] = 'Rush Att'
stats.columns.values[12] = 'Rush Y'
stats.columns.values[14] = 'Rush TD'
stats.columns.values[17] = 'Rec Y'
stats.columns.values[19] = 'Rec TD'

In [7]:
stats = stats[~stats['Int'].isna()]
stats = stats.drop(columns = ['DKPt', 'FantPt', 'FDPt', 'VBD'])

In [8]:
conv_dict = {'Age': int,
             'G': int,
             'GS': int,
             'Cmp': int,
             'Pass Att': int,
             'Pass Y': int,
             'Pass TD': int,
             'Int': int,
             'Rush Att': int,
             'Rush Y': int,
             'Y/A': float,
             'Rush TD': int,
             'Tgt': int,
             'Rec': int,
             'Rec Y': int,
             'Y/R': float,
             'Rec TD': int,
             'Fmb': int,
             'FL': int,
             'TD': int,
             '2PM': int,
             '2PP': int,
             'Points': float,
             'PosRank': int,
             'OvRank': int
             }

In [9]:
stats = stats.astype(conv_dict)

In [10]:
stats['Pts/G'] = (stats['Points'] / stats['G']).round(1)
stats['Tgt/G'] = (stats['Tgt'] / stats['G']).round(1)
stats['Rec/Tgt'] = (stats['Rec'] / stats['Tgt']).round(1)
stats['Pts/Tgt'] = (stats['Points'] / stats['Tgt']).round(1)
stats['Touches'] = (stats['Rush Att'] + stats['Rec'])
stats['Player'] = stats['Player'].str.rstrip('*+')

In [11]:
stats

Unnamed: 0,Player,Team,Pos,Age,G,GS,Cmp,Pass Att,Pass Y,Pass TD,...,2PM,2PP,Points,PosRank,OvRank,Pts/G,Tgt/G,Rec/Tgt,Pts/Tgt,Touches
0,Jonathan Taylor,IND,RB,22,17,17,0,0,0,0,...,0,0,373.1,1,1,21.9,3.0,0.8,7.3,372
1,Cooper Kupp,LAR,WR,28,17,17,0,1,0,0,...,1,0,439.5,1,2,25.9,11.2,0.8,2.3,149
2,Deebo Samuel,SFO,WR,25,16,15,1,2,24,1,...,0,0,339.0,2,3,21.2,7.6,0.6,2.8,136
3,Josh Allen,BUF,QB,25,17,17,409,646,4407,36,...,2,1,402.6,1,4,23.7,0.0,,inf,122
4,Austin Ekeler,LAC,RB,26,16,16,0,0,0,0,...,2,0,343.8,2,5,21.5,5.9,0.7,3.7,276
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
666,Travis Benjamin,SFO,0,32,10,0,0,0,0,0,...,0,0,-2.0,260,0,-0.2,0.5,0.0,-0.4,0
667,Trenton Cannon,2TM,RB,27,12,0,0,0,0,0,...,0,0,-1.6,179,0,-0.1,0.0,,-inf,3
668,John Wolford,LAR,QB,26,3,0,1,4,5,0,...,0,0,-1.9,84,0,-0.6,0.0,,-inf,2
669,Josh Rosen,ATL,QB,24,4,0,2,11,19,0,...,0,0,-3.2,85,0,-0.8,0.0,,-inf,0


In [12]:
stats.to_csv('2021PlayerStats.csv')

In [13]:
def player_csv(year):
    
    url = url = "https://www.pro-football-reference.com/years/{}/fantasy.htm#".format(year)
    html = urlopen(url)
    soup = BeautifulSoup(html)
    
    headers = [th.getText() for th in soup.findAll('tr')[1].findAll('th')]
    headers = headers[1:]
    
    rows = soup.findAll('tr', class_ = lambda table_rows: table_rows != "thead")
    player_stats = [[td.getText() for td in rows[i].findAll('td')]
                   for i in range(len(rows))]
    player_stats = player_stats[2:]
    
    stats = pd.DataFrame(player_stats, columns = headers)
    
    stats = stats.replace(r'', 0, regex = True) # Replace empty values with 0 & Rename Columns
    stats = stats.rename(columns={'FantPos': 'Pos', 'Tm': 'Team', 'PPR': 'Points'}) 
    stats.columns.values[7] = 'Pass Att'
    stats.columns.values[8] = 'Pass Y'
    stats.columns.values[9] = 'Pass TD'
    stats.columns.values[11] = 'Rush Att'
    stats.columns.values[12] = 'Rush Y'
    stats.columns.values[14] = 'Rush TD'
    stats.columns.values[17] = 'Rec Y'
    stats.columns.values[19] = 'Rec TD'
    
    stats = stats[~stats['Int'].isna()] #Remove all NULL values
    stats = stats.drop(columns = ['DKPt', 'FantPt', 'FDPt', 'VBD'])
    
    conv_dict = {'Age': int,
             'G': int,
             'GS': int,
             'Cmp': int,
             'Pass Att': int,
             'Pass Y': int,
             'Pass TD': int,
             'Int': int,
             'Rush Att': int,
             'Rush Y': int,
             'Y/A': float,
             'Rush TD': int,
             'Tgt': int,
             'Rec': int,
             'Rec Y': int,
             'Y/R': float,
             'Rec TD': int,
             'Fmb': int,
             'FL': int,
             'TD': int,
             '2PM': int,
             '2PP': int,
             'Standard': int,
             'PPR': float,
             'PosRank': int,
             'OvRank': int
             }
    stats = stats.astype(conv_dict) # Change data types of columns    
    
    stats['Pts/G'] = (stats['Points'] / stats['G']).round(1) # Create new columns
    stats['Year'] = year
    
    
    stats.to_csv('{}playerstats.csv'.format(year))

In [14]:
#player_csv(2020)
#player_csv(2019)
#player_csv(2018)
#player_csv(2017)
#player_csv(2016)

### Aggregate play-by-play data to find statistics for each player on a weekly basis

In [15]:
#YEAR = 2021

#data = pd.read_csv('https://github.com/guga31bb/nflfastR-data/blob/master/data/' \
#                         'play_by_play_' + str(YEAR) + '.csv.gz?raw=True',
#                         compression='gzip', low_memory=False)

In [16]:
#data

In [17]:
#for col_name in data.columns:
#    print(col_name)

In [18]:
#passer = data.groupby(['passer', 'week'], as_index=False).agg({'passing_yards':'sum',
#                                                                'pass_touchdown':'sum',
#                                                                'interception':'sum',
#                                                                })
                                                    

In [19]:
#rusher = data.groupby(['rusher', 'fantasy_player_id', 'week'], as_index=False).agg({'rushing_yards':'sum',
#                                                                                    'rush_touchdown':'sum',
#                                                                                    'touchdown':'sum',
#                                                                                    'fumble':'sum'})
                                                                  
                                                                  
                                                                  

In [20]:
#receiver = data.groupby(['receiver', 'fantasy_player_id', 'week'], as_index=False).agg({'receiver_player_name':'count',
#                                                                                        'receiving_yards':'sum',
#                                                                                        })
                                                        
                                                        
                                                                  
                                                                  
                                                                  

In [21]:
#receiver

## Read in game logs using CSV file from advancedsportsanalytics.com/nfl-raw-data

In [22]:
games = pd.read_csv('2021GameLogs.csv')

In [23]:
games['game_date'] = pd.to_datetime(games['game_date'])

In [24]:
games = games.drop(columns = ['game_id', 'rush_scrambles', 'designed_rush_att', 'comb_pass_rush_play', 'comb_pass_play', 'comb_rush_play',
                             'total_ret_td', 'pass_yds_bonus', 'rush_yds_bonus', 'rec_yds_bonus', 'Total_DKP', 'Off_DKP', 'Total_FDP',
                             'Off_FDP', 'Total_SDP', 'Off_SDP', 'pass_target_yds', 'pass_poor_throws', 'pass_blitzed', 'pass_hurried',
                             'rush_yds_before_contact', 'rush_yac', 'rec_air_yds', 'rec_yac', 'rec_drops', 'offense', 'off_pct', 'vis_team',
                             'player_id', 'home_team', 'OT', 'Temperature', 'Humidity', 'Wind_Speed', 'Vegas_Line', 'Vegas_Favorite', 
                              'Over_Under'])

In [25]:
games['week'] = ''

In [26]:
for ind, row in games.iterrows():
    if row['game_date'] >= datetime.strptime('2021-09-09', '%Y-%m-%d') and row['game_date'] <= datetime.strptime('2021-09-13', '%Y-%m-%d'):
        games.at[ind, 'week'] = 1
    elif row['game_date'] >= datetime.strptime('2021-09-16', '%Y-%m-%d') and row['game_date'] <= datetime.strptime('2021-09-20', '%Y-%m-%d'):
        games.at[ind, 'week'] = 2
    elif row['game_date'] >= datetime.strptime('2021-09-23', '%Y-%m-%d') and row['game_date'] <= datetime.strptime('2021-09-27', '%Y-%m-%d'):
        games.at[ind, 'week'] = 3
    elif row['game_date'] >= datetime.strptime('2021-09-30', '%Y-%m-%d') and row['game_date'] <= datetime.strptime('2021-10-04', '%Y-%m-%d'):
        games.at[ind, 'week'] = 4
    elif row['game_date'] >= datetime.strptime('2021-10-07', '%Y-%m-%d') and row['game_date'] <= datetime.strptime('2021-10-11', '%Y-%m-%d'):
        games.at[ind, 'week'] = 5
    elif row['game_date'] >= datetime.strptime('2021-10-14', '%Y-%m-%d') and row['game_date'] <= datetime.strptime('2021-10-18', '%Y-%m-%d'):
        games.at[ind, 'week'] = 6
    elif row['game_date'] >= datetime.strptime('2021-10-21', '%Y-%m-%d') and row['game_date'] <= datetime.strptime('2021-10-25', '%Y-%m-%d'):
        games.at[ind, 'week'] = 7
    elif row['game_date'] >= datetime.strptime('2021-10-28', '%Y-%m-%d') and row['game_date'] <= datetime.strptime('2021-11-01', '%Y-%m-%d'):
        games.at[ind, 'week'] = 8
    elif row['game_date'] >= datetime.strptime('2021-11-04', '%Y-%m-%d') and row['game_date'] <= datetime.strptime('2021-11-08', '%Y-%m-%d'):
        games.at[ind, 'week'] = 9
    elif row['game_date'] >= datetime.strptime('2021-11-11', '%Y-%m-%d') and row['game_date'] <= datetime.strptime('2021-11-15', '%Y-%m-%d'):
        games.at[ind, 'week'] = 10
    elif row['game_date'] >= datetime.strptime('2021-11-18', '%Y-%m-%d') and row['game_date'] <= datetime.strptime('2021-11-22', '%Y-%m-%d'):
        games.at[ind, 'week'] = 11
    elif row['game_date'] >= datetime.strptime('2021-11-25', '%Y-%m-%d') and row['game_date'] <= datetime.strptime('2021-11-29', '%Y-%m-%d'):
        games.at[ind, 'week'] = 12
    elif row['game_date'] >= datetime.strptime('2021-12-02', '%Y-%m-%d') and row['game_date'] <= datetime.strptime('2021-12-06', '%Y-%m-%d'):
        games.at[ind, 'week'] = 13
    elif row['game_date'] >= datetime.strptime('2021-12-09', '%Y-%m-%d') and row['game_date'] <= datetime.strptime('2021-12-13', '%Y-%m-%d'):
        games.at[ind, 'week'] = 14
    elif row['game_date'] >= datetime.strptime('2021-12-16', '%Y-%m-%d') and row['game_date'] <= datetime.strptime('2021-12-21', '%Y-%m-%d'):
        games.at[ind, 'week'] = 15
    elif row['game_date'] >= datetime.strptime('2021-12-23', '%Y-%m-%d') and row['game_date'] <= datetime.strptime('2021-12-27', '%Y-%m-%d'):
        games.at[ind, 'week'] = 16
    elif row['game_date'] >= datetime.strptime('2022-01-02', '%Y-%m-%d') and row['game_date'] <= datetime.strptime('2022-01-03', '%Y-%m-%d'):
        games.at[ind, 'week'] = 17
    elif row['game_date'] >= datetime.strptime('2022-01-08', '%Y-%m-%d') and row['game_date'] <= datetime.strptime('2022-01-09', '%Y-%m-%d'):
        games.at[ind, 'week'] = 18

In [27]:
games['PPR Pts'] = ((games['pass_td'] * 4) + (games['pass_yds'] / 25) + (games['two_point_conv'] * 2) + 
                   (games['rush_td'] * 6) + (games['rush_yds'] / 10) + (games['rec_td'] * 6) + (games['rec_yds'] / 10) + 
                   (games['rec']) + (games['pass_int'] * -2) + (games['fumbles_lost'] * -2)).round(1)

In [28]:
weekly = games[['player', 'team', 'pos', 'week', 'PPR Pts']]
weekly = weekly.pivot(index=['player', 'team', 'pos'], columns='week')
weekly.columns = weekly.columns.droplevel(0)

In [29]:
weekly['Std'] = weekly[[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18]].std(axis=1)

In [30]:
weekly.reset_index(inplace=True)
weekly = weekly.rename(columns = {'player':'Player', 'team':'Team', 'pos':'Pos'})

In [31]:
total = stats.merge(weekly, how = 'inner', on = ['Player', 'Team', 'Pos'])

In [37]:
# Calculate Coefficient of Variation
total['CV'] = (total['Std'] / total['Pts/G']) * 100
total

Unnamed: 0,Player,Team,Pos,Age,G,GS,Cmp,Pass Att,Pass Y,Pass TD,...,11,12,13,14,15,16,17,18,Std,CV
0,Jonathan Taylor,IND,RB,22,17,17,0,0,0,0,...,53.4,19.7,24.3,,23.0,10.8,18.4,12.5,11.170951,51.008909
1,Cooper Kupp,LAR,WR,28,17,17,0,1,0,0,...,,18.6,26.9,31.3,34.7,21.3,21.5,26.6,7.468360,28.835366
2,Deebo Samuel,SFO,WR,25,16,15,1,2,24,1,...,16.4,20.8,,12.9,18.9,28.1,17.2,29.0,7.763408,36.619848
3,Josh Allen,BUF,QB,25,17,17,409,646,4407,36,...,16.2,26.7,11.7,35.2,20.8,31.0,20.9,23.9,8.394186,35.418505
4,Austin Ekeler,LAC,RB,26,16,16,0,0,0,0,...,41.5,21.9,17.4,16.4,18.2,,19.8,28.9,8.586802,39.938613
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
550,Malik Taylor,GNB,WR,26,10,0,0,0,0,0,...,,,,0.0,0.0,,,,1.202082,1202.081528
551,Logan Woodside,TEN,QB,26,5,0,0,0,0,0,...,,,,-0.3,,,,,0.100000,-100.000000
552,John Wolford,LAR,QB,26,3,0,1,4,5,0,...,,,,,,,,,1.626346,-271.057599
553,Josh Rosen,ATL,QB,24,4,0,2,11,19,0,...,-1.8,,,,,,,,0.945163,-118.145391


## Most consistent players with at least 120 targets

In [40]:
consistent_tgts = total[total['Tgt'] >= 120].sort_values(by = 'CV')
consistent_tgts

Unnamed: 0,Player,Team,Pos,Age,G,GS,Cmp,Pass Att,Pass Y,Pass TD,...,11,12,13,14,15,16,17,18,Std,CV
42,Keenan Allen,LAC,WR,29,16,16,0,1,0,0,...,20.2,15.5,22.4,,19.8,7.5,14.4,11.2,4.616474,28.673752
1,Cooper Kupp,LAR,WR,28,17,17,0,1,0,0,...,,18.6,26.9,31.3,34.7,21.3,21.5,26.6,7.46836,28.835366
29,Diontae Johnson,PIT,WR,25,16,14,0,0,0,0,...,23.1,18.5,30.5,15.3,9.8,15.1,17.1,12.1,5.269915,30.639038
20,Stefon Diggs,BUF,WR,28,17,17,0,0,0,0,...,18.3,20.4,9.1,14.4,13.5,21.5,10.2,23.1,5.714944,34.017526
2,Deebo Samuel,SFO,WR,25,16,15,1,2,24,1,...,16.4,20.8,,12.9,18.9,28.1,17.2,29.0,7.763408,36.619848
37,Hunter Renfrow,LVR,WR,26,17,9,0,0,0,0,...,7.5,21.6,19.2,28.7,6.2,13.0,20.6,16.9,5.921999,38.96052
9,Justin Jefferson,MIN,WR,22,17,17,2,4,35,0,...,37.2,13.3,35.6,20.5,14.7,19.6,11.8,22.1,8.254918,42.551125
51,Chris Godwin,TAM,WR,25,14,14,0,0,0,0,...,19.2,4.7,30.2,20.5,10.9,,,,7.714807,44.594258
11,Davante Adams,GNB,WR,29,16,16,0,0,0,0,...,30.5,18.4,,34.1,16.4,33.4,30.6,11.5,9.684436,45.043886
50,D.J. Moore,CAR,WR,24,17,17,0,0,0,0,...,16.0,14.3,,14.4,12.8,10.5,5.9,17.0,6.481875,46.299108


## Most consistent players with at least 240 touches

In [42]:
consistent_touches = total[total['Touches'] >= 240].sort_values(by = 'CV')
consistent_touches

Unnamed: 0,Player,Team,Pos,Age,G,GS,Cmp,Pass Att,Pass Y,Pass TD,...,11,12,13,14,15,16,17,18,Std,CV
46,Josh Jacobs,LVR,RB,23,15,14,0,0,0,0,...,11.1,19.2,24.0,10.0,12.4,11.4,18.0,22.4,4.563311,30.220604
4,Austin Ekeler,LAC,RB,26,16,16,0,0,0,0,...,41.5,21.9,17.4,16.4,18.2,,19.8,28.9,8.586802,39.938613
13,Najee Harris,PIT,RB,23,17,17,0,0,0,0,...,16.9,6.7,15.7,25.4,4.6,16.0,29.6,9.5,7.55148,42.66373
21,Ezekiel Elliott,DAL,RB,26,17,17,1,1,4,0,...,12.8,16.9,6.7,9.0,16.2,17.2,4.0,10.0,6.567987,44.378292
67,David Montgomery,CHI,RB,24,13,13,0,1,0,0,...,7.7,10.4,28.1,14.1,11.3,23.6,20.1,9.9,6.811755,45.411697
32,Antonio Gibson,WAS,RB,23,16,14,0,0,0,0,...,7.5,21.6,22.1,4.1,18.5,13.8,,22.1,6.66748,46.625735
31,Alvin Kamara,NOR,RB,26,13,10,0,0,0,0,...,,,,24.5,5.1,7.9,21.0,18.2,8.517102,47.055812
10,Joe Mixon,CIN,RB,25,16,16,0,0,0,0,...,24.3,32.3,9.4,8.8,7.0,31.5,15.6,,9.120707,50.670593
0,Jonathan Taylor,IND,RB,22,17,17,0,0,0,0,...,53.4,19.7,24.3,,23.0,10.8,18.4,12.5,11.170951,51.008909
45,Dalvin Cook,MIN,RB,26,13,13,0,0,0,0,...,22.5,14.3,,35.2,11.1,,4.3,9.2,8.482667,53.350108


## Most consistent players with at least 260 fantasy points

In [49]:
consistent_top = total[total['Points'] >= 260].sort_values(by = 'CV')
consistent_top

Unnamed: 0,Player,Team,Pos,Age,G,GS,Cmp,Pass Att,Pass Y,Pass TD,...,11,12,13,14,15,16,17,18,Std,CV
1,Cooper Kupp,LAR,WR,28,17,17,0,1,0,0,...,,18.6,26.9,31.3,34.7,21.3,21.5,26.6,7.46836,28.835366
40,Kirk Cousins,MIN,QB,33,16,16,372,561,4221,33,...,25.2,15.5,20.2,14.0,10.5,16.3,,21.9,5.736401,30.512769
29,Diontae Johnson,PIT,WR,25,16,14,0,0,0,0,...,23.1,18.5,30.5,15.3,9.8,15.1,17.1,12.1,5.269915,30.639038
74,Ryan Tannehill,TEN,QB,33,17,17,357,531,3734,21,...,9.2,8.1,,16.5,9.9,14.6,13.5,27.3,4.921031,31.145765
19,Matthew Stafford,LAR,QB,33,17,17,404,601,4886,41,...,,22.0,24.0,23.5,15.5,6.7,14.3,17.5,6.322084,32.588063
30,Jalen Hurts,PHI,QB,23,15,15,265,432,3144,16,...,30.8,6.9,,,27.6,16.7,13.0,,6.969819,33.508747
20,Stefon Diggs,BUF,WR,28,17,17,0,0,0,0,...,18.3,20.4,9.1,14.4,13.5,21.5,10.2,23.1,5.714944,34.017526
39,Kyler Murray,ARI,QB,24,14,14,333,481,3787,24,...,,,30.8,17.4,12.6,21.9,22.9,17.1,7.484611,34.812142
3,Josh Allen,BUF,QB,25,17,17,409,646,4407,36,...,16.2,26.7,11.7,35.2,20.8,31.0,20.9,23.9,8.394186,35.418505
2,Deebo Samuel,SFO,WR,25,16,15,1,2,24,1,...,16.4,20.8,,12.9,18.9,28.1,17.2,29.0,7.763408,36.619848
