In [1]:
import pandas as pd
import requests

positions = {'Quarterback': 'qb', 'Running Back': 'rb', 'Wide Receiver': 'wr', 'Tight End': 'te'}

In [2]:
# Collecting weekly data for Quarterbacks
quarterbacks_df = pd.DataFrame()

for year in range(2016, 2021):
    for week in range(1, 18):
        url = f'https://www.fantasypros.com/nfl/stats/{positions["Quarterback"]}.php?year={year}&range=week&week={week}'
        temp_df = pd.read_html(requests.get(url).text)[0]

        temp_df.loc[:, 'Year'] = year
        temp_df.loc[:, 'Week'] = week

        quarterbacks_df = quarterbacks_df.append([temp_df])

quarterbacks_df.columns = quarterbacks_df.columns.droplevel(0)
quarterbacks_df.columns.values[-2] = 'Year'
quarterbacks_df.columns.values[-1] = 'Week'

In [10]:
quarterbacks_df = quarterbacks_df.set_axis([
    "Rank",
    "Player",
    "Passing_Completions",
    "Passing_Attempts",
    "Passing_Percentage",
    "Passing_Yards",
    "Passing_Yards_per_Attempt",
    "Passing_Touchdowns",
    "Passing_Interceptions",
    "Passing_Sacks",
    "Rushing_Attempts",
    "Rushing_Yards",
    "Rushing_Touchdowns",
    "Fumbles",
    "Games",
    "Fantasy_Points",
    "Fantasy_Points_per_Game",
    "OWN",
    "Year",
    "Week"
    ], axis=1)
quarterbacks_df

Unnamed: 0,Rank,Player,Passing_Completions,Passing_Attempts,Passing_Percentage,Passing Yards,Passing_Yards_per_Attempt,Passing_Touchdowns,Passing_Interceptions,Passing_Sacks,Rushing_Attempts,Rushing_Yards,Rushing_Touchdowns,Fumbles,Games,Fantasy_Points,Fantasy_Points_per_Game,OWN,Year,Week
0,1,Andrew Luck (FA),31,47,66.0,385,8.2,4,0,2,3,21,0,0,1,35.5,35.5,2.2%,2016,1
1,2,Drew Brees (NO),28,42,66.7,423,10.1,4,0,1,2,5,0,1,1,31.4,31.4,62.0%,2016,1
2,3,Alex Smith (FA),34,48,70.8,363,7.6,2,1,3,4,15,1,0,1,28.0,28.0,2.8%,2016,1
3,4,Matthew Stafford (LAR),31,39,79.5,340,8.7,3,0,1,2,5,0,0,1,26.1,26.1,48.5%,2016,1
4,5,Jameis Winston (NO),23,32,71.9,281,8.8,4,1,0,4,3,0,0,1,25.5,25.5,4.0%,2016,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
129,130,Brett Smith (TB),0,0,0.0,0,0.0,0,0,0,0,0,0,0,0,0.0,0.0,0.0%,2020,17
130,131,David Fales (NYJ),0,0,0.0,0,0.0,0,0,0,0,0,0,0,0,0.0,0.0,0.0%,2020,17
131,132,Garrett Gilbert (DAL),0,0,0.0,0,0.0,0,0,0,0,0,0,0,1,0.0,0.0,0.2%,2020,17
132,133,Jameis Winston (NO),0,0,0.0,0,0.0,0,0,0,1,-1,0,0,1,-0.1,-0.1,4.0%,2020,17


In [11]:
# Collecting weekly data for Running Backs
running_backs_df = pd.DataFrame()

for year in range(2016, 2021):
    for week in range(1, 18):
        url = f'https://www.fantasypros.com/nfl/stats/{positions["Running Back"]}.php?year={year}&range=week&week={week}'
        temp_df = pd.read_html(requests.get(url).text)[0]

        temp_df.loc[:, 'Year'] = year
        temp_df.loc[:, 'Week'] = week

        running_backs_df = running_backs_df.append([temp_df])

running_backs_df.columns = running_backs_df.columns.droplevel(0)
running_backs_df.columns.values[-2] = 'Year'
running_backs_df.columns.values[-1] = 'Week'
running_backs_df

Unnamed: 0,Rank,Player,ATT,YDS,Y/A,LG,20+,TD,REC,TGT,YDS.1,Y/R,TD.1,FL,G,FPTS,FPTS/G,OWN,Year,Week
0,1,DeAngelo Williams (FA),26,143,5.5,17,0,2,6,9,28,4.7,0,0,1,29.1,29.1,0.1%,2016,1
1,2,C.J. Anderson (FA),20,92,4.6,28,1,1,4,5,47,11.8,1,0,1,25.9,25.9,1.0%,2016,1
2,3,Spencer Ware (CHI),11,70,6.4,14,0,1,7,8,129,18.4,0,0,1,25.9,25.9,0.0%,2016,1
3,4,Theo Riddick (LV),7,45,6.4,21,1,1,5,5,63,12.6,1,0,1,22.8,22.8,0.0%,2016,1
4,5,Carlos Hyde (JAC),23,88,3.8,18,0,2,2,3,5,2.5,0,0,1,21.3,21.3,12.7%,2016,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
240,241,Brennan Clay (DEN),0,0,0.0,0,0,0,0,0,0,0.0,0,0,0,0.0,0.0,0.0%,2020,17
241,242,Tim Flanders (NO),0,0,0.0,0,0,0,0,0,0,0.0,0,0,0,0.0,0.0,0.0%,2020,17
242,243,Damien Williams (CHI),0,0,0.0,0,0,0,0,0,0,0.0,0,0,0,0.0,0.0,3.0%,2020,17
243,244,Darius Clark (CAR),0,0,0.0,0,0,0,0,0,0,0.0,0,0,0,0.0,0.0,0.0%,2020,17


In [12]:
running_backs_df = running_backs_df.set_axis([
    "Rank",
    "Player",
    "Rushing_Attempts",
    "Rushing_Yards",
    "Rushing_Yards_per_Attempt",
    "Rushing_Longest",
    "Rushing_20+",
    "Rushing_Touchdowns",
    "Receiving_Receptions",
    "Receiving_Target",
    "Receiving_Yards",
    "Receiving_Yards_per_Reception",
    "Receiving_Touchdowns",
    "Fumbles",
    "Games",
    "Fantasy_Points",
    "Fantasy_Points_per_Game",
    "OWN",
    "Year",
    "Week"
    ], axis=1)
running_backs_df

Unnamed: 0,Rank,Player,Rushing_Attempts,Rushing_Yards,Rushing_Yards_per_Attempt,Rushing_Longest,Rushing_20+,Rushing_Touchdowns,Receiving_Receptions,Receiving_Target,Receiving_Yards,Receiving_Yards_per_Reception,Receiving_Touchdowns,Fumbles,Games,Fantasy_Points,Fantasy_Points_per_Game,OWN,Year,Week
0,1,DeAngelo Williams (FA),26,143,5.5,17,0,2,6,9,28,4.7,0,0,1,29.1,29.1,0.1%,2016,1
1,2,C.J. Anderson (FA),20,92,4.6,28,1,1,4,5,47,11.8,1,0,1,25.9,25.9,1.0%,2016,1
2,3,Spencer Ware (CHI),11,70,6.4,14,0,1,7,8,129,18.4,0,0,1,25.9,25.9,0.0%,2016,1
3,4,Theo Riddick (LV),7,45,6.4,21,1,1,5,5,63,12.6,1,0,1,22.8,22.8,0.0%,2016,1
4,5,Carlos Hyde (JAC),23,88,3.8,18,0,2,2,3,5,2.5,0,0,1,21.3,21.3,12.7%,2016,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
240,241,Brennan Clay (DEN),0,0,0.0,0,0,0,0,0,0,0.0,0,0,0,0.0,0.0,0.0%,2020,17
241,242,Tim Flanders (NO),0,0,0.0,0,0,0,0,0,0,0.0,0,0,0,0.0,0.0,0.0%,2020,17
242,243,Damien Williams (CHI),0,0,0.0,0,0,0,0,0,0,0.0,0,0,0,0.0,0.0,3.0%,2020,17
243,244,Darius Clark (CAR),0,0,0.0,0,0,0,0,0,0,0.0,0,0,0,0.0,0.0,0.0%,2020,17


In [13]:
# Collecting weekly data for Wide Receivers
wide_receivers_df = pd.DataFrame()

for year in range(2016, 2021):
    for week in range(1, 18):
        url = f'https://www.fantasypros.com/nfl/stats/{positions["Wide Receiver"]}.php?year={year}&range=week&week={week}'
        temp_df = pd.read_html(requests.get(url).text)[0]

        temp_df.loc[:, 'Year'] = year
        temp_df.loc[:, 'Week'] = week

        wide_receivers_df = wide_receivers_df.append([temp_df])

wide_receivers_df.columns = wide_receivers_df.columns.droplevel(0)
wide_receivers_df.columns.values[-2] = 'Year'
wide_receivers_df.columns.values[-1] = 'Week'
wide_receivers_df

Unnamed: 0,Rank,Player,REC,TGT,YDS,Y/R,LG,20+,TD,ATT,YDS.1,TD.1,FL,G,FPTS,FPTS/G,OWN,Year,Week
0,1,Brandin Cooks (HOU),6,9,143,23.8,98,1,2,1,11,0,0,1,27.4,27.4,88.8%,2016,1
1,2,Antonio Brown (TB),8,11,126,15.8,29,3,2,0,0,0,0,1,24.6,24.6,55.7%,2016,1
2,3,A.J. Green (ARI),12,13,180,15.0,54,3,1,0,0,0,0,1,24.0,24.0,37.8%,2016,1
3,4,Willie Snead IV (BAL),9,9,172,19.1,49,3,1,0,0,0,0,1,23.2,23.2,1.7%,2016,1
4,5,Larry Fitzgerald (ARI),8,10,81,10.1,21,1,2,0,0,0,0,1,20.1,20.1,4.5%,2016,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
382,383,Tommylee Lewis (NO),0,0,0,0.0,0,0,0,1,-2,0,0,1,-0.2,-0.2,0.1%,2020,17
383,384,Ray-Ray McCloud (PIT),2,2,-3,-1.5,0,0,0,0,0,0,0,1,-0.3,-0.3,0.2%,2020,17
384,385,Tavon Austin (GB),1,1,7,7.0,7,0,0,0,0,0,1,1,-1.3,-1.3,0.1%,2020,17
385,386,Mack Hollins (MIA),1,1,3,3.0,3,0,0,0,0,0,1,1,-1.7,-1.7,0.1%,2020,17


In [14]:
wide_receivers_df = wide_receivers_df.set_axis([
    "Rank",
    "Player",
    "Receiving_Receptions",
    "Receiving_Targets",
    "Receiving_Yards",
    "Receiving_Yards_per_Reception",
    "Receiving_Longest",
    "Receiving_20+",
    "Receiving_Touchdowns",
    "Rushing_Attempts",
    "Rushing_Yards",
    "Rushing_Touchdowns",
    "Fumbles",
    "Games",
    "Fantasy_Points",
    "Fantasy_Points_per_Game",
    "OWN",
    "Year",
    "Week"
    ], axis=1)
wide_receivers_df

Unnamed: 0,Rank,Player,Receiving_Receptions,Receiving_Targets,Receiving_Yards,Receiving_Yards_per_Reception,Receiving_Longest,Receiving_20+,Receiving_Touchdowns,Rushing_Attempts,Rushing_Yards,Rushing_Touchdowns,Fumbles,Games,Fantasy_Points,Fantasy_Points_per_Game,OWN,Year,Week
0,1,Brandin Cooks (HOU),6,9,143,23.8,98,1,2,1,11,0,0,1,27.4,27.4,88.8%,2016,1
1,2,Antonio Brown (TB),8,11,126,15.8,29,3,2,0,0,0,0,1,24.6,24.6,55.7%,2016,1
2,3,A.J. Green (ARI),12,13,180,15.0,54,3,1,0,0,0,0,1,24.0,24.0,37.8%,2016,1
3,4,Willie Snead IV (BAL),9,9,172,19.1,49,3,1,0,0,0,0,1,23.2,23.2,1.7%,2016,1
4,5,Larry Fitzgerald (ARI),8,10,81,10.1,21,1,2,0,0,0,0,1,20.1,20.1,4.5%,2016,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
382,383,Tommylee Lewis (NO),0,0,0,0.0,0,0,0,1,-2,0,0,1,-0.2,-0.2,0.1%,2020,17
383,384,Ray-Ray McCloud (PIT),2,2,-3,-1.5,0,0,0,0,0,0,0,1,-0.3,-0.3,0.2%,2020,17
384,385,Tavon Austin (GB),1,1,7,7.0,7,0,0,0,0,0,1,1,-1.3,-1.3,0.1%,2020,17
385,386,Mack Hollins (MIA),1,1,3,3.0,3,0,0,0,0,0,1,1,-1.7,-1.7,0.1%,2020,17


In [15]:
# Collecting weekly data for Tight Ends
tight_ends_df = pd.DataFrame()

for year in range(2016, 2021):
    for week in range(1, 18):
        url = f'https://www.fantasypros.com/nfl/stats/{positions["Tight End"]}.php?year={year}&range=week&week={week}'
        temp_df = pd.read_html(requests.get(url).text)[0]

        temp_df.loc[:, 'Year'] = year
        temp_df.loc[:, 'Week'] = week

        tight_ends_df = tight_ends_df.append([temp_df])

tight_ends_df.columns = tight_ends_df.columns.droplevel(0)
tight_ends_df.columns.values[-2] = 'Year'
tight_ends_df.columns.values[-1] = 'Week'
tight_ends_df

Unnamed: 0,Rank,Player,REC,TGT,YDS,Y/R,LG,20+,TD,ATT,YDS.1,TD.1,FL,G,FPTS,FPTS/G,OWN,Year,Week
0,1,Jack Doyle (IND),3,4,35,11.7,16,0,2,0,0,0,0,1,15.5,15.5,3.6%,2016,1
1,2,Dwayne Allen (FA),4,6,53,13.3,19,0,1,0,0,0,0,1,13.3,13.3,0.0%,2016,1
2,3,Julius Thomas (FA),5,5,64,12.8,22,1,1,0,0,0,0,1,12.4,12.4,0.0%,2016,1
3,4,Eric Ebron (PIT),5,5,46,9.2,13,0,1,0,0,0,0,1,10.6,10.6,56.5%,2016,1
4,5,Austin Seferian-Jenkins (FA),1,1,30,30.0,30,1,1,0,0,0,0,1,9.0,9.0,0.0%,2016,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
211,212,Maxx Williams (ARI),0,0,0,0.0,0,0,0,0,0,0,0,1,0.0,0.0,0.2%,2020,17
212,213,Nick O'Leary (LV),0,0,0,0.0,0,0,0,0,0,0,0,0,0.0,0.0,0.0%,2020,17
213,214,Tyler Kroft (NYJ),0,0,0,0.0,0,0,0,0,0,0,0,0,0.0,0.0,0.1%,2020,17
214,215,Sammis Reyes (WAS),0,0,0,0.0,0,0,0,0,0,0,0,0,0.0,0.0,0.0%,2020,17


In [16]:
tight_ends_df = tight_ends_df.set_axis([
    "Rank",
    "Player",
    "Receiving_Receptions",
    "Receiving_Targets",
    "Receiving_Yards",
    "Receiving_Yards_per_Reception",
    "Receiving_Longest",
    "Receiving_20+",
    "Receiving_Touchdowns",
    "Rushing_Attempts",
    "Rushing_Yards",
    "Rushing_Touchdowns",
    "Fumbles",
    "Games",
    "Fantasy_Points",
    "Fantasy_Points_per_Game",
    "OWN",
    "Year",
    "Week"
    ], axis=1)
tight_ends_df

Unnamed: 0,Rank,Player,Receiving_Receptions,Receiving_Targets,Receiving_Yards,Receiving_Yards_per_Reception,Receiving_Longest,Receiving_20+,Receiving_Touchdowns,Rushing_Attempts,Rushing_Yards,Rushing_Touchdowns,Fumbles,Games,Fantasy_Points,Fantasy_Points_per_Game,OWN,Year,Week
0,1,Jack Doyle (IND),3,4,35,11.7,16,0,2,0,0,0,0,1,15.5,15.5,3.6%,2016,1
1,2,Dwayne Allen (FA),4,6,53,13.3,19,0,1,0,0,0,0,1,13.3,13.3,0.0%,2016,1
2,3,Julius Thomas (FA),5,5,64,12.8,22,1,1,0,0,0,0,1,12.4,12.4,0.0%,2016,1
3,4,Eric Ebron (PIT),5,5,46,9.2,13,0,1,0,0,0,0,1,10.6,10.6,56.5%,2016,1
4,5,Austin Seferian-Jenkins (FA),1,1,30,30.0,30,1,1,0,0,0,0,1,9.0,9.0,0.0%,2016,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
211,212,Maxx Williams (ARI),0,0,0,0.0,0,0,0,0,0,0,0,1,0.0,0.0,0.2%,2020,17
212,213,Nick O'Leary (LV),0,0,0,0.0,0,0,0,0,0,0,0,0,0.0,0.0,0.0%,2020,17
213,214,Tyler Kroft (NYJ),0,0,0,0.0,0,0,0,0,0,0,0,0,0.0,0.0,0.1%,2020,17
214,215,Sammis Reyes (WAS),0,0,0,0.0,0,0,0,0,0,0,0,0,0.0,0.0,0.0%,2020,17


In [None]:
# Collecting weekly data for Snap Counts
snap_counts_df = pd.DataFrame()

for year in range(2016, 2021):
    url = f'https://www.fantasypros.com/nfl/reports/snap-counts/?year={year}'
    temp_df = pd.read_html(requests.get(url).text)[0]

    temp_df.loc[:, 'Year'] = year

    snap_counts_df = snap_counts_df.append([temp_df])
        
snap_counts_df

In [None]:
# Adding Team name to player name column to match other dataframes
new_player_column = []
for index, row in snap_counts_df.iterrows():
    new_player_column.append(f"{row['Player']} ({row['Team']})")

snap_counts_df['Player'] = new_player_column
snap_counts_df

In [None]:
# Creating new snap counts dataframe for weekly data to be in one column
new_snap_counts_df = pd.DataFrame(columns=["Player", "Position", "Week", "Year", "Snapcount"])

for i, rows in snap_counts_df.iterrows():
    player = rows['Player']
    position = rows['Pos']
    year = rows['Year']
    snaps_per_week = []
    for week in range(1, 18):
        snaps_per_week = [rows[f'{week}']]
        for snaps in snaps_per_week:
            new_snap_counts_df.loc[len(new_snap_counts_df.index)] = [player, position, week, year, snaps]
new_snap_counts_df

In [None]:
# Merging snapcount data into position dataframes
quarterbacks_df = pd.merge(left=quarterbacks_df, right=new_snap_counts_df)
running_backs_df = pd.merge(left=running_backs_df, right=new_snap_counts_df)
wide_receivers_df = pd.merge(left=wide_receivers_df, right=new_snap_counts_df)
tight_ends_df = pd.merge(left=tight_ends_df, right=new_snap_counts_df)

In [None]:
# Formatting datatypes into numerical values
import numpy as np
quarterbacks_df.loc[quarterbacks_df['Snapcount'] == 'bye','Snapcount'] = np.nan
running_backs_df.loc[running_backs_df['Snapcount'] == 'bye','Snapcount'] = np.nan
wide_receivers_df.loc[wide_receivers_df['Snapcount'] == 'bye','Snapcount'] = np.nan
tight_ends_df.loc[tight_ends_df['Snapcount'] == 'bye','Snapcount'] = np.nan

if quarterbacks_df['OWN'].dtype == 'object':
    quarterbacks_df['OWN'] = quarterbacks_df['OWN'].str.replace(r'%', r'').astype('float') / 100.0
if running_backs_df['OWN'].dtype == 'object':
    running_backs_df['OWN'] = running_backs_df['OWN'].str.replace(r'%', r'').astype('float') / 100.0
if wide_receivers_df['OWN'].dtype == 'object':
    wide_receivers_df['OWN'] = wide_receivers_df['OWN'].str.replace(r'%', r'').astype('float') / 100.0
if tight_ends_df['OWN'].dtype == 'object':
    tight_ends_df['OWN'] = tight_ends_df['OWN'].str.replace(r'%', r'').astype('float') / 100.0

In [None]:
quarterbacks_df['OWN'] = pd.to_numeric(quarterbacks_df['OWN'])
quarterbacks_df['Year'] = pd.to_numeric(quarterbacks_df['Year'])
quarterbacks_df['Week'] = pd.to_numeric(quarterbacks_df['Week'])
quarterbacks_df['Snapcount'] = pd.to_numeric(quarterbacks_df['Snapcount'])

In [None]:
running_backs_df['OWN'] = pd.to_numeric(running_backs_df['OWN'])
running_backs_df['Year'] = pd.to_numeric(running_backs_df['Year'])
running_backs_df['Week'] = pd.to_numeric(running_backs_df['Week'])
running_backs_df['Snapcount'] = pd.to_numeric(running_backs_df['Snapcount'])

In [None]:
wide_receivers_df['OWN'] = pd.to_numeric(wide_receivers_df['OWN'])
wide_receivers_df['Year'] = pd.to_numeric(wide_receivers_df['Year'])
wide_receivers_df['Week'] = pd.to_numeric(wide_receivers_df['Week'])
wide_receivers_df['Snapcount'] = pd.to_numeric(wide_receivers_df['Snapcount'])

In [None]:
tight_ends_df['OWN'] = pd.to_numeric(tight_ends_df['OWN'])
tight_ends_df['Year'] = pd.to_numeric(tight_ends_df['Year'])
tight_ends_df['Week'] = pd.to_numeric(tight_ends_df['Week'])
tight_ends_df['Snapcount'] = pd.to_numeric(tight_ends_df['Snapcount'])

In [None]:
pd.set_option('display.max_rows', None)
quarterbacks_df.corr()[['FPTS']].sort_values(by='FPTS', ascending=False)

In [None]:
running_backs_df.corr()[['FPTS']].sort_values(by='FPTS', ascending=False)

In [None]:
wide_receivers_df.corr()[['FPTS']].sort_values(by='FPTS', ascending=False)

In [None]:
tight_ends_df.corr()[['FPTS']].sort_values(by='FPTS', ascending=False)