In [1]:
# This produces the dataframe for WR

In [2]:
## Notes on the NFL Library ##
# the NFL python library seem to not work on Tuesday probably due to updates (not confirmed)
# unbalanced dataframe - pfr stats start at 2018; all other stats go back to 2017

In [3]:
## REQUIRED ACTIONS - Include in a README doc ## 
# modify the season start date in the 'get_current_week' function
# modify the number of weeks if the NFL adds regular season games to the schedule

In [4]:
# import the libraries
import requests
from bs4 import BeautifulSoup
import pandas as pd
import json
import glob
from IPython.display import display, HTML
from datetime import datetime
import nfl_data_py as nfl
import os
import re

In [5]:
# Set Pandas options to display all columns in a single row without wrapping
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)

In [6]:
# Function to calculate the current week of the NFL season
def get_current_week():
    current_date = datetime.now()
    season_start_date = datetime(2024, 9, 4)  # Update for the season start
    current_week = ((current_date - season_start_date).days // 7) + 1
    return current_week

# Define the current NFL year, week, and season type
current_year = datetime.now().year
current_week = get_current_week()
seasontype = 2 if current_week <= 18 else 3  # Regular season or playoffs

In [7]:
# define the years to pull
# nfl.import_weekly_data(years, columns, downcast)
def get_year_range(current_year, current_week, start_year=2017):
    if current_week <= 18:  # Regular season
        return list(range(start_year, current_year + 1))
    else:  # Playoffs
        return list(range(start_year, current_year))

# Use the function
years = get_year_range(current_year, current_week)

In [8]:
# define the base columns. 
base_columns = [
    'season', 'season_type', 'week', 'player_id', 'player_name', 
    'position', 'position_group', 'recent_team'
]

In [9]:
# Import the player IDs from nfl.import_ids() - without parameters
ids_data = nfl.import_ids()

# Drop the unnecessary columns
columns_to_drop = [
    'position', 'team', 'birthdate', 'age', 'draft_year', 
    'draft_round', 'draft_pick', 'draft_ovr', 'twitter_username', 
    'height', 'weight', 'college', 'db_season'
]
ids_data = ids_data.drop(columns=columns_to_drop, errors='ignore')

# Display the resulting dataframe for review
# print(f"Columns after dropping unnecessary ones: {ids_data.columns.tolist()}")
# display(ids_data)

In [10]:
# import the weekly data from nfl.import_weekly_data(years, columns, downcast)
weekly_data = nfl.import_weekly_data(
    years=years,
    columns=base_columns
)

# display(weekly_data)

Downcasting floats.


In [11]:
## Output: a dataframe of ALL NFL athletes info and ids since 2017

# Merge the two dataframes on 'player_id' and 'gsis_id'
# Align column names for merging
ids_data = ids_data.rename(columns={'gsis_id': 'player_id'})  
id_dataframe = pd.merge(weekly_data, ids_data, on='player_id', how='inner')

# Assign the resulting dataframe to a variable
all_players_id_data = id_dataframe

# Display the resulting ID dataframe
# display(all_players_id_data)

In [12]:
## Output: a dataframe of NFL WR info and ids since 2017
# extract WR from the dataframe
# Create a new dataframe with only wide receivers
wide_receiver_ids = all_players_id_data[all_players_id_data['position'] == 'WR']

# Display the resulting dataframe for review
# display(wide_receiver_ids)

In [13]:
## TEST ##
# Define the season and week range for testing
test_season = 2022  # Replace with the desired season
week_start = 8      # Start of the range
week_end = 13        # End of the range

# Filter the dataframe for the specified season and week range
test_data = wide_receiver_ids[
    (wide_receiver_ids['season'] == test_season) &
    (wide_receiver_ids['week'] >= week_start) &
    (wide_receiver_ids['week'] <= week_end)
]

# Display the filtered dataframe
# display(test_data)

In [14]:
## TEST ##
# Compare player_id to gsis_id from import_ngs_data()

wr_id = '00-0038976'  # Replace with the desired player_id

# Filter the dataframe for the matching player_id
player_data = wide_receiver_ids[wide_receiver_ids['player_id'] == wr_id]

# Display the player's weekly data
# display(player_data)

In [15]:
## Output: a dataframe of NFL WR info, ids, and stats since 2017
# WR-specific columns (receiving-related)
wr_columns = [
    'receptions', 'targets', 'receiving_yards', 'receiving_tds',
    'receiving_fumbles', 'receiving_fumbles_lost',
    'receiving_air_yards', 'receiving_yards_after_catch',
    'receiving_first_downs', 'receiving_epa',
    'receiving_2pt_conversions', 'racr', 'target_share',
    'air_yards_share', 'wopr'
]

# Pull WR-specific columns from weekly data
wr_stats = nfl.import_weekly_data(
    years=years,
    columns=['player_id', 'season', 'week'] + wr_columns  # Include keys for merging
)

# Merge WR-specific stats with wide_receiver_ids
wr_ids_weekly_stats = pd.merge(
    wide_receiver_ids,
    wr_stats,
    on=['player_id', 'season', 'week'],  # Ensure correct alignment
    how='inner'
)

# Display the resulting dataframe for review
print(f"Shape of merged dataframe: {wr_ids_weekly_stats.shape}")
# display(wr_ids_weekly_stats.head())
display(wr_ids_weekly_stats)

Downcasting floats.
Shape of merged dataframe: (17362, 44)


Unnamed: 0,season,season_type,week,player_id,player_name,position,position_group,recent_team,mfl_id,sportradar_id,fantasypros_id,pff_id,sleeper_id,nfl_id,espn_id,yahoo_id,fleaflicker_id,cbs_id,pfr_id,cfbref_id,rotowire_id,rotoworld_id,ktc_id,stats_id,stats_global_id,fantasy_data_id,swish_id,name,merge_name,receptions,targets,receiving_yards,receiving_tds,receiving_fumbles,receiving_fumbles_lost,receiving_air_yards,receiving_yards_after_catch,receiving_first_downs,receiving_epa,receiving_2pt_conversions,racr,target_share,air_yards_share,wopr
0,2017,REG,1,00-0022921,L.Fitzgerald,WR,WR,ARI,7393,b6a61b38-5cfa-46eb-b1c5-b0255d7ebaf5,9383.0,1724.0,223.0,larryfitzgerald/2506106,5528.0,6762.0,1732.0,492934.0,FitzLa00,larry-fitzgerald-1,3730.0,1661.0,,6762.0,246053.0,5571.0,,Larry Fitzgerald,larry fitzgerald,6,13,74.0,0,0.0,0.0,144.0,44.0,4.0,0.997088,0,0.513889,0.276596,0.342043,0.654324
1,2017,REG,2,00-0022921,L.Fitzgerald,WR,WR,ARI,7393,b6a61b38-5cfa-46eb-b1c5-b0255d7ebaf5,9383.0,1724.0,223.0,larryfitzgerald/2506106,5528.0,6762.0,1732.0,492934.0,FitzLa00,larry-fitzgerald-1,3730.0,1661.0,,6762.0,246053.0,5571.0,,Larry Fitzgerald,larry fitzgerald,3,6,21.0,0,0.0,0.0,29.0,17.0,2.0,-3.455533,0,0.724138,0.166667,0.069378,0.298565
2,2017,REG,3,00-0022921,L.Fitzgerald,WR,WR,ARI,7393,b6a61b38-5cfa-46eb-b1c5-b0255d7ebaf5,9383.0,1724.0,223.0,larryfitzgerald/2506106,5528.0,6762.0,1732.0,492934.0,FitzLa00,larry-fitzgerald-1,3730.0,1661.0,,6762.0,246053.0,5571.0,,Larry Fitzgerald,larry fitzgerald,13,15,149.0,1,0.0,0.0,138.0,45.0,6.0,7.632769,0,1.079710,0.312500,0.369973,0.727731
3,2017,REG,4,00-0022921,L.Fitzgerald,WR,WR,ARI,7393,b6a61b38-5cfa-46eb-b1c5-b0255d7ebaf5,9383.0,1724.0,223.0,larryfitzgerald/2506106,5528.0,6762.0,1732.0,492934.0,FitzLa00,larry-fitzgerald-1,3730.0,1661.0,,6762.0,246053.0,5571.0,,Larry Fitzgerald,larry fitzgerald,4,7,32.0,1,0.0,0.0,31.0,18.0,1.0,0.162141,0,1.032258,0.137255,0.070938,0.255539
4,2017,REG,5,00-0022921,L.Fitzgerald,WR,WR,ARI,7393,b6a61b38-5cfa-46eb-b1c5-b0255d7ebaf5,9383.0,1724.0,223.0,larryfitzgerald/2506106,5528.0,6762.0,1732.0,492934.0,FitzLa00,larry-fitzgerald-1,3730.0,1661.0,,6762.0,246053.0,5571.0,,Larry Fitzgerald,larry fitzgerald,6,10,51.0,0,0.0,0.0,44.0,29.0,5.0,2.428232,0,1.159091,0.227273,0.105516,0.414770
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17357,2024,REG,2,00-0039920,M.Corley,WR,WR,NYJ,16636,bae59933-8b94-4837-990e-f0a4ced3cdbb,26023.0,,11617.0,,4613104.0,40944.0,,3162613.0,CorlMa00,malachi-corley-1,17777.0,,1607.0,40944.0,0.0,,1215291.0,Malachi Corley,malachi corley,1,1,4.0,0,0.0,0.0,-1.0,5.0,0.0,-0.475780,0,0.000000,0.034483,-0.006579,0.047119
17358,2024,REG,9,00-0039920,M.Corley,WR,WR,NYJ,16636,bae59933-8b94-4837-990e-f0a4ced3cdbb,26023.0,,11617.0,,4613104.0,40944.0,,3162613.0,CorlMa00,malachi-corley-1,17777.0,,1607.0,40944.0,0.0,,1215291.0,Malachi Corley,malachi corley,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,,0,,,,
17359,2024,REG,10,00-0039920,M.Corley,WR,WR,NYJ,16636,bae59933-8b94-4837-990e-f0a4ced3cdbb,26023.0,,11617.0,,4613104.0,40944.0,,3162613.0,CorlMa00,malachi-corley-1,17777.0,,1607.0,40944.0,0.0,,1215291.0,Malachi Corley,malachi corley,1,2,2.0,0,0.0,0.0,12.0,0.0,1.0,-2.246118,0,0.166667,0.060606,0.057692,0.131294
17360,2024,REG,11,00-0039920,M.Corley,WR,WR,NYJ,16636,bae59933-8b94-4837-990e-f0a4ced3cdbb,26023.0,,11617.0,,4613104.0,40944.0,,3162613.0,CorlMa00,malachi-corley-1,17777.0,,1607.0,40944.0,0.0,,1215291.0,Malachi Corley,malachi corley,1,1,10.0,0,0.0,0.0,10.0,0.0,0.0,0.563583,0,1.000000,0.034483,0.080645,0.108176


In [16]:
# Output: imports the NFL next-generation stats from the nfl python library

# import the next generation stats (NGS) from nfl.import_ngs_data()
# note: ngs starts at week 0 (previous season totals) - not needed so drop those rows

# Pull NGS receiving data for the specified years
ngs_wr_df = nfl.import_ngs_data('receiving', years)

# Exclude rows where 'week' == 0 and filter for 'WR' position in one step
ngs_wr_df = ngs_wr_df[(ngs_wr_df['week'] != 0) & (ngs_wr_df['player_position'] == 'WR')]

# Drop unnecessary columns
ngs_wr_df = ngs_wr_df.drop(columns=['season_type', 'player_position', 'receptions', 'targets','player_jersey_number'], errors='ignore')

# Display the resulting dataframe
print(f"Shape of NGS WR DataFrame after dropping columns: {ngs_wr_df.shape}")
display(ngs_wr_df)

# csv file
# ngs_wr_df.to_csv('ngs_wr_df.csv', index=False)

Shape of NGS WR DataFrame after dropping columns: (8231, 18)


Unnamed: 0,season,week,player_display_name,team_abbr,avg_cushion,avg_separation,avg_intended_air_yards,percent_share_of_intended_air_yards,catch_percentage,yards,rec_touchdowns,avg_yac,avg_expected_yac,avg_yac_above_expectation,player_gsis_id,player_first_name,player_last_name,player_short_name
1725,2017,1,Ryan Grant,WAS,9.936667,2.894592,4.410000,7.154639,66.666667,61.0,0,11.232500,10.072361,1.160139,00-0031068,Ryan,Grant,R.Grant
1726,2017,1,Martavis Bryant,PIT,8.300000,4.122054,12.688333,33.327496,33.333333,14.0,0,0.155000,4.098278,-3.943278,00-0031373,Martavis,Bryant,M.Bryant
1729,2017,1,Jamison Crowder,WAS,7.655000,3.177793,10.540000,19.949707,42.857143,14.0,0,1.450000,1.631897,-0.181897,00-0031941,Jamison,Crowder,J.Crowder
1732,2017,1,Nelson Agholor,PHI,7.423750,2.462620,10.463750,20.274656,75.000000,86.0,1,5.611667,3.262470,2.349197,00-0031549,Nelson,Agholor,N.Agholor
1733,2017,1,John Brown,ARI,7.360000,2.751526,13.422222,28.208481,44.444444,32.0,0,-0.377500,0.961993,-1.339493,00-0031051,John,Brown,J.Brown
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13298,2024,20,Terry McLaurin,WAS,7.463333,5.494066,1.058333,3.880944,66.666667,87.0,1,20.250000,7.479416,12.770584,00-0035659,Terry,McLaurin,T.McLaurin
13301,2024,20,Amon-Ra St. Brown,DET,5.754000,3.568523,8.846000,24.598871,80.000000,137.0,0,7.613750,4.600767,3.012983,00-0036963,Amon-Ra,St. Brown,A.St. Brown
13302,2024,20,Nico Collins,HOU,5.117500,1.892568,12.631250,39.495798,62.500000,81.0,0,0.686000,0.953086,-0.267086,00-0036554,Nico,Collins,N.Collins
13303,2024,20,Dyami Brown,WAS,4.230000,2.878581,11.838750,57.884122,75.000000,98.0,0,5.431667,5.221507,0.210160,00-0036626,Dyami,Brown,D.Brown


In [17]:
# Output: a dataframe of NFL WR info, ids, weekly stats, and next-gen stats since 2017

# Joins wr_ids_weekly_stats dataframe with ngs_wr_df using the keys 'player_id' and 'player_gsis_id'
# Merge wr_ids_weekly_stats with ngs_wr_df using a left join
wr_ids_ngs_weekly_stats = pd.merge(
    wr_ids_weekly_stats,
    ngs_wr_df,
    left_on=['player_id', 'season', 'week'],  # Keys from wr_ids_weekly_stats
    right_on=['player_gsis_id', 'season', 'week'],  # Keys from ngs_wr_df
    how='left'  # Retain all rows from wr_ids_weekly_stats
)

# Display the shape of the resulting dataframe
print(f"Shape of merged dataframe: {wr_ids_ngs_weekly_stats.shape}")
print(f"Row count matches: {wr_ids_weekly_stats.shape[0] == wr_ids_ngs_weekly_stats.shape[0]}")

# Display a sample of the merged dataframe
display(wr_ids_ngs_weekly_stats)

# csv file
# wr_ids_ngs_weekly_stats.to_csv('wr_ids_ngs_weekly_stats.csv', index=False)

Shape of merged dataframe: (17362, 60)
Row count matches: True


Unnamed: 0,season,season_type,week,player_id,player_name,position,position_group,recent_team,mfl_id,sportradar_id,fantasypros_id,pff_id,sleeper_id,nfl_id,espn_id,yahoo_id,fleaflicker_id,cbs_id,pfr_id,cfbref_id,rotowire_id,rotoworld_id,ktc_id,stats_id,stats_global_id,fantasy_data_id,swish_id,name,merge_name,receptions,targets,receiving_yards,receiving_tds,receiving_fumbles,receiving_fumbles_lost,receiving_air_yards,receiving_yards_after_catch,receiving_first_downs,receiving_epa,receiving_2pt_conversions,racr,target_share,air_yards_share,wopr,player_display_name,team_abbr,avg_cushion,avg_separation,avg_intended_air_yards,percent_share_of_intended_air_yards,catch_percentage,yards,rec_touchdowns,avg_yac,avg_expected_yac,avg_yac_above_expectation,player_gsis_id,player_first_name,player_last_name,player_short_name
0,2017,REG,1,00-0022921,L.Fitzgerald,WR,WR,ARI,7393,b6a61b38-5cfa-46eb-b1c5-b0255d7ebaf5,9383.0,1724.0,223.0,larryfitzgerald/2506106,5528.0,6762.0,1732.0,492934.0,FitzLa00,larry-fitzgerald-1,3730.0,1661.0,,6762.0,246053.0,5571.0,,Larry Fitzgerald,larry fitzgerald,6,13,74.0,0,0.0,0.0,144.0,44.0,4.0,0.997088,0,0.513889,0.276596,0.342043,0.654324,Larry Fitzgerald,ARI,5.936667,2.293974,10.764615,32.677938,46.153846,74.0,0.0,7.375000,8.630824,-1.255824,00-0022921,Larry,Fitzgerald,L.Fitzgerald
1,2017,REG,2,00-0022921,L.Fitzgerald,WR,WR,ARI,7393,b6a61b38-5cfa-46eb-b1c5-b0255d7ebaf5,9383.0,1724.0,223.0,larryfitzgerald/2506106,5528.0,6762.0,1732.0,492934.0,FitzLa00,larry-fitzgerald-1,3730.0,1661.0,,6762.0,246053.0,5571.0,,Larry Fitzgerald,larry fitzgerald,3,6,21.0,0,0.0,0.0,29.0,17.0,2.0,-3.455533,0,0.724138,0.166667,0.069378,0.298565,Larry Fitzgerald,ARI,4.746667,2.808189,5.010000,7.075605,50.000000,21.0,0.0,5.873333,6.784866,-0.911533,00-0022921,Larry,Fitzgerald,L.Fitzgerald
2,2017,REG,3,00-0022921,L.Fitzgerald,WR,WR,ARI,7393,b6a61b38-5cfa-46eb-b1c5-b0255d7ebaf5,9383.0,1724.0,223.0,larryfitzgerald/2506106,5528.0,6762.0,1732.0,492934.0,FitzLa00,larry-fitzgerald-1,3730.0,1661.0,,6762.0,246053.0,5571.0,,Larry Fitzgerald,larry fitzgerald,13,15,149.0,1,0.0,0.0,138.0,45.0,6.0,7.632769,0,1.079710,0.312500,0.369973,0.727731,Larry Fitzgerald,ARI,6.556000,3.289943,9.675333,33.997048,86.666667,149.0,1.0,3.536154,3.383512,0.152642,00-0022921,Larry,Fitzgerald,L.Fitzgerald
3,2017,REG,4,00-0022921,L.Fitzgerald,WR,WR,ARI,7393,b6a61b38-5cfa-46eb-b1c5-b0255d7ebaf5,9383.0,1724.0,223.0,larryfitzgerald/2506106,5528.0,6762.0,1732.0,492934.0,FitzLa00,larry-fitzgerald-1,3730.0,1661.0,,6762.0,246053.0,5571.0,,Larry Fitzgerald,larry fitzgerald,4,7,32.0,1,0.0,0.0,31.0,18.0,1.0,0.162141,0,1.032258,0.137255,0.070938,0.255539,Larry Fitzgerald,ARI,8.400000,2.609960,5.661429,8.939165,57.142857,32.0,1.0,4.125000,3.548166,0.576834,00-0022921,Larry,Fitzgerald,L.Fitzgerald
4,2017,REG,5,00-0022921,L.Fitzgerald,WR,WR,ARI,7393,b6a61b38-5cfa-46eb-b1c5-b0255d7ebaf5,9383.0,1724.0,223.0,larryfitzgerald/2506106,5528.0,6762.0,1732.0,492934.0,FitzLa00,larry-fitzgerald-1,3730.0,1661.0,,6762.0,246053.0,5571.0,,Larry Fitzgerald,larry fitzgerald,6,10,51.0,0,0.0,0.0,44.0,29.0,5.0,2.428232,0,1.159091,0.227273,0.105516,0.414770,Larry Fitzgerald,ARI,6.392222,2.449503,4.217000,11.136346,60.000000,51.0,0.0,6.521667,5.652730,0.868937,00-0022921,Larry,Fitzgerald,L.Fitzgerald
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17357,2024,REG,2,00-0039920,M.Corley,WR,WR,NYJ,16636,bae59933-8b94-4837-990e-f0a4ced3cdbb,26023.0,,11617.0,,4613104.0,40944.0,,3162613.0,CorlMa00,malachi-corley-1,17777.0,,1607.0,40944.0,0.0,,1215291.0,Malachi Corley,malachi corley,1,1,4.0,0,0.0,0.0,-1.0,5.0,0.0,-0.475780,0,0.000000,0.034483,-0.006579,0.047119,,,,,,,,,,,,,,,,
17358,2024,REG,9,00-0039920,M.Corley,WR,WR,NYJ,16636,bae59933-8b94-4837-990e-f0a4ced3cdbb,26023.0,,11617.0,,4613104.0,40944.0,,3162613.0,CorlMa00,malachi-corley-1,17777.0,,1607.0,40944.0,0.0,,1215291.0,Malachi Corley,malachi corley,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,,0,,,,,,,,,,,,,,,,,,,,
17359,2024,REG,10,00-0039920,M.Corley,WR,WR,NYJ,16636,bae59933-8b94-4837-990e-f0a4ced3cdbb,26023.0,,11617.0,,4613104.0,40944.0,,3162613.0,CorlMa00,malachi-corley-1,17777.0,,1607.0,40944.0,0.0,,1215291.0,Malachi Corley,malachi corley,1,2,2.0,0,0.0,0.0,12.0,0.0,1.0,-2.246118,0,0.166667,0.060606,0.057692,0.131294,,,,,,,,,,,,,,,,
17360,2024,REG,11,00-0039920,M.Corley,WR,WR,NYJ,16636,bae59933-8b94-4837-990e-f0a4ced3cdbb,26023.0,,11617.0,,4613104.0,40944.0,,3162613.0,CorlMa00,malachi-corley-1,17777.0,,1607.0,40944.0,0.0,,1215291.0,Malachi Corley,malachi corley,1,1,10.0,0,0.0,0.0,10.0,0.0,0.0,0.563583,0,1.000000,0.034483,0.080645,0.108176,,,,,,,,,,,,,,,,


In [18]:
## TEST ##
# Define the season and week range for testing
test_season = 2024  # Replace with the desired season
week_start = 1      # Start of the range
week_end = 18       # End of the range

# Filter the merged dataframe for the specified season and week range
test_data = wr_ids_ngs_weekly_stats[
    (wr_ids_ngs_weekly_stats['season'] == test_season) &
    (wr_ids_ngs_weekly_stats['week'] >= week_start) &
    (wr_ids_ngs_weekly_stats['week'] <= week_end)
]

# Display the filtered dataframe for testing
print(f"Filtered data for season {test_season}, weeks {week_start}-{week_end}:")
# display(test_data)

Filtered data for season 2024, weeks 1-18:


In [19]:
## TEST ##
# Compare player_id to player_gsis_id from the merged dataframe
wr_id = '00-0038976'  # Replace with the desired player_id

# Filter the merged dataframe for the matching player_id
player_data = wr_ids_ngs_weekly_stats[wr_ids_ngs_weekly_stats['player_id'] == wr_id]

# Display the player's weekly data for testing
print(f"Weekly data for player_id {wr_id}:")
# display(player_data)

Weekly data for player_id 00-0038976:


In [20]:
# Output: pro-football reference dataframe that contains WR data from the python nfl library
# note: PFR data not available before 2018
# there is no position info so the data will pull WR, TE, and RB receiving data

# Define the range of years for PFR data (2018 to the current year)
pfr_years = list(range(2018, current_year))

# import pro-football reference data
pfr_rec_df = nfl.import_weekly_pfr('rec',pfr_years)

# Drop unnecessary columns
pfr_rec_df = pfr_rec_df.drop(
    columns=['game_id','pfr_game_id','receiving_int','rushing_broken_tackles', 
             'passing_drops', 'passing_drop_pct'], errors='ignore')

# display dataframe
print(f"Shape of PFR dataframe: {pfr_rec_df.shape}")
pfr_rec_df

# csv file
# pfr_rec_df.to_csv('pfr_rec_df.csv', index=False)

Shape of PFR dataframe: (31083, 11)


Unnamed: 0,season,week,game_type,team,opponent,pfr_player_name,pfr_player_id,receiving_broken_tackles,receiving_drop,receiving_drop_pct,receiving_rat
0,2018,1,REG,PHI,ATL,Nelson Agholor,AghoNe00,0.0,0.0,0.000,80.4
1,2018,1,REG,PHI,ATL,Zach Ertz,ErtzZa00,0.0,2.0,0.200,63.7
2,2018,1,REG,PHI,ATL,Darren Sproles,SproDa00,1.0,1.0,0.143,62.8
3,2018,1,REG,PHI,ATL,DeAndre Carter,CartDe02,0.0,0.0,0.000,108.3
4,2018,1,REG,PHI,ATL,Nick Foles,FoleNi00,0.0,0.0,0.000,118.7
...,...,...,...,...,...,...,...,...,...,...,...
4340,2024,19,WC,MIN,LA,Jalen Nailor,NailJa00,0.0,0.0,0.000,79.6
4341,2024,19,WC,MIN,LA,Trent Sherfield,SherTr00,0.0,0.0,0.000,70.1
4342,2024,19,WC,MIN,LA,Johnny Mundt,MundJo00,0.0,0.0,0.000,96.5
4343,2024,19,WC,MIN,LA,C.J. Ham,HamxC.00,0.0,0.0,0.000,100.0


In [21]:
# Output: a dataframe of NFL WR info, ids, weekly stats, next-gen stats, and pro-footeball reference data
# NOTE: unbalanced dataframe - pfr stats start at 2018

# merge the pfr_rec_df with the wr_ids_ngs_weekly_stats dataframe
# match with ids then filter out the unmatched rows as they are likely (TE)
# Step 1: Merge the dataframes with a LEFT JOIN
wr_ids_ngs_pfr_stats = pd.merge(
    wr_ids_ngs_weekly_stats,
    pfr_rec_df,  # Use the full PFR dataframe as position data is unavailable
    left_on=['pfr_id', 'season', 'week'],  # Keys from wr_ids_ngs_weekly_stats
    right_on=['pfr_player_id', 'season', 'week'],  # Keys from pfr_rec_df
    how='left'  # Retain all rows from wr_ids_ngs_weekly_stats
)

# Display the shape of the resulting dataframe
print(f"Shape of merged dataframe: {wr_ids_ngs_pfr_stats.shape}")

# Row integrity check
print(
    f"Row count matches: {wr_ids_weekly_stats.shape[0] == wr_ids_ngs_weekly_stats.shape[0] == wr_ids_ngs_pfr_stats.shape[0]}"
)

# Display the first few rows of the merged dataframe for review
display(wr_ids_ngs_pfr_stats)

# csv file
# wr_ids_ngs_pfr_stats.to_csv('wr_ids_ngs_pfr_stats.csv', index=False)

Shape of merged dataframe: (17362, 69)
Row count matches: True


Unnamed: 0,season,season_type,week,player_id,player_name,position,position_group,recent_team,mfl_id,sportradar_id,fantasypros_id,pff_id,sleeper_id,nfl_id,espn_id,yahoo_id,fleaflicker_id,cbs_id,pfr_id,cfbref_id,rotowire_id,rotoworld_id,ktc_id,stats_id,stats_global_id,fantasy_data_id,swish_id,name,merge_name,receptions,targets,receiving_yards,receiving_tds,receiving_fumbles,receiving_fumbles_lost,receiving_air_yards,receiving_yards_after_catch,receiving_first_downs,receiving_epa,receiving_2pt_conversions,racr,target_share,air_yards_share,wopr,player_display_name,team_abbr,avg_cushion,avg_separation,avg_intended_air_yards,percent_share_of_intended_air_yards,catch_percentage,yards,rec_touchdowns,avg_yac,avg_expected_yac,avg_yac_above_expectation,player_gsis_id,player_first_name,player_last_name,player_short_name,game_type,team,opponent,pfr_player_name,pfr_player_id,receiving_broken_tackles,receiving_drop,receiving_drop_pct,receiving_rat
0,2017,REG,1,00-0022921,L.Fitzgerald,WR,WR,ARI,7393,b6a61b38-5cfa-46eb-b1c5-b0255d7ebaf5,9383.0,1724.0,223.0,larryfitzgerald/2506106,5528.0,6762.0,1732.0,492934.0,FitzLa00,larry-fitzgerald-1,3730.0,1661.0,,6762.0,246053.0,5571.0,,Larry Fitzgerald,larry fitzgerald,6,13,74.0,0,0.0,0.0,144.0,44.0,4.0,0.997088,0,0.513889,0.276596,0.342043,0.654324,Larry Fitzgerald,ARI,5.936667,2.293974,10.764615,32.677938,46.153846,74.0,0.0,7.375000,8.630824,-1.255824,00-0022921,Larry,Fitzgerald,L.Fitzgerald,,,,,,,,,
1,2017,REG,2,00-0022921,L.Fitzgerald,WR,WR,ARI,7393,b6a61b38-5cfa-46eb-b1c5-b0255d7ebaf5,9383.0,1724.0,223.0,larryfitzgerald/2506106,5528.0,6762.0,1732.0,492934.0,FitzLa00,larry-fitzgerald-1,3730.0,1661.0,,6762.0,246053.0,5571.0,,Larry Fitzgerald,larry fitzgerald,3,6,21.0,0,0.0,0.0,29.0,17.0,2.0,-3.455533,0,0.724138,0.166667,0.069378,0.298565,Larry Fitzgerald,ARI,4.746667,2.808189,5.010000,7.075605,50.000000,21.0,0.0,5.873333,6.784866,-0.911533,00-0022921,Larry,Fitzgerald,L.Fitzgerald,,,,,,,,,
2,2017,REG,3,00-0022921,L.Fitzgerald,WR,WR,ARI,7393,b6a61b38-5cfa-46eb-b1c5-b0255d7ebaf5,9383.0,1724.0,223.0,larryfitzgerald/2506106,5528.0,6762.0,1732.0,492934.0,FitzLa00,larry-fitzgerald-1,3730.0,1661.0,,6762.0,246053.0,5571.0,,Larry Fitzgerald,larry fitzgerald,13,15,149.0,1,0.0,0.0,138.0,45.0,6.0,7.632769,0,1.079710,0.312500,0.369973,0.727731,Larry Fitzgerald,ARI,6.556000,3.289943,9.675333,33.997048,86.666667,149.0,1.0,3.536154,3.383512,0.152642,00-0022921,Larry,Fitzgerald,L.Fitzgerald,,,,,,,,,
3,2017,REG,4,00-0022921,L.Fitzgerald,WR,WR,ARI,7393,b6a61b38-5cfa-46eb-b1c5-b0255d7ebaf5,9383.0,1724.0,223.0,larryfitzgerald/2506106,5528.0,6762.0,1732.0,492934.0,FitzLa00,larry-fitzgerald-1,3730.0,1661.0,,6762.0,246053.0,5571.0,,Larry Fitzgerald,larry fitzgerald,4,7,32.0,1,0.0,0.0,31.0,18.0,1.0,0.162141,0,1.032258,0.137255,0.070938,0.255539,Larry Fitzgerald,ARI,8.400000,2.609960,5.661429,8.939165,57.142857,32.0,1.0,4.125000,3.548166,0.576834,00-0022921,Larry,Fitzgerald,L.Fitzgerald,,,,,,,,,
4,2017,REG,5,00-0022921,L.Fitzgerald,WR,WR,ARI,7393,b6a61b38-5cfa-46eb-b1c5-b0255d7ebaf5,9383.0,1724.0,223.0,larryfitzgerald/2506106,5528.0,6762.0,1732.0,492934.0,FitzLa00,larry-fitzgerald-1,3730.0,1661.0,,6762.0,246053.0,5571.0,,Larry Fitzgerald,larry fitzgerald,6,10,51.0,0,0.0,0.0,44.0,29.0,5.0,2.428232,0,1.159091,0.227273,0.105516,0.414770,Larry Fitzgerald,ARI,6.392222,2.449503,4.217000,11.136346,60.000000,51.0,0.0,6.521667,5.652730,0.868937,00-0022921,Larry,Fitzgerald,L.Fitzgerald,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17357,2024,REG,2,00-0039920,M.Corley,WR,WR,NYJ,16636,bae59933-8b94-4837-990e-f0a4ced3cdbb,26023.0,,11617.0,,4613104.0,40944.0,,3162613.0,CorlMa00,malachi-corley-1,17777.0,,1607.0,40944.0,0.0,,1215291.0,Malachi Corley,malachi corley,1,1,4.0,0,0.0,0.0,-1.0,5.0,0.0,-0.475780,0,0.000000,0.034483,-0.006579,0.047119,,,,,,,,,,,,,,,,,REG,NYJ,TEN,Malachi Corley,CorlMa00,0.0,0.0,0.0,83.3
17358,2024,REG,9,00-0039920,M.Corley,WR,WR,NYJ,16636,bae59933-8b94-4837-990e-f0a4ced3cdbb,26023.0,,11617.0,,4613104.0,40944.0,,3162613.0,CorlMa00,malachi-corley-1,17777.0,,1607.0,40944.0,0.0,,1215291.0,Malachi Corley,malachi corley,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
17359,2024,REG,10,00-0039920,M.Corley,WR,WR,NYJ,16636,bae59933-8b94-4837-990e-f0a4ced3cdbb,26023.0,,11617.0,,4613104.0,40944.0,,3162613.0,CorlMa00,malachi-corley-1,17777.0,,1607.0,40944.0,0.0,,1215291.0,Malachi Corley,malachi corley,1,2,2.0,0,0.0,0.0,12.0,0.0,1.0,-2.246118,0,0.166667,0.060606,0.057692,0.131294,,,,,,,,,,,,,,,,,REG,NYJ,ARI,Malachi Corley,CorlMa00,0.0,0.0,0.0,56.2
17360,2024,REG,11,00-0039920,M.Corley,WR,WR,NYJ,16636,bae59933-8b94-4837-990e-f0a4ced3cdbb,26023.0,,11617.0,,4613104.0,40944.0,,3162613.0,CorlMa00,malachi-corley-1,17777.0,,1607.0,40944.0,0.0,,1215291.0,Malachi Corley,malachi corley,1,1,10.0,0,0.0,0.0,10.0,0.0,0.0,0.563583,0,1.000000,0.034483,0.080645,0.108176,,,,,,,,,,,,,,,,,REG,NYJ,IND,Malachi Corley,CorlMa00,0.0,0.0,0.0,108.3


In [22]:
# Output: a dataframe of NFL WR info, ids, weekly stats, next-gen stats, and pro-footeball reference data
# Output: Ordered the df by year, week, and receiving yards
# NOTE: unbalanced dataframe - pfr stats start at 2018

# Order the dataframe by season (year), week, and receiving_yards
wr_ids_ngs_pfr_stats_sorted = wr_ids_ngs_pfr_stats.sort_values(
    by=['season', 'week', 'receiving_yards'], 
    ascending=[True, True, False]  # Ascending for season and week, descending for receiving_yards
)

# Display the shape of the resulting dataframe
print(f"Shape of merged dataframe: {wr_ids_ngs_pfr_stats_sorted.shape}")

# Display the sorted dataframe
print("Dataframe sorted by season, week, and receiving_yards:")
display(wr_ids_ngs_pfr_stats_sorted)


# Save the sorted dataframe to a csv
# wr_ids_ngs_pfr_stats_sorted.to_csv('wr_ids_ngs_pfr_stats_sorted.csv', index=False)

Shape of merged dataframe: (17362, 69)
Dataframe sorted by season, week, and receiving_yards:


Unnamed: 0,season,season_type,week,player_id,player_name,position,position_group,recent_team,mfl_id,sportradar_id,fantasypros_id,pff_id,sleeper_id,nfl_id,espn_id,yahoo_id,fleaflicker_id,cbs_id,pfr_id,cfbref_id,rotowire_id,rotoworld_id,ktc_id,stats_id,stats_global_id,fantasy_data_id,swish_id,name,merge_name,receptions,targets,receiving_yards,receiving_tds,receiving_fumbles,receiving_fumbles_lost,receiving_air_yards,receiving_yards_after_catch,receiving_first_downs,receiving_epa,receiving_2pt_conversions,racr,target_share,air_yards_share,wopr,player_display_name,team_abbr,avg_cushion,avg_separation,avg_intended_air_yards,percent_share_of_intended_air_yards,catch_percentage,yards,rec_touchdowns,avg_yac,avg_expected_yac,avg_yac_above_expectation,player_gsis_id,player_first_name,player_last_name,player_short_name,game_type,team,opponent,pfr_player_name,pfr_player_id,receiving_broken_tackles,receiving_drop,receiving_drop_pct,receiving_rat
214,2017,REG,1,00-0027793,A.Brown,WR,WR,PIT,9988,16e33176-b73e-49b7-b0aa-c405b47a706e,9808.0,5718.0,536.0,antoniobrown/2508061,13934.0,24171.0,,1272852.0,BrowAn04,antonio-brown-1,6454.0,5698.0,,24171.0,406214.0,11056.0,406214.0,Antonio Brown,antonio brown,11,11,182.0,0,1.0,0.0,90.0,92.0,8.0,10.870283,0,2.022222,0.305556,0.400000,0.738333,Antonio Brown,PIT,4.442727,4.311392,7.329091,35.293088,100.000000,182.0,0.0,9.137273,6.636465,2.500807,00-0027793,Antonio,Brown,A.Brown,,,,,,,,,
645,2017,REG,1,00-0030035,A.Thielen,WR,WR,MIN,11938,2fa2b2da-4aa9-44b5-b27e-56876dfe2ad4,13429.0,8288.0,1689.0,,16460.0,27277.0,,2059362.0,ThieAd00,,8986.0,9054.0,308.0,27277.0,733643.0,15534.0,733643.0,Adam Thielen,adam thielen,9,10,157.0,0,0.0,0.0,105.0,59.0,4.0,6.655833,0,1.495238,0.312500,0.439331,0.776281,Adam Thielen,MIN,3.920000,2.277252,10.673000,42.259265,90.000000,157.0,0.0,6.504444,7.789293,-1.284848,00-0030035,Adam,Thielen,A.Thielen,,,,,,,,,
1673,2017,REG,1,00-0033040,T.Hill,WR,WR,KC,12801,01d8aee3-e1c4-4988-970a-8c0c2d08bd83,15802.0,10799.0,3321.0,,3116406.0,29399.0,,2131163.0,HillTy00,,11222.0,11458.0,286.0,29399.0,823156.0,18082.0,823156.0,Tyreek Hill,tyreek hill,7,8,133.0,1,0.0,0.0,94.0,55.0,5.0,9.070634,0,1.414894,0.235294,0.361538,0.606018,Tyreek Hill,KC,7.078571,5.210156,11.551250,35.725055,87.500000,133.0,1.0,7.945714,10.144182,-2.198467,00-0033040,Tyreek,Hill,T.Hill,,,,,,,,,
245,2017,REG,1,00-0027891,G.Tate,WR,WR,DET,9831,c88d9352-b835-45ed-a909-1cfec09a58bc,9683.0,5585.0,642.0,goldentate/497326,13217.0,24035.0,,1265470.0,TateGo00,golden-tate-1,6389.0,5583.0,,24035.0,400490.0,11611.0,400490.0,Golden Tate,golden tate,10,12,107.0,0,0.0,0.0,68.0,43.0,5.0,-5.569108,0,1.573529,0.307692,0.232877,0.624552,Golden Tate,DET,5.639167,2.270264,5.055833,20.000659,83.333333,107.0,0.0,4.657000,4.412235,0.244765,00-0027891,Golden,Tate,G.Tate,,,,,,,,,
38,2017,REG,1,00-0026035,D.Amendola,WR,WR,NE,9308,973bfe3c-6d0d-4130-a79c-f860650b1da6,9146.0,4717.0,491.0,dannyamendola/2649,11674.0,9037.0,5595.0,516968.0,AmenDa00,,5813.0,4991.0,,9037.0,263758.0,9906.0,263758.0,Danny Amendola,danny amendola,6,7,100.0,0,1.0,0.0,54.0,49.0,5.0,6.142825,0,1.851852,0.194444,0.095238,0.358333,Danny Amendola,NE,2.763333,3.564135,7.581429,9.771861,85.714286,100.0,0.0,8.360000,8.952487,-0.592487,00-0026035,Daniel,Amendola,D.Amendola,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16428,2024,POST,20,00-0037240,J.Williams,WR,WR,DET,15756,590adb00-5598-47ad-b6d4-16a6c5519847,23677.0,,8148.0,,4426388.0,33967.0,,3125968.0,WillJa11,jameson-williams-1,15849.0,,1273.0,33967.0,0.0,,1175184.0,Jameson Williams,jameson williams,1,4,19.0,0,0.0,0.0,111.0,0.0,1.0,-3.616179,0,0.171171,0.102564,0.318052,0.376482,,,,,,,,,,,,,,,,,,,,,,,,,
15246,2024,POST,20,00-0030564,D.Hopkins,WR,WR,KC,11232,5c48ade7-4b9a-4757-9643-87a6e3839e2b,11606.0,7808.0,1426.0,deandrehopkins/2540165,15795.0,26650.0,,1737078.0,HopkDe00,deandre-hopkins-1,8619.0,8404.0,,26650.0,560241.0,14986.0,560241.0,DeAndre Hopkins,deandre hopkins,0,1,0.0,0,0.0,0.0,5.0,0.0,0.0,-0.441731,0,0.000000,0.041667,0.032468,0.085227,,,,,,,,,,,,,,,,,,,,,,,,,
15557,2024,POST,20,00-0033857,J.Smith-Schuster,WR,WR,KC,13156,9547fbb1-0d4f-4d9e-83b9-e2fa30463bb9,16427.0,11817.0,4040.0,,3120348.0,30175.0,,2139620.0,SmitJu00,juju-smith-1,11877.0,12184.0,,30175.0,835909.0,18883.0,835909.0,JuJu Smith-Schuster,juju smith-schuster,0,1,0.0,0,0.0,0.0,7.0,0.0,0.0,-1.662924,0,0.000000,0.041667,0.045455,0.094318,,,,,,,,,,,,,,,,,,,,,,,,,
15811,2024,POST,20,00-0035208,O.Zaccheaus,WR,WR,WAS,14592,d8281390-f081-41e5-b55e-75779536fe94,18864.0,44920.0,6271.0,,3917914.0,32123.0,,2186266.0,ZaccOl01,,13833.0,,609.0,32123.0,883976.0,21142.0,883976.0,Olamide Zaccheaus,olamide zaccheaus,0,2,0.0,0,0.0,0.0,31.0,0.0,0.0,-2.204745,0,0.000000,0.068966,0.183432,0.231851,,,,,,,,,,,,,,,,,,,,,,,,,
