In [1]:
# This produces the dataframe for WR

In [2]:
## Notes on the NFL Library ##
# the NFL python library seem to not work on Tuesday probably due to updates (not confirmed)
# unbalanced dataframe - pfr stats start at 2018; all other stats go back to 2017

In [3]:
## REQUIRED ACTIONS - Include in a README doc ## 
# modify the season start date in the 'get_current_week' function
# modify the number of weeks if the NFL adds regular season games to the schedule

In [4]:
# import the libraries
import requests
from bs4 import BeautifulSoup
import pandas as pd
import json
import glob
from IPython.display import display, HTML
from datetime import datetime
import nfl_data_py as nfl
import os
import re

In [5]:
# Set Pandas options to display all columns in a single row without wrapping
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)

In [6]:
# Function to calculate the current week of the NFL season
def get_current_week():
    current_date = datetime.now()
    season_start_date = datetime(2024, 9, 4)  # Update for the season start
    current_week = ((current_date - season_start_date).days // 7) + 1
    return current_week

# Define the current NFL year, week, and season type
current_year = datetime.now().year
current_week = get_current_week()
seasontype = 2 if current_week <= 18 else 3  # Regular season or playoffs

In [7]:
# define the years to pull
# nfl.import_weekly_data(years, columns, downcast)
def get_year_range(current_year, current_week, start_year=2017):
    if current_week <= 18:  # Regular season
        return list(range(start_year, current_year + 1))
    else:  # Playoffs
        return list(range(start_year, current_year))

# Use the function
years = get_year_range(current_year, current_week)

In [8]:
# define the base columns. 
base_columns = [
    'season', 'season_type', 'week', 'player_id', 'player_name', 
    'position', 'position_group', 'recent_team'
]

In [9]:
# Import the player IDs from nfl.import_ids() - without parameters
ids_data = nfl.import_ids()

# Drop the unnecessary columns
columns_to_drop = [
    'position', 'team', 'birthdate', 'age', 'draft_year', 
    'draft_round', 'draft_pick', 'draft_ovr', 'twitter_username', 
    'height', 'weight', 'college', 'db_season'
]
ids_data = ids_data.drop(columns=columns_to_drop, errors='ignore')

# Display the resulting dataframe for review
# print(f"Columns after dropping unnecessary ones: {ids_data.columns.tolist()}")
# display(ids_data)

In [10]:
# import the weekly data from nfl.import_weekly_data(years, columns, downcast)
weekly_data = nfl.import_weekly_data(
    years=years,
    columns=base_columns
)

# display(weekly_data)

Downcasting floats.


In [11]:
## Output: a dataframe of ALL NFL athletes info and ids since 2017

# Merge the two dataframes on 'player_id' and 'gsis_id'
# Align column names for merging
ids_data = ids_data.rename(columns={'gsis_id': 'player_id'})  
id_dataframe = pd.merge(weekly_data, ids_data, on='player_id', how='inner')

# Assign the resulting dataframe to a variable
all_players_id_data = id_dataframe

# Display the resulting ID dataframe
# display(all_players_id_data)

In [12]:
## Output: a dataframe of NFL TE info and ids since 2017
# extract TE from the dataframe
# Create a new dataframe with only tight ends
tight_end_ids = all_players_id_data[all_players_id_data['position'] == 'TE']

# Display the resulting dataframe for review
print(f"Shape of merged dataframe: {tight_end_ids.shape}")

# Display the resulting dataframe for review
# display(tight_end_ids)

Shape of merged dataframe: (8710, 29)


In [13]:
## Output: a dataframe of NFL TE info, ids, and stats since 2017
# TE-specific columns (receiving-related)
te_columns = [
    'receptions', 'targets', 'receiving_yards', 'receiving_tds',
    'receiving_fumbles', 'receiving_fumbles_lost',
    'receiving_air_yards', 'receiving_yards_after_catch',
    'receiving_first_downs', 'receiving_epa',
    'receiving_2pt_conversions', 'racr', 'target_share',
    'air_yards_share', 'wopr'
]

# Pull TE-specific columns from weekly data
te_stats = nfl.import_weekly_data(
    years=years,
    columns=['player_id', 'season', 'week'] + te_columns  # Include keys for merging
)

# Merge TE-specific stats with tight_end_ids
te_ids_weekly_stats = pd.merge(
    tight_end_ids,
    te_stats,
    on=['player_id', 'season', 'week'],  # Ensure correct alignment
    how='inner'
)

# Display the resulting dataframe for review
print(f"Shape of merged dataframe: {te_ids_weekly_stats.shape}")

# Row integrity check
print(
    f"Row count matches: {te_ids_weekly_stats.shape[0] == tight_end_ids.shape[0]}"
)

# display the df
# display(te_ids_weekly_stats)

Downcasting floats.
Shape of merged dataframe: (8710, 44)
Row count matches: True


In [14]:
# Output: imports the NFL next-generation stats (NGS) from the nfl python library

# Import the next-generation stats (NGS) from nfl.import_ngs_data()
# Note: NGS starts at week 0 (previous season totals) - not needed so drop those rows

# Pull NGS receiving data for the specified years
ngs_te_df = nfl.import_ngs_data('receiving', years)

# Exclude rows where 'week' == 0 and filter for 'TE' position in one step
ngs_te_df = ngs_te_df[(ngs_te_df['week'] != 0) & (ngs_te_df['player_position'] == 'TE')]

# Drop unnecessary columns
ngs_te_df = ngs_te_df.drop(columns=['season_type', 'player_position', 'receptions', 'targets', 'player_jersey_number'], errors='ignore')

# Display the resulting dataframe
print(f"Shape of NGS TE DataFrame after dropping columns: {ngs_te_df.shape}")
# display(ngs_te_df)

# Optional: Save the resulting dataframe to a CSV file
# ngs_te_df.to_csv('ngs_te_df.csv', index=False)

Shape of NGS TE DataFrame after dropping columns: (2474, 18)


In [15]:
# Output: a dataframe of NFL TE info, ids, weekly stats, and next-gen stats since 2017

# Joins te_ids_weekly_stats dataframe with ngs_te_df using the keys 'player_id' and 'player_gsis_id'
# Merge te_ids_weekly_stats with ngs_te_df using a left join
te_ids_ngs_weekly_stats = pd.merge(
    te_ids_weekly_stats,
    ngs_te_df,
    left_on=['player_id', 'season', 'week'],  # Keys from te_ids_weekly_stats
    right_on=['player_gsis_id', 'season', 'week'],  # Keys from ngs_te_df
    how='left'  # Retain all rows from te_ids_weekly_stats
)

# Display the shape of the resulting dataframe
print(f"Shape of merged dataframe: {te_ids_ngs_weekly_stats.shape}")
print(f"Row count matches: {te_ids_weekly_stats.shape[0] == te_ids_ngs_weekly_stats.shape[0]}")

# Display a sample of the merged dataframe
# display(te_ids_ngs_weekly_stats)

# Optional: Save the merged dataframe to a CSV file
# te_ids_ngs_weekly_stats.to_csv('te_ids_ngs_weekly_stats.csv', index=False)

Shape of merged dataframe: (8710, 60)
Row count matches: True


In [16]:
# Output: pro-football reference dataframe for receiving data from the python nfl library
# note: PFR data not available before 2018
# there is no position info so the data will pull WR, TE, and RB receiving data

# Define the range of years for PFR data (2018 to the current year)
pfr_years = list(range(2018, current_year))

# import pro-football reference data
pfr_rec_df = nfl.import_weekly_pfr('rec',pfr_years)

# Drop unnecessary columns
pfr_rec_df = pfr_rec_df.drop(
    columns=['game_id','pfr_game_id','receiving_int','rushing_broken_tackles', 
             'passing_drops', 'passing_drop_pct'], errors='ignore')

# display dataframe
print(f"Shape of PFR dataframe: {pfr_rec_df.shape}")
# display(pfr_rec_df)

# csv file
# pfr_rec_df.to_csv('pfr_rec_df.csv', index=False)

Shape of PFR dataframe: (31083, 11)


In [17]:
# Output: a dataframe of NFL TE info, ids, weekly stats, next-gen stats, and pro-football reference data
# NOTE: unbalanced dataframe - PFR stats start at 2018

# Merge the pfr_rec_df with the te_ids_ngs_weekly_stats dataframe
# Merge the dataframes with a LEFT JOIN
te_ids_ngs_pfr_stats = pd.merge(
    te_ids_ngs_weekly_stats,
    pfr_rec_df,  # Use the full PFR dataframe as position data is unavailable
    left_on=['pfr_id', 'season', 'week'],  # Keys from te_ids_ngs_weekly_stats
    right_on=['pfr_player_id', 'season', 'week'],  # Keys from pfr_rec_df
    how='left'  # Retain all rows from te_ids_ngs_weekly_stats
)

# Display the shape of the resulting dataframe
print(f"Shape of merged dataframe: {te_ids_ngs_pfr_stats.shape}")

# Row integrity check
print(
    f"Row count matches: {te_ids_weekly_stats.shape[0] == te_ids_ngs_weekly_stats.shape[0] == te_ids_ngs_pfr_stats.shape[0]}"
)

# Display the first few rows of the merged dataframe for review
# display(te_ids_ngs_pfr_stats)

# Optional: Save the merged dataframe to a CSV file
# te_ids_ngs_pfr_stats.to_csv('te_ids_ngs_pfr_stats.csv', index=False)

Shape of merged dataframe: (8710, 69)
Row count matches: True


In [18]:
# Output: a dataframe of NFL TE info, ids, weekly stats, next-gen stats, and pro-football reference data
# Output: Ordered the df by year, week, and receiving yards
# NOTE: unbalanced dataframe - PFR stats start at 2018

# Order the dataframe by season (year), week, and receiving_yards
te_ids_ngs_pfr_stats_sorted = te_ids_ngs_pfr_stats.sort_values(
    by=['season', 'week', 'receiving_yards'], 
    ascending=[True, True, False]  # Ascending for season and week, descending for receiving_yards
)

# Display the shape of the resulting dataframe
print(f"Shape of merged dataframe: {te_ids_ngs_pfr_stats_sorted.shape}")

# Row integrity check
print(
    f"Row count matches: {te_ids_weekly_stats.shape[0] == te_ids_ngs_weekly_stats.shape[0] == te_ids_ngs_pfr_stats.shape[0] == te_ids_ngs_pfr_stats_sorted.shape[0]}"
)

# Display the sorted dataframe
print("Dataframe sorted by season, week, and receiving_yards:")
display(te_ids_ngs_pfr_stats_sorted)

# Save the sorted dataframe to a CSV file
te_ids_ngs_pfr_stats_sorted.to_csv('te_ids_ngs_pfr_stats_sorted.csv', index=False)

Shape of merged dataframe: (8710, 69)
Row count matches: True
Dataframe sorted by season, week, and receiving_yards:


Unnamed: 0,season,season_type,week,player_id,player_name,position,position_group,recent_team,mfl_id,sportradar_id,fantasypros_id,pff_id,sleeper_id,nfl_id,espn_id,yahoo_id,fleaflicker_id,cbs_id,pfr_id,cfbref_id,rotowire_id,rotoworld_id,ktc_id,stats_id,stats_global_id,fantasy_data_id,swish_id,name,merge_name,receptions,targets,receiving_yards,receiving_tds,receiving_fumbles,receiving_fumbles_lost,receiving_air_yards,receiving_yards_after_catch,receiving_first_downs,receiving_epa,receiving_2pt_conversions,racr,target_share,air_yards_share,wopr,player_display_name,team_abbr,avg_cushion,avg_separation,avg_intended_air_yards,percent_share_of_intended_air_yards,catch_percentage,yards,rec_touchdowns,avg_yac,avg_expected_yac,avg_yac_above_expectation,player_gsis_id,player_first_name,player_last_name,player_short_name,game_type,team,opponent,pfr_player_name,pfr_player_id,receiving_broken_tackles,receiving_drop,receiving_drop_pct,receiving_rat
833,2017,REG,1,00-0032392,A.Hooper,TE,TE,ATL,12677,90c2a93f-d837-4e1b-b57c-56648903a8db,15581.0,10715.0,3202.0,,3043275.0,29315.0,,2067004.0,HoopAu00,austin-hooper-1,10748.0,11301.0,384.0,29315.0,739424.0,17963.0,739424.0,Austin Hooper,austin hooper,2,2,128.0,1,0.0,0.0,45.0,83.0,2.0,11.335708,0,2.844445,0.066667,0.208333,0.245833,,,,,,,,,,,,,,,,,,,,,,,,,
403,2017,REG,1,00-0030061,Z.Ertz,TE,TE,PHI,11247,de3421f7-2147-4835-89a5-724e87bad463,11689.0,7816.0,1339.0,zachertz/2540158,15835.0,26658.0,,1685963.0,ErtzZa00,zach-ertz-1,8781.0,8409.0,305.0,26658.0,503177.0,14856.0,503177.0,Zach Ertz,zach ertz,8,8,93.0,0,0.0,0.0,78.0,15.0,6.0,7.477927,0,1.192308,0.210526,0.194514,0.451949,Zach Ertz,PHI,4.500000,2.710968,9.355000,18.126332,100.000000,93.0,0.0,2.270000,2.177001,0.092999,00-0030061,Zachary,Ertz,Z.Ertz,,,,,,,,,
91,2017,REG,1,00-0024389,De.Walker,TE,TE,TEN,8416,ccce5e8e-52ca-4f0f-a40f-fe5e7227d156,9172.0,3120.0,284.0,delaniewalker/2495966,9761.0,7924.0,4353.0,1109396.0,WalkHu00,,4888.0,3976.0,,7924.0,218943.0,7175.0,,Delanie Walker,delanie walker,7,9,76.0,0,0.0,0.0,104.0,12.0,5.0,5.525133,0,0.730769,0.219512,0.279570,0.524967,Delanie Walker,TEN,7.572500,3.097148,10.901111,26.669023,77.777778,76.0,0.0,2.358571,3.025790,-0.667219,00-0024389,Delanie,Walker,D.Walker,,,,,,,,,
592,2017,REG,1,00-0031299,J.Matthews,TE,TE,BUF,11676,7b96a836-666b-47b6-a0a7-9dbb0b4c53e8,12124.0,8677.0,1800.0,jordanmatthews/2543500,16763.0,27570.0,,1759816.0,MattJo00,jordan-matthews-1,9273.0,9420.0,,27570.0,555648.0,15974.0,555648.0,Jordan Matthews,jordan matthews,2,3,61.0,0,0.0,0.0,56.0,42.0,2.0,5.050018,0,1.089286,0.111111,0.306011,0.380874,,,,,,,,,,,,,,,,,,,,,,,,,
16,2017,REG,1,00-0022127,J.Witten,TE,TE,DAL,6997,e38c9b1b-7c51-48a2-ac1d-a752502e8930,9275.0,1384.0,23.0,jasonwitten/2505629,4527.0,6405.0,,396134.0,WittJa00,jason-witten-1,3086.0,1990.0,,6405.0,184571.0,722.0,,Jason Witten,jason witten,7,9,59.0,1,0.0,0.0,61.0,12.0,1.0,2.561272,0,0.967213,0.236842,0.205387,0.499034,Jason Witten,DAL,4.082000,1.552612,6.372222,18.549065,77.777778,59.0,1.0,1.725714,1.558290,0.167425,00-0022127,Christopher,Witten,J.Witten,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8581,2024,POST,20,00-0039065,S.LaPorta,TE,TE,DET,16214,3059f1e5-1871-4458-9e90-9b9917874c61,22978.0,,10859.0,,4430027.0,40064.0,,3124197.0,LaPoSa01,sam-laporta-1,17033.0,,1429.0,40064.0,0.0,,1168926.0,Sam LaPorta,sam laporta,6,7,51.0,1,0.0,0.0,29.0,22.0,3.0,1.582399,0,1.758621,0.179487,0.083095,0.327397,Sam LaPorta,DET,7.562500,3.203121,5.065714,9.860682,85.714286,51.0,1.0,3.826667,3.596636,0.230030,00-0039065,Sam,LaPorta,S.LaPorta,,,,,,,,,
7633,2024,POST,20,00-0030061,Z.Ertz,TE,TE,WAS,11247,de3421f7-2147-4835-89a5-724e87bad463,11689.0,7816.0,1339.0,zachertz/2540158,15835.0,26658.0,,1685963.0,ErtzZa00,zach-ertz-1,8781.0,8409.0,305.0,26658.0,503177.0,14856.0,503177.0,Zach Ertz,zach ertz,5,5,28.0,1,0.0,0.0,22.0,6.0,2.0,5.264380,0,1.272727,0.172414,0.130178,0.349745,Zach Ertz,WAS,5.986667,2.741199,4.400000,13.445789,100.000000,28.0,1.0,1.366000,1.937940,-0.571940,00-0030061,Zachary,Ertz,Z.Ertz,,,,,,,,,
8155,2024,POST,20,00-0036628,J.Bates,TE,TE,WAS,15344,3208880b-949f-4926-85eb-1940770550ea,22833.0,42275.0,7716.0,,4048228.0,33512.0,,2257291.0,BateJo00,john-bates-1,15289.0,,,33512.0,0.0,,944038.0,John Bates,john bates,1,2,20.0,0,0.0,0.0,17.0,3.0,1.0,1.821314,0,1.176471,0.068966,0.100592,0.173862,,,,,,,,,,,,,,,,,,,,,,,,,
8172,2024,POST,20,00-0036637,N.Gray,TE,TE,KC,15337,c97dfcb6-b999-476f-8487-f3848e92df17,19539.0,60878.0,7828.0,,4240472.0,33550.0,,2866903.0,GrayNo00,noah-gray-1,15459.0,,1059.0,33550.0,0.0,,1056981.0,Noah Gray,noah gray,3,3,13.0,0,0.0,0.0,10.0,3.0,0.0,-1.140443,0,1.300000,0.125000,0.064935,0.232955,,,,,,,,,,,,,,,,,,,,,,,,,
