In [1]:
# This produces the dataframe for RB

In [2]:
## Notes on the NFL Library ##
# the NFL python library seem to not work on Tuesday probably due to updates (not confirmed)
# unbalanced dataframe - pfr stats start at 2018; all other stats go back to 2017

In [3]:
## REQUIRED ACTIONS - Include in a README doc ## 
# modify the season start date in the 'get_current_week' function
# modify the number of weeks if the NFL adds regular season games to the schedule

In [4]:
# import the libraries
import requests
from bs4 import BeautifulSoup
import pandas as pd
import json
import glob
from IPython.display import display, HTML
from datetime import datetime
import nfl_data_py as nfl
import os
import re

In [5]:
# Set Pandas options to display all columns in a single row without wrapping
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)

In [6]:
# Function to calculate the current week of the NFL season
def get_current_week():
    current_date = datetime.now()
    season_start_date = datetime(2024, 9, 4)  # Update for the season start
    current_week = ((current_date - season_start_date).days // 7) + 1
    return current_week

# Define the current NFL year, week, and season type
current_year = datetime.now().year
current_week = get_current_week()
seasontype = 2 if current_week <= 18 else 3  # Regular season or playoffs

In [7]:
# define the years to pull
# nfl.import_weekly_data(years, columns, downcast)
def get_year_range(current_year, current_week, start_year=2017):
    if current_week <= 18:  # Regular season
        return list(range(start_year, current_year + 1))
    else:  # Playoffs
        return list(range(start_year, current_year))

# Use the function
years = get_year_range(current_year, current_week)

In [8]:
# define the base columns. 
base_columns = [
    'season', 'season_type', 'week', 'player_id', 'player_name', 
    'position', 'position_group', 'recent_team'
]

In [9]:
# Import the player IDs from nfl.import_ids() - without parameters
ids_data = nfl.import_ids()

# Drop the unnecessary columns
columns_to_drop = [
    'position', 'team', 'birthdate', 'age', 'draft_year', 
    'draft_round', 'draft_pick', 'draft_ovr', 'twitter_username', 
    'height', 'weight', 'college', 'db_season'
]
ids_data = ids_data.drop(columns=columns_to_drop, errors='ignore')

# Display the resulting dataframe for review
# print(f"Columns after dropping unnecessary ones: {ids_data.columns.tolist()}")
# display(ids_data)

In [10]:
# import the weekly data from nfl.import_weekly_data(years, columns, downcast)
weekly_data = nfl.import_weekly_data(
    years=years,
    columns=base_columns
)

# display(weekly_data)

Downcasting floats.


In [11]:
## Output: a dataframe of ALL NFL athletes info and ids since 2017

# Merge the two dataframes on 'player_id' and 'gsis_id'
# Align column names for merging
ids_data = ids_data.rename(columns={'gsis_id': 'player_id'})  
id_dataframe = pd.merge(weekly_data, ids_data, on='player_id', how='inner')

# Assign the resulting dataframe to a variable
all_players_id_data = id_dataframe

# Display the resulting ID dataframe
# display(all_players_id_data)

In [12]:
## Output: a dataframe of NFL RB info and ids since 2017

# Extract RBs from the dataframe
# Create a new dataframe with only running backs
running_back_ids = all_players_id_data[all_players_id_data['position'] == 'RB']

# Display the resulting dataframe for review
print(f"Shape of merged dataframe: {running_back_ids.shape}")

# Display the resulting dataframe for review
# display(running_back_ids)

Shape of merged dataframe: (11058, 29)


In [13]:
## Output: a dataframe of NFL RB info, ids, and stats since 2017

# RB-specific columns (rushing- and receiving-related)
rb_columns = [
    'carries', 'attempts', 'targets', 'target_share', 'receptions', 'rushing_yards', 'receiving_yards', 
    'receiving_yards_after_catch', 'rushing_first_downs', 'receiving_first_downs', 'rushing_tds', 
    'receiving_tds', 'rushing_epa', 'receiving_epa', 'rushing_2pt_conversions', 'receiving_2pt_conversions'
]

# Pull RB-specific columns from weekly data
rb_stats = nfl.import_weekly_data(
    years=years,
    columns=['player_id', 'season', 'week'] + rb_columns  # Include keys for merging
)

# Merge RB-specific stats with running_back_ids
rb_ids_weekly_stats = pd.merge(
    running_back_ids,
    rb_stats,
    on=['player_id', 'season', 'week'],  # Ensure correct alignment
    how='inner'
)

# Display the resulting dataframe for review
print(f"Shape of merged dataframe: {rb_ids_weekly_stats.shape}")

# Row integrity check
print(
    f"Row count matches: {rb_ids_weekly_stats.shape[0] == running_back_ids.shape[0]}"
)

# Display the dataframe
# display(rb_ids_weekly_stats)

Downcasting floats.
Shape of merged dataframe: (11058, 45)
Row count matches: True


In [14]:
# Output: imports the NFL next-generation stats (NGS) for RBs from the NFL Python library

# Import the next-generation stats (NGS) for rushing data
# Note: NGS starts at week 0 (previous season totals) - these rows are dropped

# Pull NGS rushing data for the specified years
ngs_rb_df = nfl.import_ngs_data('rushing', years)

# Exclude rows where 'week' == 0 and filter for 'RB' position in one step
ngs_rb_df = ngs_rb_df[(ngs_rb_df['week'] != 0) & (ngs_rb_df['player_position'] == 'RB')]

# Drop unnecessary columns
ngs_rb_df = ngs_rb_df.drop(columns=['season_type', 'player_position', 'player_jersey_number'], errors='ignore')

# Display the resulting dataframe
print(f"Shape of NGS RB DataFrame after dropping columns: {ngs_rb_df.shape}")
display(ngs_rb_df)

# Save the dataframe to a CSV file (optional)
# ngs_rb_df.to_csv('ngs_rb_df.csv', index=False)

Shape of NGS RB DataFrame after dropping columns: (4373, 19)


Unnamed: 0,season,week,player_display_name,team_abbr,efficiency,percent_attempts_gte_eight_defenders,avg_time_to_los,rush_attempts,rush_yards,avg_rush_yards,rush_touchdowns,player_gsis_id,player_first_name,player_last_name,player_short_name,expected_rush_yards,rush_yards_over_expected,rush_yards_over_expected_per_att,rush_pct_over_expected
629,2017,1,Devonta Freeman,ATL,6.343514,8.333333,3.019091,12,37,3.083333,1,00-0031285,Devonta,Freeman,D.Freeman,,,,
630,2017,1,Todd Gurley,LAR,8.196500,10.526316,3.127438,19,40,2.105263,1,00-0032241,Todd,Gurley,T.Gurley,,,,
631,2017,1,Jamaal Charles,DEN,5.660000,20.000000,3.204667,10,40,4.000000,0,00-0026213,Jamaal,Charles,J.Charles,,,,
632,2017,1,Lamar Miller,HOU,3.960000,11.764706,2.483200,17,65,3.823529,0,00-0029615,Lamar,Miller,L.Miller,,,,
633,2017,1,Le'Veon Bell,PIT,4.232187,0.000000,3.067900,10,32,3.200000,0,00-0030496,Le'Veon,Bell,L.Bell,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5397,2024,19,Joe Mixon,HOU,4.147925,36.000000,2.823143,25,106,4.240000,1,00-0033897,Joe,Mixon,J.Mixon,90.417809,10.582191,0.440925,0.500000
5398,2024,19,Bucky Irving,TB,3.796234,17.647059,2.772857,17,77,4.529412,0,00-0039361,Mar'Keise,Irving,B.Irving,73.614280,3.385720,0.199160,0.588235
5399,2024,20,Jahmyr Gibbs,DET,3.183714,7.142857,3.038250,14,105,7.500000,2,00-0039139,Jahmyr,Gibbs,J.Gibbs,71.417790,33.582210,2.398729,0.571429
5400,2024,20,Joe Mixon,HOU,3.381818,11.111111,2.669778,18,88,4.888889,1,00-0033897,Joe,Mixon,J.Mixon,92.995584,-4.995584,-0.277532,0.333333


In [15]:
# Output: a dataframe of NFL RB info, ids, weekly stats, and next-gen stats since 2017

# Join rb_ids_weekly_stats dataframe with ngs_rb_df using the keys 'player_id' and 'player_gsis_id'
# Merge rb_ids_weekly_stats with ngs_rb_df using a left join
rb_ids_ngs_weekly_stats = pd.merge(
    rb_ids_weekly_stats,
    ngs_rb_df,
    left_on=['player_id', 'season', 'week'],  # Keys from rb_ids_weekly_stats
    right_on=['player_gsis_id', 'season', 'week'],  # Keys from ngs_rb_df
    how='left'  # Retain all rows from rb_ids_weekly_stats
)

# Display the shape of the resulting dataframe
print(f"Shape of merged dataframe: {rb_ids_ngs_weekly_stats.shape}")
print(f"Row count matches: {rb_ids_weekly_stats.shape[0] == rb_ids_ngs_weekly_stats.shape[0]}")

# Display a sample of the merged dataframe
display(rb_ids_ngs_weekly_stats)

# Save the merged dataframe to a CSV file (optional)
# rb_ids_ngs_weekly_stats.to_csv('rb_ids_ngs_weekly_stats.csv', index=False)

Shape of merged dataframe: (11058, 62)
Row count matches: True


Unnamed: 0,season,season_type,week,player_id,player_name,position,position_group,recent_team,mfl_id,sportradar_id,fantasypros_id,pff_id,sleeper_id,nfl_id,espn_id,yahoo_id,fleaflicker_id,cbs_id,pfr_id,cfbref_id,rotowire_id,rotoworld_id,ktc_id,stats_id,stats_global_id,fantasy_data_id,swish_id,name,merge_name,carries,attempts,targets,target_share,receptions,rushing_yards,receiving_yards,receiving_yards_after_catch,rushing_first_downs,receiving_first_downs,rushing_tds,receiving_tds,rushing_epa,receiving_epa,rushing_2pt_conversions,receiving_2pt_conversions,player_display_name,team_abbr,efficiency,percent_attempts_gte_eight_defenders,avg_time_to_los,rush_attempts,rush_yards,avg_rush_yards,rush_touchdowns,player_gsis_id,player_first_name,player_last_name,player_short_name,expected_rush_yards,rush_yards_over_expected,rush_yards_over_expected_per_att,rush_pct_over_expected
0,2017,REG,1,00-0023500,F.Gore,RB,RB,IND,7877,6a2b129d-a9e5-4131-b491-82269b323f77,9218.0,2282.0,232.0,frankgore/2506404,8479.0,7241.0,2848.0,411568.0,GoreFr00,frank-gore-1,4400.0,3205.0,,7241.0,157341.0,5820.0,,Frank Gore,frank gore,10,0,1,0.047619,1,42.0,10.0,14.0,1.0,0.0,0,0,-0.449218,0.374096,0,0,Frank Gore,IND,3.610952,10.000000,2.435667,10.0,42.0,4.200000,0.0,00-0023500,Frank,Gore,F.Gore,,,,
1,2017,REG,2,00-0023500,F.Gore,RB,RB,IND,7877,6a2b129d-a9e5-4131-b491-82269b323f77,9218.0,2282.0,232.0,frankgore/2506404,8479.0,7241.0,2848.0,411568.0,GoreFr00,frank-gore-1,4400.0,3205.0,,7241.0,157341.0,5820.0,,Frank Gore,frank gore,14,0,2,0.055556,0,46.0,0.0,0.0,2.0,0.0,1,0,-0.984420,-1.496430,0,0,Frank Gore,IND,4.080435,50.000000,2.736917,14.0,46.0,3.285714,1.0,00-0023500,Frank,Gore,F.Gore,,,,
2,2017,REG,3,00-0023500,F.Gore,RB,RB,IND,7877,6a2b129d-a9e5-4131-b491-82269b323f77,9218.0,2282.0,232.0,frankgore/2506404,8479.0,7241.0,2848.0,411568.0,GoreFr00,frank-gore-1,4400.0,3205.0,,7241.0,157341.0,5820.0,,Frank Gore,frank gore,25,0,1,0.041667,1,57.0,0.0,5.0,2.0,0.0,1,0,-5.832108,-0.788598,0,0,Frank Gore,IND,5.425789,24.000000,2.572100,25.0,57.0,2.280000,1.0,00-0023500,Frank,Gore,F.Gore,,,,
3,2017,REG,4,00-0023500,F.Gore,RB,RB,IND,7877,6a2b129d-a9e5-4131-b491-82269b323f77,9218.0,2282.0,232.0,frankgore/2506404,8479.0,7241.0,2848.0,411568.0,GoreFr00,frank-gore-1,4400.0,3205.0,,7241.0,157341.0,5820.0,,Frank Gore,frank gore,12,0,3,0.107143,3,46.0,34.0,43.0,1.0,2.0,0,0,-1.547445,1.718400,0,0,Frank Gore,IND,3.221957,41.666667,2.545750,12.0,46.0,3.833333,0.0,00-0023500,Frank,Gore,F.Gore,,,,
4,2017,REG,5,00-0023500,F.Gore,RB,RB,IND,7877,6a2b129d-a9e5-4131-b491-82269b323f77,9218.0,2282.0,232.0,frankgore/2506404,8479.0,7241.0,2848.0,411568.0,GoreFr00,frank-gore-1,4400.0,3205.0,,7241.0,157341.0,5820.0,,Frank Gore,frank gore,14,0,4,0.121212,3,48.0,38.0,40.0,2.0,1.0,0,0,-4.457448,1.223682,0,0,Frank Gore,IND,4.459583,28.571429,2.452167,14.0,48.0,3.428571,0.0,00-0023500,Frank,Gore,F.Gore,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11053,2024,REG,10,00-0039921,T.Benson,RB,RB,ARI,16594,0c1253f2-1822-445d-8590-fea6fd32703f,25388.0,,11589.0,,4429275.0,40958.0,,3176915.0,BensTr01,trey-benson-2,17767.0,,1565.0,40958.0,0.0,,1248031.0,Trey Benson,trey benson,10,0,2,0.083333,2,62.0,25.0,33.0,3.0,2.0,0,0,1.512646,1.987017,0,0,Trey Benson,ARI,3.232581,10.000000,2.690778,10.0,62.0,6.200000,0.0,00-0039921,Trey,Benson,T.Benson,43.82976,18.17024,1.817024,0.6
11054,2024,REG,12,00-0039921,T.Benson,RB,RB,ARI,16594,0c1253f2-1822-445d-8590-fea6fd32703f,25388.0,,11589.0,,4429275.0,40958.0,,3176915.0,BensTr01,trey-benson-2,17767.0,,1565.0,40958.0,0.0,,1248031.0,Trey Benson,trey benson,4,0,0,,0,18.0,0.0,0.0,0.0,0.0,0,0,-0.326119,,0,0,,,,,,,,,,,,,,,,,
11055,2024,REG,13,00-0039921,T.Benson,RB,RB,ARI,16594,0c1253f2-1822-445d-8590-fea6fd32703f,25388.0,,11589.0,,4429275.0,40958.0,,3176915.0,BensTr01,trey-benson-2,17767.0,,1565.0,40958.0,0.0,,1248031.0,Trey Benson,trey benson,3,0,0,,0,20.0,0.0,0.0,1.0,0.0,0,0,0.515926,,0,0,,,,,,,,,,,,,,,,,
11056,2024,REG,14,00-0039921,T.Benson,RB,RB,ARI,16594,0c1253f2-1822-445d-8590-fea6fd32703f,25388.0,,11589.0,,4429275.0,40958.0,,3176915.0,BensTr01,trey-benson-2,17767.0,,1565.0,40958.0,0.0,,1248031.0,Trey Benson,trey benson,2,0,1,0.026316,1,15.0,4.0,9.0,1.0,0.0,0,0,0.053110,-0.144496,0,0,,,,,,,,,,,,,,,,,


In [16]:
# Output: pro-football reference dataframe for rushing data from the NFL Python library
# Note: PFR data not available before 2018
# There is no position info, so the data will pull WR, TE, and RB rushing data

# Define the range of years for PFR data (2018 to the current year)
pfr_years = list(range(2018, current_year))

# Import pro-football reference data for rushing stats
pfr_rush_df = nfl.import_weekly_pfr('rush', pfr_years)

# Drop unnecessary columns
pfr_rush_df = pfr_rush_df.drop(
    columns=['game_id', 'pfr_game_id', 'rushing_int',
             'passing_drops', 'passing_drop_pct'], errors='ignore'
)

# Display the dataframe
print(f"Shape of PFR dataframe: {pfr_rush_df.shape}")
display(pfr_rush_df)

# Save the dataframe to a CSV file (optional)
# pfr_rush_df.to_csv('pfr_rush_df.csv', index=False)

Shape of PFR dataframe: (16050, 14)


Unnamed: 0,season,week,game_type,team,opponent,pfr_player_name,pfr_player_id,carries,rushing_yards_before_contact,rushing_yards_before_contact_avg,rushing_yards_after_contact,rushing_yards_after_contact_avg,rushing_broken_tackles,receiving_broken_tackles
0,2018,1,REG,PHI,ATL,Jay Ajayi,AjayJa00,15.0,19.0,1.3,43.0,2.9,3.0,
1,2018,1,REG,PHI,ATL,Corey Clement,ClemCo00,5.0,18.0,3.6,8.0,1.6,0.0,
2,2018,1,REG,PHI,ATL,Darren Sproles,SproDa00,5.0,2.0,0.4,8.0,1.6,0.0,
3,2018,1,REG,PHI,ATL,Nelson Agholor,AghoNe00,1.0,1.0,1.0,15.0,15.0,0.0,
4,2018,1,REG,PHI,ATL,Nick Foles,FoleNi00,1.0,-1.0,-1.0,0.0,0.0,0.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2298,2024,19,WC,LA,MIN,Matthew Stafford,StafMa00,2.0,-1.0,-0.5,0.0,0.0,0.0,
2299,2024,19,WC,LA,MIN,Puka Nacua,NacuPu00,1.0,0.0,0.0,7.0,7.0,0.0,
2300,2024,19,WC,MIN,LA,Aaron Jones,JoneAa00,13.0,19.0,1.5,29.0,2.2,0.0,
2301,2024,19,WC,MIN,LA,Cam Akers,AkerCa00,5.0,32.0,6.4,7.0,1.4,0.0,


In [17]:
# Output: a dataframe of NFL RB info, ids, weekly stats, next-gen stats, and pro-football reference data
# NOTE: unbalanced dataframe - PFR stats start at 2018

# Merge the pfr_rush_df with the rb_ids_ngs_weekly_stats dataframe
# Merge the dataframes with a LEFT JOIN
rb_ids_ngs_pfr_stats = pd.merge(
    rb_ids_ngs_weekly_stats,
    pfr_rush_df,  # Use the full PFR dataframe as position data is unavailable
    left_on=['pfr_id', 'season', 'week'],  # Keys from rb_ids_ngs_weekly_stats
    right_on=['pfr_player_id', 'season', 'week'],  # Keys from pfr_rush_df
    how='left'  # Retain all rows from rb_ids_ngs_weekly_stats
)

# Display the shape of the resulting dataframe
print(f"Shape of merged dataframe: {rb_ids_ngs_pfr_stats.shape}")

# Row integrity check
print(
    f"Row count matches: {rb_ids_weekly_stats.shape[0] == rb_ids_ngs_weekly_stats.shape[0] == rb_ids_ngs_pfr_stats.shape[0]}"
)

# Display the first few rows of the merged dataframe for review
display(rb_ids_ngs_pfr_stats)

# Save the merged dataframe to a CSV file (optional)
# rb_ids_ngs_pfr_stats.to_csv('rb_ids_ngs_pfr_stats.csv', index=False)

Shape of merged dataframe: (11058, 74)
Row count matches: True


Unnamed: 0,season,season_type,week,player_id,player_name,position,position_group,recent_team,mfl_id,sportradar_id,fantasypros_id,pff_id,sleeper_id,nfl_id,espn_id,yahoo_id,fleaflicker_id,cbs_id,pfr_id,cfbref_id,rotowire_id,rotoworld_id,ktc_id,stats_id,stats_global_id,fantasy_data_id,swish_id,name,merge_name,carries_x,attempts,targets,target_share,receptions,rushing_yards,receiving_yards,receiving_yards_after_catch,rushing_first_downs,receiving_first_downs,rushing_tds,receiving_tds,rushing_epa,receiving_epa,rushing_2pt_conversions,receiving_2pt_conversions,player_display_name,team_abbr,efficiency,percent_attempts_gte_eight_defenders,avg_time_to_los,rush_attempts,rush_yards,avg_rush_yards,rush_touchdowns,player_gsis_id,player_first_name,player_last_name,player_short_name,expected_rush_yards,rush_yards_over_expected,rush_yards_over_expected_per_att,rush_pct_over_expected,game_type,team,opponent,pfr_player_name,pfr_player_id,carries_y,rushing_yards_before_contact,rushing_yards_before_contact_avg,rushing_yards_after_contact,rushing_yards_after_contact_avg,rushing_broken_tackles,receiving_broken_tackles
0,2017,REG,1,00-0023500,F.Gore,RB,RB,IND,7877,6a2b129d-a9e5-4131-b491-82269b323f77,9218.0,2282.0,232.0,frankgore/2506404,8479.0,7241.0,2848.0,411568.0,GoreFr00,frank-gore-1,4400.0,3205.0,,7241.0,157341.0,5820.0,,Frank Gore,frank gore,10,0,1,0.047619,1,42.0,10.0,14.0,1.0,0.0,0,0,-0.449218,0.374096,0,0,Frank Gore,IND,3.610952,10.000000,2.435667,10.0,42.0,4.200000,0.0,00-0023500,Frank,Gore,F.Gore,,,,,,,,,,,,,,,,
1,2017,REG,2,00-0023500,F.Gore,RB,RB,IND,7877,6a2b129d-a9e5-4131-b491-82269b323f77,9218.0,2282.0,232.0,frankgore/2506404,8479.0,7241.0,2848.0,411568.0,GoreFr00,frank-gore-1,4400.0,3205.0,,7241.0,157341.0,5820.0,,Frank Gore,frank gore,14,0,2,0.055556,0,46.0,0.0,0.0,2.0,0.0,1,0,-0.984420,-1.496430,0,0,Frank Gore,IND,4.080435,50.000000,2.736917,14.0,46.0,3.285714,1.0,00-0023500,Frank,Gore,F.Gore,,,,,,,,,,,,,,,,
2,2017,REG,3,00-0023500,F.Gore,RB,RB,IND,7877,6a2b129d-a9e5-4131-b491-82269b323f77,9218.0,2282.0,232.0,frankgore/2506404,8479.0,7241.0,2848.0,411568.0,GoreFr00,frank-gore-1,4400.0,3205.0,,7241.0,157341.0,5820.0,,Frank Gore,frank gore,25,0,1,0.041667,1,57.0,0.0,5.0,2.0,0.0,1,0,-5.832108,-0.788598,0,0,Frank Gore,IND,5.425789,24.000000,2.572100,25.0,57.0,2.280000,1.0,00-0023500,Frank,Gore,F.Gore,,,,,,,,,,,,,,,,
3,2017,REG,4,00-0023500,F.Gore,RB,RB,IND,7877,6a2b129d-a9e5-4131-b491-82269b323f77,9218.0,2282.0,232.0,frankgore/2506404,8479.0,7241.0,2848.0,411568.0,GoreFr00,frank-gore-1,4400.0,3205.0,,7241.0,157341.0,5820.0,,Frank Gore,frank gore,12,0,3,0.107143,3,46.0,34.0,43.0,1.0,2.0,0,0,-1.547445,1.718400,0,0,Frank Gore,IND,3.221957,41.666667,2.545750,12.0,46.0,3.833333,0.0,00-0023500,Frank,Gore,F.Gore,,,,,,,,,,,,,,,,
4,2017,REG,5,00-0023500,F.Gore,RB,RB,IND,7877,6a2b129d-a9e5-4131-b491-82269b323f77,9218.0,2282.0,232.0,frankgore/2506404,8479.0,7241.0,2848.0,411568.0,GoreFr00,frank-gore-1,4400.0,3205.0,,7241.0,157341.0,5820.0,,Frank Gore,frank gore,14,0,4,0.121212,3,48.0,38.0,40.0,2.0,1.0,0,0,-4.457448,1.223682,0,0,Frank Gore,IND,4.459583,28.571429,2.452167,14.0,48.0,3.428571,0.0,00-0023500,Frank,Gore,F.Gore,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11053,2024,REG,10,00-0039921,T.Benson,RB,RB,ARI,16594,0c1253f2-1822-445d-8590-fea6fd32703f,25388.0,,11589.0,,4429275.0,40958.0,,3176915.0,BensTr01,trey-benson-2,17767.0,,1565.0,40958.0,0.0,,1248031.0,Trey Benson,trey benson,10,0,2,0.083333,2,62.0,25.0,33.0,3.0,2.0,0,0,1.512646,1.987017,0,0,Trey Benson,ARI,3.232581,10.000000,2.690778,10.0,62.0,6.200000,0.0,00-0039921,Trey,Benson,T.Benson,43.82976,18.17024,1.817024,0.6,REG,ARI,NYJ,Trey Benson,BensTr01,10.0,18.0,1.8,44.0,4.4,2.0,
11054,2024,REG,12,00-0039921,T.Benson,RB,RB,ARI,16594,0c1253f2-1822-445d-8590-fea6fd32703f,25388.0,,11589.0,,4429275.0,40958.0,,3176915.0,BensTr01,trey-benson-2,17767.0,,1565.0,40958.0,0.0,,1248031.0,Trey Benson,trey benson,4,0,0,,0,18.0,0.0,0.0,0.0,0.0,0,0,-0.326119,,0,0,,,,,,,,,,,,,,,,,,REG,ARI,SEA,Trey Benson,BensTr01,4.0,11.0,2.8,7.0,1.8,0.0,
11055,2024,REG,13,00-0039921,T.Benson,RB,RB,ARI,16594,0c1253f2-1822-445d-8590-fea6fd32703f,25388.0,,11589.0,,4429275.0,40958.0,,3176915.0,BensTr01,trey-benson-2,17767.0,,1565.0,40958.0,0.0,,1248031.0,Trey Benson,trey benson,3,0,0,,0,20.0,0.0,0.0,1.0,0.0,0,0,0.515926,,0,0,,,,,,,,,,,,,,,,,,REG,ARI,MIN,Trey Benson,BensTr01,3.0,17.0,5.7,3.0,1.0,0.0,
11056,2024,REG,14,00-0039921,T.Benson,RB,RB,ARI,16594,0c1253f2-1822-445d-8590-fea6fd32703f,25388.0,,11589.0,,4429275.0,40958.0,,3176915.0,BensTr01,trey-benson-2,17767.0,,1565.0,40958.0,0.0,,1248031.0,Trey Benson,trey benson,2,0,1,0.026316,1,15.0,4.0,9.0,1.0,0.0,0,0,0.053110,-0.144496,0,0,,,,,,,,,,,,,,,,,,REG,ARI,SEA,Trey Benson,BensTr01,2.0,1.0,0.5,14.0,7.0,0.0,


In [18]:
# Output: a dataframe of NFL RB info, ids, weekly stats, next-gen stats, and pro-football reference data
# Output: Ordered the dataframe by year, week, and rushing yards
# NOTE: unbalanced dataframe - PFR stats start at 2018

# Order the dataframe by season (year), week, and rushing_yards
rb_ids_ngs_pfr_stats_sorted = rb_ids_ngs_pfr_stats.sort_values(
    by=['season', 'week', 'rushing_yards'], 
    ascending=[True, True, False]  # Ascending for season and week, descending for rushing_yards
)

# Display the shape of the resulting dataframe
print(f"Shape of merged dataframe: {rb_ids_ngs_pfr_stats_sorted.shape}")

# Row integrity check
print(
    f"Row count matches: {rb_ids_weekly_stats.shape[0] == rb_ids_ngs_weekly_stats.shape[0] == rb_ids_ngs_pfr_stats.shape[0] == rb_ids_ngs_pfr_stats_sorted.shape[0]}"
)

# Display the sorted dataframe
print("Dataframe sorted by season, week, and rushing_yards:")
display(rb_ids_ngs_pfr_stats_sorted)

# Save the sorted dataframe to a CSV file
rb_ids_ngs_pfr_stats_sorted.to_csv('rb_ids_ngs_pfr_stats_sorted.csv', index=False)

Shape of merged dataframe: (11058, 74)
Row count matches: True
Dataframe sorted by season, week, and rushing_yards:


Unnamed: 0,season,season_type,week,player_id,player_name,position,position_group,recent_team,mfl_id,sportradar_id,fantasypros_id,pff_id,sleeper_id,nfl_id,espn_id,yahoo_id,fleaflicker_id,cbs_id,pfr_id,cfbref_id,rotowire_id,rotoworld_id,ktc_id,stats_id,stats_global_id,fantasy_data_id,swish_id,name,merge_name,carries_x,attempts,targets,target_share,receptions,rushing_yards,receiving_yards,receiving_yards_after_catch,rushing_first_downs,receiving_first_downs,rushing_tds,receiving_tds,rushing_epa,receiving_epa,rushing_2pt_conversions,receiving_2pt_conversions,player_display_name,team_abbr,efficiency,percent_attempts_gte_eight_defenders,avg_time_to_los,rush_attempts,rush_yards,avg_rush_yards,rush_touchdowns,player_gsis_id,player_first_name,player_last_name,player_short_name,expected_rush_yards,rush_yards_over_expected,rush_yards_over_expected_per_att,rush_pct_over_expected,game_type,team,opponent,pfr_player_name,pfr_player_id,carries_y,rushing_yards_before_contact,rushing_yards_before_contact_avg,rushing_yards_after_contact,rushing_yards_after_contact_avg,rushing_broken_tackles,receiving_broken_tackles
1295,2017,REG,1,00-0033923,K.Hunt,RB,RB,KC,13138,0ef0d0ca-2d2d-455b-ab63-a20c01303e37,16425.0,11841.0,4098.0,,3059915.0,30199.0,,2079567.0,HuntKa00,kareem-hunt-1,11739.0,12295.0,,30199.0,746613.0,18944.0,746613.0,Kareem Hunt,kareem hunt,17,0,5,0.147059,5,148.0,98.0,76.0,6.0,3.0,1,2,3.455881,9.309378,0,0,Kareem Hunt,KC,2.821014,35.294118,2.920235,17.0,148.0,8.705882,1.0,00-0033923,Kareem,Hunt,K.Hunt,,,,,,,,,,,,,,,,
1259,2017,REG,1,00-0033893,D.Cook,RB,RB,MIN,13128,8960d61e-433b-41ea-a7ad-4e76be87b582,16374.0,11796.0,4029.0,,3116593.0,30154.0,,2130893.0,CookDa01,dalvin-cook-1,11700.0,12138.0,,30154.0,824080.0,18872.0,824080.0,Dalvin Cook,dalvin cook,22,0,5,0.156250,3,127.0,10.0,12.0,4.0,0.0,0,0,-0.011037,-3.058858,0,0,Dalvin Cook,MIN,3.939449,40.909091,2.865950,22.0,127.0,5.772727,0.0,00-0033893,Dalvin,Cook,D.Cook,,,,,,,,,,,,,,,,
98,2017,REG,1,00-0027029,L.McCoy,RB,RB,BUF,9448,166292fc-629e-4c7b-b7bf-f572ca9eeb43,9398.0,4976.0,676.0,leseanmccoy/79607,12514.0,9317.0,,1243187.0,McCoLe01,lesean-mccoy-1,5970.0,5168.0,,9317.0,397945.0,11932.0,,LeSean McCoy,lesean mccoy,22,0,6,0.222222,5,110.0,49.0,39.0,4.0,2.0,0,0,-0.711850,1.988743,0,0,LeSean McCoy,BUF,3.686273,4.545455,2.676778,22.0,110.0,5.000000,0.0,00-0027029,LeSean,McCoy,L.McCoy,,,,,,,,,,,,,,,,
1063,2017,REG,1,00-0033045,E.Elliott,RB,RB,DAL,12625,bef8b2b4-78bd-4a4d-bb5d-6b55ada9ef6a,15498.0,10638.0,3164.0,,3051392.0,29238.0,,2060769.0,ElliEz00,ezekiel-elliott-1,10736.0,11265.0,,29238.0,728338.0,17923.0,728338.0,Ezekiel Elliott,ezekiel elliott,24,0,5,0.131579,5,104.0,36.0,42.0,6.0,1.0,0,0,0.570525,-0.114128,0,0,Ezekiel Elliott,DAL,4.143077,20.833333,2.896261,24.0,104.0,4.333333,0.0,00-0033045,Ezekiel,Elliott,E.Elliott,,,,,,,,,,,,,,,,
1243,2017,REG,1,00-0033856,L.Fournette,RB,RB,JAX,13129,7f46a7be-286e-4bfe-8778-d03dbe600ce9,16378.0,11759.0,3969.0,,3115364.0,30117.0,,2131693.0,FourLe00,leonard-fournette-1,11687.0,12132.0,,30117.0,822013.0,18803.0,822013.0,Leonard Fournette,leonard fournette,26,0,3,0.142857,3,100.0,24.0,25.0,5.0,1.0,1,0,3.412744,1.308894,0,0,Leonard Fournette,JAX,3.781000,57.692308,2.325435,26.0,100.0,3.846154,1.0,00-0033856,Leonard,Fournette,L.Fournette,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10413,2024,POST,20,00-0037197,I.Pacheco,RB,RB,KC,15749,3a25c037-fa30-4740-b742-ec8e4f1a2a25,24333.0,,8205.0,,4361529.0,34207.0,,2966551.0,PachIs00,isaih-pacheco-1,16104.0,,1266.0,34207.0,0.0,,1106832.0,Isiah Pacheco,isiah pacheco,5,0,1,0.041667,0,18.0,0.0,0.0,0.0,0.0,0,0,-0.804974,-0.807685,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
10473,2024,POST,20,00-0037258,D.Pierce,RB,RB,HOU,15717,5d4ef6da-a6b5-4d9c-adc0-28dfdbe9315e,22947.0,,8129.0,,4360238.0,34063.0,,2964236.0,PierDa01,dameon-pierce-1,15875.0,,1207.0,34063.0,0.0,,1108998.0,Dameon Pierce,dameon pierce,4,0,1,0.035714,0,10.0,0.0,0.0,0.0,0.0,0,0,-0.708495,-1.823628,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
9850,2024,POST,20,00-0033854,D.Ogunbowale,RB,RB,HOU,13148,42b57148-fc06-4aee-b40b-bb941271b5b7,16879.0,12329.0,4718.0,,2983509.0,30860.0,,2010409.0,OgunDa00,,11768.0,12943.0,,30860.0,703015.0,19626.0,703015.0,Dare Ogunbowale,dare ogunbowale,1,0,1,0.035714,1,9.0,0.0,6.0,0.0,0.0,0,0,0.414379,-0.359967,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
9935,2024,POST,20,00-0033955,J.McNichols,RB,RB,WAS,13143,25cc3585-6194-4786-968a-2600db46b6c6,16445.0,11917.0,4219.0,,3127586.0,30275.0,,2139987.0,McNiJe00,jeremy-mcnichols-1,11767.0,12174.0,,30275.0,837594.0,19065.0,837594.0,Jeremy McNichols,jeremy mcnichols,4,0,0,,0,8.0,0.0,0.0,1.0,0.0,1,0,-0.353687,,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
