# Examining RB Health in the Proceeding Season Based on Workload

In [2]:
import nfl_data_py as nfl

In [3]:
years = range(1999, 2024 + 1)
s_type = "ALL"
df_seasons_all = nfl.import_seasonal_data(years, s_type)

In [22]:
df_roster = nfl.import_seasonal_rosters(years)

In [23]:
df_roster.columns

Index(['season', 'team', 'position', 'depth_chart_position', 'jersey_number',
       'status', 'player_name', 'first_name', 'last_name', 'birth_date',
       'height', 'weight', 'college', 'player_id', 'espn_id', 'sportradar_id',
       'yahoo_id', 'rotowire_id', 'pff_id', 'pfr_id', 'fantasy_data_id',
       'sleeper_id', 'years_exp', 'headshot_url', 'esb_id', 'gsis_it_id',
       'smart_id', 'entry_year', 'rookie_year', 'draft_club', 'ngs_position',
       'week', 'game_type', 'status_description_abbr', 'football_name',
       'draft_number', 'age'],
      dtype='object')

In [43]:
df_roster_clean = df_roster[["season", "player_id", "player_name", "age"]]

In [44]:
df_roster_clean.query("player_id == '00-0034844'")

Unnamed: 0,season,player_id,player_name,age
44701,2018,00-0034844,Saquon Barkley,21.0
47076,2019,00-0034844,Saquon Barkley,22.0
49813,2020,00-0034844,Saquon Barkley,23.0
52439,2021,00-0034844,Saquon Barkley,24.0
55076,2022,00-0034844,Saquon Barkley,25.0
57904,2023,00-0034844,Saquon Barkley,26.0
60810,2024,00-0034844,Saquon Barkley,27.0


In [53]:
df_rush_all = df_seasons_all[["player_id", "season", "season_type", "carries", "rushing_yards", "rushing_tds", 
                              "rushing_first_downs", "rushing_epa", "games"]]

In [54]:
df_rush_all.season_type.unique()

array(['REG', 'POST'], dtype=object)

In [55]:
df_combined = df_rush_all.groupby(['player_id', 'season'], as_index=False).agg({
    'carries': 'sum',
    'rushing_yards': 'sum',
    'rushing_tds': 'sum',
    'rushing_first_downs': 'sum',
    'rushing_epa': 'sum',
    'games': 'max'
})

In [56]:
df_merged = df_combined.merge(
    df_roster_clean[['player_id', 'season', 'player_name', 'age']],
    on=['player_id', 'season'],
    how='left'
)

In [57]:
df_over_400 = df_merged.query("carries >= 400")
df_over_400

Unnamed: 0,player_id,season,carries,rushing_yards,rushing_tds,rushing_first_downs,rushing_epa,games,player_name,age
656,00-0004304,2004,410,1927.0,14,95.0,12.208601,18,Corey Dillon,29.0
945,00-0005883,1999,424,1726.0,12,83.0,-36.163311,20,Eddie George,25.0
946,00-0005883,2000,425,1546.0,15,83.0,-31.726274,17,Eddie George,26.0
1011,00-0006305,2003,403,2105.0,17,109.0,25.236686,18,Ahman Green,26.0
1303,00-0008241,2000,404,1797.0,13,105.0,-5.608213,17,Edgerrin James,22.0
1630,00-0010442,2004,408,1840.0,12,93.0,14.796085,18,Curtis Martin,31.0
3220,00-0019647,2005,430,2116.0,29,118.0,45.660339,19,Shaun Alexander,28.0
3281,00-0019693,2000,409,1673.0,10,76.0,-17.577455,20,Jamal Lewis,21.0
3283,00-0019693,2003,402,2098.0,14,84.0,-10.020831,17,Jamal Lewis,24.0
4731,00-0021976,2006,429,1821.0,17,93.0,0.013186,17,Larry Johnson,26.0


In [69]:
rb_dict = {}
for index, row in df_over_400.iterrows():
    season = row["season"]
    player_id = row["player_id"]
    player = row["player_name"]
    games = row["games"]
    age = row["age"]

    rb_dict[player] = {}

    temp_df = df_rush_all \
        .query(f"player_id == '{player_id}' and season_type == 'REG' and season <= {season + 1} and season >= {season}") \
        .reset_index(drop=True)
    if len(temp_df) == 2:
        games_0 = temp_df.at[0, 'games']
        games_1 = temp_df.at[1, 'games']

        games_diff = games_1 - games_0
        rb_dict[player]["games_diff"] = games_diff

        yards_0 = temp_df.at[0, 'rushing_yards']
        yards_1 = temp_df.at[1, 'rushing_yards']

        yards_diff = yards_1 - yards_0
        rb_dict[player]["yards_diff"] = yards_diff

        rb_dict[player]["seasons"] = (season, season + 1)
        rb_dict[player]["age"] = (age, age + 1)

In [70]:
rb_dict

{'Corey Dillon': {'games_diff': -5,
  'yards_diff': -902.0,
  'seasons': (2004, 2005),
  'age': (29.0, 30.0)},
 'Eddie George': {'games_diff': -1,
  'yards_diff': -516.0,
  'seasons': (2000, 2001),
  'age': (26.0, 27.0)},
 'Ahman Green': {'games_diff': -2,
  'yards_diff': -720.0,
  'seasons': (2003, 2004),
  'age': (26.0, 27.0)},
 'Edgerrin James': {'games_diff': -11,
  'yards_diff': -1028.0,
  'seasons': (2000, 2001),
  'age': (22.0, 23.0)},
 'Curtis Martin': {'games_diff': -6,
  'yards_diff': -962.0,
  'seasons': (2004, 2005),
  'age': (31.0, 32.0)},
 'Shaun Alexander': {'games_diff': -7,
  'yards_diff': -984.0,
  'seasons': (2005, 2006),
  'age': (28.0, 29.0)},
 'Jamal Lewis': {'games_diff': -5,
  'yards_diff': -1057.0,
  'seasons': (2003, 2004),
  'age': (24.0, 25.0)},
 'Larry Johnson': {'games_diff': -9,
  'yards_diff': -1230.0,
  'seasons': (2006, 2007),
  'age': (26.0, 27.0)},
 'Arian Foster': {'games_diff': -10,
  'yards_diff': -882.0,
  'seasons': (2012, 2013),
  'age': (26.0,

In [72]:
df_over_350 = df_merged.query("carries >= 350")
df_over_350.head()

Unnamed: 0,player_id,season,carries,rushing_yards,rushing_tds,rushing_first_downs,rushing_epa,games,player_name,age
97,00-0000745,2005,370,1901.0,9,73.0,6.232753,17,Tiki Barber,30.0
98,00-0000745,2006,353,1799.0,5,84.0,12.261546,17,Tiki Barber,31.0
171,00-0001215,2000,352,1302.0,8,70.0,-8.35501,16,Jerome Bettis,28.0
583,00-0004052,2001,356,1432.0,5,74.0,-29.115399,16,Stephen Davis,27.0
585,00-0004052,2003,382,1759.0,9,80.0,-16.359918,18,Stephen Davis,29.0


In [73]:
rb_dict_350 = {}
for index, row in df_over_350.iterrows():
    season = row["season"]
    player_id = row["player_id"]
    player = row["player_name"]
    games = row["games"]
    age = row["age"]

    rb_dict_350[player] = {}

    temp_df = df_rush_all \
        .query(f"player_id == '{player_id}' and season_type == 'REG' and season <= {season + 1} and season >= {season}") \
        .reset_index(drop=True)
    if len(temp_df) == 2:
        games_0 = temp_df.at[0, 'games']
        games_1 = temp_df.at[1, 'games']

        games_diff = games_1 - games_0
        rb_dict_350[player]["games_diff"] = games_diff

        yards_0 = temp_df.at[0, 'rushing_yards']
        yards_1 = temp_df.at[1, 'rushing_yards']

        yards_diff = yards_1 - yards_0
        rb_dict_350[player]["yards_diff"] = yards_diff

        rb_dict_350[player]["seasons"] = (season, season + 1)
        rb_dict_350[player]["age"] = (age, age + 1)

In [74]:
rb_dict_350

{'Tiki Barber': {},
 'Jerome Bettis': {'games_diff': -4,
  'yards_diff': -230.0,
  'seasons': (2000, 2001),
  'age': (28.0, 29.0)},
 'Stephen Davis': {'games_diff': -16,
  'yards_diff': -1352.0,
  'seasons': (2003, 2004),
  'age': (29.0, 30.0)},
 'Corey Dillon': {'games_diff': -5,
  'yards_diff': -902.0,
  'seasons': (2004, 2005),
  'age': (29.0, 30.0)},
 'Eddie George': {'games_diff': -5,
  'yards_diff': -599.0,
  'seasons': (2003, 2004),
  'age': (29.0, 30.0)},
 'Ahman Green': {'games_diff': -2,
  'yards_diff': -720.0,
  'seasons': (2003, 2004),
  'age': (26.0, 27.0)},
 'Edgerrin James': {'games_diff': 0,
  'yards_diff': -347.0,
  'seasons': (2005, 2006),
  'age': (27.0, 28.0)},
 'Curtis Martin': {'games_diff': -6,
  'yards_diff': -962.0,
  'seasons': (2004, 2005),
  'age': (31.0, 32.0)},
 'Ricky Williams': {},
 'Thomas Jones': {'games_diff': -2,
  'yards_diff': -506.0,
  'seasons': (2009, 2010),
  'age': (31.0, 32.0)},
 'Shaun Alexander': {'games_diff': -7,
  'yards_diff': -984.0,
 

In [76]:
for player in rb_dict_350.keys():
    if rb_dict_350[player]:
        yards_diff = rb_dict_350[player]["yards_diff"]
        if yards_diff >= 0:
            print(player)
            print(yards_diff)
            print(rb_dict_350[player]["seasons"])
            print("-" * 50)

Marshawn Lynch
49.0
(2013, 2014)
--------------------------------------------------


In [78]:
print(len(rb_dict_350.keys()))

30
