# Examining RB Health in the Proceeding Season Based on Workload

## Get Data

In [1]:
import pandas as pd
import nfl_data_py as nfl

In [2]:
years = range(1999, 2024 + 1)
s_type = "ALL"
df_seasons_all = nfl.import_seasonal_data(years, s_type)

In [3]:
df_roster = nfl.import_seasonal_rosters(years)

In [4]:
df_roster.columns

Index(['season', 'team', 'position', 'depth_chart_position', 'jersey_number',
       'status', 'player_name', 'first_name', 'last_name', 'birth_date',
       'height', 'weight', 'college', 'player_id', 'espn_id', 'sportradar_id',
       'yahoo_id', 'rotowire_id', 'pff_id', 'pfr_id', 'fantasy_data_id',
       'sleeper_id', 'years_exp', 'headshot_url', 'esb_id', 'gsis_it_id',
       'smart_id', 'entry_year', 'rookie_year', 'draft_club', 'ngs_position',
       'week', 'game_type', 'status_description_abbr', 'football_name',
       'draft_number', 'age'],
      dtype='object')

In [5]:
df_roster_clean = df_roster[["season", "player_id", "player_name", "age"]]

In [6]:
df_roster_clean.query("player_id == '00-0034844'")

Unnamed: 0,season,player_id,player_name,age
3083,2018,00-0034844,Saquon Barkley,21.0
2316,2019,00-0034844,Saquon Barkley,22.0
1939,2020,00-0034844,Saquon Barkley,23.0
1497,2021,00-0034844,Saquon Barkley,24.0
1173,2022,00-0034844,Saquon Barkley,25.0
867,2023,00-0034844,Saquon Barkley,26.0
683,2024,00-0034844,Saquon Barkley,27.0


## Create DFs

In [7]:
df_carries = df_seasons_all.groupby(['player_id', 'season'], as_index=False).agg({
    'carries': 'sum',
    'receptions': 'sum',
    'games': 'max'
})
df_carries["touches"] = df_carries["carries"] + df_carries["receptions"]

In [8]:
df_carries_over_400 = df_carries.query("carries >= 400")
df_carries_over_400.head()

Unnamed: 0,player_id,season,carries,receptions,games,touches
645,00-0004304,2004,410,24,18,434
927,00-0005883,1999,424,55,20,479
928,00-0005883,2000,425,56,17,481
993,00-0006305,2003,403,58,18,461
1279,00-0008241,2000,404,64,17,468


In [9]:
df_rush_all = df_seasons_all.query("season_type == 'REG'")[["player_id", "season", "season_type", "carries", 
                                                            "rushing_yards", "rushing_tds", "rushing_first_downs", 
                                                            "rushing_epa", "games", "fantasy_points_ppr", "fantasy_points",
                                                            "receptions", "receiving_yards", "receiving_tds"
                                                           ]]

In [10]:
df_merged_one = df_rush_all.merge(
    df_roster_clean[['player_id', 'season', 'player_name', 'age']],
    on=['player_id', 'season'],
    how='left'
)

## Over 400 Carries

In [11]:
columns = [
    "player_name",
    "prev_season",
    "next_season",
    "age_prev",
    "age_next",
    "games_diff",
    "fantasy_points_ppr_diff",
    "fantasy_points_diff",
    "carries_diff",
    "rushing_yards",
    "rushing_tds",
    "rushing_first_downs",
    "receptions",
    "receiving_yards",
    "receiving_tds"
]

df_400 = pd.DataFrame(columns=columns)

In [12]:
data = []
for index, row in df_carries_over_400.iterrows():
    season = row["season"]
    player_id = row["player_id"]

    df_one = df_merged_one.query(f"player_id == '{player_id}' and season == {season}")
    df_two = df_merged_one.query(f"player_id == '{player_id}' and season == {season + 1}")
    
    player = df_one["player_name"].iloc[0]
    age = df_one["age"].iloc[0]

    if len(df_one) == 1 and len(df_two) == 1:
        games_0 = df_one['games'].iloc[0]
        games_1 = df_two['games'].iloc[0]
        games_diff = games_1 - games_0


        yards_0 = df_one['rushing_yards'].iloc[0]
        yards_1 = df_two['rushing_yards'].iloc[0]
        yards_diff = yards_1 - yards_0


        ff_0 = df_one["fantasy_points_ppr"].iloc[0]
        ff_1 = df_two["fantasy_points_ppr"].iloc[0]
        ff_diff = ff_1 - ff_0

        ff_reg_0 = df_one["fantasy_points"].iloc[0]
        ff_reg_1 = df_two["fantasy_points"].iloc[0]
        ff_reg_diff = ff_reg_1 - ff_reg_0

        carries_0 = df_one["carries"].iloc[0]
        carries_1 = df_two["carries"].iloc[0]
        carries_diff = carries_1 - carries_0

        rushing_tds_0 = df_one["rushing_tds"].iloc[0]
        rushing_tds_1 = df_two["rushing_tds"].iloc[0]
        rushing_tds_diff = rushing_tds_1 - rushing_tds_0
        
        rushing_first_downs_0 = df_one["rushing_first_downs"].iloc[0]
        rushing_first_downs_1 = df_two["rushing_first_downs"].iloc[0]
        rushing_first_downs_diff = rushing_first_downs_1 - rushing_first_downs_0
        
        receptions_0 = df_one["receptions"].iloc[0]
        receptions_1 = df_two["receptions"].iloc[0]
        receptions_diff = receptions_1 - receptions_0
        
        receiving_yards_0 = df_one["receiving_yards"].iloc[0]
        receiving_yards_1 = df_two["receiving_yards"].iloc[0]
        receiving_yards_diff = receiving_yards_1 - receiving_yards_0
        
        receiving_tds_0 = df_one["receiving_tds"].iloc[0]
        receiving_tds_1 = df_two["receiving_tds"].iloc[0]
        receiving_tds_diff = receiving_tds_1 - receiving_tds_0

        data_to_add = {
            "player_name": player,
            "prev_season": season,
            "next_season": season + 1,
            "age_prev": age,
            "age_next": age + 1,
            "games_diff": games_diff,
            "fantasy_points_ppr_diff": round(ff_diff, 2),
            "fantasy_points_diff": round(ff_reg_diff, 2),
            "carries_diff": carries_diff,
            "rushing_yards": yards_diff,
            "rushing_tds": rushing_tds_diff,
            "rushing_first_downs": rushing_first_downs_diff,
            "receptions": receptions_diff,
            "receiving_yards": receiving_yards_diff,
            "receiving_tds": receiving_tds_diff
        }
        data.append(data_to_add)

In [13]:
for row in data:
    df_400 = pd.concat([df_400, pd.DataFrame([row])], ignore_index=True)
df_400.head()

  df_400 = pd.concat([df_400, pd.DataFrame([row])], ignore_index=True)


Unnamed: 0,player_name,prev_season,next_season,age_prev,age_next,games_diff,fantasy_points_ppr_diff,fantasy_points_diff,carries_diff,rushing_yards,rushing_tds,rushing_first_downs,receptions,receiving_yards,receiving_tds
0,Corey Dillon,2004,2005,29.0,30.0,-5,-71.4,-78.4,-136,-902.0,0,-32.0,7,78.0,0
1,Eddie George,1999,2000,25.0,26.0,-3,37.3,35.3,82,178.0,5,10.0,2,-25.0,-2
2,Eddie George,2000,2001,26.0,27.0,-1,-149.6,-138.6,-83,-516.0,-9,-38.0,-11,-150.0,-2
3,Ahman Green,2003,2004,26.0,27.0,-2,-156.4,-146.4,-96,-720.0,-8,-40.0,-10,-92.0,-4
4,Edgerrin James,2000,2001,22.0,23.0,-11,-262.6,-225.6,-232,-1028.0,-10,-59.0,-37,-388.0,-5


In [14]:
df_400[["player_name", "prev_season", "age_prev", "fantasy_points_diff", "fantasy_points_ppr_diff", "carries_diff"]]

Unnamed: 0,player_name,prev_season,age_prev,fantasy_points_diff,fantasy_points_ppr_diff,carries_diff
0,Corey Dillon,2004,29.0,-78.4,-71.4,-136
1,Eddie George,1999,25.0,35.3,37.3,82
2,Eddie George,2000,26.0,-138.6,-149.6,-83
3,Ahman Green,2003,26.0,-146.4,-156.4,-96
4,Edgerrin James,2000,22.0,-225.6,-262.6,-232
5,Curtis Martin,2004,31.0,-164.9,-181.9,-151
6,Shaun Alexander,2005,28.0,-231.4,-234.4,-118
7,Jamal Lewis,2003,24.0,-142.6,-158.6,-153
8,Larry Johnson,2006,26.0,-231.4,-242.4,-258
9,Arian Foster,2012,26.0,-175.6,-193.6,-230


## Over 350 Carries

In [15]:
df_carries_over_350 = df_carries.query("carries >= 350")
df_carries_over_350.head()

Unnamed: 0,player_id,season,carries,receptions,games,touches
94,00-0000745,2005,370,57,17,427
95,00-0000745,2006,353,60,17,413
165,00-0001215,2000,352,13,16,365
573,00-0004052,2001,356,28,16,384
575,00-0004052,2003,382,16,18,398


In [16]:
columns = [
    "player_name",
    "prev_season",
    "next_season",
    "age_prev",
    "age_next",
    "games_diff",
    "fantasy_points_ppr_diff",
    "fantasy_points_diff",
    "carries_diff",
    "rushing_yards",
    "rushing_tds",
    "rushing_first_downs",
    "receptions",
    "receiving_yards",
    "receiving_tds"
]

df_350 = pd.DataFrame(columns=columns)

In [17]:
data = []
for index, row in df_carries_over_350.iterrows():
    season = row["season"]
    player_id = row["player_id"]

    df_one = df_merged_one.query(f"player_id == '{player_id}' and season == {season}")
    df_two = df_merged_one.query(f"player_id == '{player_id}' and season == {season + 1}")
    
    player = df_one["player_name"].iloc[0]
    age = df_one["age"].iloc[0]

    if len(df_one) == 1 and len(df_two) == 1:
        games_0 = df_one['games'].iloc[0]
        games_1 = df_two['games'].iloc[0]
        games_diff = games_1 - games_0


        yards_0 = df_one['rushing_yards'].iloc[0]
        yards_1 = df_two['rushing_yards'].iloc[0]
        yards_diff = yards_1 - yards_0


        ff_0 = df_one["fantasy_points_ppr"].iloc[0]
        ff_1 = df_two["fantasy_points_ppr"].iloc[0]
        ff_diff = round(ff_1 - ff_0, 2)

        ff_reg_0 = df_one["fantasy_points"].iloc[0]
        ff_reg_1 = df_two["fantasy_points"].iloc[0]
        ff_reg_diff = round(ff_reg_1 - ff_reg_0, 2)

        carries_0 = df_one["carries"].iloc[0]
        carries_1 = df_two["carries"].iloc[0]
        carries_diff = carries_1 - carries_0

        rushing_tds_0 = df_one["rushing_tds"].iloc[0]
        rushing_tds_1 = df_two["rushing_tds"].iloc[0]
        rushing_tds_diff = rushing_tds_1 - rushing_tds_0
        
        rushing_first_downs_0 = df_one["rushing_first_downs"].iloc[0]
        rushing_first_downs_1 = df_two["rushing_first_downs"].iloc[0]
        rushing_first_downs_diff = rushing_first_downs_1 - rushing_first_downs_0
        
        receptions_0 = df_one["receptions"].iloc[0]
        receptions_1 = df_two["receptions"].iloc[0]
        receptions_diff = receptions_1 - receptions_0
        
        receiving_yards_0 = df_one["receiving_yards"].iloc[0]
        receiving_yards_1 = df_two["receiving_yards"].iloc[0]
        receiving_yards_diff = receiving_yards_1 - receiving_yards_0
        
        receiving_tds_0 = df_one["receiving_tds"].iloc[0]
        receiving_tds_1 = df_two["receiving_tds"].iloc[0]
        receiving_tds_diff = receiving_tds_1 - receiving_tds_0

        data_to_add = {
            "player_name": player,
            "prev_season": season,
            "next_season": season + 1,
            "age_prev": age,
            "age_next": age + 1,
            "games_diff": games_diff,
            "fantasy_points_ppr_diff": ff_diff,
            "fantasy_points_diff": ff_reg_diff,
            "carries_diff": carries_diff,
            "rushing_yards": yards_diff,
            "rushing_tds": rushing_tds_diff,
            "rushing_first_downs": rushing_first_downs_diff,
            "receptions": receptions_diff,
            "receiving_yards": receiving_yards_diff,
            "receiving_tds": receiving_tds_diff
        }
        data.append(data_to_add)

In [18]:
for row in data:
    df_350 = pd.concat([df_350, pd.DataFrame([row])], ignore_index=True)
df_350.head()

  df_350 = pd.concat([df_350, pd.DataFrame([row])], ignore_index=True)


Unnamed: 0,player_name,prev_season,next_season,age_prev,age_next,games_diff,fantasy_points_ppr_diff,fantasy_points_diff,carries_diff,rushing_yards,rushing_tds,rushing_first_downs,receptions,receiving_yards,receiving_tds
0,Tiki Barber,2005,2006,30.0,31.0,0,-60.3,-64.3,-30,-198.0,-4,6.0,4,-65.0,-2
1,Jerome Bettis,2000,2001,28.0,29.0,-4,-49.62,-44.62,-127,-230.0,-4,-24.0,-5,-49.0,0
2,Stephen Davis,2001,2002,27.0,28.0,-4,-58.5,-53.5,-149,-612.0,2,-27.0,-5,-63.0,1
3,Stephen Davis,2003,2004,29.0,30.0,-16,-201.9,-189.9,-294,-1352.0,-8,-64.0,-12,-127.0,0
4,Corey Dillon,2004,2005,29.0,30.0,-5,-71.4,-78.4,-136,-902.0,0,-32.0,7,78.0,0


In [19]:
len(df_350)

45

In [20]:
df_350.query("fantasy_points_diff > 0 or fantasy_points_ppr_diff > 0")[["player_name", "prev_season", "age_prev", 
                                                                        "fantasy_points_diff", "fantasy_points_ppr_diff", "carries_diff"]]

Unnamed: 0,player_name,prev_season,age_prev,fantasy_points_diff,fantasy_points_ppr_diff,carries_diff
5,Eddie George,1999,25.0,35.3,37.3,82
10,Edgerrin James,1999,21.0,27.8,27.8,23
12,Edgerrin James,2003,25.0,40.2,40.2,25
13,Edgerrin James,2004,26.0,11.2,4.2,26
15,Curtis Martin,1999,26.0,32.32,56.32,-56
18,Shaun Alexander,2004,27.0,62.0,54.0,17
21,Rudi Johnson,2004,24.0,8.7,16.7,-25
24,LaDainian Tomlinson,2002,23.0,38.64,59.64,-59
25,LaDainian Tomlinson,2004,25.0,34.16,32.16,1
30,Adrian Peterson,2008,23.0,38.3,60.3,-49


In [21]:
import plotly.io as pio

In [22]:
def save_table(df, file_name):
    fig = dict(
        type='table',
        header=dict(values=list(df.columns), align='center', fill_color='lightblue'),
        cells=dict(values=[df[col] for col in df.columns], align='center', fill_color='lightgrey')
    )
    pio.write_html(fig, file=f'{file_name}.html', auto_open=True)

In [26]:
# df_400
# df_400[["player_name", "prev_season", "age_prev", "fantasy_points_diff", "fantasy_points_ppr_diff", "carries_diff"]]
save_table(df_400.sort_values("fantasy_points_diff", ascending=False), "full_400")
save_table(df_400[["player_name", "prev_season", "age_prev", 
                   "fantasy_points_diff", "fantasy_points_ppr_diff", "carries_diff"]].sort_values("fantasy_points_diff", ascending=False), "short_400")

In [27]:
save_table(df_350.sort_values("fantasy_points_diff", ascending=False), "full_350")
save_table(df_350[["player_name", "prev_season", "age_prev", 
                   "fantasy_points_diff", "fantasy_points_ppr_diff", "carries_diff"]].sort_values("fantasy_points_diff", ascending=False), "short_350")

In [42]:
def stats(df):
    total = len(df)
    positive = len(df.query("fantasy_points_diff > 0"))
    print(f"Total: {total}")
    print(f"+: {positive}")
    print(f"% pos: {round(100 * positive/total, 2)}%")
    print()

In [43]:
stats(df_400)
stats(df_350)

Total: 11
+: 1
% pos: 9.09%

Total: 45
+: 13
% pos: 28.89%

