In [None]:
!pip install nfl_data_py

In [2]:
import nfl_data_py as nfl
import pandas as pd

In [3]:
nflpbp_df = nfl.import_pbp_data([2023, 2023, 2024])

2023 done.
2023 done.
2024 done.
Downcasting floats.


In [4]:
def get_made_field_goal_data(df: pd.DataFrame, offense, defense: str) -> pd.DataFrame:
  """Returns a DataFrame of field goal stats for the given play-by-play data.

  Finds all games where a field goal was made in the 4th quater which put the
  field goal kicker's team either tied or ahead of the opponent's score and
  where the kicker's team ultimately won the game.

  Returns:
    A DataFrame with:
    - The kicker's name
    - The expected points added for the made field goal
    - The number of seconds remaining in the game
    - Kick distance
  """
  out_df = df.loc[
    # Filter events by field goals made ...
    (df['field_goal_attempt'] == 1) &
    # in the 4th quarter ...
    (df['qtr'] == 4) &
    (df['posteam_type'] == offense) &
    # where the kicker's team is trailing or tied before the field goal ...
    (df['posteam_score'] <= df[f'total_{defense}_score']) &
    # and leading or tied after the field goal is made ...
    (df['posteam_score_post'] >= df[f'total_{defense}_score']) &
    # and the kicker's team ultimately wins the game
    (df[f'{offense}_score'] > df[f'{defense}_score'])
    ][['kicker_player_name', f'{offense}_team', 'epa', 'game_seconds_remaining', 'kick_distance']]
  out_df = out_df.rename(columns={f'{offense}_team': 'team'})
  return out_df

In [5]:
def get_missed_field_goal_data(df: pd.DataFrame, offense, defense: str) -> pd.DataFrame:
  """Returns a DataFrame of missed field goal stats for the given pbp data.

  Finds all games where a field goal was made in the 4th quater which put the
  field goal kicker's team either tied or ahead of the opponent's score and
  where the kicker's team ultimately won the game.

  Returns:
    A DataFrame with:
    - The kicker's name
    - The expected points added for the made field goal
    - The number of seconds remaining in the game
    - Kick distance
  """
  out_df = df.loc[
    # Filter events by missed field goals ...
    (df['field_goal_result'].isin(['missed', 'blocked'])) &
    # in the 4th quarter ...
    (df['qtr'] == 4) &
    (df['posteam_type'] == offense) &
    # where the kicker's team is trailing ...
    (df['posteam_score'] <= df[f'total_{defense}_score']) &
    # where the field goal could have tied or taken the lead ...
    (df[f'total_{defense}_score'] - df['posteam_score'] <= 3)
    ][['kicker_player_name', f'{offense}_team']]
  out_df = out_df.rename(columns={f'{offense}_team': 'team'})
  return out_df

In [6]:
home_fgs_df = get_made_field_goal_data(nflpbp_df, 'home', 'away')
away_fgs_df = get_made_field_goal_data(nflpbp_df, 'away', 'home')
clutch_fgs_df = pd.concat([home_fgs_df, away_fgs_df]).reset_index(drop=True)
home_fgs_df = get_missed_field_goal_data(nflpbp_df, 'home', 'away')
away_fgs_df = get_missed_field_goal_data(nflpbp_df, 'away', 'home')
missed_fgs_df = pd.concat([home_fgs_df, away_fgs_df]).reset_index(drop=True)

In [10]:
# Group by kicker name
kicker_gb = clutch_fgs_df.groupby('kicker_player_name')
# The kicker's team
team_s = kicker_gb['team'].unique().apply(', '.join)
# Number of made field goals by kicker
made_fgs_s = kicker_gb.size()
made_fgs_s.name = 'num_fgs'
# Median expected points added for each made field goal
wpa_avg_s = kicker_gb['epa'].median()
wpa_avg_s.name = 'median_epa'
# Median seconds remaining in the game for each made field goal
secs_remain_s = kicker_gb['game_seconds_remaining'].median()
secs_remain_s.name = 'median_secs_remain'
# Median distance of field goal in yards
kick_dist_s = kicker_gb['kick_distance'].median()
kick_dist_s.name = 'median_kick_distance'
fgs_made_df = pd.concat([team_s, made_fgs_s, wpa_avg_s, secs_remain_s, kick_dist_s], axis=1)
# Reindex to move the kicker names from the index to its own column
fgs_made_df = fgs_made_df.reset_index(drop=False)

missed_kicker_gb = missed_fgs_df.groupby('kicker_player_name')
missed_team_s = missed_kicker_gb['team'].unique().apply(', '.join)
missed_fgs_s = missed_kicker_gb.size()
missed_fgs_s.name = 'num_missed_fgs'
fgs_missed_df = pd.concat([missed_team_s, missed_fgs_s], axis=1)

fgs_stats_df = pd.merge(fgs_made_df, fgs_missed_df, left_on=['kicker_player_name', 'team'], right_on=['kicker_player_name', 'team'], how='outer').fillna(0)
# Calculate percentage of made/missed clutch field goals
fgs_stats_df['clutch_factor'] = fgs_stats_df['num_fgs'] / (fgs_stats_df['num_fgs'] + fgs_stats_df['num_missed_fgs'])
fgs_stats_df['pct_clutch'] = fgs_stats_df['clutch_factor'].apply(lambda p: f'{p:.2%}')

In [11]:
fgs_rank_df = fgs_stats_df.sort_values(by=['num_fgs', 'clutch_factor', 'median_epa', 'median_secs_remain', 'median_kick_distance'], ascending=[False, False, False, True, False])
fgs_rank_df.index = range(1, len(fgs_rank_df) + 1)
fgs_rank_df.loc[fgs_rank_df['num_fgs'] > 1][['kicker_player_name', 'team', 'pct_clutch', 'num_fgs', 'median_epa', 'median_secs_remain', 'median_kick_distance']]


Unnamed: 0,kicker_player_name,team,pct_clutch,num_fgs,median_epa,median_secs_remain,median_kick_distance
1,H.Butker,KC,100.00%,13.0,0.075863,349.0,26.0
2,D.Hopkins,CLE,100.00%,10.0,0.259642,36.0,34.0
3,W.Lutz,DEN,88.89%,8.0,1.408313,144.0,49.0
4,Y.Koo,ATL,80.00%,8.0,0.620756,4.0,44.0
5,M.Gay,IND,100.00%,6.0,1.66333,380.5,53.0
6,T.Bass,BUF,75.00%,6.0,0.306929,74.0,34.0
7,G.Zuerlein,NYJ,75.00%,6.0,0.259642,10.0,35.0
8,J.Bates,DET,100.00%,5.0,0.713437,19.0,44.0
9,C.Ryland,"ARI, NE",100.00%,5.0,0.26514,7.0,35.0
10,J.Sanders,MIA,71.43%,5.0,0.413991,4.0,37.0
