In [None]:
import nfl_data_py as nfl
import pandas as pd
import numpy as np

In [None]:
# pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

In [None]:
pbp = nfl.import_pbp_data([2024])
pbp

In [None]:
game = pbp.loc[pbp['game_id']=="2024_21_WAS_PHI"].copy()
# game.drop(game[game['posteam'].isna()].index, inplace=True)
# game.drop(game[game['epa'].isna()].index, inplace=True)
columns = ['posteam','time','qtr','down','ydstogo','yrdln','play_type','desc','ep','epa','total_home_epa','total_away_epa']
game[columns]

In [None]:
game.loc[game['play_type']=='extra_point', columns]

In [None]:
def game_epa(game, penalties=False):
    # remove posteam None, epa NaN. should catch start, end, timeouts
    game.drop(game[game['posteam'].isna()].index, inplace=True)
    game.drop(game[game['epa'].isna()].index, inplace=True)

    # we can't really properly measure impact of penalties, because we can't calculate expected points of the counterfactual
    # so let's just try removing plays with penalties for now. they are a part of a team's skill, so we can try adding them back in later
    # actually let's make it an arg
    if not penalties:
        game.drop(game[game['penalty']==1].index, inplace=True)

    home_team = game['home_team'].values[0]
    away_team = game['away_team'].values[0]
    game_id = game['game_id'].values[0]
    game_date = game['game_date'].values[0]

    df = game.groupby(['posteam', 'play_type']).agg({'epa': sum})

    # the opposing team's epa during their possessions is the inverse of your defense's epa
    home_off = pd.Series(df.loc[home_team]['epa'])
    home_def = pd.Series(df.loc[away_team]['epa']) * -1
    away_off = pd.Series(df.loc[away_team]['epa'])
    away_def = pd.Series(df.loc[home_team]['epa']) * -1

    index = pd.MultiIndex.from_tuples([], names=['game_id', 'team'])
    epa = pd.DataFrame(index=index, columns=["game_date", "extra_point", "extra_point_def", "field_goal", "field_goal_def", "kickoff", "kickoff_def", "punt", "punt_def", "pass", "pass_def", "run", "run_def", "qb_kneel", "qb_kneel_def"])

    epa.loc[(game_id, home_team), :] = [game_date, 
                                        home_off.get('extra_point', default=0), home_def.get('extra_point', default=0),
                                        home_off.get('field_goal', default=0), home_def.get('field_goal', default=0),
                                        home_off.get('kickoff', default=0), home_def.get('kickoff', default=0),
                                        home_off.get('punt', default=0), home_def.get('punt', default=0),
                                        home_off.get('pass', default=0), home_def.get('pass', default=0),
                                        home_off.get('run', default=0), home_def.get('run', default=0),
                                        home_off.get('qb_kneel', default=0), home_def.get('qb_kneel', default=0)]
    epa.loc[(game_id, away_team), :] = [game_date, 
                                        away_off.get('extra_point', default=0), away_def.get('extra_point', default=0),
                                        away_off.get('field_goal', default=0), away_def.get('field_goal', default=0),
                                        away_off.get('kickoff', default=0), away_def.get('kickoff', default=0),
                                        away_off.get('punt', default=0), away_def.get('punt', default=0),
                                        away_off.get('pass', default=0), away_def.get('pass', default=0),
                                        away_off.get('run', default=0), away_def.get('run', default=0),
                                        away_off.get('qb_kneel', default=0), away_def.get('qb_kneel', default=0)]

    

    return(epa) 


In [None]:
epa = game_epa(game)
epa

In [None]:
game_ids = pbp['game_id'].unique()

In [None]:
index = pd.MultiIndex.from_tuples([], names=['game_id', 'team'])
epa = pd.DataFrame(index=index, columns=["game_date", "extra_point", "extra_point_def", "field_goal", "field_goal_def", "kickoff", "kickoff_def", "punt", "punt_def", "pass", "pass_def", "run", "run_def", "qb_kneel", "qb_kneel_def"])

In [None]:
for id in game_ids:
    game = game = pbp.loc[pbp['game_id']==id].copy()
    epa = pd.concat([epa, game_epa(game)])    

In [None]:
epa