# tackle aura

1. identify all instances of a defender being within `N` yards of a ball carrier
    1. need to know who the ball carrier is at frame x
    1. need to collect *just* that ball carrier's location at frame x and join it back in to all other tracking data
2. for all such instances, record the angle of attack and relative velocities
3. for all such instances, identify whether or not that instance resulted in the identified player tackling the ball carrier

from there we have a few options

+ anchor on a given defender and quantify / qualify their ability relative to others of their position
+ anchor on a given defender and analyze which angles of attack are most commonly successful against them
+ anchor on a given ball carrier and analyze which angles of attack are most commonly successful against them

In [None]:
import numpy as np
import pandas as pd
import plotly.express as px

import nfl.data

In [None]:
gp = ['gameId', 'playId']
gpf = gp + ['frameId']
gpfn = gpf + ['nflId']
gpn = gp + ['nflId']

idx = pd.IndexSlice

In [None]:
t = (nfl.data.load_all_tracking(week_num_start=1, week_num_end=9)
     .fillna(value={'nflId': -1}))
# tw1 = (nfl.data.load_tracking_week(week_num=1)
#        .fillna(value={'nflId': -1}))
# # g1 = tw1[(tw1.gameId == 2022090800)]
# tw1

In [None]:
def get_ball_carrier_position(df: pd.DataFrame) -> pd.DataFrame:
    bc = nfl.data.get_ballcarrier()
    return (df
            .rename(columns={'nflId': 'ballCarrierId'})
            .merge(bc, how='inner', on=gp + ['ballCarrierId'])
            [gpf + ['ballCarrierId', 'club', 'x', 'y', 's', 'o', 'dir']]
            .rename(columns={c: f"{c}_ballcarrier" for c in ['club', 'x', 'y', 's', 'o', 'dir']}))

ball_pos = get_ball_carrier_position(t)
# ball_pos

In [None]:
def add_distance_from_ballcarrier(df: pd.DataFrame) -> pd.DataFrame:
    ball_pos = get_ball_carrier_position(df=df)
    df_w_bp = df.merge(ball_pos, how='left', on=gpf)
    df_w_bp.loc[:, 'd_bc'] = ((df_w_bp.x - df_w_bp.x_ballcarrier) ** 2
                              + (df_w_bp.y - df_w_bp.y_ballcarrier) ** 2) ** .5
    return df_w_bp

t = add_distance_from_ballcarrier(t)
# t

In [None]:
def add_is_close(df: pd.DataFrame) -> pd.DataFrame:
    # "close" means: on the opposing team and within 2 yards
    df.loc[:, 'is_ballcarrier'] = df.nflId == df.ballCarrierId
    on_opposing_team = df.club != df.club_ballcarrier
    not_football = df.nflId != -1
    df.loc[:, 'is_close'] = (on_opposing_team
                             & (~df.is_ballcarrier)
                             & not_football
                             & (df.d_bc <= 2))
    return df

t = add_is_close(t)
# t

In [None]:
t.shape

In [None]:
t.gameId.max(), t.gameId.min()

In [None]:
# t[t.is_close].head(100)

In [None]:
def add_tackle_info(df: pd.DataFrame) -> pd.DataFrame:
    tackles = nfl.data.load_tackles()
    return (df
            .merge(tackles, how='left', on=gpn)
            .fillna(value={c: 0 for c in ['tackle', 'assist', 'forcedFumble', 'pff_missedTackle']}))

In [None]:
t = add_tackle_info(t)
# t[t.is_close].head(100)

In [None]:
px.violin(data_frame=t[t.is_close],
          x='tackle',
          y='d_bc')

In [None]:
(t
 [(t.tackle == 1) & (t.is_close)]
 .groupby('displayName')
 .d_bc
 .median()
 .sort_values(ascending=False))

In [None]:
def add_relative_motion(df: pd.DataFrame) -> pd.DataFrame:
    df.loc[:, 'dir_rel'] = df.dir_ballcarrier - df.dir
    # this is just the component of the defender's speed in the direction of the
    # ball carrier -- it may be better to calculate the velocity in a moving
    # reference frame
    # todo: look up formula for velocity in a moving (accelerating even) ref frame
    df.loc[:, 's_rel'] = df.s_ballcarrier - df.s * np.cos(np.radians(df.dir_rel))
    return df

In [None]:
t = add_relative_motion(t)
# t[t.is_close]

In [None]:
# look at all defensive players on one single play
z = t[(t.gameId == 2022090800) & (t.playId == 56) & (t.club == 'LA')]
px.line(data_frame=z,
        x='d_bc',
        y='s_rel',
        color='displayName')

In [None]:
def my_bin(s: pd.Series, n_bins: int, s_min: float | None = None, s_max: float | None = None) -> np.ndarray:
    s_min = s_min or s.min()
    s_max = s_max or s.max()
    bin_width = (s_max - s_min) / n_bins
    return s_min + (1 + s.floordiv(bin_width)) * bin_width

In [None]:
def get_radial_bins(r: pd.Series, n_bins_r: int = 10, s_min: float | None = None, s_max: float | None = None) -> np.ndarray:
    s_min = s_min or r.min()
    s_max = s_max or r.max()
    
    # chose n_bins_r - 1 numbers r_i such that r_{i + 1}^2 - r_{i}^2 is the same for all i
    annulus_area = (s_max ** 2 - s_min ** 2) / n_bins_r
    r_vals = [0]
    for i in range(n_bins_r):
        r_vals.append((annulus_area + r_vals[-1] ** 2) ** .5)
    
    intervals = pd.cut(r.clip(0.0, 2.0 - 1e-6), bins=r_vals, include_lowest=True)
    lft = intervals.apply(lambda interval: interval.left).astype(float)
    rgt = intervals.apply(lambda interval: interval.right).astype(float)
    return rgt - lft


def get_theta_bins(theta: pd.Series, n_bins_theta: int = 30) -> np.ndarray:
    # for starters, any negative value should be moved to the positive side of the axis
    theta = pd.Series(np.where(theta > 0, theta, 360 + theta), index=theta.index)
    if 360 % n_bins_theta != 0:
        raise ValueError()
    theta_bins = np.linspace(0, 360, n_bins_theta + 1)
    return (pd.cut(theta, bins=theta_bins, include_lowest=True)
            .apply(lambda interval: interval.left)
            .astype(float)
            .clip(0.0))


def add_radial_bins(df: pd.DataFrame, n_bins_r: int = 10, n_bins_theta: int = 30) -> pd.DataFrame:
    df.loc[:, 'd_bc_bin'] = get_radial_bins(r=df.d_bc, n_bins_r=n_bins_r, s_min=0.0, s_max=2.0)
    df.loc[:, 'dir_rel_bin'] = get_theta_bins(theta=df.dir_rel, n_bins_theta=n_bins_theta)
    return df

# get_radial_bins(z.d_bc, s_min=0, s_max=2.0)
# get_theta_bins(z.dir_rel).astype(float).clip(0.0)

In [None]:
import plotly.graph_objs as go


def make_radial_plot(df: pd.DataFrame, n_bins_r: int = 10, n_bins_theta: int = 30) -> go.Figure:
    df = add_radial_bins(df=df, n_bins_r=n_bins_r, n_bins_theta=n_bins_theta)
    df_avg = (df
              .groupby(['d_bc_bin', 'dir_rel_bin'])
              .tackle
              .mean()
              .reset_index()
              .sort_values(by=['d_bc_bin', 'dir_rel_bin'], ascending=[False, True]))
    
    fig = px.bar_polar(data_frame=df_avg,
                       r='d_bc_bin',
                       theta='dir_rel_bin',
                       color='tackle',
                       height=1_000,
                       width=1_000)
    fig.update_layout(polar_bargap=0)
    return fig


z = (t
     [t.is_close]
     [gpfn + ['displayName', 'd_bc', 'tackle', 'dir_rel', 's_rel']]
     .sort_values(by=['d_bc', 'dir_rel']))

make_radial_plot(z)

In [None]:
def add_position(df: pd.DataFrame) -> pd.DataFrame:
    player_ids = nfl.data.load_players()[['nflId', 'position']]
    return df.merge(player_ids, how='left', on='nflId')

t = add_position(t)

# t.head(20)

In [None]:
t.position.value_counts()

In [None]:
z = (t
     [t.is_close & (t.position == 'CB')]
     [gpfn + ['displayName', 'd_bc', 'tackle', 'dir_rel', 's_rel']]
     .sort_values(by=['d_bc', 'dir_rel']))

make_radial_plot(z)

In [None]:
z = (t
     [t.is_close & (t.position == 'DE')]
     [gpfn + ['displayName', 'd_bc', 'tackle', 'dir_rel', 's_rel']]
     .sort_values(by=['d_bc', 'dir_rel']))

make_radial_plot(z)

In [None]:
def add_sideline_distance(df: pd.DataFrame) -> pd.DataFrame:
    df.loc[:, 'd_sideline'] = df.y.apply(lambda y: min(53.3 - y, y - 0))
    return df

In [None]:
t = add_sideline_distance(t)
# t.head()

## establish position-dependent baseline tackle success rates

we will do this by training logistic regression models (one for each position) on very few features:

1. distance to ball carrier
2. relative speed in the direction of motion of the ballcarrier
3. the angle of approach of the defender to the ballcarrier (represented as two features, the `sin` and `cos` of the angle between their respective heading directions)

we will use k-fold validation to prevent overfitting 

In [None]:
pos = 'ILB'
k = 5

from sklearn.linear_model import LogisticRegressionCV

clf = LogisticRegressionCV(Cs=50,
                           cv=k,
                           scoring='neg_log_loss',
                           n_jobs=-1,
                           random_state=1337,
                           tol=1e-6)

z = (t
     [t.is_close & (t.position == pos)]
     [gpfn + ['displayName', 'position', 'd_bc', 'tackle', 'dir_rel', 's_rel', 'd_sideline']])
z.loc[:, 'sin_dir_rel'] = np.sin(z.dir_rel)
z.loc[:, 'cos_dir_rel'] = np.cos(z.dir_rel)
z.loc[:, 'sin_x_cos'] = z.sin_dir_rel * z.cos_dir_rel
z.loc[:, 'd_bc_x_s_rel'] = z.d_bc * z.s_rel
print(f" z has {z.shape[0]:,} records")

feature_names = ['d_bc', 'sin_dir_rel', 'cos_dir_rel', 's_rel', 'd_sideline', 'sin_x_cos', 'd_bc_x_s_rel']
X = z[feature_names]
y = z.tackle

clf.fit(X, y)

clf.score(X, y)

In [None]:
pd.DataFrame({'coef': clf.coef_.tolist()[0], 'feature_name': feature_names})

In [None]:
df_pred = pd.DataFrame({'y_pred': clf.predict_proba(X)[:, 1],
                        'y': y})
px.violin(data_frame=df_pred, x='y', y='y_pred')

In [None]:
players = nfl.data.load_players()
players[players.displayName.str.match('.*Wagner')]

In [None]:
is_bobby_wagner = z.nflId == 38577
X = z[feature_names]
y = z.tackle
df_pred = pd.DataFrame({'prob': clf.predict_proba(X)[:, 1],
                        'y': y})
px.violin(data_frame=df_pred, x='y', y='prob', color=is_bobby_wagner)

In [None]:
df = z.copy()
df.loc[:, 'is_bobby_wagner'] = df.nflId == 38577

df = add_radial_bins(df=df, n_bins_theta=15)
df_avg = (df
          .groupby(['d_bc_bin', 'dir_rel_bin', 'is_bobby_wagner'])
          .tackle
          .mean()
          .reset_index()
          .sort_values(by=['d_bc_bin', 'dir_rel_bin', 'is_bobby_wagner'], ascending=[False, True, True])
          .pivot_table(values='tackle', index=['d_bc_bin', 'dir_rel_bin'], columns=['is_bobby_wagner']))
df_avg.loc[:, 'player_over_repl'] = df_avg[True] - df_avg[False]

px.histogram(df_avg.dropna().player_over_repl)