In [170]:
from dash import Dash, html, callback, Input, Output, State, dcc, dash_table, no_update
import dash_cytoscape as cyto
import pandas as pd
import plotly.graph_objects as go
from pathlib import Path
import os
import numpy as np
import colorlover
from tqdm.notebook import tqdm
from scipy.spatial.distance import euclidean
from math import acos, sqrt, atan2, pi
from sklearn.preprocessing import LabelEncoder

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [171]:
data_dir = Path("/Users/keltim01/Documents/data/nfl-big-data-bowl-2023")

In [172]:
files = [x for x in os.listdir(data_dir) if x.startswith("week")]

In [173]:
files

['week1.csv',
 'week2.csv',
 'week3.csv',
 'week7.csv',
 'week6.csv',
 'week4.csv',
 'week5.csv',
 'week8.csv']

In [9]:
df_players = pd.read_csv(data_dir.joinpath("players.csv"))
df_plays = pd.read_csv(data_dir.joinpath("plays.csv"))
df_games = pd.read_csv(data_dir.joinpath("games.csv"))
df_pff_data = pd.read_csv(data_dir.joinpath("pffScoutingData.csv"))
df_nfl_pbp = pd.read_parquet(data_dir.joinpath("play_by_play_2021.parquet"))

In [174]:
df_weekly_data = pd.DataFrame()

# Standardize Data if you haven't written a file or don't want to
for file in tqdm(files):
    df_wk = pd.read_csv(data_dir.joinpath(file))
    
    df_wk['x'] = df_wk.apply(lambda row: 120 - row['x'] if row['playDirection'] == 'left' else row['x'], axis=1)
    df_wk['y'] = df_wk.apply(lambda row: 160 / 3 - row['y'] if row['playDirection'] == 'left' else row['y'], axis=1)
    df_wk['dir'] = df_wk.apply(lambda row: row['dir'] + 180 if row['playDirection'] == 'left' else row['dir'], axis=1)
    df_wk['dir'] = df_wk.apply(lambda row: row['dir'] - 360 if row['dir'] > 360 else row['dir'], axis=1)
    df_wk['o'] = df_wk.apply(lambda row: row['o'] + 180 if row['playDirection'] == 'left' else row['o'], axis=1)
    df_wk['o'] = df_wk.apply(lambda row: row['o'] - 360 if row['o'] > 360 else row['o'], axis=1)
    df_wk['second'] = df_wk['frameId'] / 10
    
    df_wk['week'] = int(file.split(".")[0][-1])
    df_weekly_data = pd.concat([df_weekly_data, df_wk], ignore_index=True)
df_weekly_data.rename(columns={'team': 'club'}, inplace=True)

  0%|          | 0/8 [00:00<?, ?it/s]

In [187]:
df_weekly_data = pd.merge(df_weekly_data, df_players[['nflId', 'displayName']], on='nflId', how='left')


In [188]:
os.makedirs(data_dir.joinpath("standardized"), exist_ok=True)
df_weekly_data.to_parquet(data_dir.joinpath("standardized","all_weeks.parquet"), index=False)

In [7]:
df_weekly_data = pd.read_parquet(data_dir.joinpath("standardized","all_weeks.parquet"))

In [8]:
# Define utility functions
def euclidean_distance(x1, y1, x2, y2):
    return sqrt((x1 - x2)**2 + (y1 - y2)**2)

def calculate_angle(x1, y1, x2, y2):
    dot_product = x1 * x2 + y1 * y2
    magnitude1 = sqrt(x1**2 + y1**2)
    magnitude2 = sqrt(x2**2 + y2**2)
    cos_angle = dot_product / (magnitude1 * magnitude2)
    angle = acos(np.clip(cos_angle, -1, 1)) * (180 / pi)  # Clamp value and convert to degrees
    return angle

def cosine_similarity(x1, y1, x2, y2):
    dot_product = x1 * x2 + y1 * y2
    magnitude1 = sqrt(x1**2 + y1**2)
    magnitude2 = sqrt(x2**2 + y2**2)
    return dot_product / (magnitude1 * magnitude2)

def orthogonal_distance(x1, y1, x2, y2, x3, y3):
    num = abs((y2 - y1) * x3 - (x2 - x1) * y3 + x2 * y1 - y2 * x1)
    den = sqrt((y2 - y1)**2 + (x2 - x1)**2)
    return num / den

def calculate_sideline_distance(y_position, sideline1=0, sideline2=53.3):
    distance_to_sideline1 = abs(y_position - sideline1)
    distance_to_sideline2 = abs(y_position - sideline2)
    return min(distance_to_sideline1, distance_to_sideline2)


In [10]:
# print(df_players.info())
# print(df_plays.info())
# print(df_games.info())
# print(df_pff_data.info())
# print(df_nfl_pbp.info())
# print(df_weekly_data.info())

In [164]:
def process_data_pressure_probability(
        df_weekly_data: pd.DataFrame,
        df_pff_data: pd.DataFrame,
        df_plays: pd.DataFrame,
):
    # Mutate and calculate second_since_snap
    df_weekly_data['snap_time'] = df_weekly_data.loc[df_weekly_data['event'] == 'ball_snap', 'second'].groupby([df_weekly_data['gameId'], df_weekly_data['playId']]).transform('first')
    df_weekly_data['second_since_snap'] = df_weekly_data['second'] - df_weekly_data['snap_time']
    df_weekly_data = df_weekly_data.drop(columns=['snap_time'])

    # Selecting columns from pffData
    df_pff_join = df_pff_data[['gameId', 'playId', 'nflId', 'pff_role', 'pff_hit', 'pff_hurry', 'pff_sack',
                        'pff_beatenByDefender', 'pff_hurryAllowed', 'pff_hitAllowed', 'pff_sackAllowed',
                        'pff_nflIdBlockedPlayer', 'pff_blockType']].copy()
    
    # Join and create pressure_df
    df_pressure = pd.merge(df_weekly_data, df_pff_join, on=['gameId', 'playId', 'nflId'], how='left').fillna(0)
    df_pressure['is_pressure'] = np.where((df_pressure['pff_hit'] + df_pressure['pff_hurry'] + df_pressure['pff_sack']) > 0, 1, 0)


    # Create rusher_df
    df_rusher = df_pressure[df_pressure['pff_role'] == 'Pass Rush'].drop(columns=['pff_hurryAllowed', 'pff_sackAllowed',
                                                                                'pff_hitAllowed', 'pff_beatenByDefender',
                                                                                'pff_blockType', 'pff_nflIdBlockedPlayer'])
    
    df_rush_join = df_rusher[['gameId', 'playId', 'frameId', 'nflId', 'x', 'y']].copy() 

    # Create qb_df
    df_qb = (df_pressure[df_pressure['pff_role'] == 'Pass']
            .rename(columns={col: f'qb_{col}' for col in ['x', 'y', 's', 'a', 'dis', 'o', 'dir']})
            .rename(columns={'nflId': 'qb_nflId'}))
    
    df_qb_join = (df_qb[['gameId', 'playId', 'frameId', 'qb_nflId', 'qb_x', 'qb_y', 'qb_s', 'qb_a', 'qb_o', 'qb_dir', 'qb_dis']].copy())
    
    # Create blocker_df
    df_blocker = (df_pressure[df_pressure['pff_role'] == 'Pass Block']
                .rename(columns={col: f'blocker_{col}' for col in ['x', 'y', 's', 'a', 'dis', 'o', 'dir']})
                .drop(columns=['pff_hit', 'pff_hurry', 'pff_sack', 'time']))

    df_blk_join = (df_blocker[['gameId', 'playId', 'frameId', 'nflId', 'blocker_x', 'blocker_y', 'blocker_s',
                            'blocker_a', 'blocker_o', 'blocker_dir', 'blocker_dis', 'pff_role',
                            'pff_beatenByDefender', 'pff_hurryAllowed', 'pff_hitAllowed',
                            'pff_sackAllowed', 'pff_blockType']].copy()
                .rename(columns={'nflId': 'blocker_id', 'pff_role': 'blocker_role'}))
    
    # Cross join and calculate distances
    df_cross_join = pd.merge(df_rush_join, df_blk_join, on=['gameId', 'playId', 'frameId'], how='left')
    df_cross_join['blk_rush_dist'] = df_cross_join.apply(lambda row: euclidean_distance(row['x'], row['y'], row['blocker_x'], row['blocker_y']), axis=1)


    # Find top 3 shortest distances
    df_blocker_top_3 = (df_cross_join.sort_values(by='blk_rush_dist')
                    .groupby(['gameId', 'playId', 'frameId', 'nflId'])
                    .head(3)
                    .assign(player_dist_rank=lambda x: x.groupby(['gameId', 'playId', 'frameId', 'nflId']).cumcount() + 1))


    # Merge with qb_df and rusher_df
    df_pres_long = (df_blocker_top_3.merge(df_qb_join, on=['gameId', 'playId', 'frameId'], how='left')
                    .merge(df_rusher, on=['gameId', 'playId', 'frameId', 'nflId', 'x', 'y'], how='left'))
    
        # Calculate angles and other features
    df_with_angles = df_pres_long.assign(
        vec_rusher_to_qb_x=lambda x: x['qb_x'] - x['x'],
        vec_rusher_to_qb_y=lambda x: x['qb_y'] - x['y'],
        vec_rusher_to_blocker_x=lambda x: x['blocker_x'] - x['x'],
        vec_rusher_to_blocker_y=lambda x: x['blocker_y'] - x['y'],
        leverage_angle=lambda x: x.apply(lambda row: calculate_angle(row['vec_rusher_to_qb_x'], row['vec_rusher_to_qb_y'], row['vec_rusher_to_blocker_x'], row['vec_rusher_to_blocker_y']), axis=1),
        cos_sim=lambda x: x.apply(lambda row: cosine_similarity(row['vec_rusher_to_qb_x'], row['vec_rusher_to_qb_y'], row['vec_rusher_to_blocker_x'], row['vec_rusher_to_blocker_y']), axis=1),
        ortho_dist=lambda x: x.apply(lambda row: orthogonal_distance(row['x'], row['y'], row['qb_x'], row['qb_y'], row['blocker_x'], row['blocker_y']), axis=1),
        blocker_influence=lambda x: x.apply(lambda row: np.where(row['ortho_dist'] <= 1, 1, np.exp(-row['ortho_dist'])), axis=1),
        rel_s=lambda x: x['s'] - x['qb_s'],
        approach_angle=lambda x: x.apply(lambda row: np.arctan2(row['y'] - row['qb_y'], row['x'] - row['qb_x']), axis=1),
        rel_o=lambda x: np.abs(x['o'] - x['qb_o']),
        qb_dist_near_sideline=lambda x: x['qb_y'].apply(calculate_sideline_distance),
        rush_qb_dist=lambda x: x.apply(lambda row: euclidean_distance(row['x'], row['y'], row['qb_x'], row['qb_y']), axis=1)
        )

    # Summarize blocker-interference
    df_blocker_interference = df_with_angles.groupby(['gameId', 'playId', 'frameId', 'nflId']).agg(blocker_interference=('blocker_influence', 'sum')).reset_index()

    # Pivot the dataframe to wide format
    df_wide = df_with_angles.pivot(index=['gameId', 'playId', 'frameId', 'nflId'],
                                columns='player_dist_rank',
                                values=['blocker_id', 'blocker_x', 'blocker_y', 'blocker_s', 'blocker_a',
                                        'blocker_o', 'blocker_dir', 'blocker_dis', 'pff_blockType',
                                        'pff_beatenByDefender', 'pff_hurryAllowed', 'pff_hitAllowed',
                                        'pff_sackAllowed', 'blk_rush_dist', 'vec_rusher_to_blocker_x',
                                        'vec_rusher_to_blocker_y', 'leverage_angle', 'cos_sim', 'ortho_dist',
                                        'blocker_influence'])

    df_wide.columns = ['{}_{}'.format(col[0], col[1]) for col in df_wide.columns]
    df_wide.reset_index(inplace=True)

    # Join with play features
    play_feats = df_plays[['gameId', 'playId', 'down', 'yardsToGo', 'defendersInBox', 'absoluteYardlineNumber']]
    df_pres_model = pd.merge(df_wide, play_feats, on=['gameId', 'playId'], how='left')

    return df_pres_model


In [158]:
df_blocker_test, df_qb_test, df_rusher_test = process_data_pressure_probability(df_weekly_data_tst, df_pff_data, df_plays)

In [163]:
df_qb_test.columns

Index(['gameId', 'playId', 'qb_nflId', 'frameId', 'time', 'jerseyNumber',
       'team', 'playDirection', 'qb_x', 'qb_y', 'qb_s', 'qb_a', 'qb_dis',
       'qb_o', 'qb_dir', 'event', 'second', 'week', 'second_since_snap',
       'pff_role', 'pff_hit', 'pff_hurry', 'pff_sack', 'pff_beatenByDefender',
       'pff_hurryAllowed', 'pff_hitAllowed', 'pff_sackAllowed',
       'pff_nflIdBlockedPlayer', 'pff_blockType', 'is_pressure'],
      dtype='object')

In [153]:
df_blocker_test.head()

Unnamed: 0,gameId,playId,frameId,nflId,x,y,blocker_id,blocker_x,blocker_y,blocker_s,blocker_a,blocker_o,blocker_dir,blocker_dis,blocker_role,pff_beatenByDefender,pff_hurryAllowed,pff_hitAllowed,pff_sackAllowed,pff_blockType,blk_rush_dist,player_dist_rank
10361,2021091200,843,35,43378.0,69.36,24.29,53497.0,69.31,24.31,1.84,2.49,259.82,254.66,0.18,Pass Block,0.0,1.0,0.0,0.0,PP,0.053852,1
10356,2021091200,843,34,43378.0,69.4,24.32,53497.0,69.49,24.35,2.14,2.29,261.13,256.57,0.21,Pass Block,0.0,1.0,0.0,0.0,PP,0.094868,1
9736,2021091200,843,36,35452.0,69.21,24.16,53497.0,69.17,24.27,1.5,2.64,243.78,250.69,0.15,Pass Block,0.0,1.0,0.0,0.0,PP,0.117047,1
10351,2021091200,843,33,43378.0,69.55,24.37,53497.0,69.68,24.42,2.45,1.77,282.31,262.5,0.25,Pass Block,0.0,1.0,0.0,0.0,PP,0.139284,1
10366,2021091200,843,36,43378.0,69.31,24.24,53497.0,69.17,24.27,1.5,2.64,243.78,250.69,0.15,Pass Block,0.0,1.0,0.0,0.0,PP,0.143178,1


In [160]:
df_qb_test.head()

Unnamed: 0,gameId,playId,qb_nflId,frameId,time,jerseyNumber,team,playDirection,qb_x,qb_y,qb_s,qb_a,qb_dis,qb_o,qb_dir,event,second,week,second_since_snap,pff_role,pff_hit,pff_hurry,pff_sack,pff_beatenByDefender,pff_hurryAllowed,pff_hitAllowed,pff_sackAllowed,pff_nflIdBlockedPlayer,pff_blockType,is_pressure
0,2021091200,56,33084.0,1,2021-09-12 17:06:24,2.0,ATL,right,33.29,29.91,0.0,0.0,0.01,105.19,358.76,0,0.1,1,0.0,Pass,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
1,2021091200,56,33084.0,2,2021-09-12 17:06:25,2.0,ATL,right,33.29,29.91,0.0,0.0,0.0,105.19,358.51,0,0.2,1,0.0,Pass,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
2,2021091200,56,33084.0,3,2021-09-12 17:06:25,2.0,ATL,right,33.29,29.91,0.0,0.0,0.0,103.49,356.37,0,0.3,1,0.0,Pass,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
3,2021091200,56,33084.0,4,2021-09-12 17:06:25,2.0,ATL,right,33.29,29.9,0.0,0.0,0.0,102.81,351.29,0,0.4,1,0.0,Pass,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
4,2021091200,56,33084.0,5,2021-09-12 17:06:25,2.0,ATL,right,33.28,29.9,0.0,0.0,0.0,100.7,342.64,0,0.5,1,0.0,Pass,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0


In [161]:
df_blocker_test.merge(df_qb_test, on=['gameId', 'playId', 'frameId'], how='left').head()

Unnamed: 0,gameId,playId,frameId,nflId,x,y,blocker_id,blocker_x,blocker_y,blocker_s,blocker_a,blocker_o,blocker_dir,blocker_dis,blocker_role,pff_beatenByDefender_x,pff_hurryAllowed_x,pff_hitAllowed_x,pff_sackAllowed_x,pff_blockType_x,blk_rush_dist,player_dist_rank,qb_nflId,time,jerseyNumber,team,playDirection,qb_x,qb_y,qb_s,qb_a,qb_dis,qb_o,qb_dir,event,second,week,second_since_snap,pff_role,pff_hit,pff_hurry,pff_sack,pff_beatenByDefender_y,pff_hurryAllowed_y,pff_hitAllowed_y,pff_sackAllowed_y,pff_nflIdBlockedPlayer,pff_blockType_y,is_pressure
0,2021091200,843,35,43378.0,69.36,24.29,53497.0,69.31,24.31,1.84,2.49,259.82,254.66,0.18,Pass Block,0.0,1.0,0.0,0.0,PP,0.053852,1,33084.0,2021-09-12 17:32:27,2.0,ATL,right,70.3,23.04,1.02,1.27,0.11,135.98,140.75,0,3.5,1,0.0,Pass,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
1,2021091200,843,34,43378.0,69.4,24.32,53497.0,69.49,24.35,2.14,2.29,261.13,256.57,0.21,Pass Block,0.0,1.0,0.0,0.0,PP,0.094868,1,33084.0,2021-09-12 17:32:27,2.0,ATL,right,70.24,23.13,1.19,0.52,0.12,137.24,142.16,0,3.4,1,0.0,Pass,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
2,2021091200,843,36,35452.0,69.21,24.16,53497.0,69.17,24.27,1.5,2.64,243.78,250.69,0.15,Pass Block,0.0,1.0,0.0,0.0,PP,0.117047,1,33084.0,2021-09-12 17:32:27,2.0,ATL,right,70.37,22.99,0.79,1.79,0.09,133.86,131.44,0,3.6,1,0.0,Pass,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
3,2021091200,843,33,43378.0,69.55,24.37,53497.0,69.68,24.42,2.45,1.77,282.31,262.5,0.25,Pass Block,0.0,1.0,0.0,0.0,PP,0.139284,1,33084.0,2021-09-12 17:32:27,2.0,ATL,right,70.17,23.22,1.34,0.39,0.12,134.05,141.33,0,3.3,1,0.0,Pass,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
4,2021091200,843,36,43378.0,69.31,24.24,53497.0,69.17,24.27,1.5,2.64,243.78,250.69,0.15,Pass Block,0.0,1.0,0.0,0.0,PP,0.143178,1,33084.0,2021-09-12 17:32:27,2.0,ATL,right,70.37,22.99,0.79,1.79,0.09,133.86,131.44,0,3.6,1,0.0,Pass,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0


In [None]:
    # Merge with qb_df and rusher_df
    df_pres_long = (df_blocker_top_3.merge(df_qb, on=['gameId', 'playId', 'frameId'], how='left')
                    .merge(df_rusher, on=['gameId', 'playId', 'frameId', 'nflId', 'x', 'y'], how='left'))

In [128]:
df_cross_join = pd.merge(df_rush_join, df_blk_join, on=['gameId', 'playId', 'frameId'], how='left')


In [129]:
df_cross_join.head()

Unnamed: 0,gameId,playId,frameId,nflId,x,y,blocker_id,blocker_x,blocker_y,blocker_s,blocker_a,blocker_o,blocker_dir,blocker_dis,blocker_role,pff_beatenByDefender,pff_hurryAllowed,pff_hitAllowed,pff_sackAllowed,pff_blockType
0,2021091200,56,1,35452.0,35.53,26.99,41232.0,33.78,33.0,0.0,0.0,122.65,99.55,0.0,Pass Block,0.0,0.0,0.0,0.0,PP
1,2021091200,56,1,35452.0,35.53,26.99,46094.0,33.47,25.54,0.0,0.0,73.94,182.28,0.0,Pass Block,0.0,0.0,0.0,0.0,PP
2,2021091200,56,1,35452.0,35.53,26.99,47797.0,33.91,28.33,0.0,0.0,106.9,272.93,0.0,Pass Block,0.0,0.0,0.0,0.0,SW
3,2021091200,56,1,35452.0,35.53,26.99,47814.0,33.96,26.79,0.0,0.0,136.66,152.46,0.0,Pass Block,0.0,0.0,0.0,0.0,PT
4,2021091200,56,1,35452.0,35.53,26.99,52486.0,34.45,29.73,0.0,0.0,54.03,99.31,0.0,Pass Block,0.0,0.0,0.0,0.0,PT


In [None]:
['gameId', 'playId', 'frameId', 'nflId']

In [67]:
df_weekly_data_tst = df_weekly_data[df_weekly_data['gameId'] == 2021091200].copy()

In [102]:
df_with_angles, df_blocker_interference = process_data_pressure_probability(df_weekly_data_tst, df_pff_data, df_plays)

In [103]:
df_with_angles.head()

Unnamed: 0,gameId,playId,frameId,nflId,x,y,blocker_id,blocker_x,blocker_y,blocker_s,blocker_a,blocker_o,blocker_dir,blocker_dis,blocker_role,pff_beatenByDefender_x,pff_hurryAllowed_x,pff_hitAllowed_x,pff_sackAllowed_x,pff_blockType_x,blk_rush_dist,player_dist_rank,qb_nflId,time_x,jerseyNumber_x,team_x,playDirection_x,qb_x,qb_y,qb_s,qb_a,qb_dis,qb_o,qb_dir,event_x,second_x,week_x,second_since_snap_x,pff_role_x,pff_hit_x,pff_hurry_x,pff_sack_x,pff_beatenByDefender_y,pff_hurryAllowed_y,pff_hitAllowed_y,pff_sackAllowed_y,pff_nflIdBlockedPlayer,pff_blockType_y,is_pressure_x,time_y,jerseyNumber_y,team_y,playDirection_y,s,a,dis,o,dir,event_y,second_y,week_y,second_since_snap_y,pff_role_y,pff_hit_y,pff_hurry_y,pff_sack_y,is_pressure_y,vec_rusher_to_qb_x,vec_rusher_to_qb_y,vec_rusher_to_blocker_x,vec_rusher_to_blocker_y,leverage_angle,cos_sim,ortho_dist,blocker_influence,rel_s,approach_angle,rel_o,qb_dist_near_sideline,rush_qb_dist
0,2021091200,843,35,43378.0,69.36,24.29,53497.0,69.31,24.31,1.84,2.49,259.82,254.66,0.18,Pass Block,0.0,1.0,0.0,0.0,PP,0.053852,1,33084.0,2021-09-12 17:32:27,2.0,ATL,right,70.3,23.04,1.02,1.27,0.11,135.98,140.75,0,3.5,1,0.0,Pass,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,2021-09-12 17:32:27,97.0,PHI,right,1.01,2.44,0.05,194.43,246.35,0,3.5,1,0.0,Pass Rush,0.0,1.0,0.0,1,0.94,-1.25,-0.05,0.02,148.744575,-0.854863,0.027941,1.0,-0.01,2.215576,58.45,23.04,1.564001
1,2021091200,843,34,43378.0,69.4,24.32,53497.0,69.49,24.35,2.14,2.29,261.13,256.57,0.21,Pass Block,0.0,1.0,0.0,0.0,PP,0.094868,1,33084.0,2021-09-12 17:32:27,2.0,ATL,right,70.24,23.13,1.19,0.52,0.12,137.24,142.16,0,3.4,1,0.0,Pass,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,2021-09-12 17:32:27,97.0,PHI,right,1.48,2.45,0.16,198.37,250.5,0,3.4,1,0.0,Pass Rush,0.0,1.0,0.0,1,0.84,-1.19,0.09,0.03,73.217356,0.288742,0.090828,1.0,0.29,2.185459,61.13,23.13,1.456606
2,2021091200,843,36,35452.0,69.21,24.16,53497.0,69.17,24.27,1.5,2.64,243.78,250.69,0.15,Pass Block,0.0,1.0,0.0,0.0,PP,0.117047,1,33084.0,2021-09-12 17:32:27,2.0,ATL,right,70.37,22.99,0.79,1.79,0.09,133.86,131.44,0,3.6,1,0.0,Pass,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,2021-09-12 17:32:27,55.0,PHI,right,4.22,2.27,0.44,189.17,5.71,0,3.6,1,0.0,Pass Rush,0.0,1.0,0.0,1,1.16,-1.17,-0.04,0.11,155.22901,-0.90799,0.049042,1.0,3.43,2.351903,55.31,22.99,1.647574
3,2021091200,843,33,43378.0,69.55,24.37,53497.0,69.68,24.42,2.45,1.77,282.31,262.5,0.25,Pass Block,0.0,1.0,0.0,0.0,PP,0.139284,1,33084.0,2021-09-12 17:32:27,2.0,ATL,right,70.17,23.22,1.34,0.39,0.12,134.05,141.33,0,3.3,1,0.0,Pass,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,2021-09-12 17:32:27,97.0,PHI,right,1.73,2.73,0.15,196.74,252.12,0,3.3,1,0.0,Pass Rush,0.0,1.0,0.0,1,0.62,-1.15,0.13,0.05,82.707053,0.126943,0.138157,1.0,0.39,2.065256,62.69,23.22,1.306484
4,2021091200,843,36,43378.0,69.31,24.24,53497.0,69.17,24.27,1.5,2.64,243.78,250.69,0.15,Pass Block,0.0,1.0,0.0,0.0,PP,0.143178,1,33084.0,2021-09-12 17:32:27,2.0,ATL,right,70.37,22.99,0.79,1.79,0.09,133.86,131.44,0,3.6,1,0.0,Pass,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,2021-09-12 17:32:27,97.0,PHI,right,0.69,2.63,0.07,188.71,236.46,0,3.6,1,0.0,Pass Rush,0.0,1.0,0.0,1,1.06,-1.25,-0.14,0.03,142.392702,-0.792212,0.087374,1.0,-0.1,2.274128,54.85,22.99,1.638933


In [104]:
df_blocker_interference.head()

Unnamed: 0,gameId,playId,frameId,nflId,blocker_interference
0,2021091200,56,1,35452.0,1.335463
1,2021091200,56,1,38542.0,2.189664
2,2021091200,56,1,43378.0,1.465028
3,2021091200,56,1,44826.0,2.245399
4,2021091200,56,2,35452.0,1.336106


In [165]:
df_test = process_data_pressure_probability(df_weekly_data_tst, df_pff_data, df_plays)

In [167]:
df_test.head()

Unnamed: 0,gameId,playId,frameId,nflId,blocker_id_1,blocker_id_2,blocker_id_3,blocker_x_1,blocker_x_2,blocker_x_3,blocker_y_1,blocker_y_2,blocker_y_3,blocker_s_1,blocker_s_2,blocker_s_3,blocker_a_1,blocker_a_2,blocker_a_3,blocker_o_1,blocker_o_2,blocker_o_3,blocker_dir_1,blocker_dir_2,blocker_dir_3,blocker_dis_1,blocker_dis_2,blocker_dis_3,pff_blockType_1,pff_blockType_2,pff_blockType_3,pff_beatenByDefender_1,pff_beatenByDefender_2,pff_beatenByDefender_3,pff_hurryAllowed_1,pff_hurryAllowed_2,pff_hurryAllowed_3,pff_hitAllowed_1,pff_hitAllowed_2,pff_hitAllowed_3,pff_sackAllowed_1,pff_sackAllowed_2,pff_sackAllowed_3,blk_rush_dist_1,blk_rush_dist_2,blk_rush_dist_3,vec_rusher_to_blocker_x_1,vec_rusher_to_blocker_x_2,vec_rusher_to_blocker_x_3,vec_rusher_to_blocker_y_1,vec_rusher_to_blocker_y_2,vec_rusher_to_blocker_y_3,leverage_angle_1,leverage_angle_2,leverage_angle_3,cos_sim_1,cos_sim_2,cos_sim_3,ortho_dist_1,ortho_dist_2,ortho_dist_3,blocker_influence_1,blocker_influence_2,blocker_influence_3,down,yardsToGo,defendersInBox,absoluteYardlineNumber
0,2021091200,56,1,35452.0,47814.0,47797.0,46094.0,33.96,33.91,33.47,26.79,28.33,25.54,0.0,0.0,0.0,0.0,0.0,0.0,136.66,106.9,73.94,152.46,272.93,182.28,0.0,0.0,0.0,PT,SW,PP,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.582688,2.10238,2.519147,-1.57,-1.62,-2.06,-0.2,1.34,-1.45,59.767071,12.911138,87.648429,0.503517,0.974718,0.041031,1.367419,0.469755,2.517025,0.2547635996922593,1.0,0.0806993121423392,1,10,7.0,35.0
1,2021091200,56,1,38542.0,41232.0,53497.0,52486.0,33.78,33.93,34.45,33.0,31.26,29.73,0.0,0.0,0.0,0.0,0.0,0.0,122.65,87.28,54.03,99.55,183.23,99.31,0.0,0.0,0.0,PP,PA,PT,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.887326,2.144411,3.080812,-1.84,-1.69,-1.17,0.42,-1.32,-2.85,61.748215,10.897972,18.790435,0.473347,0.981965,0.946703,1.6625,0.405424,0.992353,0.1896641715884459,1.0,1.0,1,10,7.0,35.0
2,2021091200,56,1,43378.0,52486.0,47797.0,53497.0,34.45,33.91,33.93,29.73,28.33,31.26,0.0,0.0,0.0,0.0,0.0,0.0,54.03,106.9,87.28,99.31,272.93,183.23,0.0,0.0,0.0,PT,SW,PA,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.110045,2.157452,2.242432,-1.11,-1.65,-1.63,0.01,-1.39,1.54,4.268365,44.896129,38.589213,0.997226,0.708388,0.781638,0.082619,1.522781,1.398677,1.0,0.2181045126990092,0.2469233211584311,1,10,7.0,35.0
3,2021091200,56,1,44826.0,46094.0,47814.0,47797.0,33.47,33.96,33.91,25.54,26.79,28.33,0.0,0.0,0.0,0.0,0.0,0.0,73.94,136.66,106.9,182.28,152.46,272.93,0.0,0.0,0.0,PP,PT,SW,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.722389,3.358869,4.776578,-2.15,-1.66,-1.71,1.67,2.92,4.46,31.067235,8.523235,0.117472,0.856562,0.988956,0.999998,1.404871,0.49782,0.009793,0.2453986635663312,1.0,1.0,1,10,7.0,35.0
4,2021091200,56,2,35452.0,47814.0,47797.0,46094.0,33.96,33.91,33.48,26.79,28.33,25.54,0.0,0.0,0.0,0.0,0.0,0.0,134.0,106.9,73.94,150.32,274.73,171.71,0.0,0.0,0.01,PT,SW,PP,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.582688,2.10238,2.510976,-1.57,-1.62,-2.05,-0.2,1.34,-1.45,59.767071,12.911138,87.779768,0.503517,0.974718,0.038741,1.367419,0.469755,2.509091,0.2547635996922593,1.0,0.0813421526944025,1,10,7.0,35.0


In [169]:
df_weekly_data_tst.head()

Unnamed: 0,gameId,playId,nflId,frameId,time,jerseyNumber,team,playDirection,x,y,s,a,dis,o,dir,event,second,week,snap_time,second_since_snap
92644,2021091200,56,33084.0,1,2021-09-12 17:06:24,2.0,ATL,right,33.29,29.91,0.0,0.0,0.01,105.19,358.76,,0.1,1,,
92645,2021091200,56,33084.0,2,2021-09-12 17:06:25,2.0,ATL,right,33.29,29.91,0.0,0.0,0.0,105.19,358.51,,0.2,1,,
92646,2021091200,56,33084.0,3,2021-09-12 17:06:25,2.0,ATL,right,33.29,29.91,0.0,0.0,0.0,103.49,356.37,,0.3,1,,
92647,2021091200,56,33084.0,4,2021-09-12 17:06:25,2.0,ATL,right,33.29,29.9,0.0,0.0,0.0,102.81,351.29,,0.4,1,,
92648,2021091200,56,33084.0,5,2021-09-12 17:06:25,2.0,ATL,right,33.28,29.9,0.0,0.0,0.0,100.7,342.64,,0.5,1,,
