# Deep Learning Preprocessing v2 

__Author:__ Jack Friedman <br>
__Date:__ 11/6/2023 <br>
__Purpose:__ Playground for converting data into position image

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from Preprocessing_v2 import *
from DataLoader import load_data

In [2]:
# Load data
[games_df, players_df, plays_df, tracking_df] = load_data()

loaded games df
shape: (136, 9)
-----
loaded players df
shape: (1683, 7)
-----
loaded plays df
shape: (12486, 35)
-----
loading tracking frames...
loaded tracking frames
shape: (12187398, 17)
returning 4 frames


In [3]:
tracking_df_clean = preprocess_all_df(plays_df, games_df, players_df, tracking_df)

cleaning plays_df
final plays data shape: (6840, 289)
-----

cleaning games_df
-----

cleaning players_df
-----

cleaning tracking_df
original tracking df shape: (12187398, 17)
unique play and game id combos: (6840, 2)
filtered df shape: (8458178, 17)
number of merge errors: 0
joined plays and tracking dataframes
original tracking shape: (8458178, 17)
merged data shape: (8458178, 304)
-------
joined players and tracking dataframes
original tracking shape: (8458178, 304)
merged data shape: (8458178, 306)
-------
joined games and tracking dataframes
original tracking shape: (8458178, 306)
merged data shape: (8458178, 307)
-------
Old df shape:(8458178, 346)
New df shape:(8458178, 346)
-----



In [None]:
# Check
for var in ['X_std', 'Y_std', 'Dir_std', 'O_std']:
    print(tracking_df_clean[var].describe())

In [None]:
tracking_df_clean[(tracking_df_clean['X_std'] > 10) & (tracking_df_clean['X_std'] < 11)]['X_std'].unique()

In [None]:
tracking_df_clean['event'].unique()

### frame cutoffs

In [None]:
tracking_df_clean[(tracking_df_clean['gameId'] == 2022090800) & (tracking_df_clean['playId'] == 80) & (tracking_df_clean['frameId'] <= tracking_df_clean[(tracking_df_clean['event'] == 'run') | (tracking_df_clean['event'] == 'handoff')]['frameId'].max())]

In [4]:
# Filter for pre-handoff events 

# Step 1: Get the frames where handoff or run occurs 
frame_cutoffs = tracking_df_clean[(tracking_df_clean['event'] == 'run') | (tracking_df_clean['event'] == 'handoff')][['gameId', 'playId', 'frameId']].drop_duplicates()
frame_cutoffs = frame_cutoffs.rename(columns = {'frameId':'frame_cutoff'})
frame_cutoffs

Unnamed: 0,gameId,playId,frame_cutoff
5,2022090800,80,6
708,2022090800,101,19
1838,2022090800,146,22
3122,2022090800,191,18
4296,2022090800,299,19
...,...,...,...
8451732,2022110700,3630,18
8453136,2022110700,3686,19
8454884,2022110700,3707,19
8456220,2022110700,3740,21


In [5]:
# Look at distribution fo cutoffs
frame_cutoffs['frame_cutoff'].describe()

count    6820.000000
mean       17.966129
std         4.242868
min         6.000000
25%        17.000000
50%        19.000000
75%        20.000000
max        49.000000
Name: frame_cutoff, dtype: float64

In [6]:
# Step 2: Merge the frame cutoffs with the original dataframe and filter the frames before the cutoff
tracking_df_clean = pd.merge(tracking_df_clean, frame_cutoffs, on=['gameId', 'playId'])

In [7]:
tracking_df_clean[tracking_df_clean['frameId'] <= tracking_df_clean['frame_cutoff']]['event'].unique()

array([nan, 'run', 'ball_snap', 'handoff', 'man_in_motion', 'play_action',
       'line_set', 'shift', 'pass_forward', 'autoevent_passforward',
       'autoevent_passinterrupted', 'run_pass_option',
       'autoevent_ballsnap', 'snap_direct', 'fumble', 'first_contact',
       'penalty_flag', 'pass_outcome_caught', 'pass_arrived'],
      dtype=object)

### check passing plays

In [12]:
tracking_df_clean['event'].unique()

array([nan, 'run', 'first_contact', 'tackle', 'ball_snap', 'handoff',
       'man_in_motion', 'fumble', 'out_of_bounds', 'play_action',
       'touchdown', 'autoevent_passforward', 'autoevent_passinterrupted',
       'line_set', 'qb_slide', 'shift', 'pass_forward',
       'pass_outcome_caught', 'run_pass_option', 'qb_sack',
       'autoevent_ballsnap', 'snap_direct', 'fumble_defense_recovered',
       'safety', 'lateral', 'pass_arrived', 'penalty_flag',
       'fumble_offense_recovered', 'penalty_accepted'], dtype=object)

In [14]:
plays_df[~plays_df['playDescription'].str.contains('pass', na=False)]['playDescription']

1        (7:38) (Shotgun) C.Claypool right end to PIT 3...
3        (13:12) M.Boone left tackle to DEN 44 for 7 ya...
4        (8:33) D.Singletary right guard to TEN 32 for ...
6        (13:31) (Shotgun) R.Tannehill scrambles up the...
7        (15:00) J.Wilson left tackle to SF 32 for 7 ya...
                               ...                        
12477    (14:18) (Shotgun) M.Gordon left tackle to NYJ ...
12481    (13:31) E.Elliott right tackle to WAS 38 for 1...
12483    (9:35) (Shotgun) J.Mixon left end to CIN 47 fo...
12484    (2:02) (Shotgun) J.Taylor up the middle to TEN...
12485    (12:51) C.Heck reported in as eligible.  D.Pie...
Name: playDescription, Length: 6840, dtype: object

In [None]:
possible_pass_plays = tracking_df_clean[tracking_df_clean['event'].str.contains('pass', na=False)][['gameId', 'playId']].drop_duplicates()

In [None]:
pd.set_option('display.max_colwidth', None)
pd.merge(plays_df, possible_pass_plays, on=['gameId', 'playId'], how='inner')

In [None]:
len(tracking_df_clean[(tracking_df_clean['event'] == 'run') | (tracking_df_clean['event'] == 'pass_forward')][['gameId', 'playId']].drop_duplicates())

In [None]:
plays_df[(plays_df['gameId'] == 2022090800) & (plays_df['playId'] == 80)]['playDescription']

### Check when 2 players are in the same matrix cell (same yard^2) in a play

In [27]:
def is_out_of_bounds(x, y):
    i = int(x)
    j = int(y)
    return i < 0 or i > 119 or j < 0 or j > 53

# Keep track of number of conflicts when 2+ players are in a cell
num_conflicts = 0
pre_cutoff_rows = tracking_df_clean[tracking_df_clean['frameId'] <= tracking_df_clean['frame_cutoff']]
for (game_id, play_id), group_df in pre_cutoff_rows.groupby(['gameId', 'playId']):
    for frame_id, frame_df in group_df.groupby(['frameId']):
        # Create a blank matrix of the field
        image = np.zeros((120, 54))

        for _, row in frame_df.iterrows():
            if pd.isnull(row['nflId']):
                continue

            # Keep track of x and y locations of player (matrix indices)
            i = int(row['X_std'])
            j = int(row['Y_std'])

            # Make sure player is in the frame 
            if is_out_of_bounds(i, j):
                continue

            image[i, j] += 1

            if image[i,j] > 2:
                num_conflicts += 1
                print("found a conflict in " + str((game_id, play_id, frame_id)) + " at " + str((i, j)))
                break

print('total conflicts: ' + str(num_conflicts))

  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):


found a conflict in (2022090800, 299, 19) at (26, 26)
found a conflict in (2022090800, 933, 18) at (73, 34)


  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']

found a conflict in (2022090800, 2712, 19) at (103, 29)
found a conflict in (2022090800, 3283, 16) at (86, 29)


  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):


found a conflict in (2022091100, 546, 15) at (41, 23)
found a conflict in (2022091100, 546, 16) at (41, 23)
found a conflict in (2022091100, 546, 17) at (41, 23)
found a conflict in (2022091100, 546, 18) at (41, 23)


  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']

found a conflict in (2022091100, 2899, 15) at (76, 30)
found a conflict in (2022091100, 3289, 17) at (35, 30)


  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']

found a conflict in (2022091101, 361, 15) at (45, 31)


  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):


found a conflict in (2022091101, 1901, 18) at (48, 24)
found a conflict in (2022091101, 1901, 19) at (48, 24)
found a conflict in (2022091101, 1901, 20) at (48, 24)


  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):


found a conflict in (2022091101, 2452, 15) at (58, 27)
found a conflict in (2022091101, 2452, 16) at (58, 27)
found a conflict in (2022091101, 2452, 17) at (55, 23)
found a conflict in (2022091101, 2478, 16) at (74, 18)
found a conflict in (2022091101, 2478, 17) at (74, 18)
found a conflict in (2022091101, 2478, 18) at (74, 18)
found a conflict in (2022091101, 2629, 15) at (27, 25)
found a conflict in (2022091101, 2629, 20) at (27, 26)


  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):


found a conflict in (2022091101, 2930, 16) at (41, 31)
found a conflict in (2022091101, 2930, 17) at (41, 31)


  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']

found a conflict in (2022091101, 3923, 15) at (94, 24)


  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):


found a conflict in (2022091102, 467, 18) at (41, 30)
found a conflict in (2022091102, 574, 16) at (78, 28)
found a conflict in (2022091102, 698, 17) at (17, 26)
found a conflict in (2022091102, 698, 18) at (18, 24)
found a conflict in (2022091102, 719, 20) at (26, 28)
found a conflict in (2022091102, 800, 18) at (36, 32)


  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']

found a conflict in (2022091102, 1794, 16) at (50, 27)
found a conflict in (2022091102, 1869, 17) at (63, 21)
found a conflict in (2022091102, 1869, 18) at (63, 21)
found a conflict in (2022091102, 1869, 19) at (64, 24)


  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):


found a conflict in (2022091102, 2373, 14) at (102, 23)
found a conflict in (2022091102, 2373, 15) at (102, 23)
found a conflict in (2022091102, 2511, 20) at (38, 26)


  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']

found a conflict in (2022091102, 3523, 16) at (106, 25)


  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):


found a conflict in (2022091103, 611, 17) at (40, 31)
found a conflict in (2022091103, 764, 12) at (108, 24)
found a conflict in (2022091103, 764, 13) at (108, 22)
found a conflict in (2022091103, 764, 14) at (108, 23)
found a conflict in (2022091103, 764, 15) at (108, 23)
found a conflict in (2022091103, 764, 16) at (108, 23)
found a conflict in (2022091103, 764, 17) at (108, 22)
found a conflict in (2022091103, 764, 18) at (108, 22)
found a conflict in (2022091103, 764, 19) at (108, 22)
found a conflict in (2022091103, 764, 20) at (108, 22)
found a conflict in (2022091103, 881, 18) at (47, 23)


  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']

found a conflict in (2022091103, 1700, 17) at (105, 21)
found a conflict in (2022091103, 2037, 19) at (18, 23)


  for frame_id, frame_df in group_df.groupby(['frameId']):
  for frame_id, frame_df in group_df.groupby(['frameId']):


KeyboardInterrupt: 

In [1]:
print(num_conflicts)

NameError: name 'num_conflicts' is not defined

In [None]:
tracking_df_clean['Y_std'].astype(int).describe()

In [None]:
# EXAMINE Conflict in 1 frame

 # Create a blank matrix of the field
image = np.zeros((120, 54))

# found a conflict at (2022090800, 146, 15)
frame = tracking_df_clean[(tracking_df_clean['gameId'] == 2022092502) & (tracking_df_clean['playId'] == 2196) & (tracking_df_clean['frameId'] == 19)]
for _, row in frame.iterrows():
    if pd.isnull(row['nflId']):
        continue

    # Keep track of x and y locations of player (matrix indices)
    i = int(row['X_std'])
    j = int(row['Y_std'])

    # Make sure player is in the frame 
    if is_out_of_bounds(i, j):
        continue

    image[i, j] += 1

    if image[i, j] > 1:
        print(i, j)

image

## Create Image of the field

Notes from soroush meeting

1. Channels - 0 to 1 - for num players
2. Ratio of teams- if all in same team (1, half-half is 0.5, 2/3 on one team) --> 0 to 1
3. Offense ratio- 0-1 ratio of offense (1 = only offense, 0 = nobody there)
4. Defense ratio- 0 -1 ratio of defense (1 = only defense, 0 = )
5. Velocity - average velocity and 
6. ariance of velocity in the cells 
6. Height - average height of players in the cell 0 - 1 (1 is tallest height)
7. Variance of the height 
8. Channel for whether the football is there


All channels should go to 0 to 1
- Every continuosu variable can be represented with 2 variables- average and variance
- 



List of Channels
1. Spatial - 0 to 1 for percentage of players on the field at this location (0 = nobody 1 = all 21 players at this spot) -- [will never reach 1 but need to scale for a ton of linemen being in one spot or a tackling dogpile with many players- can change upper bound]
2. Ball location: = 1 if ball at this location, = 0 if not
3. [DEPRECIATED] Team ratio - if all in same team (1, half-half is 0.5, 2/3 on one team) --> 0 to 1
4. Offense ratio - 0-1 ratio of offense (1 = only offense, 0 = nobody there, else = # offense / total people 
5. Defense ratio -  0 -1 ratio of defense (1 = only defense, 0 = nobody there, else = % of players in that cell on defense)
6. Velocity (average, standardized 0-1)
7. Velocity (variation, standardized 0-1)
8. Acceleration (average, standardized 0-1)
9. Acceleration (variation, standardized 0-1)
10. Net??? angle of player motion – standardization to not include 0?? 
11. Angle of player orientation??????
12. Height (average, standardized 0-1)
13. Height (variation, standardized 0-1)
14. Weight (average, standardized 0-1)
15. Weight (variation, standardized 0-1)


In [None]:
single_frame_data = tracking_df_clean[(tracking_df_clean['gameId'] == 2022090800) & (tracking_df_clean['playId'] == 80) & (tracking_df_clean['frameId'] == 2)]
single_frame_data 

Notes on update formulae: <br>

Variables: <br>
$n_o$ = total # offensive players checked so far <br>
$n_d$ = total # defensive players checked so far <br>
$p_0$ = existing % at this spot <br>

note total players checked = $n_o + n_d$

1. Spatial layer (% of players on field at this location):
    - Formula to udpate = $\frac{(p_0)(n_o + n_d) + 1}{(n_o + n_d) + 1}$ =  $\frac{\text{(existing \% at this spot)} \text{(\# players checked)} + 1}{\text{(\# players checked)} + 1}$
3. Offese ratio:
    - Formula to update = $\frac{(p_0)(n_o) + 1}{n_o + 1}$ 

### Proof of concept- build tensor with 1 frame

#### A) Get max velocity, accel, etc. for normalizing from 0-1

In [138]:
tracking_df_clean['isHomeTeam']

0          0
1          0
2          0
3          0
4          0
          ..
8438163    0
8438164    0
8438165    0
8438166    0
8438167    0
Name: isHomeTeam, Length: 8438168, dtype: int64

In [11]:
tracking_df_clean['s'].describe()

count    8.438168e+06
mean     2.423730e+00
std      1.922817e+00
min      0.000000e+00
25%      8.800000e-01
50%      2.040000e+00
75%      3.630000e+00
max      3.640000e+01
Name: s, dtype: float64

In [16]:
max_a = tracking_df_clean['a'].max()
max_s = tracking_df_clean['s'].max()
max_height = tracking_df_clean['heightInches'].max()   
max_weight = tracking_df_clean['weight'].max()  

#### B) Building single frame

In [197]:
def is_out_of_bounds(x, y):
    i = int(x)
    j = int(y)
    return i < 0 or i > 119 or j < 0 or j > 53

In [198]:
# Conflicts to test
# (2022090800, 2360, 17) at (68, 27) --> double
# (2022091100, 546, 16) at (41, 23) --> triple

In [199]:
single_frame_data = tracking_df_clean[(tracking_df_clean['gameId'] == 2022090800) & (tracking_df_clean['playId'] == 2360) & (tracking_df_clean['frameId'] == 17)]
# single_frame_data = tracking_df_clean[(tracking_df_clean['gameId'] == 2022091100) & (tracking_df_clean['playId'] == 546) & (tracking_df_clean['frameId'] == 16)]

# STEP 0: Create a blank matrix
image = np.zeros((120, 54, 15))

# STEP 1: Record ball location before looping through players in Channel 1
i = int(single_frame_data[single_frame_data['nflId'].isnull()]['X_std'])
j = int(single_frame_data[single_frame_data['nflId'].isnull()]['Y_std'])
image[i, j, 1] = 1

# STEP 2: Drop football from dataframe
single_frame_data = single_frame_data.dropna(subset=['nflId'])

# STEP 3: Populate player channels, person by person
total_num_players_checked_on_field = 0
num_players = np.zeros((120, 54, 4))  # Depth 0 is offense channel Depth 1 is defense, depth 2 is home, depth 3 is away
temp_values = {}   # holds temporary values for calculating variances (0 = s, 1 = a, 2 = h, 3 = w)

for _, row in single_frame_data.iterrows():
    # Keep track of x and y locations of player (matrix indices)
    i = int(row['X_std'])
    j = int(row['Y_std'])
    
    # Make sure player is in the frame 
    if is_out_of_bounds(i, j):
        continue

    # Update total number of players checked
    total_num_players_checked_on_field += 1

    # Channel 0: Record position of this player
    image[i, j, 0] = (image[i, j, 0] * 22 + 1) / 22

    # Keep track of people on offense and defense
    if row['isOnOffense'] == 1:
        num_players[i, j, 0] += 1  # record offensive player
    else: 
        num_players[i, j, 1] += 1  # record defensive player
    total_players_in_cell = num_players[i, j, 0] + num_players[i, j, 1]
    
    # Channel 2: % offensive players in this cell
    # Channel 3: Record defensive player ratio
    image[i, j, 2] = num_players[i, j, 0] / (num_players[i, j, 0] + num_players[i, j, 1])  # calculate % offensive players at this cell
    image[i, j, 3] = num_players[i, j, 1] / (num_players[i, j, 0] + num_players[i, j, 1])  # calculate % offensive players at this cell

    # Channel 4: Velocity (average, standardized 0-1)
    image[i, j, 4] = (image[i, j, 4] * (total_players_in_cell - 1) + row['s'] / max_s) / total_players_in_cell

    # Store continuous variables so we can calculate variances at the end
    if (i, j) not in temp_values.keys():
        temp_values[(i, j)] = {'s': [],
                               'a': [],
                               'heightInches':[],
                               'weight': []}
    temp_values[(i, j)]['s'] += [row['s'] / max_s]  # Channel 5: Velocity (variance, standardized 0-1)
    temp_values[(i, j)]['a'] += [row['a'] / max_a]  # Channel 7: Acceleration (variance, standardized 0-1)
    temp_values[(i, j)]['heightInches'] += [row['heightInches'] / max_height]  # Channel 9: Height (variance, standardized 0-1)
    temp_values[(i, j)]['weight'] += [row['weight'] / max_weight]  # Channel 11: Weight (variance, standardized 0-1)
    
    # Channel 6: Acceleration (average, standardized 0-1)
    image[i, j, 6] = (image[i, j, 6] * (total_players_in_cell - 1) + row['a'] / max_a) / total_players_in_cell
    
    # Channel 8: Height (average, standardized 0-1)
    image[i, j, 8] = (image[i, j, 8] * (total_players_in_cell - 1) + row['heightInches'] / max_height) / total_players_in_cell
    
    # Channel 10: Weight (average, standardized 0-1)
    image[i, j, 10] = (image[i, j, 10] * (total_players_in_cell - 1) + row['weight'] / max_weight) / total_players_in_cell

    # Keep track of people home and away
    if row['isHomeTeam'] == 1:
        num_players[i, j, 2] += 1  # record home team player
    else: 
        num_players[i, j, 3] += 1  # record away team player
    # Update channels
    # Channel 12: % home team in this cell
    # Channel 13: % away team in this cell
    image[i, j, 12] = num_players[i, j, 2] / total_players_in_cell  # calculate % home team players at this cell
    image[i, j, 13] = num_players[i, j, 3] / total_players_in_cell  # calculate % away team players at this cell

# Compute variances 
for (i, j) in temp_values.keys():
    # Channel 5: Velocity (variance, standardized 0-1)
    image[i, j, 5] = np.var(temp_values[(i,j)]['s'], ddof = 1)

    # Channel 7: Acceleration (variance, standardized 0-1)
    image[i, j, 7] = np.var(temp_values[(i,j)]['a'], ddof = 1)

    # Channel 9: Height (variance, standardized 0-1)
    image[i, j, 9] = np.var(temp_values[(i,j)]['heightInches'], ddof = 1)
    
    # Channel 11: Weight (variance, standardized 0-1)
    image[i, j, 11] = np.var(temp_values[(i,j)]['weight'], ddof = 1)
    

    
    # 10. Net??? angle of player motion – standardization to not include 0?? 
    # 11. Angle of player orientation??????
    # 16. PFF rating?

  return _methods._var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)


In [192]:
vals = single_frame_data.astype({'X_std': 'int64', 'Y_std': 'int64'})[(tracking_df_clean['X_std'].astype(int) == 41) & (tracking_df_clean['Y_std'].astype(int) ==23)]['heightInches'].values
vals

  vals = single_frame_data.astype({'X_std': 'int64', 'Y_std': 'int64'})[(tracking_df_clean['X_std'].astype(int) == 41) & (tracking_df_clean['Y_std'].astype(int) ==23)]['heightInches'].values


array([76., 77., 75.])

In [194]:
for i in vals:
    print(i / max_height)

0.9382716049382716
0.9506172839506173
0.9259259259259259


In [195]:
temp_values[(41,23)]['heightInches']

[0.9382716049382716, 0.9506172839506173, 0.9259259259259259]

In [196]:
image[41, 23, 9]

0.00015241579027587256

In [None]:
# 68, 27
# (41, 23)