In [2]:
from matplotlib import pyplot as plt
from scipy.stats import linregress
import numpy as np
from sklearn import datasets
import pandas as pd

In [3]:
week_df = pd.read_csv('../base_datasets/tracking_week_7.csv')
week_df.head()

Unnamed: 0,gameId,playId,nflId,displayName,frameId,time,jerseyNumber,club,playDirection,x,y,s,a,dis,o,dir,event
0,2022102000,56,37084.0,J.J. Watt,1,2022-10-20 20:16:19.099999,99.0,ARI,left,87.64,23.49,3.95,2.68,0.4,113.01,67.5,
1,2022102000,56,37084.0,J.J. Watt,2,2022-10-20 20:16:19.200000,99.0,ARI,left,88.02,23.63,4.08,2.35,0.41,118.7,71.05,pass_arrived
2,2022102000,56,37084.0,J.J. Watt,3,2022-10-20 20:16:19.299999,99.0,ARI,left,88.44,23.74,4.21,2.07,0.44,114.82,75.53,
3,2022102000,56,37084.0,J.J. Watt,4,2022-10-20 20:16:19.400000,99.0,ARI,left,88.86,23.82,4.2,2.07,0.43,121.02,79.59,
4,2022102000,56,37084.0,J.J. Watt,5,2022-10-20 20:16:19.500000,99.0,ARI,left,89.28,23.88,4.15,2.08,0.42,124.76,82.73,


In [4]:
"""
Cell generated by Data Wrangler.
"""
def clean_data(week_df):
    # Replace missing values with 0 in columns: 'jerseyNumber', 'nflId'
    week_df = week_df.fillna({'jerseyNumber': 0, 'nflId': 0})
    # Change column type to string for column: 'nflId'
    week_df = week_df.astype({'nflId': 'string'})
    # Replace all instances of ".0" with "" in column: 'nflId'
    week_df['nflId'] = week_df['nflId'].str.replace(".0", "", case=False, regex=False)
    # Change column type to string for column: 'jerseyNumber'
    week_df = week_df.astype({'jerseyNumber': 'string'})
    # Replace all instances of ".0" with "" in column: 'jerseyNumber'
    week_df['jerseyNumber'] = week_df['jerseyNumber'].str.replace(".0", "", case=False, regex=False)
    # Change column type to string for columns: 'club', 'playDirection', 'event', 'displayName', 'time'
    week_df = week_df.astype({'club': 'string', 'playDirection': 'string','event': 'string','displayName': 'string'})
    # Reduce the floats to two decimal points
    week_df[['x', 'y', 's', 'a', 'dis']] = week_df[['x', 'y', 's', 'a', 'dis']].round(2)
    # Change column type to datetime64[ns] for column: 'time'
    week_df = week_df.astype({'time': 'datetime64[ns]'})
    return week_df

week_df_clean = clean_data(week_df.copy())
week_df_clean.head()

Unnamed: 0,gameId,playId,nflId,displayName,frameId,time,jerseyNumber,club,playDirection,x,y,s,a,dis,o,dir,event
0,2022102000,56,37084,J.J. Watt,1,2022-10-20 20:16:19.099999,99,ARI,left,87.64,23.49,3.95,2.68,0.4,113.01,67.5,
1,2022102000,56,37084,J.J. Watt,2,2022-10-20 20:16:19.200000,99,ARI,left,88.02,23.63,4.08,2.35,0.41,118.7,71.05,pass_arrived
2,2022102000,56,37084,J.J. Watt,3,2022-10-20 20:16:19.299999,99,ARI,left,88.44,23.74,4.21,2.07,0.44,114.82,75.53,
3,2022102000,56,37084,J.J. Watt,4,2022-10-20 20:16:19.400000,99,ARI,left,88.86,23.82,4.2,2.07,0.43,121.02,79.59,
4,2022102000,56,37084,J.J. Watt,5,2022-10-20 20:16:19.500000,99,ARI,left,89.28,23.88,4.15,2.08,0.42,124.76,82.73,


In [5]:
week_df_clean.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1291493 entries, 0 to 1291492
Data columns (total 17 columns):
 #   Column         Non-Null Count    Dtype         
---  ------         --------------    -----         
 0   gameId         1291493 non-null  int64         
 1   playId         1291493 non-null  int64         
 2   nflId          1291493 non-null  string        
 3   displayName    1291493 non-null  string        
 4   frameId        1291493 non-null  int64         
 5   time           1291493 non-null  datetime64[ns]
 6   jerseyNumber   1291493 non-null  string        
 7   club           1291493 non-null  string        
 8   playDirection  1291493 non-null  string        
 9   x              1291493 non-null  float64       
 10  y              1291493 non-null  float64       
 11  s              1291493 non-null  float64       
 12  a              1291493 non-null  float64       
 13  dis            1291493 non-null  float64       
 14  o              1235421 non-null  f

In [5]:
unique_values=week_df_clean['gameId'].unique()
print(unique_values)

[2022102000 2022102300 2022102301 2022102302 2022102303 2022102304
 2022102305 2022102306 2022102307 2022102308 2022102309 2022102310
 2022102311 2022102400]


In [19]:
ari_no_df = week_df_clean[week_df_clean['gameId'] == 2022102000] 
cle_bal_df = week_df_clean[week_df_clean['gameId'] == 2022102300]
tb_car_df = week_df_clean[week_df_clean['gameId'] == 2022102301]
cin_atl_df = week_df_clean[week_df_clean['gameId'] == 2022102302]
det_dal_df = week_df_clean[week_df_clean['gameId'] == 2022102303] 
jax_nyg_df = week_df_clean[week_df_clean['gameId'] == 2022102304]
ten_ind_df = week_df_clean[week_df_clean['gameId'] == 2022102305]
gb_was_df = week_df_clean[week_df_clean['gameId'] == 2022102306]
nyj_den_df = week_df_clean[week_df_clean['gameId'] == 2022102307]
lv_hou_df = week_df_clean[week_df_clean['gameId'] == 2022102308]
lac_sea_df = week_df_clean[week_df_clean['gameId'] == 2022102309]
sf_kc_df = week_df_clean[week_df_clean['gameId'] == 2022102310]
mia_pit_df = week_df_clean[week_df_clean['gameId'] == 2022102311]
chi_ne_df = week_df_clean[week_df_clean['gameId'] == 2022102400]

unique_values=_df['club'].unique()
print(unique_values)

<StringArray>
['CHI', 'NE', 'football']
Length: 3, dtype: string


In [20]:
cle_bal_df.head()

Unnamed: 0,gameId,playId,nflId,displayName,frameId,time,jerseyNumber,club,playDirection,x,y,s,a,dis,o,dir,event
91655,2022102300,56,33131,Calais Campbell,1,2022-10-23 13:03:12.500000,93,BAL,right,35.5,30.47,0.04,0.04,0.02,286.13,205.8,
91656,2022102300,56,33131,Calais Campbell,2,2022-10-23 13:03:12.599999,93,BAL,right,35.5,30.47,0.04,0.04,0.01,286.13,198.59,
91657,2022102300,56,33131,Calais Campbell,3,2022-10-23 13:03:12.700000,93,BAL,right,35.48,30.46,0.04,0.04,0.02,284.42,219.35,
91658,2022102300,56,33131,Calais Campbell,4,2022-10-23 13:03:12.799999,93,BAL,right,35.48,30.44,0.06,0.08,0.02,278.52,204.61,
91659,2022102300,56,33131,Calais Campbell,5,2022-10-23 13:03:12.900000,93,BAL,right,35.48,30.42,0.1,0.32,0.02,260.78,189.6,


In [22]:
unique_values=cle_bal_df['playId'].unique()
print(unique_values)

[  56   77  101  122  146  170  191  215  244  265  288  346  367  388
  409  430  454  475  552  576  625  765  834  855  876  922  943 1020
 1041 1110 1156 1201 1251 1272 1314 1338 1364 1447 1471 1495 1634 1859
 1912 1933 1992 2018 2039 2094 2118 2171 2194 2215 2269 2290 2314 2342
 2450 2471 2506 2527 2551 2623 2644 2665 2691 2712 2795 2816 2842 2866
 2890 2916 2937 2990 3018 3044 3065 3094 3115 3136 3157 3178 3199 3220
 3248 3326 3347 3376 3410 3446 3487 3564 3592 3620 3682 3733]
