In [1]:
from matplotlib import pyplot as plt
from scipy.stats import linregress
import numpy as np
from sklearn import datasets
import pandas as pd

In [2]:
week_df = pd.read_csv('../base_datasets/tracking_week_5.csv')
week_df.head()

Unnamed: 0,gameId,playId,nflId,displayName,frameId,time,jerseyNumber,club,playDirection,x,y,s,a,dis,o,dir,event
0,2022100600,90,33084.0,Matt Ryan,1,2022-10-06 20:17:04.799999,2.0,IND,left,90.42,23.74,0.11,0.04,0.03,271.98,257.76,
1,2022100600,90,33084.0,Matt Ryan,2,2022-10-06 20:17:04.900000,2.0,IND,left,90.39,23.74,0.14,0.06,0.03,272.84,256.68,
2,2022100600,90,33084.0,Matt Ryan,3,2022-10-06 20:17:05.000000,2.0,IND,left,90.36,23.73,0.17,0.09,0.03,272.84,254.91,
3,2022100600,90,33084.0,Matt Ryan,4,2022-10-06 20:17:05.099999,2.0,IND,left,90.32,23.73,0.19,0.11,0.04,275.8,260.06,
4,2022100600,90,33084.0,Matt Ryan,5,2022-10-06 20:17:05.200000,2.0,IND,left,90.28,23.72,0.2,0.13,0.04,275.8,257.79,


In [3]:
"""
Cell generated by Data Wrangler.
"""
def clean_data(week_df):
    # Replace missing values with 0 in columns: 'jerseyNumber', 'nflId'
    week_df = week_df.fillna({'jerseyNumber': 0, 'nflId': 0})
    # Change column type to string for column: 'nflId'
    week_df = week_df.astype({'nflId': 'string'})
    # Replace all instances of ".0" with "" in column: 'nflId'
    week_df['nflId'] = week_df['nflId'].str.replace(".0", "", case=False, regex=False)
    # Change column type to string for column: 'jerseyNumber'
    week_df = week_df.astype({'jerseyNumber': 'string'})
    # Replace all instances of ".0" with "" in column: 'jerseyNumber'
    week_df['jerseyNumber'] = week_df['jerseyNumber'].str.replace(".0", "", case=False, regex=False)
    # Change column type to string for columns: 'club', 'playDirection', 'event', 'displayName', 'time'
    week_df = week_df.astype({'club': 'string', 'playDirection': 'string','event': 'string','displayName': 'string'})
    # Reduce the floats to two decimal points
    week_df[['x', 'y', 's', 'a', 'dis']] = week_df[['x', 'y', 's', 'a', 'dis']].round(2)
    # Change column type to datetime64[ns] for column: 'time'
    week_df = week_df.astype({'time': 'datetime64[ns]'})
    return week_df

week_df_clean = clean_data(week_df.copy())
week_df_clean.head()

Unnamed: 0,gameId,playId,nflId,displayName,frameId,time,jerseyNumber,club,playDirection,x,y,s,a,dis,o,dir,event
0,2022100600,90,33084,Matt Ryan,1,2022-10-06 20:17:04.799999,2,IND,left,90.42,23.74,0.11,0.04,0.03,271.98,257.76,
1,2022100600,90,33084,Matt Ryan,2,2022-10-06 20:17:04.900000,2,IND,left,90.39,23.74,0.14,0.06,0.03,272.84,256.68,
2,2022100600,90,33084,Matt Ryan,3,2022-10-06 20:17:05.000000,2,IND,left,90.36,23.73,0.17,0.09,0.03,272.84,254.91,
3,2022100600,90,33084,Matt Ryan,4,2022-10-06 20:17:05.099999,2,IND,left,90.32,23.73,0.19,0.11,0.04,275.8,260.06,
4,2022100600,90,33084,Matt Ryan,5,2022-10-06 20:17:05.200000,2,IND,left,90.28,23.72,0.2,0.13,0.04,275.8,257.79,


In [4]:
week_df_clean.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1476232 entries, 0 to 1476231
Data columns (total 17 columns):
 #   Column         Non-Null Count    Dtype         
---  ------         --------------    -----         
 0   gameId         1476232 non-null  int64         
 1   playId         1476232 non-null  int64         
 2   nflId          1476232 non-null  string        
 3   displayName    1476232 non-null  string        
 4   frameId        1476232 non-null  int64         
 5   time           1476232 non-null  datetime64[ns]
 6   jerseyNumber   1476232 non-null  string        
 7   club           1476232 non-null  string        
 8   playDirection  1476232 non-null  string        
 9   x              1476232 non-null  float64       
 10  y              1476232 non-null  float64       
 11  s              1476232 non-null  float64       
 12  a              1476232 non-null  float64       
 13  dis            1476232 non-null  float64       
 14  o              1412137 non-null  f

In [5]:
unique_values=week_df_clean['gameId'].unique()
print(unique_values)

[2022100600 2022100900 2022100901 2022100902 2022100903 2022100904
 2022100905 2022100906 2022100907 2022100908 2022100909 2022100910
 2022100911 2022100912 2022100913 2022101000]


In [23]:
ind_den_df = week_df_clean[week_df_clean['gameId'] ==     2022100600] 
gb_nyg_df = week_df_clean[week_df_clean['gameId'] == 2022100900]
buf_pit_df = week_df_clean[week_df_clean['gameId'] == 2022100901]
cle_lac_df = week_df_clean[week_df_clean['gameId'] == 2022100902]
hou_jax_df = week_df_clean[week_df_clean['gameId'] == 2022100903] 
chi_min_df = week_df_clean[week_df_clean['gameId'] == 2022100904]
det_ne_df = week_df_clean[week_df_clean['gameId'] == 2022100905]
sea_no_df = week_df_clean[week_df_clean['gameId'] == 2022100906]
nyj_mia_df = week_df_clean[week_df_clean['gameId'] == 2022100907]
tb_atl_df = week_df_clean[week_df_clean['gameId'] == 2022100908]
ten_was_df = week_df_clean[week_df_clean['gameId'] == 2022100909]
car_sf_df = week_df_clean[week_df_clean['gameId'] == 2022100910]
phi_car_df = week_df_clean[week_df_clean['gameId'] == 2022100911]
la_dal_df = week_df_clean[week_df_clean['gameId'] == 2022100912]
cin_bal_df = week_df_clean[week_df_clean['gameId'] == 2022100913]
lv_kc_df = week_df_clean[week_df_clean['gameId'] == 2022101000]

unique_values=_df['club'].unique()
print(unique_values)

<StringArray>
['LV', 'KC', 'football']
Length: 3, dtype: string


In [24]:
cle_lac_df.head()

Unnamed: 0,gameId,playId,nflId,displayName,frameId,time,jerseyNumber,club,playDirection,x,y,s,a,dis,o,dir,event
265926,2022100902,57,41231,Khalil Mack,1,2022-10-09 13:03:18.000000,52,LAC,left,84.67,16.86,0.15,0.16,0.01,48.78,331.66,
265927,2022100902,57,41231,Khalil Mack,2,2022-10-09 13:03:18.099999,52,LAC,left,84.66,16.87,0.13,0.11,0.01,46.37,327.64,
265928,2022100902,57,41231,Khalil Mack,3,2022-10-09 13:03:18.200000,52,LAC,left,84.66,16.88,0.08,0.15,0.01,44.74,332.76,
265929,2022100902,57,41231,Khalil Mack,4,2022-10-09 13:03:18.299999,52,LAC,left,84.65,16.88,0.04,0.26,0.01,48.06,304.83,
265930,2022100902,57,41231,Khalil Mack,5,2022-10-09 13:03:18.400000,52,LAC,left,84.65,16.88,0.05,0.32,0.0,48.06,47.63,
