In [35]:
from matplotlib import pyplot as plt
from scipy.stats import linregress
import numpy as np
from sklearn import datasets
import pandas as pd

In [36]:
week_df = pd.read_csv('../base_datasets/tracking_week_1.csv')
week_df.head()

Unnamed: 0,gameId,playId,nflId,displayName,frameId,time,jerseyNumber,club,playDirection,x,y,s,a,dis,o,dir,event
0,2022090800,56,35472.0,Rodger Saffold,1,2022-09-08 20:24:05.200000,76.0,BUF,left,88.37,27.27,1.62,1.15,0.16,231.74,147.9,
1,2022090800,56,35472.0,Rodger Saffold,2,2022-09-08 20:24:05.299999,76.0,BUF,left,88.47,27.13,1.67,0.61,0.17,230.98,148.53,pass_arrived
2,2022090800,56,35472.0,Rodger Saffold,3,2022-09-08 20:24:05.400000,76.0,BUF,left,88.56,27.01,1.57,0.49,0.15,230.98,147.05,
3,2022090800,56,35472.0,Rodger Saffold,4,2022-09-08 20:24:05.500000,76.0,BUF,left,88.64,26.9,1.44,0.89,0.14,232.38,145.42,
4,2022090800,56,35472.0,Rodger Saffold,5,2022-09-08 20:24:05.599999,76.0,BUF,left,88.72,26.8,1.29,1.24,0.13,233.36,141.95,


In [37]:
"""
Cell generated by Data Wrangler.
"""
def clean_data(week_df):
    # Replace missing values with 0 in columns: 'jerseyNumber', 'nflId'
    week_df = week_df.fillna({'jerseyNumber': 0, 'nflId': 0})
    # Change column type to string for column: 'nflId'
    week_df = week_df.astype({'nflId': 'string'})
    # Replace all instances of ".0" with "" in column: 'nflId'
    week_df['nflId'] = week_df['nflId'].str.replace(".0", "", case=False, regex=False)
    # Change column type to string for column: 'jerseyNumber'
    week_df = week_df.astype({'jerseyNumber': 'string'})
    # Replace all instances of ".0" with "" in column: 'jerseyNumber'
    week_df['jerseyNumber'] = week_df['jerseyNumber'].str.replace(".0", "", case=False, regex=False)
    # Change column type to string for columns: 'club', 'playDirection', 'event', 'displayName', 'time'
    week_df = week_df.astype({'club': 'string', 'playDirection': 'string','event': 'string','displayName': 'string'})
    # Reduce the floats to two decimal points
    week_df[['x', 'y', 's', 'a', 'dis']] = week_df[['x', 'y', 's', 'a', 'dis']].round(2)
    # Change column type to datetime64[ns] for column: 'time'
    week_df = week_df.astype({'time': 'datetime64[ns]'})
    return week_df

week_df_clean = clean_data(week_df.copy())
week_df_clean.head()

Unnamed: 0,gameId,playId,nflId,displayName,frameId,time,jerseyNumber,club,playDirection,x,y,s,a,dis,o,dir,event
0,2022090800,56,35472,Rodger Saffold,1,2022-09-08 20:24:05.200000,76,BUF,left,88.37,27.27,1.62,1.15,0.16,231.74,147.9,
1,2022090800,56,35472,Rodger Saffold,2,2022-09-08 20:24:05.299999,76,BUF,left,88.47,27.13,1.67,0.61,0.17,230.98,148.53,pass_arrived
2,2022090800,56,35472,Rodger Saffold,3,2022-09-08 20:24:05.400000,76,BUF,left,88.56,27.01,1.57,0.49,0.15,230.98,147.05,
3,2022090800,56,35472,Rodger Saffold,4,2022-09-08 20:24:05.500000,76,BUF,left,88.64,26.9,1.44,0.89,0.14,232.38,145.42,
4,2022090800,56,35472,Rodger Saffold,5,2022-09-08 20:24:05.599999,76,BUF,left,88.72,26.8,1.29,1.24,0.13,233.36,141.95,


In [38]:
week_df_clean.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1407439 entries, 0 to 1407438
Data columns (total 17 columns):
 #   Column         Non-Null Count    Dtype         
---  ------         --------------    -----         
 0   gameId         1407439 non-null  int64         
 1   playId         1407439 non-null  int64         
 2   nflId          1346246 non-null  string        
 3   displayName    1407439 non-null  string        
 4   frameId        1407439 non-null  int64         
 5   time           1407439 non-null  datetime64[ns]
 6   jerseyNumber   1346246 non-null  string        
 7   club           1407439 non-null  string        
 8   playDirection  1407439 non-null  string        
 9   x              1407439 non-null  float64       
 10  y              1407439 non-null  float64       
 11  s              1407439 non-null  float64       
 12  a              1407439 non-null  float64       
 13  dis            1407439 non-null  float64       
 14  o              1346397 non-null  f

In [39]:
unique_values=week_df_clean['gameId'].unique()
print(unique_values)

[2022090800 2022091100 2022091101 2022091102 2022091103 2022091104
 2022091105 2022091106 2022091107 2022091108 2022091109 2022091110
 2022091111 2022091112 2022091113 2022091200]


In [42]:
buf_la_df = week_df_clean[week_df_clean['gameId'] == 2022090800] 
no_atl_df = week_df_clean[week_df_clean['gameId'] == 2022091100]
cle_car_df = week_df_clean[week_df_clean['gameId'] == 2022091101]
sf_chi_df = week_df_clean[week_df_clean['gameId'] == 2022091102]
cin_pit_df = week_df_clean[week_df_clean['gameId'] == 2022091103] 
phi_det_df = week_df_clean[week_df_clean['gameId'] == 2022091104]
ind_hou_df = week_df_clean[week_df_clean['gameId'] == 2022091105]
mia_ne_df = week_df_clean[week_df_clean['gameId'] == 2022091106]
nyj_bal_df = week_df_clean[week_df_clean['gameId'] == 2022091107]
ten_nyg_df = week_df_clean[week_df_clean['gameId'] == 2022091108]
jax_was_df = week_df_clean[week_df_clean['gameId'] == 2022091109]
kc_ari_df = week_df_clean[week_df_clean['gameId'] == 2022091110]
lv_lac_df = week_df_clean[week_df_clean['gameId'] == 2022091111]
min_gb_df = week_df_clean[week_df_clean['gameId'] == 2022091112]
tb_dal_df = week_df_clean[week_df_clean['gameId'] == 2022091113]
den_sea_df = week_df_clean[week_df_clean['gameId'] == 2022091200]




In [43]:
cle_car_df.head()

Unnamed: 0,gameId,playId,nflId,displayName,frameId,time,jerseyNumber,club,playDirection,x,y,s,a,dis,o,dir,event
172799,2022091101,85,41227,Jadeveon Clowney,1,2022-09-11 13:05:42.500000,90,CLE,right,26.32,17.19,4.18,2.14,0.42,14.9,258.77,
172800,2022091101,85,41227,Jadeveon Clowney,2,2022-09-11 13:05:42.599999,90,CLE,right,25.91,17.12,4.01,2.4,0.42,14.04,259.13,
172801,2022091101,85,41227,Jadeveon Clowney,3,2022-09-11 13:05:42.700000,90,CLE,right,25.55,17.05,3.66,2.82,0.37,14.04,259.46,
172802,2022091101,85,41227,Jadeveon Clowney,4,2022-09-11 13:05:42.799999,90,CLE,right,25.2,17.0,3.38,2.93,0.35,14.04,260.68,
172803,2022091101,85,41227,Jadeveon Clowney,5,2022-09-11 13:05:42.900000,90,CLE,right,24.9,16.97,3.01,3.15,0.31,11.37,262.32,pass_arrived
