In [2]:
from matplotlib import pyplot as plt
from scipy.stats import linregress
import numpy as np
from sklearn import datasets
import pandas as pd

In [3]:
week_df = pd.read_csv('../base_datasets/tracking_week_8.csv')
week_df.head()

Unnamed: 0,gameId,playId,nflId,displayName,frameId,time,jerseyNumber,club,playDirection,x,y,s,a,dis,o,dir,event
0,2022102700,68,38557.0,Kevin Zeitler,1,2022-10-27 20:16:37.099999,70.0,BAL,right,25.82,28.56,0.92,1.22,0.1,62.16,327.7,
1,2022102700,68,38557.0,Kevin Zeitler,2,2022-10-27 20:16:37.200000,70.0,BAL,right,25.78,28.64,0.87,1.12,0.09,59.23,337.03,
2,2022102700,68,38557.0,Kevin Zeitler,3,2022-10-27 20:16:37.299999,70.0,BAL,right,25.77,28.72,0.78,1.14,0.08,58.48,348.42,pass_arrived
3,2022102700,68,38557.0,Kevin Zeitler,4,2022-10-27 20:16:37.400000,70.0,BAL,right,25.77,28.79,0.72,1.23,0.07,57.03,1.0,
4,2022102700,68,38557.0,Kevin Zeitler,5,2022-10-27 20:16:37.500000,70.0,BAL,right,25.79,28.86,0.7,1.26,0.07,54.68,15.53,


In [4]:
"""
Cell generated by Data Wrangler.
"""
def clean_data(week_df):
    # Replace missing values with 0 in columns: 'jerseyNumber', 'nflId'
    week_df = week_df.fillna({'jerseyNumber': 0, 'nflId': 0})
    # Change column type to string for column: 'nflId'
    week_df = week_df.astype({'nflId': 'string'})
    # Replace all instances of ".0" with "" in column: 'nflId'
    week_df['nflId'] = week_df['nflId'].str.replace(".0", "", case=False, regex=False)
    # Change column type to string for column: 'jerseyNumber'
    week_df = week_df.astype({'jerseyNumber': 'string'})
    # Replace all instances of ".0" with "" in column: 'jerseyNumber'
    week_df['jerseyNumber'] = week_df['jerseyNumber'].str.replace(".0", "", case=False, regex=False)
    # Change column type to string for columns: 'club', 'playDirection', 'event', 'displayName', 'time'
    week_df = week_df.astype({'club': 'string', 'playDirection': 'string','event': 'string','displayName': 'string'})
    # Reduce the floats to two decimal points
    week_df[['x', 'y', 's', 'a', 'dis']] = week_df[['x', 'y', 's', 'a', 'dis']].round(2)
    # Change column type to datetime64[ns] for column: 'time'
    week_df = week_df.astype({'time': 'datetime64[ns]'})
    return week_df

week_df_clean = clean_data(week_df.copy())
week_df_clean.head()

Unnamed: 0,gameId,playId,nflId,displayName,frameId,time,jerseyNumber,club,playDirection,x,y,s,a,dis,o,dir,event
0,2022102700,68,38557,Kevin Zeitler,1,2022-10-27 20:16:37.099999,70,BAL,right,25.82,28.56,0.92,1.22,0.1,62.16,327.7,
1,2022102700,68,38557,Kevin Zeitler,2,2022-10-27 20:16:37.200000,70,BAL,right,25.78,28.64,0.87,1.12,0.09,59.23,337.03,
2,2022102700,68,38557,Kevin Zeitler,3,2022-10-27 20:16:37.299999,70,BAL,right,25.77,28.72,0.78,1.14,0.08,58.48,348.42,pass_arrived
3,2022102700,68,38557,Kevin Zeitler,4,2022-10-27 20:16:37.400000,70,BAL,right,25.77,28.79,0.72,1.23,0.07,57.03,1.0,
4,2022102700,68,38557,Kevin Zeitler,5,2022-10-27 20:16:37.500000,70,BAL,right,25.79,28.86,0.7,1.26,0.07,54.68,15.53,


In [5]:
week_df_clean.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1406772 entries, 0 to 1406771
Data columns (total 17 columns):
 #   Column         Non-Null Count    Dtype         
---  ------         --------------    -----         
 0   gameId         1406772 non-null  int64         
 1   playId         1406772 non-null  int64         
 2   nflId          1406772 non-null  string        
 3   displayName    1406772 non-null  string        
 4   frameId        1406772 non-null  int64         
 5   time           1406772 non-null  datetime64[ns]
 6   jerseyNumber   1406772 non-null  string        
 7   club           1406772 non-null  string        
 8   playDirection  1406772 non-null  string        
 9   x              1406772 non-null  float64       
 10  y              1406772 non-null  float64       
 11  s              1406772 non-null  float64       
 12  a              1406772 non-null  float64       
 13  dis            1406772 non-null  float64       
 14  o              1345642 non-null  f

In [6]:
unique_values=week_df_clean['gameId'].unique()
print(unique_values)

[2022102700 2022103000 2022103001 2022103002 2022103003 2022103004
 2022103005 2022103006 2022103007 2022103008 2022103009 2022103010
 2022103011 2022103012 2022103100]


In [23]:
tb_bal_df = week_df_clean[week_df_clean['gameId'] ==     2022102700] 
den_jax_df = week_df_clean[week_df_clean['gameId'] == 2022103000]
atl_car_df = week_df_clean[week_df_clean['gameId'] == 2022103001]
chi_dal_df = week_df_clean[week_df_clean['gameId'] == 2022103002]
det_mia_df = week_df_clean[week_df_clean['gameId'] == 2022103003] 
hou_ten_df = week_df_clean[week_df_clean['gameId'] == 2022103004]
min_ari_df = week_df_clean[week_df_clean['gameId'] == 2022103005]
no_lv_df = week_df_clean[week_df_clean['gameId'] == 2022103006]
nyj_ne_df = week_df_clean[week_df_clean['gameId'] == 2022103007]
phi_pit_df = week_df_clean[week_df_clean['gameId'] == 2022103008]
ind_was_df = week_df_clean[week_df_clean['gameId'] == 2022103009]
la_sf_df = week_df_clean[week_df_clean['gameId'] == 2022103010]
sea_nyg_df = week_df_clean[week_df_clean['gameId'] == 2022103011]
buf_gb_df = week_df_clean[week_df_clean['gameId'] == 2022103012]
cle_cin_df = week_df_clean[week_df_clean['gameId'] == 2022103100]

unique_values=_df['club'].unique()
print(unique_values)

<StringArray>
['CIN', 'CLE', 'football']
Length: 3, dtype: string


In [24]:
cle_lac_df.head()

Unnamed: 0,gameId,playId,nflId,displayName,frameId,time,jerseyNumber,club,playDirection,x,y,s,a,dis,o,dir,event
265926,2022100902,57,41231,Khalil Mack,1,2022-10-09 13:03:18.000000,52,LAC,left,84.67,16.86,0.15,0.16,0.01,48.78,331.66,
265927,2022100902,57,41231,Khalil Mack,2,2022-10-09 13:03:18.099999,52,LAC,left,84.66,16.87,0.13,0.11,0.01,46.37,327.64,
265928,2022100902,57,41231,Khalil Mack,3,2022-10-09 13:03:18.200000,52,LAC,left,84.66,16.88,0.08,0.15,0.01,44.74,332.76,
265929,2022100902,57,41231,Khalil Mack,4,2022-10-09 13:03:18.299999,52,LAC,left,84.65,16.88,0.04,0.26,0.01,48.06,304.83,
265930,2022100902,57,41231,Khalil Mack,5,2022-10-09 13:03:18.400000,52,LAC,left,84.65,16.88,0.05,0.32,0.0,48.06,47.63,
