In [1]:
from matplotlib import pyplot as plt
from scipy.stats import linregress
import numpy as np
from sklearn import datasets
import pandas as pd

In [14]:
week_df = pd.read_csv('../base_datasets/tracking_week_6.csv')
week_df.head()

Unnamed: 0,gameId,playId,nflId,displayName,frameId,time,jerseyNumber,club,playDirection,x,y,s,a,dis,o,dir,event
0,2022101300,54,42488.0,Bobby McCain,1,2022-10-13 20:16:18.799999,20.0,WAS,left,68.86,37.6,3.95,2.63,0.39,175.81,254.51,
1,2022101300,54,42488.0,Bobby McCain,2,2022-10-13 20:16:18.900000,20.0,WAS,left,68.48,37.47,4.07,2.75,0.4,180.36,249.5,
2,2022101300,54,42488.0,Bobby McCain,3,2022-10-13 20:16:19.000000,20.0,WAS,left,68.1,37.31,4.17,2.83,0.41,181.03,244.73,
3,2022101300,54,42488.0,Bobby McCain,4,2022-10-13 20:16:19.099999,20.0,WAS,left,67.73,37.11,4.27,2.98,0.42,182.99,239.75,
4,2022101300,54,42488.0,Bobby McCain,5,2022-10-13 20:16:19.200000,20.0,WAS,left,67.37,36.88,4.4,2.92,0.43,182.99,235.32,


In [15]:
"""
Cell generated by Data Wrangler.
"""
def clean_data(week_df):
    # Replace missing values with 0 in columns: 'jerseyNumber', 'nflId'
    week_df = week_df.fillna({'jerseyNumber': 0, 'nflId': 0})
    # Change column type to string for column: 'nflId'
    week_df = week_df.astype({'nflId': 'string'})
    # Replace all instances of ".0" with "" in column: 'nflId'
    week_df['nflId'] = week_df['nflId'].str.replace(".0", "", case=False, regex=False)
    # Change column type to string for column: 'jerseyNumber'
    week_df = week_df.astype({'jerseyNumber': 'string'})
    # Replace all instances of ".0" with "" in column: 'jerseyNumber'
    week_df['jerseyNumber'] = week_df['jerseyNumber'].str.replace(".0", "", case=False, regex=False)
    # Change column type to string for columns: 'club', 'playDirection', 'event', 'displayName', 'time'
    week_df = week_df.astype({'club': 'string', 'playDirection': 'string','event': 'string','displayName': 'string'})
    # Reduce the floats to two decimal points
    week_df[['x', 'y', 's', 'a', 'dis']] = week_df[['x', 'y', 's', 'a', 'dis']].round(2)
    # Change column type to datetime64[ns] for column: 'time'
    week_df = week_df.astype({'time': 'datetime64[ns]'})
    return week_df

week_df_clean = clean_data(week_df.copy())
week_df_clean.head()

Unnamed: 0,gameId,playId,nflId,displayName,frameId,time,jerseyNumber,club,playDirection,x,y,s,a,dis,o,dir,event
0,2022101300,54,42488,Bobby McCain,1,2022-10-13 20:16:18.799999,20,WAS,left,68.86,37.6,3.95,2.63,0.39,175.81,254.51,
1,2022101300,54,42488,Bobby McCain,2,2022-10-13 20:16:18.900000,20,WAS,left,68.48,37.47,4.07,2.75,0.4,180.36,249.5,
2,2022101300,54,42488,Bobby McCain,3,2022-10-13 20:16:19.000000,20,WAS,left,68.1,37.31,4.17,2.83,0.41,181.03,244.73,
3,2022101300,54,42488,Bobby McCain,4,2022-10-13 20:16:19.099999,20,WAS,left,67.73,37.11,4.27,2.98,0.42,182.99,239.75,
4,2022101300,54,42488,Bobby McCain,5,2022-10-13 20:16:19.200000,20,WAS,left,67.37,36.88,4.4,2.92,0.43,182.99,235.32,


In [16]:
week_df_clean.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1249365 entries, 0 to 1249364
Data columns (total 17 columns):
 #   Column         Non-Null Count    Dtype         
---  ------         --------------    -----         
 0   gameId         1249365 non-null  int64         
 1   playId         1249365 non-null  int64         
 2   nflId          1249365 non-null  string        
 3   displayName    1249365 non-null  string        
 4   frameId        1249365 non-null  int64         
 5   time           1249365 non-null  datetime64[ns]
 6   jerseyNumber   1249365 non-null  string        
 7   club           1249365 non-null  string        
 8   playDirection  1249365 non-null  string        
 9   x              1249365 non-null  float64       
 10  y              1249365 non-null  float64       
 11  s              1249365 non-null  float64       
 12  a              1249365 non-null  float64       
 13  dis            1249365 non-null  float64       
 14  o              1195080 non-null  f

In [17]:
unique_values=week_df_clean['gameId'].unique()
print(unique_values)

[2022101300 2022101600 2022101601 2022101602 2022101603 2022101604
 2022101605 2022101606 2022101607 2022101608 2022101609 2022101610
 2022101611 2022101700]


In [13]:
was_chi_df = week_df_clean[week_df_clean['gameId'] == 2022101300] 
atl_sf_df = week_df_clean[week_df_clean['gameId'] == 2022101600]
cle_ne_df = week_df_clean[week_df_clean['gameId'] == 2022101601]
week_df = week_df_clean[week_df_clean['gameId'] == 2022101602]
week_df = week_df_clean[week_df_clean['gameId'] == 2022101603] 
week_df = week_df_clean[week_df_clean['gameId'] == 2022101604]
week_df = week_df_clean[week_df_clean['gameId'] == 2022101605]
week_df = week_df_clean[week_df_clean['gameId'] == 2022101606]
week_df = week_df_clean[week_df_clean['gameId'] == 2022101607]
week_df = week_df_clean[week_df_clean['gameId'] == 2022101608]
week_df = week_df_clean[week_df_clean['gameId'] == 2022101609]
week_df = week_df_clean[week_df_clean['gameId'] == 2022101610]
week_df = week_df_clean[week_df_clean['gameId'] == 2022101611]
week_df = week_df_clean[week_df_clean['gameId'] == 2022101700]

unique_values=_df['club'].unique()
print(unique_values)

<StringArray>
['WAS', 'CHI', 'football']
Length: 3, dtype: string


In [24]:
cle_lac_df.head()

Unnamed: 0,gameId,playId,nflId,displayName,frameId,time,jerseyNumber,club,playDirection,x,y,s,a,dis,o,dir,event
265926,2022100902,57,41231,Khalil Mack,1,2022-10-09 13:03:18.000000,52,LAC,left,84.67,16.86,0.15,0.16,0.01,48.78,331.66,
265927,2022100902,57,41231,Khalil Mack,2,2022-10-09 13:03:18.099999,52,LAC,left,84.66,16.87,0.13,0.11,0.01,46.37,327.64,
265928,2022100902,57,41231,Khalil Mack,3,2022-10-09 13:03:18.200000,52,LAC,left,84.66,16.88,0.08,0.15,0.01,44.74,332.76,
265929,2022100902,57,41231,Khalil Mack,4,2022-10-09 13:03:18.299999,52,LAC,left,84.65,16.88,0.04,0.26,0.01,48.06,304.83,
265930,2022100902,57,41231,Khalil Mack,5,2022-10-09 13:03:18.400000,52,LAC,left,84.65,16.88,0.05,0.32,0.0,48.06,47.63,
