In [1]:
from matplotlib import pyplot as plt
from scipy.stats import linregress
import numpy as np
from sklearn import datasets
import pandas as pd

In [2]:
week_df = pd.read_csv('../base_datasets/tracking_week_9.csv')
week_df.head()

Unnamed: 0,gameId,playId,nflId,displayName,frameId,time,jerseyNumber,club,playDirection,x,y,s,a,dis,o,dir,event
0,2022110300,55,38542.0,Fletcher Cox,1,2022-11-03 20:16:30.400000,91.0,PHI,right,35.31,21.25,0.25,0.21,0.01,275.05,263.18,
1,2022110300,55,38542.0,Fletcher Cox,2,2022-11-03 20:16:30.500000,91.0,PHI,right,35.3,21.25,0.21,0.2,0.02,270.08,264.09,
2,2022110300,55,38542.0,Fletcher Cox,3,2022-11-03 20:16:30.599999,91.0,PHI,right,35.29,21.25,0.17,0.18,0.01,267.61,264.78,
3,2022110300,55,38542.0,Fletcher Cox,4,2022-11-03 20:16:30.700000,91.0,PHI,right,35.31,21.24,0.1,0.15,0.02,263.43,250.8,
4,2022110300,55,38542.0,Fletcher Cox,5,2022-11-03 20:16:30.799999,91.0,PHI,right,35.31,21.25,0.07,0.12,0.01,262.28,258.57,


In [3]:
"""
Cell generated by Data Wrangler.
"""
def clean_data(week_df):
    # Replace missing values with 0 in columns: 'jerseyNumber', 'nflId'
    week_df = week_df.fillna({'jerseyNumber': 0, 'nflId': 0})
    # Change column type to string for column: 'nflId'
    week_df = week_df.astype({'nflId': 'string'})
    # Replace all instances of ".0" with "" in column: 'nflId'
    week_df['nflId'] = week_df['nflId'].str.replace(".0", "", case=False, regex=False)
    # Change column type to string for column: 'jerseyNumber'
    week_df = week_df.astype({'jerseyNumber': 'string'})
    # Replace all instances of ".0" with "" in column: 'jerseyNumber'
    week_df['jerseyNumber'] = week_df['jerseyNumber'].str.replace(".0", "", case=False, regex=False)
    # Change column type to string for columns: 'club', 'playDirection', 'event', 'displayName', 'time'
    week_df = week_df.astype({'club': 'string', 'playDirection': 'string','event': 'string','displayName': 'string'})
    # Reduce the floats to two decimal points
    week_df[['x', 'y', 's', 'a', 'dis']] = week_df[['x', 'y', 's', 'a', 'dis']].round(2)
    # Change column type to datetime64[ns] for column: 'time'
    week_df = week_df.astype({'time': 'datetime64[ns]'})
    return week_df

week_df_clean = clean_data(week_df.copy())
week_df_clean.head()

Unnamed: 0,gameId,playId,nflId,displayName,frameId,time,jerseyNumber,club,playDirection,x,y,s,a,dis,o,dir,event
0,2022110300,55,38542,Fletcher Cox,1,2022-11-03 20:16:30.400000,91,PHI,right,35.31,21.25,0.25,0.21,0.01,275.05,263.18,
1,2022110300,55,38542,Fletcher Cox,2,2022-11-03 20:16:30.500000,91,PHI,right,35.3,21.25,0.21,0.2,0.02,270.08,264.09,
2,2022110300,55,38542,Fletcher Cox,3,2022-11-03 20:16:30.599999,91,PHI,right,35.29,21.25,0.17,0.18,0.01,267.61,264.78,
3,2022110300,55,38542,Fletcher Cox,4,2022-11-03 20:16:30.700000,91,PHI,right,35.31,21.24,0.1,0.15,0.02,263.43,250.8,
4,2022110300,55,38542,Fletcher Cox,5,2022-11-03 20:16:30.799999,91,PHI,right,35.31,21.25,0.07,0.12,0.01,262.28,258.57,


In [4]:
week_df_clean.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1150023 entries, 0 to 1150022
Data columns (total 17 columns):
 #   Column         Non-Null Count    Dtype         
---  ------         --------------    -----         
 0   gameId         1150023 non-null  int64         
 1   playId         1150023 non-null  int64         
 2   nflId          1150023 non-null  string        
 3   displayName    1150023 non-null  string        
 4   frameId        1150023 non-null  int64         
 5   time           1150023 non-null  datetime64[ns]
 6   jerseyNumber   1150023 non-null  string        
 7   club           1150023 non-null  string        
 8   playDirection  1150023 non-null  string        
 9   x              1150023 non-null  float64       
 10  y              1150023 non-null  float64       
 11  s              1150023 non-null  float64       
 12  a              1150023 non-null  float64       
 13  dis            1150023 non-null  float64       
 14  o              1100062 non-null  f

In [5]:
unique_values=week_df_clean['gameId'].unique()
print(unique_values)

[2022110300 2022110600 2022110601 2022110602 2022110603 2022110604
 2022110605 2022110606 2022110607 2022110608 2022110609 2022110610
 2022110700]


In [20]:
phi_hou_df = week_df_clean[week_df_clean['gameId'] ==  2022110300] 
atl_lac_df = week_df_clean[week_df_clean['gameId'] == 2022110600]
chi_mia_df = week_df_clean[week_df_clean['gameId'] == 2022110601]
cin_car_df = week_df_clean[week_df_clean['gameId'] == 2022110602]
det_gb_df = week_df_clean[week_df_clean['gameId'] == 2022110603] 
lv_jax_df = week_df_clean[week_df_clean['gameId'] == 2022110604]
ind_ne_df = week_df_clean[week_df_clean['gameId'] == 2022110605]
buf_nyj_df = week_df_clean[week_df_clean['gameId']   == 2022110606]
min_was_df = week_df_clean[week_df_clean['gameId']  == 2022110607]
ari_sea_df = week_df_clean[week_df_clean['gameId'] == 2022110608]
tb_la_df = week_df_clean[week_df_clean['gameId'] == 2022110609]
ten_kc_df = week_df_clean[week_df_clean['gameId']   == 2022110610]
no_bal_df = week_df_clean[week_df_clean['gameId'] == 2022110700]

unique_values=_df['club'].unique()
print(unique_values)

<StringArray>
['BAL', 'NO', 'football']
Length: 3, dtype: string


In [24]:
cle_lac_df.head()

Unnamed: 0,gameId,playId,nflId,displayName,frameId,time,jerseyNumber,club,playDirection,x,y,s,a,dis,o,dir,event
265926,2022100902,57,41231,Khalil Mack,1,2022-10-09 13:03:18.000000,52,LAC,left,84.67,16.86,0.15,0.16,0.01,48.78,331.66,
265927,2022100902,57,41231,Khalil Mack,2,2022-10-09 13:03:18.099999,52,LAC,left,84.66,16.87,0.13,0.11,0.01,46.37,327.64,
265928,2022100902,57,41231,Khalil Mack,3,2022-10-09 13:03:18.200000,52,LAC,left,84.66,16.88,0.08,0.15,0.01,44.74,332.76,
265929,2022100902,57,41231,Khalil Mack,4,2022-10-09 13:03:18.299999,52,LAC,left,84.65,16.88,0.04,0.26,0.01,48.06,304.83,
265930,2022100902,57,41231,Khalil Mack,5,2022-10-09 13:03:18.400000,52,LAC,left,84.65,16.88,0.05,0.32,0.0,48.06,47.63,
