In [1]:
from IPython.display import clear_output
from PIL import Image
import os
import json
import pandas as pd

def downcast(df, verbose=True):
    start_mem = df.memory_usage().sum() / 1024**2
    for col in df.columns:
        dtype_name = df[col].dtype.name
        if dtype_name == 'object':
            pass
        elif dtype_name == 'bool':
            df[col] = df[col].astype('int8')
        elif dtype_name.startswith('int') or (df[col].round() == df[col]).all():
            df[col] = pd.to_numeric(df[col], downcast='integer')
        else:
            df[col] = pd.to_numeric(df[col], downcast='float')
    end_mem = df.memory_usage().sum() / 1024**2
    if verbose:
        print('{:.1f}% Compressed'.format(100 * (start_mem - end_mem) / start_mem))
    
    return df

plays = pd.read_csv("nfl-big-data-bowl-2022/plays.csv")
plays = downcast(plays)
scouting = pd.read_csv("nfl-big-data-bowl-2022/PFFScoutingData.csv")
scouting = downcast(scouting)
tracking2018 = pd.read_csv("nfl-big-data-bowl-2022/tracking2018.csv")
tracking2018 = downcast(tracking2018)
tracking2019 = pd.read_csv("nfl-big-data-bowl-2022/tracking2019.csv")
tracking2019 = downcast(tracking2019)
tracking2020 = pd.read_csv("nfl-big-data-bowl-2022/tracking2020.csv")
tracking2020 = downcast(tracking2020)

43.0% Compressed
13.7% Compressed
36.1% Compressed
36.1% Compressed
36.1% Compressed


In [2]:
frames = [tracking2018,tracking2019,tracking2020]
tracking = pd.concat(frames)

In [54]:
pd.set_option('display.max_rows', 10)
plays.nunique()

gameId                      764
playId                     4435
playDescription           12355
quarter                       5
down                          5
                          ...  
passResult                    4
kickLength                   79
kickReturnYardage           106
playResult                  116
absoluteYardlineNumber       99
Length: 25, dtype: int64

In [55]:
scouting['snapDetail'].value_counts()

OK    5451
L      185
H      136
<       77
>       70
Name: snapDetail, dtype: int64

In [80]:
df = plays.loc[plays['specialTeamsPlayType']=='Extra Point']
x = df['specialTeamsResult'].value_counts()
x

Kick Attempt Good           3252
Kick Attempt No Good         199
Blocked Kick Attempt          24
Non-Special Teams Result      13
Name: specialTeamsResult, dtype: int64

In [57]:
import plotly.graph_objects as go
import plotly.express as px    

fig = px.bar(x,
             title='Extra point',
            labels={'x':'actual',
                   'y':'predicted'},
            width=600, 
            height=400
            )

fig.update_layout(showlegend=False)

fig.show()

In [83]:
pd.set_option('display.max_columns', None)
DF = df.merge(tracking,how='left',left_on=['gameId','playId'],right_on=['gameId','playId'])
DF2 = df.merge(scouting,how='left',left_on=['gameId','playId'],right_on=['gameId','playId'])

In [88]:
DF2

Unnamed: 0,gameId,playId,playDescription,quarter,down,yardsToGo,possessionTeam,specialTeamsPlayType,specialTeamsResult,kickerId,returnerId,kickBlockerId,yardlineSide,yardlineNumber,gameClock,penaltyCodes,penaltyJerseyNumbers,penaltyYards,preSnapHomeScore,preSnapVisitorScore,passResult,kickLength,kickReturnYardage,playResult,absoluteYardlineNumber,snapDetail,snapTime,operationTime,hangTime,kickType,kickDirectionIntended,kickDirectionActual,returnDirectionIntended,returnDirectionActual,missedTackler,assistTackler,tackler,kickoffReturnFormation,gunners,puntRushers,specialTeamsSafeties,vises,kickContactType
0,2018090600,2883,"J.Elliott extra point is GOOD, Center-R.Lovato...",3,0,0,PHI,Extra Point,Kick Attempt Good,44966.0,,,ATL,15,04:37:00,,,,9,6,,,,0,25,,,,,,,,,,,,,,,,ATL 42,,
1,2018090600,3553,"M.Bryant extra point is No Good, Hit Right Upr...",4,0,0,ATL,Extra Point,Kick Attempt No Good,27091.0,,,PHI,15,09:48:00,,,,10,12,,,,0,25,,,,,,,,,,,,,,,,PHI 55; PHI 58,,
2,2018090900,380,"J.Tucker extra point is GOOD, Center-M.Cox, Ho...",1,0,0,BAL,Extra Point,Kick Attempt Good,39470.0,,,BUF,15,08:42:00,,,,6,0,,,,0,95,,,,,,,,,,,,,,,,BUF 49,,
3,2018090900,972,"J.Tucker extra point is GOOD, Center-M.Cox, Ho...",1,0,0,BAL,Extra Point,Kick Attempt Good,39470.0,,,BUF,15,01:32:00,,,,13,0,,,,0,95,,,,,,,,,,,,,,,,BUF 27; BUF 49,,
4,2018090900,2757,"J.Tucker extra point is GOOD, Center-M.Cox, Ho...",3,0,0,BAL,Extra Point,Kick Attempt Good,39470.0,,,BUF,15,12:28:00,,,,32,0,,,,0,25,,,,,,,,,,,,,,,,BUF 49,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3483,2021010315,2813,"T.Vizcaino extra point is GOOD, Center-C.Holba...",4,0,0,SF,Extra Point,Kick Attempt Good,47590.0,,,SEA,15,14:22:00,,,,15,6,,,,0,95,,,,,,,,,,,,,,,,SEA 37; SEA 54,,
3484,2021010315,3074,"J.Myers extra point is No Good, Wide Left, Cen...",4,0,0,SEA,Extra Point,Kick Attempt No Good,41175.0,,,SF,15,10:54:00,,,,16,12,,,,0,25,,,,,,,,,,,,,,,,SF 20; SF 51,,
3485,2021010315,3667,"J.Myers extra point is GOOD, Center-T.Ott, Hol...",4,0,0,SEA,Extra Point,Kick Attempt Good,41175.0,,,SF,15,02:20:00,,,,16,18,,,,0,25,,,,,,,,,,,,,,,,SF 20; SF 51,,
3486,2021010315,3870,"J.Myers extra point is GOOD, Center-T.Ott, Hol...",4,0,0,SEA,Extra Point,Kick Attempt Good,41175.0,,,SF,15,01:49:00,,,,16,25,,,,0,25,,,,,,,,,,,,,,,,SF 20; SF 51,,


In [89]:
DF2['tackler'].value_counts()

DAL 98    1
Name: tackler, dtype: int64

In [59]:
ee = DF.groupby(['gameId','playId'])['event'].agg('unique').reset_index()

In [60]:
ee

Unnamed: 0,gameId,playId,event
0,2018090600,2883,"[None, ball_snap, extra_point_attempt, extra_p..."
1,2018090600,3553,"[None, ball_snap, extra_point_attempt, extra_p..."
2,2018090900,380,"[None, ball_snap, extra_point_attempt, extra_p..."
3,2018090900,972,"[None, ball_snap, extra_point_attempt, extra_p..."
4,2018090900,2757,"[None, ball_snap, extra_point_attempt, extra_p..."
...,...,...,...
3483,2021010315,2813,"[None, ball_snap, extra_point_attempt, extra_p..."
3484,2021010315,3074,"[None, ball_snap, extra_point_attempt, extra_p..."
3485,2021010315,3667,"[None, ball_snap, extra_point_attempt, extra_p..."
3486,2021010315,3870,"[None, ball_snap, extra_point_attempt, extra_p..."


In [61]:
ee['liststring'] = [','.join(map(str, l)) for l in ee['event']]
ee['liststring'] = ee['liststring'].str.replace('None,', '')
EE = ee.loc[(ee['liststring']!='ball_snap,extra_point_attempt,extra_point')&(ee['liststring']!='ball_snap,extra_point_attempt,extra_point_missed')
&(ee['liststring']!='line_set,ball_snap,extra_point_attempt,extra_point')&(ee['liststring']!='ball_snap,line_set,extra_point_attempt,extra_point')]
EE

Unnamed: 0,gameId,playId,event,liststring
41,2018090910,2976,"[None, ball_snap, fumble, tackle]","ball_snap,fumble,tackle"
118,2018091611,1690,"[None, ball_snap, extra_point_attempt, extra_p...","ball_snap,extra_point_attempt,extra_point_bloc..."
127,2018091612,3753,"[None, ball_snap, extra_point_attempt, extra_p...","ball_snap,extra_point_attempt,extra_point,pena..."
264,2018093009,4176,"[None, field_goal_play, ball_snap, field_goal_...","field_goal_play,ball_snap,field_goal_attempt,f..."
329,2018100710,314,"[None, ball_snap, fumble, fumble_defense_recov...","ball_snap,fumble,fumble_defense_recovered,tackle"
...,...,...,...,...
3304,2020122011,1885,"[None, ball_snap, extra_point_attempt]","ball_snap,extra_point_attempt"
3306,2020122011,3766,"[None, ball_snap, fumble, extra_point_attempt,...","ball_snap,fumble,extra_point_attempt,fumble_of..."
3307,2020122011,4357,"[None, ball_snap, extra_point_attempt]","ball_snap,extra_point_attempt"
3312,2020122012,4719,"[None, ball_snap, extra_point]","ball_snap,extra_point"


In [67]:
ee.loc[ee['liststring'].str.contains('two_point_conversion')==True]['liststring']

'ball_snap,extra_point_attempt,extra_point_blocked,fumble,fumble_defense_recovered,first_contact,two_point_conversion'

In [63]:
EE['liststring'][2800]

'ball_snap,field_goal_attempt,field_goal'

In [64]:
pd.set_option('display.max_rows', 10)
EE['liststring'].value_counts()

ball_snap,extra_point_attempt                                                                      33
ball_snap,extra_point_attempt,extra_point_blocked                                                  10
ball_snap,extra_point_attempt,extra_point_blocked,fumble_defense_recovered,first_contact,tackle     4
ball_snap,fumble,fumble_offense_recovered,pass_forward,pass_outcome_incomplete                      3
line_set,ball_snap,extra_point_attempt,extra_point_missed                                           3
                                                                                                   ..
ball_snap,extra_point_attempt,extra_point_blocked,fumble_offense_recovered,tackle                   1
field_goal_play,ball_snap,field_goal_attempt,field_goal                                             1
ball_snap,extra_point_attempt,extra_point_blocked,extra_point_missed                                1
line_set,ball_snap,fumble,fumble_offense_recovered                                

In [79]:
success = ee.loc[(ee['liststring'].str.contains('extra_point')==True)&(ee['liststring'].str.contains('extra_point_missed')==False)]
success.shape

(3277, 4)

In [71]:
plot_df = pd.DataFrame()
plot_df['Extra Point Type'] = ['1pt conversion','1pt conversion','2pt touchdown','2pt touchdown']
plot_df['Outcome'] = ['Success','Fail','Success','Fail']
plot_df['count'] = [1800,200,750,250]

fig = px.bar(plot_df,
             x = 'Extra Point Type',
             y = 'count',
             color = 'Outcome',
             title='Extra points',

            width=600, 
            height=400
            )

fig.update_layout(showlegend=True)

fig.show()