# Teams success on 4th down

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

## Bring in data with less features

In [2]:
df = pd.read_csv('../../datasets/less_feature_df.csv', index_col = [0])

In [3]:
df.shape

(449371, 99)

In [4]:
df.columns

Index(['play_id', 'game_id', 'home_team', 'away_team', 'posteam',
       'posteam_type', 'defteam', 'side_of_field', 'yardline_100', 'game_date',
       'quarter_seconds_remaining', 'half_seconds_remaining',
       'game_seconds_remaining', 'game_half', 'quarter_end', 'drive', 'sp',
       'qtr', 'down', 'goal_to_go', 'time', 'yrdln', 'ydstogo', 'ydsnet',
       'desc', 'play_type', 'yards_gained', 'shotgun', 'no_huddle',
       'qb_dropback', 'qb_scramble', 'home_timeouts_remaining',
       'away_timeouts_remaining', 'timeout', 'posteam_timeouts_remaining',
       'defteam_timeouts_remaining', 'total_home_score', 'total_away_score',
       'posteam_score', 'defteam_score', 'score_differential',
       'posteam_score_post', 'defteam_score_post', 'score_differential_post',
       'no_score_prob', 'opp_fg_prob', 'opp_safety_prob', 'opp_td_prob',
       'fg_prob', 'safety_prob', 'td_prob', 'ep', 'epa', 'total_home_epa',
       'total_away_epa', 'wp', 'def_wp', 'home_wp', 'away_wp', 'wpa',

In [5]:
#pd.options.display.max_columns = 255

In [6]:
df.head(10)

Unnamed: 0,play_id,game_id,home_team,away_team,posteam,posteam_type,defteam,side_of_field,yardline_100,game_date,...,pass_touchdown,rush_touchdown,return_touchdown,field_goal_attempt,punt_attempt,fumble,complete_pass,lateral_reception,lateral_rush,return_yards
0,46,2009091000,PIT,TEN,PIT,home,TEN,TEN,30.0,2009-09-10,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,39.0
1,68,2009091000,PIT,TEN,PIT,home,TEN,PIT,58.0,2009-09-10,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2,92,2009091000,PIT,TEN,PIT,home,TEN,PIT,53.0,2009-09-10,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,113,2009091000,PIT,TEN,PIT,home,TEN,PIT,56.0,2009-09-10,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,139,2009091000,PIT,TEN,PIT,home,TEN,PIT,56.0,2009-09-10,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
5,162,2009091000,PIT,TEN,TEN,away,PIT,TEN,98.0,2009-09-10,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,183,2009091000,PIT,TEN,TEN,away,PIT,TEN,98.0,2009-09-10,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
7,207,2009091000,PIT,TEN,TEN,away,PIT,TEN,94.0,2009-09-10,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,228,2009091000,PIT,TEN,TEN,away,PIT,TEN,96.0,2009-09-10,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,11.0
9,253,2009091000,PIT,TEN,PIT,home,TEN,TEN,43.0,2009-09-10,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [7]:
# function to drop columns with a large number of nulls
to_drop = []
for i in df.columns:
    if df[i].isnull().sum() > 100000:
        to_drop.append(i)

In [8]:
df.drop(to_drop, axis = 1, inplace = True)

In [9]:
#df.isnull().sum()

##  Split play_type into dummys to get run/pass...

In [10]:
dummies = pd.get_dummies(df['play_type'], drop_first = False)

In [11]:
df = pd.concat((df, dummies), axis = 1)

In [12]:
df.head()

Unnamed: 0,play_id,game_id,home_team,away_team,posteam,posteam_type,defteam,side_of_field,yardline_100,game_date,...,return_yards,extra_point,field_goal,kickoff,no_play,pass,punt,qb_kneel,qb_spike,run
0,46,2009091000,PIT,TEN,PIT,home,TEN,TEN,30.0,2009-09-10,...,39.0,0,0,1,0,0,0,0,0,0
1,68,2009091000,PIT,TEN,PIT,home,TEN,PIT,58.0,2009-09-10,...,0.0,0,0,0,0,1,0,0,0,0
2,92,2009091000,PIT,TEN,PIT,home,TEN,PIT,53.0,2009-09-10,...,0.0,0,0,0,0,0,0,0,0,1
3,113,2009091000,PIT,TEN,PIT,home,TEN,PIT,56.0,2009-09-10,...,0.0,0,0,0,0,1,0,0,0,0
4,139,2009091000,PIT,TEN,PIT,home,TEN,PIT,56.0,2009-09-10,...,0.0,0,0,0,0,0,1,0,0,0


### Make fourth down conversion it's own column, with 1 being successful, 0 failed conversion and NAN for the rest

In [13]:
df['4th_down_conversion'] = np.nan

In [14]:
df.head()

Unnamed: 0,play_id,game_id,home_team,away_team,posteam,posteam_type,defteam,side_of_field,yardline_100,game_date,...,extra_point,field_goal,kickoff,no_play,pass,punt,qb_kneel,qb_spike,run,4th_down_conversion
0,46,2009091000,PIT,TEN,PIT,home,TEN,TEN,30.0,2009-09-10,...,0,0,1,0,0,0,0,0,0,
1,68,2009091000,PIT,TEN,PIT,home,TEN,PIT,58.0,2009-09-10,...,0,0,0,0,1,0,0,0,0,
2,92,2009091000,PIT,TEN,PIT,home,TEN,PIT,53.0,2009-09-10,...,0,0,0,0,0,0,0,0,1,
3,113,2009091000,PIT,TEN,PIT,home,TEN,PIT,56.0,2009-09-10,...,0,0,0,0,1,0,0,0,0,
4,139,2009091000,PIT,TEN,PIT,home,TEN,PIT,56.0,2009-09-10,...,0,0,0,0,0,1,0,0,0,


In [15]:
df.loc[df['fourth_down_failed'] == 1, '4th_down_conversion'] = 0

In [16]:
df.loc[df['fourth_down_converted'] == 1, '4th_down_conversion'] = 1

In [17]:
df['4th_down_conversion'].value_counts()

0.0    2438
1.0    2338
Name: 4th_down_conversion, dtype: int64

In [18]:
df['total_4th_attempts'] = np.nan

In [19]:
df.loc[df['fourth_down_failed'] == 1, 'total_4th_attempts'] = 1

In [20]:
df.loc[df['fourth_down_converted'] == 1, 'total_4th_attempts'] = 1

### Look at average yards to go for a successful attempt and a failed attempt and field position

# Look at most successful teams converting on 4th down

In [21]:
df.dropna(inplace = True)

In [22]:
team_success = df[['posteam', '4th_down_conversion', 'ydstogo', 'goal_to_go', 'run', 'pass', 'total_4th_attempts']]

In [23]:
team_success.head()

Unnamed: 0,posteam,4th_down_conversion,ydstogo,goal_to_go,run,pass,total_4th_attempts
329,CLE,0.0,10,0.0,0,1,1.0
395,DET,1.0,1,0.0,1,0,1.0
516,DET,1.0,1,0.0,0,1,1.0
676,TB,0.0,7,0.0,0,1,1.0
697,TB,1.0,2,1.0,0,1,1.0


In [24]:
df_2 = team_success.groupby('posteam').mean()

In [25]:
df_2 = df_2.sort_values(by = '4th_down_conversion', ascending = False)

In [26]:
df_2.head()

Unnamed: 0_level_0,4th_down_conversion,ydstogo,goal_to_go,run,pass,total_4th_attempts
posteam,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
LAC,0.65,2.6,0.1,0.55,0.45,1.0
NO,0.597222,3.5625,0.118056,0.465278,0.506944,1.0
CIN,0.591549,4.253521,0.105634,0.485915,0.514085,1.0
DAL,0.570312,4.507812,0.109375,0.460938,0.523438,1.0
NE,0.565217,3.471014,0.101449,0.427536,0.565217,1.0


In [27]:
df_2.tail()

Unnamed: 0_level_0,4th_down_conversion,ydstogo,goal_to_go,run,pass,total_4th_attempts
posteam,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
OAK,0.429448,5.092025,0.116564,0.355828,0.644172,1.0
LA,0.425532,5.085106,0.085106,0.361702,0.638298,1.0
BUF,0.415584,5.746753,0.103896,0.305195,0.694805,1.0
STL,0.401575,6.228346,0.07874,0.251969,0.732283,1.0
MIA,0.397351,5.715232,0.092715,0.311258,0.668874,1.0


#### Average attempts appear to be from 4-6 yds (2 teams under 3 yds)

In [28]:
team_success_2 = df[['posteam', '4th_down_conversion', 'fourth_down_failed', 'total_4th_attempts', 'run', 'pass']]

In [29]:
df_3 = team_success_2.groupby('posteam').sum()

In [30]:
df_3.head()

Unnamed: 0_level_0,4th_down_conversion,fourth_down_failed,total_4th_attempts,run,pass
posteam,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ARI,58.0,68.0,126.0,37,86
ATL,81.0,75.0,156.0,44,108
BAL,73.0,77.0,150.0,66,84
BUF,64.0,90.0,154.0,47,107
CAR,74.0,60.0,134.0,56,77


# See success rate by distances and by team

## Under 4 yds

In [31]:
df_4 = df[df['ydstogo'] < 4]

df_4 = df_4[['posteam', '4th_down_conversion', 'ydstogo', 'goal_to_go', 'run', 'pass']]

df_4 = df_4.groupby('posteam').mean()

df_4 = df_4.sort_values(by = '4th_down_conversion', ascending = False)

df_4.head()

Unnamed: 0_level_0,4th_down_conversion,ydstogo,goal_to_go,run,pass
posteam,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
CIN,0.698925,1.548387,0.129032,0.666667,0.333333
CAR,0.697368,1.315789,0.118421,0.618421,0.381579
LAC,0.6875,1.3125,0.125,0.6875,0.3125
NO,0.680412,1.340206,0.164948,0.628866,0.360825
PIT,0.671642,1.432836,0.149254,0.537313,0.462687


In [32]:
df_4.tail()

Unnamed: 0_level_0,4th_down_conversion,ydstogo,goal_to_go,run,pass
posteam,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
NYG,0.52381,1.678571,0.130952,0.380952,0.619048
IND,0.507692,1.430769,0.169231,0.430769,0.569231
GB,0.506173,1.580247,0.111111,0.580247,0.419753
BUF,0.5,1.581081,0.148649,0.5,0.5
OAK,0.483146,1.52809,0.157303,0.539326,0.460674


## 4 - 7 yds

In [33]:
df_5 = df[(df['ydstogo'] >= 4) & (df['ydstogo'] <= 7)]

df_5 = df_5[['posteam', '4th_down_conversion', 'ydstogo', 'goal_to_go', 'run', 'pass']]

df_5 = df_5.groupby('posteam').mean()

df_5 = df_5.sort_values(by = '4th_down_conversion', ascending = False)

df_5.head()

Unnamed: 0_level_0,4th_down_conversion,ydstogo,goal_to_go,run,pass
posteam,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
LAC,0.666667,6.0,0.0,0.0,1.0
SF,0.62069,5.137931,0.068966,0.068966,0.931034
GB,0.575,5.15,0.025,0.075,0.925
JAX,0.5625,5.0,0.125,0.1875,0.6875
ATL,0.548387,5.290323,0.16129,0.064516,0.870968


In [34]:
df_5.tail()

Unnamed: 0_level_0,4th_down_conversion,ydstogo,goal_to_go,run,pass
posteam,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
STL,0.324324,5.108108,0.081081,0.081081,0.891892
HOU,0.322581,5.290323,0.064516,0.096774,0.903226
PIT,0.294118,5.470588,0.058824,0.058824,0.882353
JAC,0.269231,5.192308,0.230769,0.076923,0.923077
LA,0.2,5.2,0.0,0.2,0.8


## 7 + yards

In [35]:
df_6 = df[(df['ydstogo'] > 7)]

df_6 = df_6[['posteam', '4th_down_conversion', 'ydstogo', 'goal_to_go', 'run', 'pass']]

df_6 = df_6.groupby('posteam').mean()

df_6 = df_6.sort_values(by = '4th_down_conversion', ascending = False)

df_6.head()

Unnamed: 0_level_0,4th_down_conversion,ydstogo,goal_to_go,run,pass
posteam,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
JAC,0.421053,11.526316,0.052632,0.078947,0.894737
NO,0.409091,11.681818,0.0,0.181818,0.818182
OAK,0.394737,13.315789,0.052632,0.105263,0.894737
TEN,0.380952,13.47619,0.047619,0.142857,0.857143
IND,0.37931,11.482759,0.034483,0.172414,0.793103


In [36]:
df_6.tail()

Unnamed: 0_level_0,4th_down_conversion,ydstogo,goal_to_go,run,pass
posteam,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
MIA,0.186047,13.488372,0.046512,0.023256,0.906977
STL,0.15,13.15,0.025,0.125,0.85
PIT,0.117647,11.647059,0.058824,0.058824,0.882353
CHI,0.103448,11.241379,0.034483,0.103448,0.896552
LAC,0.0,13.0,0.0,0.0,1.0
