# WPA and EPA

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

## Bring in data with less features

In [2]:
df = pd.read_csv('../../datasets/less_feature_df.csv', index_col = [0])

##  Split play_type into dummys to get run/pass...

In [3]:
dummies = pd.get_dummies(df['play_type'], drop_first = False)

In [4]:
df = pd.concat((df, dummies), axis = 1)

In [5]:
df.head()

Unnamed: 0,play_id,game_id,home_team,away_team,posteam,posteam_type,defteam,side_of_field,yardline_100,game_date,...,return_yards,extra_point,field_goal,kickoff,no_play,pass,punt,qb_kneel,qb_spike,run
0,46,2009091000,PIT,TEN,PIT,home,TEN,TEN,30.0,2009-09-10,...,39.0,0,0,1,0,0,0,0,0,0
1,68,2009091000,PIT,TEN,PIT,home,TEN,PIT,58.0,2009-09-10,...,0.0,0,0,0,0,1,0,0,0,0
2,92,2009091000,PIT,TEN,PIT,home,TEN,PIT,53.0,2009-09-10,...,0.0,0,0,0,0,0,0,0,0,1
3,113,2009091000,PIT,TEN,PIT,home,TEN,PIT,56.0,2009-09-10,...,0.0,0,0,0,0,1,0,0,0,0
4,139,2009091000,PIT,TEN,PIT,home,TEN,PIT,56.0,2009-09-10,...,0.0,0,0,0,0,0,1,0,0,0


### Make Fourth down attempt it's own column, with 1 being successful, 0 failed conversion and NAN for the rest

In [6]:
df['4th_down_conversion'] = np.nan

In [7]:
df.head()

Unnamed: 0,play_id,game_id,home_team,away_team,posteam,posteam_type,defteam,side_of_field,yardline_100,game_date,...,extra_point,field_goal,kickoff,no_play,pass,punt,qb_kneel,qb_spike,run,4th_down_conversion
0,46,2009091000,PIT,TEN,PIT,home,TEN,TEN,30.0,2009-09-10,...,0,0,1,0,0,0,0,0,0,
1,68,2009091000,PIT,TEN,PIT,home,TEN,PIT,58.0,2009-09-10,...,0,0,0,0,1,0,0,0,0,
2,92,2009091000,PIT,TEN,PIT,home,TEN,PIT,53.0,2009-09-10,...,0,0,0,0,0,0,0,0,1,
3,113,2009091000,PIT,TEN,PIT,home,TEN,PIT,56.0,2009-09-10,...,0,0,0,0,1,0,0,0,0,
4,139,2009091000,PIT,TEN,PIT,home,TEN,PIT,56.0,2009-09-10,...,0,0,0,0,0,1,0,0,0,


In [8]:
df.loc[df['fourth_down_failed'] == 1, '4th_down_conversion'] = 0

In [9]:
df.loc[df['fourth_down_converted'] == 1, '4th_down_conversion'] = 1

In [10]:
df['4th_down_conversion'].value_counts()

0.0    2438
1.0    2338
Name: 4th_down_conversion, dtype: int64

In [11]:
df['total_4th_attempts'] = np.nan

In [12]:
df.loc[df['fourth_down_failed'] == 1, 'total_4th_attempts'] = 1

In [13]:
df.loc[df['fourth_down_converted'] == 1, 'total_4th_attempts'] = 1

In [14]:
df.drop(columns = ['quarter_end', 'down', 'kickoff_out_of_bounds', 'extra_point', 'kickoff',
'third_down_failed', 'field_goal', 'own_kickoff_recovery_td',
'kickoff_inside_twenty',
'kickoff_fair_catch',
'field_goal_attempt',
'qb_spike'
], inplace = True)

In [15]:
df.dropna(inplace = True)

# Compare wp(a) and ep(a) if a team punts and if a team goes for it on fourth down

In [16]:
df_2 = df.groupby(df['4th_down_conversion']).mean()

In [17]:
#df_2.reset_index(inplace = True)

In [18]:
df_2 = df_2[['wp', 'wpa', 'ep', 'epa']]

In [19]:
df_2

Unnamed: 0_level_0,wp,wpa,ep,epa
4th_down_conversion,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.0,0.2787,-0.040938,1.273433,-2.442409
1.0,0.332925,0.071184,1.386137,2.69707


Converting on fourth down is approximately a four point benefit when compared to not converting. The winning probability is approximately 11%.

## Make punt it's own column

In [20]:
df_3 = df.groupby(df['punt']).mean()

In [21]:
df_3 = df_3[['wp', 'wpa', 'ep', 'epa']]

In [22]:
df_3

Unnamed: 0_level_0,wp,wpa,ep,epa
punt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,0.305296,0.014159,1.329033,0.082972
1,0.49239,-0.081516,0.217581,-3.571696


Averging a failed conversion and successful conversion, winning probability and expected points will usually increase if your team attempts to go for it on fourth down. 