In [1]:
import pandas as pd
try:
    from StringIO import StringIO
except ImportError:
    from io import StringIO
import numpy as np

In [2]:
#Import 2018 play by play data
mylist = []
for chunk in pd.read_csv(r'nflscrapR-data/play_by_play_data/regular_season/reg_pbp_2018.csv', sep=',', chunksize=20000,low_memory=False):
    mylist.append(chunk)
p18 = pd.concat(mylist, axis = 0)
del mylist

In [3]:
#Import Roster Data
rosters18 = pd.read_csv(r'rosters_2018.csv', sep = ',', low_memory = False)

## Tackle Assists

In [5]:
#Count tackle assists 1-4 
tackleassists18one = pd.DataFrame({'tackles' : p18.groupby( [ "assist_tackle_1_player_id"] ).size()}).reset_index()
tackleassists18two = pd.DataFrame({'tackles' : p18.groupby( [ "assist_tackle_2_player_id"] ).size()}).reset_index()

#Merge all together into one tackle assists dataframe
tackleassists18 = pd.merge(tackleassists18one, tackleassists18two, left_on = 'assist_tackle_1_player_id', right_on = 'assist_tackle_2_player_id', how='left')
tackleassists18.head()

Unnamed: 0,assist_tackle_1_player_id,tackles_x,assist_tackle_2_player_id,tackles_y
0,00-0021140,7,00-0021140,4.0
1,00-0022161,5,00-0022161,6.0
2,00-0022247,14,00-0022247,19.0
3,00-0023173,2,00-0023173,1.0
4,00-0023259,13,00-0023259,13.0


In [6]:
#Add the tackles together to get one consolidated tackle assist count for each player
tackleassists18['tackle_assists'] = (tackleassists18['tackles_x'] + tackleassists18['tackles_y'])
tackleassists18.drop(columns=['tackles_x', 'assist_tackle_2_player_id','tackles_y'],inplace=True)
tackleassists18.head()

Unnamed: 0,assist_tackle_1_player_id,tackle_assists
0,00-0021140,11.0
1,00-0022161,11.0
2,00-0022247,33.0
3,00-0023173,3.0
4,00-0023259,26.0


In [7]:
#Add the number of tackle assists to the rosters18 dataframe
rosters18 = pd.merge(rosters18, tackleassists18,left_on='GSIS_ID',right_on='assist_tackle_1_player_id', how='left')
#Remove the extra player_id column
rosters18.drop(columns=['assist_tackle_1_player_id'],inplace=True)
rosters18.head()

Unnamed: 0,Season,Player,Team,Pos,name,GSIS_ID,tackle_assists
0,2018,Antoine Bethea,ARI,FS,A.Bethea,00-0024421,27.0
1,2018,Benson Mayowa,ARI,DE,B.Mayowa,00-0030380,9.0
2,2018,Brandon Williams,ARI,CB,B.Williams,00-0032769,
3,2018,Budda Baker,ARI,SS,B.Baker,00-0033890,31.0
4,2018,Cameron Malveaux,ARI,DE,C.Malveaux,00-0033403,8.0


In [8]:
#Calculate the tackle assist points for all positions as 0.5 per tackle assist
rosters18['tackle_assist_pts'] = rosters18['tackle_assists']*0.5
rosters18.head()
#Overwrite the tackle assist points for the positions that are calculated differently: CB, DE, DT
#rosters18.where((rosters18['Pos']=='CB', rosters18['tackleassists'], rosters18['tackle_assist_pts'])

Unnamed: 0,Season,Player,Team,Pos,name,GSIS_ID,tackle_assists,tackle_assist_pts
0,2018,Antoine Bethea,ARI,FS,A.Bethea,00-0024421,27.0,13.5
1,2018,Benson Mayowa,ARI,DE,B.Mayowa,00-0030380,9.0,4.5
2,2018,Brandon Williams,ARI,CB,B.Williams,00-0032769,,
3,2018,Budda Baker,ARI,SS,B.Baker,00-0033890,31.0,15.5
4,2018,Cameron Malveaux,ARI,DE,C.Malveaux,00-0033403,8.0,4.0


## QB Hits

In [9]:
qbhit18one = pd.DataFrame({'qb_hit' : p18.groupby( [ "qb_hit_1_player_id"] ).size()}).reset_index()
qbhit18two = pd.DataFrame({'qb_hit' : p18.groupby( [ "qb_hit_2_player_id"] ).size()}).reset_index()

In [10]:
qbhitpts18 = pd.merge(qbhit18one, qbhit18two, left_on = 'qb_hit_1_player_id', right_on = 'qb_hit_2_player_id', how='left')

In [11]:
qbhitpts18['qb_hit_xy'] = qbhitpts18['qb_hit_x'] + qbhitpts18['qb_hit_y']
qbhitpts18.head()

Unnamed: 0,qb_hit_1_player_id,qb_hit_x,qb_hit_2_player_id,qb_hit_y,qb_hit_xy
0,00-0021140,11,,,
1,00-0022161,13,00-0022161,2.0,15.0
2,00-0022247,1,,,
3,00-0023259,10,,,
4,00-0023368,16,00-0023368,1.0,17.0


In [12]:
rosters18 = pd.merge(rosters18, qbhitpts18[['qb_hit_xy','qb_hit_1_player_id']],left_on='GSIS_ID',right_on='qb_hit_1_player_id', how='left')
rosters18.head()

Unnamed: 0,Season,Player,Team,Pos,name,GSIS_ID,tackles_xy,qb_hit_xy,qb_hit_1_player_id
0,2018,Antoine Bethea,ARI,FS,A.Bethea,00-0024421,13.5,,00-0024421
1,2018,Benson Mayowa,ARI,DE,B.Mayowa,00-0030380,4.5,,00-0030380
2,2018,Brandon Williams,ARI,CB,B.Williams,00-0032769,,,
3,2018,Budda Baker,ARI,SS,B.Baker,00-0033890,15.5,3.0,00-0033890
4,2018,Cameron Malveaux,ARI,DE,C.Malveaux,00-0033403,4.0,,00-0033403


In [13]:
rosters18.drop(['qb_hit_1_player_id'], axis=1,inplace=True)
rosters18.head()

Unnamed: 0,Season,Player,Team,Pos,name,GSIS_ID,tackles_xy,qb_hit_xy
0,2018,Antoine Bethea,ARI,FS,A.Bethea,00-0024421,13.5,
1,2018,Benson Mayowa,ARI,DE,B.Mayowa,00-0030380,4.5,
2,2018,Brandon Williams,ARI,CB,B.Williams,00-0032769,,
3,2018,Budda Baker,ARI,SS,B.Baker,00-0033890,15.5,3.0
4,2018,Cameron Malveaux,ARI,DE,C.Malveaux,00-0033403,4.0,
