# PBP Cleaner for ISFL

In [48]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import re
from tqdm.notebook import tqdm
from urllib.request import urlopen
import matplotlib.pyplot as plt

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', -1)

import warnings
warnings.filterwarnings('ignore')

In [49]:
pbpDF = pd.read_csv('all_post21.csv')

In [50]:
pbpDF.loc[pbpDF['play'].str.contains('FUMBLE')].head(15)

Unnamed: 0.1,Unnamed: 0,teamID,Q,time,totTime,down,distance,side,yard,play,gameID,S,homeTeam,awayTeam,teamPoss,dist2goal,awayScore,homeScore
318,130,1,3,2:34,1054,3.0,2.0,COL,47.0,"Pass by Kyubee, S., complete to Lecavalier, R. for 11 yds. Tackle by Marmeladov, V.. FUMBLE by Lecavalier, R., recovered by Lecavalier, R..",18,1,COL,BAL,BAL,47.0,13,15
491,125,5,3,5:18,1218,1.0,10.0,OCO,33.0,"Boss, M. SACKED by Blaster Blade - DE for -11 yds. FUMBLE by Boss, M., recovered by Boss, M..",19,1,OCO,ARI,OCO,67.0,17,6
576,20,1,1,8:51,3231,3.0,12.0,BAL,37.0,"Rush by Robinson, M. for 6 yds. Tackle by Bavitz, I.. FUMBLE by Robinson, M., recovered by Novel, L..",20,1,BAL,OCO,BAL,63.0,0,7
741,3,2,1,14:30,3570,2.0,1.0,YKW,22.0,"Orosz, C. SACKED by Everest Teagarden - DE for -5 yds. FUMBLE by Orosz, C., recovered by Akselsen, M..",21,1,COL,YKW,YKW,78.0,0,0
768,30,3,1,5:44,3044,3.0,19.0,COL,22.0,"Rush by Tweed, B. for 8 yds. Tackle by Shelton, L.. FUMBLE recovered by Shelton, L. at the COL - 30 yard line and returned for 4 yards.",21,1,COL,YKW,COL,78.0,0,3
1170,6,4,1,13:36,3516,3.0,24.0,ARI,16.0,"Rush by Mackworthy, R. for 8 yds. Tackle by Showcase, S.. FUMBLE by Mackworthy, R., recovered by Mackworthy, R..",35,1,BAL,ARI,ARI,84.0,0,0
1397,24,2,1,6:23,3083,2.0,2.0,COL,25.0,"Pass by Orosz, C., complete to LeClair, A. for 22 yds. Tackle by Hayden, A.. FUMBLE by LeClair, A., recovered by LeClair, A..",36,1,YKW,COL,YKW,25.0,0,0
1478,105,2,3,14:35,1775,2.0,10.0,YKW,13.0,"Rush by Stats, J. for 0 yds. Tackle by Cox, V.. FUMBLE by Stats, J., recovered by Larsendorf, R..",36,1,YKW,COL,YKW,87.0,7,16
1573,4,6,1,13:54,3534,2.0,6.0,SJS,9.0,"Pass by Christ, J., complete to Bottles, B. for 11 yds. Tackle by Bavitz, I.. FUMBLE recovered by Bavitz, I. at the SJS - 20 yard line and returned for 2 yards.",37,1,OCO,SJS,SJS,91.0,0,0
2111,137,4,3,8:48,1428,1.0,10.0,ARI,28.0,"Pass by Draxel, V., complete to Jefferson, L. for 10 yds. Tackle by Shelton, L.. FUMBLE recovered by Sanchez, P. at the ARI - 38 yard line and returned for 1 yards.",42,1,ARI,YKW,ARI,72.0,13,7


In [59]:
testDF = pbpDF.loc[pbpDF['gameID'] == 5225]

In [60]:
def playType(x):
    play = x.loc['play']
    if 'Rush' in play:
        return 'Rush'
    elif 'Pass' in play or 'throw' in play or 'SACKED' in play:
        return 'Pass'
    elif 'Penalty' in play:
        return 'Penalty'
    elif 'Punt' in play:
        return 'Punt'
    elif 'kicks off' in play or 'Kickoff' in play:
        return 'Kickoff'
    elif 'FG' in play:
        return 'Field Goal'
    elif 'Timeout' in play:
        return 'Timeout'
    elif 'TOUCHDOWN' in play:
        return 'PAT'
    elif 'Returned by' in play:
        return 'Punt'
    elif 'takes it down' in play or 'waits for blockers' in play or 'endzone' in play or 'return' in play or 'cuts across' in play:
        return 'Kickoff'
    elif 'Turnover' in play:
        return 'Turnover on Downs'
    else:
        return 'N/A'

In [61]:
tqdm.pandas(desc="Play type")
testDF.loc[:,'playType'] = testDF.progress_apply(lambda x: playType(x),axis=1)

HBox(children=(FloatProgress(value=0.0, description='Play type', max=222.0, style=ProgressStyle(description_wi…




### Passing Columns

Offense:
`passResult`, `passer`, `receiver`, `incompleteType`

Defense:
`interceptor`, `passDefender`, `sacker`

In [62]:
def passResult(x):
    play = x.loc['play']
    playType = x.loc['playType']
    
    if playType == 'Pass':
        if 'incomplete' in play or 'Incomplete' in play or 'throws the ball away' in play:
            return 'Incomplete'
        elif 'complete' in play: return 'Complete'
        elif 'SACKED' in play: return 'Sack'
        elif 'INTERCEPT' in play: return 'Interception'
        else: return 'None'        
    else: return 'N/A'
    
def incompleteType(x):
    play = x.loc['play']
    
    if 'falls incomplete' in play:
        return 'Miss'
    elif 'Broken up' in play:
        return 'Broken up'
    elif 'dropped' in play:
        return 'Dropped'
    elif 'throws the ball away' in play:
        return 'Throw away'
    else:
        return 'N/A'

def passer(x):
    play = x.loc['play']
    passResult = x.loc['passResult']
    incompleteType = x.loc['incompleteType']
    
    if passResult == 'Sack':
        passer = play.split(' SACKED')[0]
    elif passResult == 'Complete':
        passer = play.split('Pass by ')[1].split(', c')[0]
    elif passResult == 'Interception':
        passer = play.split('Pass by ')[1].split(', t')[0]
    elif passResult == 'Incomplete':
        if incompleteType == 'Throw away':
            passer = play.split(' throws')[0]
        else:
            passer = play.split('Pass by ')[1].split(' to')[0]
    else:
        return 'N/A'
    return passer

def receiver(x):
    play = x.loc['play']
    passResult = x.loc['passResult']
    incompleteType = x.loc['incompleteType']
    
    if passResult == 'Incomplete':
        if incompleteType == 'Miss':
            receiver = play.split('to ')[1].split(' falls')[0]
            return receiver
        elif incompleteType == 'Broken up':
            receiver = play.split('to ')[1].split(' is incomplete')[0]
            return receiver
        elif incompleteType == 'Dropped':
            receiver = play.split('to ')[1].split(' was dropped')[0]
            return receiver
        else: return "N/A"
    elif passResult == 'Complete':
        receiver = play.split('complete to ')[1].split(' for')[0]
        return receiver
    elif passResult == 'Interception':
        receiver = play.split('to ')[1].split('. INTERCEPTION')[0]
        return receiver
    else: return "N/A"

In [63]:
tqdm.pandas(desc="Pass type")
testDF.loc[:,'passResult'] = testDF.progress_apply(lambda x: passResult(x),axis=1)

tqdm.pandas(desc="Incomplete type")
testDF.loc[:,'incompleteType'] = testDF.progress_apply(lambda x: incompleteType(x),axis=1)

tqdm.pandas(desc="Passer")
testDF.loc[:,'passer'] = testDF.progress_apply(lambda x: passer(x),axis=1)

tqdm.pandas(desc="Receiver")
testDF.loc[:,'receiver'] = testDF.progress_apply(lambda x: receiver(x),axis=1)

HBox(children=(FloatProgress(value=0.0, description='Pass type', max=222.0, style=ProgressStyle(description_wi…




HBox(children=(FloatProgress(value=0.0, description='Incomplete type', max=222.0, style=ProgressStyle(descript…




HBox(children=(FloatProgress(value=0.0, description='Passer', max=222.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='Receiver', max=222.0, style=ProgressStyle(description_wid…




### Rushing Columns

Offense:
`rusher`, `rushResult`

In [64]:
def rushResult(x):
    play = x.loc['play']
    playType = x.loc['playType']
    
    if playType == 'Rush':
        if 'FUMBLE' in play:
            return 'Fumble'
        else:
            return 'Rush'
    else: return 'N/A'
    
def rusher(x):
    play = x.loc['play']
    playType = x.loc['playType']
    
    if playType == 'Rush':
        rusher = play.split('by ')[1].split(' for')[0]
    else:
        return 'N/A'

    return rusher

In [65]:
tqdm.pandas(desc="Rush type")
testDF.loc[:,'rushResult'] = testDF.progress_apply(lambda x: rushResult(x),axis=1)

tqdm.pandas(desc="rusher")
testDF.loc[:,'rusher'] = testDF.progress_apply(lambda x: rusher(x),axis=1)

HBox(children=(FloatProgress(value=0.0, description='Rush type', max=222.0, style=ProgressStyle(description_wi…




HBox(children=(FloatProgress(value=0.0, description='rusher', max=222.0, style=ProgressStyle(description_width…




In [67]:
# testDF.loc[testDF['playType'] == 'Rush']
testDF

Unnamed: 0.1,Unnamed: 0,teamID,Q,time,totTime,down,distance,side,yard,play,gameID,S,homeTeam,awayTeam,teamPoss,dist2goal,awayScore,homeScore,playType,passResult,incompleteType,passer,receiver,rushResult,rusher
537040,0,8,1,15:00,3600,,,NO,30.0,"Prohaska, H. kicks off.",5225,21,NO,YKW,NO,70.0,0,0,Kickoff,,,,,,
537041,1,2,1,15:00,3600,,,,,"Snuggles, J. takes it down the left side.",5225,21,NO,YKW,YKW,,0,0,Kickoff,,,,,,
537042,2,2,1,15:00,3600,,,,,"Snuggles, J. waits for blockers.",5225,21,NO,YKW,YKW,,0,0,Kickoff,,,,,,
537043,3,2,1,15:00,3600,,,,,A 40 yard return.,5225,21,NO,YKW,YKW,,0,0,Kickoff,,,,,,
537044,4,2,1,14:55,3595,1.0,10.0,YKW,43.0,"Rush by Drake, O. for 3 yds. Tackle by Bode, Q..",5225,21,NO,YKW,YKW,57.0,0,0,Rush,,,,,Rush,"Drake, O."
537045,5,2,1,14:33,3573,2.0,7.0,YKW,46.0,"Pass by Bigsby, C. to Swift, N. falls incomplete.",5225,21,NO,YKW,YKW,54.0,0,0,Pass,Incomplete,Miss,"Bigsby, C.","Swift, N.",,
537046,6,2,1,14:27,3567,3.0,7.0,YKW,46.0,"Pass by Bigsby, C., complete to Drake, O. for a short gain. Tackle by Bode, Q..",5225,21,NO,YKW,YKW,54.0,0,0,Pass,Complete,,"Bigsby, C.","Drake, O.",,
537047,7,8,1,13:56,3536,4.0,7.0,NO,46.0,"Punt by Smalls (R), D. of 54 yards. Touchback.",5225,21,NO,YKW,NO,54.0,0,0,Punt,,,,,,
537048,8,8,1,13:53,3533,1.0,10.0,NO,20.0,"Rush by Gump, F. for a short gain. Tackle by Maulolo, C..",5225,21,NO,YKW,NO,80.0,0,0,Rush,,,,,Rush,"Gump, F."
537049,9,8,1,13:38,3518,2.0,10.0,NO,20.0,"Rush by Toriki, M. for 3 yds. Tackle by Virtanen, M..",5225,21,NO,YKW,NO,80.0,0,0,Rush,,,,,Rush,"Toriki, M."


### To-Do:
* ~~Passing~~
* ~~Receiving~~
* ~~Rushing~~
* Defense (tackler, fumbler, fumble recoverer)
* Field Goals (kickdistance, result, blocker)
* Punts (kickdistance, result, yards returned, blocker)
* Extra Points (result, blocker)
* Play Result (TD/FD/ydGained)
* Kickoffs (kickdistance, result, yards returned)