In [1]:
#This notebook will take the cleaned nfl play by play data and add the next score function to each row, then out put the csv file for easy access by other notebooks for feature engineering and modeling

In [2]:
import pandas as pd

In [6]:
def make_scoring_plays_df(df):
    df_scoring_plays = df.loc[(df['sp']==1) & (df['play_type'] != 'No Play')] #pull out just the scoring plays from a dataframe
    return df_scoring_plays

#Given a specific play (row) of a dataframe of scoring plays this function finds the next play from that game in the dataframe
def find_next_score(play, scoring_plays):
    
    #Test if a score even happens in that half (game resets at halftime)
    try:
        next_score = scoring_plays.loc[(scoring_plays['play_id'] >= play['play_id']) & (scoring_plays['game_id'] == play['game_id']) & (scoring_plays['game_half'] == play['game_half'])].iloc[0]
        
    #If there isnt a next score
    except IndexError:
        score_type = 'No Score'
        score_drive = play['drive']
        return score_type, score_drive
    
    #Set the score drive for when the score will happen
    score_drive = next_score['drive']


    #Check to see what type of score occured, TD, Opp TD, FG, Opp FG etc. 
    
    #Touchdown
    
    if next_score['touchdown'] == 1:
        
        if next_score['td_team'] == play['posteam']:
            score_type = 'TD'
            return score_type, score_drive
        else:
            score_type = 'Opp_TD'
            return score_type, score_drive
    
    
    #Return Touchdown (where possesion team is not the ones that scored) so just reverse the TD and Opp_TD
    
    if next_score['return_touchdown'] == 1:
        
        if next_score['td_team'] == play['posteam']:
            score_type = 'Opp_TD'
            return score_type, score_drive
        
        else:
            score_type = 'TD'
            return score_type, score_drive
    
    #Field Goal
    
    elif next_score['field_goal_result'] == 'made':
        
        if next_score['posteam'] == play['posteam']:
            score_type = 'FG'
            return score_type, score_drive
        
        else:
            score_type = 'Opp_FG'
            return score_type, score_drive
            
    #Safety
    
    elif next_score['safety'] == 1:
        
        if next_score['posteam'] == play['posteam']:
            score_type = 'Opp_Safety'
            return score_type, score_drive
        else:
            score_type = 'Safety'
            return score_type, score_drive
            
    #Extra point
    
    elif next_score['extra_point_result'] == 'good':
        if next_score['posteam'] == play['posteam']:
            score_type = 'Extra_Point'
            return score_type, score_drive
        else:
            score_type = 'Opp_Extra_Point'
            return score_type, score_drive
            
    #two point conversion
    
    elif next_score['two_point_conv_result'] == 'success':
        if next_score['posteam'] == play['posteam']:
            score_type = 'Two_Point_Conv'
            return score_type, score_drive
        else:
            score_type = 'Opp_Two_Point_Conv'
            return score_type, score_drive
            
    #Defensive Two Point
    
    elif next_score['defensive_two_point_conv'] == 'success':
        if next_score['posteam'] != play['posteam']:
            score_type = 'Def_Two_Point_Conv'
            return score_type, score_drive

        else:
            score_type = 'Opp_Def_Two_Point_Conv'
            return score_type, score_drive
    
    else:
        score_type = 'NA'
        return score_type, score_drive

def make_next_score_df(df, df_scoring_plays):
    
    df['next_score_type'], df['next_score_drive'] = zip(*df.apply(lambda x: find_next_score(x, df_scoring_plays),axis=1)) #adds the next_score_type and drive to each row in the data frame
    
    return df

    
def add_next_score(file):
    
    df = pd.read_csv('../data/interim/' + file, low_memory=False) #read in the cleaned data
    
    df_scoring_plays = make_scoring_plays_df(df) #make dataframe of scoring plays
    
    
    df = make_next_score_df(df, df_scoring_plays)
    
    df.to_csv('../data/interim/nfl_pbp_next_scores.csv')
    
    return

In [7]:
file = 'nfl_pbp_clean.csv'
add_next_score(file)

In [9]:
df = pd.read_csv('../data/interim/nfl_pbp_next_scores.csv', low_memory=False) #read in the cleaned data


In [10]:
df[['desc','next_score_type'.head()

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,play_id,game_id,home_team,away_team,posteam,posteam_type,defteam,side_of_field,...,penalty_type,defensive_two_point_attempt,defensive_two_point_conv,defensive_extra_point_attempt,defensive_extra_point_conv,week,season,game_type,next_score_type,next_score_drive
0,0,1,1,2009091000,PIT,TEN,PIT,home,TEN,PIT,...,,0.0,0.0,0.0,0.0,1,2009,reg,TD,11
1,1,2,2,2009091000,PIT,TEN,PIT,home,TEN,PIT,...,,0.0,0.0,0.0,0.0,1,2009,reg,TD,11
2,2,3,3,2009091000,PIT,TEN,PIT,home,TEN,PIT,...,,0.0,0.0,0.0,0.0,1,2009,reg,TD,11
3,3,4,4,2009091000,PIT,TEN,PIT,home,TEN,PIT,...,,0.0,0.0,0.0,0.0,1,2009,reg,TD,11
4,4,5,5,2009091000,PIT,TEN,TEN,away,PIT,TEN,...,,0.0,0.0,0.0,0.0,1,2009,reg,Opp_TD,11
