In [1]:
# Imports
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Load the plays data
plays_df = pd.read_csv('/Users/JKMacBook/Documents/Lambda/Kaggle/NFL/nfl-big-data-bowl-2021/full_play_3.csv')

In [3]:
# Determine the down and yardage scenario for each play (yardage 0-4, 5-10, 11+)
for i in plays_df.index:
    if plays_df.at[i, 'yardsToGo'] < 5:
        plays_df.at[i, 'scenario'] = str(plays_df.at[i, 'down']) + ' 0-4'
    elif (plays_df.at[i, 'yardsToGo'] >= 5) & (plays_df.at[i, 'yardsToGo'] <= 10):
        plays_df.at[i, 'scenario'] = str(plays_df.at[i, 'down']) + ' 5-10'
    elif plays_df.at[i, 'yardsToGo'] > 10:
        plays_df.at[i, 'scenario'] = str(plays_df.at[i, 'down']) + ' 11+'

In [4]:
# Calculate the stats for each scenario
def scenario_calc(s_df):
    
    s_df = s_df[s_df['scheme'] != 'None']
    completion_df = s_df.groupby('scheme')['passResult'].apply(lambda x: (x=='C').sum())
    incompletion_df = s_df.groupby('scheme')['passResult'].apply(lambda x: (x=='I').sum())
    interception_df = s_df.groupby('scheme')['passResult'].apply(lambda x: (x=='IN').sum())
    epa_df = s_df.groupby('scheme')['epa'].mean()
    
    merge_list = [completion_df, incompletion_df, interception_df, epa_df]
    scheme_df = pd.concat(merge_list, axis=1)
    scheme_df.columns = ['completions', 'incompletions', 'interceptions', 'epaMean']
    scheme_df['scheme'] = scheme_df.index
    scheme_df = scheme_df.reset_index(drop=True)
    scheme_df = scheme_df[['scheme', 'completions', 'incompletions', 'interceptions', 'epaMean']]
    
    scheme_df['totalPlays'] = scheme_df['completions'] + scheme_df['incompletions'] + scheme_df['interceptions']
    scheme_df.reset_index()
    
    scheme_df['completionPct'] = scheme_df['completions'] / scheme_df['totalPlays']
    scheme_df.reset_index()
    
    scheme_df['interceptionPct'] = scheme_df['interceptions'] / (scheme_df['completions'] + scheme_df['incompletions'] + scheme_df['interceptions'])
    scheme_df.reset_index()
    
    scheme_df['completionRank'] = scheme_df['completionPct'].rank(ascending=1)
    scheme_df.reset_index()
    
    scheme_df['epaRank'] = scheme_df['epaMean'].rank(ascending=1)
    scheme_df.reset_index()
    
    scheme_df['intRank'] = scheme_df['interceptionPct'].rank(ascending=0)
    scheme_df.reset_index()
    
    scheme_df['comboRank'] = scheme_df['completionRank'] + scheme_df['epaRank'] + scheme_df['intRank']
    scheme_df.reset_index()
    scheme_df = scheme_df.sort_values(by=['comboRank'])
    
    return scheme_df

In [5]:
# Stats for 1st down short yardage plays (0-4)
scenario_df = plays_df[(plays_df['scenario'] == '1 0-4')]
final_df = scenario_calc(scenario_df)
final_df

Unnamed: 0,scheme,completions,incompletions,interceptions,epaMean,totalPlays,completionPct,interceptionPct,completionRank,epaRank,intRank,comboRank
0,cover-0,69,56,4,0.434224,129,0.534884,0.031008,1.0,1.0,1.0,3.0
1,cover-3,1,0,0,1.365215,1,1.0,0.0,2.0,2.0,2.0,6.0


In [6]:
final_df.to_excel('/Users/JKMacBook/Documents/Lambda/Kaggle/NFL/nfl-big-data-bowl-2021/final/play_rankings_1s.xlsx', index=True)

In [7]:
# Stats for 1st down mid yardage plays (5-10)
scenario_df = plays_df[(plays_df['scenario'] == '1 5-10')]
final_df = scenario_calc(scenario_df)
final_df

Unnamed: 0,scheme,completions,incompletions,interceptions,epaMean,totalPlays,completionPct,interceptionPct,completionRank,epaRank,intRank,comboRank
4,cover-4,1275,557,42,0.070925,1874,0.680363,0.022412,4.0,1.0,2.0,7.0
2,cover-2,219,90,11,0.110621,320,0.684375,0.034375,5.0,2.0,1.0,8.0
3,cover-3,2019,1089,71,0.111903,3179,0.635105,0.022334,3.0,3.0,3.0,9.0
0,cover-0,161,114,4,0.289891,279,0.577061,0.014337,1.0,5.0,4.0,10.0
1,cover-1,368,210,8,0.229544,586,0.627986,0.013652,2.0,4.0,5.0,11.0


In [8]:
final_df.to_excel('/Users/JKMacBook/Documents/Lambda/Kaggle/NFL/nfl-big-data-bowl-2021/final/play_rankings_1m.xlsx', index=True)

In [9]:
# Stats for 1st down long yardage plays (11+)
scenario_df = plays_df[(plays_df['scenario'] == '1 11+')]
final_df = scenario_calc(scenario_df)
final_df

Unnamed: 0,scheme,completions,incompletions,interceptions,epaMean,totalPlays,completionPct,interceptionPct,completionRank,epaRank,intRank,comboRank
3,cover-3,104,49,5,-0.085529,158,0.658228,0.031646,2.0,1.0,3.0,6.0
4,cover-4,102,37,5,-0.054909,144,0.708333,0.034722,3.0,2.0,2.0,7.0
1,cover-1,18,16,0,-0.027598,34,0.529412,0.0,1.0,4.0,4.5,9.5
2,cover-2,20,3,1,0.058498,24,0.833333,0.041667,5.0,5.0,1.0,11.0
0,cover-0,4,1,0,-0.043436,5,0.8,0.0,4.0,3.0,4.5,11.5


In [10]:
final_df.to_excel('/Users/JKMacBook/Documents/Lambda/Kaggle/NFL/nfl-big-data-bowl-2021/final/play_rankings_1l.xlsx', index=True)

In [11]:
# Stats for 2nd down short yardage plays (0-4)
scenario_df = plays_df[(plays_df['scenario'] == '2 0-4')]
final_df = scenario_calc(scenario_df)
final_df

Unnamed: 0,scheme,completions,incompletions,interceptions,epaMean,totalPlays,completionPct,interceptionPct,completionRank,epaRank,intRank,comboRank
0,cover-0,84,77,7,0.123417,168,0.5,0.041667,1.0,1.0,1.0,3.0
3,cover-3,227,126,6,0.286745,359,0.632312,0.016713,2.0,3.0,2.0,7.0
1,cover-1,67,31,1,0.286396,99,0.676768,0.010101,4.0,2.0,4.0,10.0
4,cover-4,177,63,4,0.287035,244,0.72541,0.016393,5.0,4.0,3.0,12.0
2,cover-2,26,14,0,0.289907,40,0.65,0.0,3.0,5.0,5.0,13.0


In [12]:
final_df.to_excel('/Users/JKMacBook/Documents/Lambda/Kaggle/NFL/nfl-big-data-bowl-2021/final/play_rankings_2s.xlsx', index=True)

In [13]:
# Stats for 2nd down mid yardage plays (5-10)
scenario_df = plays_df[(plays_df['scenario'] == '2 5-10')]
final_df = scenario_calc(scenario_df)
final_df

Unnamed: 0,scheme,completions,incompletions,interceptions,epaMean,totalPlays,completionPct,interceptionPct,completionRank,epaRank,intRank,comboRank
0,cover-0,112,87,6,0.020076,205,0.546341,0.029268,1.0,1.0,1.0,3.0
1,cover-1,227,129,7,0.03844,363,0.625344,0.019284,2.0,2.0,3.0,7.0
4,cover-4,776,345,26,0.045541,1147,0.676548,0.022668,4.0,3.0,2.0,9.0
3,cover-3,1177,582,31,0.099154,1790,0.657542,0.017318,3.0,5.0,4.0,12.0
2,cover-2,138,62,3,0.070729,203,0.679803,0.014778,5.0,4.0,5.0,14.0


In [14]:
final_df.to_excel('/Users/JKMacBook/Documents/Lambda/Kaggle/NFL/nfl-big-data-bowl-2021/final/play_rankings_2m.xlsx', index=True)

In [15]:
# Stats for 2nd down long yardage plays (11+)
scenario_df = plays_df[(plays_df['scenario'] == '2 11+')]
final_df = scenario_calc(scenario_df)
final_df

Unnamed: 0,scheme,completions,incompletions,interceptions,epaMean,totalPlays,completionPct,interceptionPct,completionRank,epaRank,intRank,comboRank
0,cover-0,18,12,2,-0.439789,32,0.5625,0.0625,2.0,1.0,1.0,4.0
1,cover-1,50,35,4,-0.171032,89,0.561798,0.044944,1.0,3.0,2.0,6.0
4,cover-4,311,91,9,-0.192313,411,0.756691,0.021898,4.0,2.0,3.0,9.0
3,cover-3,307,132,8,-0.052519,447,0.686801,0.017897,3.0,5.0,4.0,12.0
2,cover-2,68,14,1,-0.061942,83,0.819277,0.012048,5.0,4.0,5.0,14.0


In [16]:
final_df.to_excel('/Users/JKMacBook/Documents/Lambda/Kaggle/NFL/nfl-big-data-bowl-2021/final/play_rankings_2l.xlsx', index=True)

In [17]:
# Stats for 3rd down short yardage plays (0-4)
scenario_df = plays_df[(plays_df['scenario'] == '3 0-4')]
final_df = scenario_calc(scenario_df)
final_df

Unnamed: 0,scheme,completions,incompletions,interceptions,epaMean,totalPlays,completionPct,interceptionPct,completionRank,epaRank,intRank,comboRank
0,cover-0,127,97,6,0.14769,230,0.552174,0.026087,1.0,1.0,2.0,4.0
4,cover-4,117,76,7,0.226437,200,0.585,0.035,3.0,3.0,1.0,7.0
3,cover-3,396,222,14,0.189213,632,0.626582,0.022152,5.0,2.0,3.0,10.0
1,cover-1,107,70,4,0.227923,181,0.59116,0.022099,4.0,4.0,4.0,12.0
2,cover-2,27,19,1,0.252116,47,0.574468,0.021277,2.0,5.0,5.0,12.0


In [18]:
final_df.to_excel('/Users/JKMacBook/Documents/Lambda/Kaggle/NFL/nfl-big-data-bowl-2021/final/play_rankings_3s.xlsx', index=True)

In [19]:
# Stats for 3rd down mid yardage plays (5-10)
scenario_df = plays_df[(plays_df['scenario'] == '3 5-10')]
final_df = scenario_calc(scenario_df)
final_df

Unnamed: 0,scheme,completions,incompletions,interceptions,epaMean,totalPlays,completionPct,interceptionPct,completionRank,epaRank,intRank,comboRank
1,cover-1,96,99,5,-0.360918,200,0.48,0.025,2.0,1.0,3.0,6.0
4,cover-4,422,308,20,-0.133592,750,0.562667,0.026667,4.0,3.0,1.0,8.0
0,cover-0,92,110,2,-0.108994,204,0.45098,0.009804,1.0,4.0,5.0,10.0
3,cover-3,477,368,22,-0.017087,867,0.550173,0.025375,3.0,5.0,2.0,10.0
2,cover-2,124,85,5,-0.154688,214,0.579439,0.023364,5.0,2.0,4.0,11.0


In [20]:
final_df.to_excel('/Users/JKMacBook/Documents/Lambda/Kaggle/NFL/nfl-big-data-bowl-2021/final/play_rankings_3m.xlsx', index=True)

In [21]:
# Stats for 3rd down long yardage plays (11+)
scenario_df = plays_df[(plays_df['scenario'] == '3 11+')]
final_df = scenario_calc(scenario_df)
final_df

Unnamed: 0,scheme,completions,incompletions,interceptions,epaMean,totalPlays,completionPct,interceptionPct,completionRank,epaRank,intRank,comboRank
3,cover-3,84,63,5,-0.494672,152,0.552632,0.032895,1.0,4.0,1.0,6.0
2,cover-2,36,27,2,-0.510098,65,0.553846,0.030769,2.0,3.0,2.0,7.0
0,cover-0,8,6,0,-1.143039,14,0.571429,0.0,3.0,1.0,4.5,8.5
4,cover-4,226,121,8,-0.551295,355,0.63662,0.022535,4.0,2.0,3.0,9.0
1,cover-1,14,5,0,-0.328749,19,0.736842,0.0,5.0,5.0,4.5,14.5


In [22]:
final_df.to_excel('/Users/JKMacBook/Documents/Lambda/Kaggle/NFL/nfl-big-data-bowl-2021/final/play_rankings_3l.xlsx', index=True)

In [23]:
# Stats for 4th down short yardage plays (0-4)
scenario_df = plays_df[(plays_df['scenario'] == '4 0-4')]
final_df = scenario_calc(scenario_df)
final_df

Unnamed: 0,scheme,completions,incompletions,interceptions,epaMean,totalPlays,completionPct,interceptionPct,completionRank,epaRank,intRank,comboRank
4,cover-4,11,11,0,-0.389062,22,0.5,0.0,1.0,1.0,3.5,5.5
1,cover-1,13,10,0,0.065695,23,0.565217,0.0,2.0,2.0,3.5,7.5
3,cover-3,40,23,2,0.645467,65,0.615385,0.030769,3.0,5.0,1.0,9.0
0,cover-0,41,21,0,0.354587,62,0.66129,0.0,4.0,3.0,3.5,10.5
2,cover-2,6,1,0,0.491532,7,0.857143,0.0,5.0,4.0,3.5,12.5


In [24]:
final_df.to_excel('/Users/JKMacBook/Documents/Lambda/Kaggle/NFL/nfl-big-data-bowl-2021/final/play_rankings_4s.xlsx', index=True)

In [25]:
# Stats for 4th down mid yardage plays (5-10)
scenario_df = plays_df[(plays_df['scenario'] == '4 5-10')]
final_df = scenario_calc(scenario_df)
final_df

Unnamed: 0,scheme,completions,incompletions,interceptions,epaMean,totalPlays,completionPct,interceptionPct,completionRank,epaRank,intRank,comboRank
1,cover-1,4,6,0,-1.686761,10,0.4,0.0,1.0,1.0,4.5,6.5
2,cover-2,4,4,1,-0.116909,9,0.444444,0.111111,3.0,3.0,1.0,7.0
0,cover-0,8,10,1,0.120441,19,0.421053,0.052632,2.0,4.0,2.0,8.0
3,cover-3,12,14,0,-0.671547,26,0.461538,0.0,4.0,2.0,4.5,10.5
4,cover-4,18,8,1,0.158545,27,0.666667,0.037037,5.0,5.0,3.0,13.0


In [26]:
final_df.to_excel('/Users/JKMacBook/Documents/Lambda/Kaggle/NFL/nfl-big-data-bowl-2021/final/play_rankings_4m.xlsx', index=True)

In [27]:
# Stats for 4th down long yardage plays (11+)
scenario_df = plays_df[(plays_df['scenario'] == '4 11+')]
final_df = scenario_calc(scenario_df)
final_df

Unnamed: 0,scheme,completions,incompletions,interceptions,epaMean,totalPlays,completionPct,interceptionPct,completionRank,epaRank,intRank,comboRank
3,cover-4,1,2,3,-2.26956,6,0.166667,0.5,2.0,2.0,1.0,5.0
0,cover-0,0,1,0,-0.882783,1,0.0,0.0,1.0,3.0,3.0,7.0
2,cover-3,2,2,0,-2.690808,4,0.5,0.0,3.0,1.0,3.0,7.0
1,cover-2,3,0,0,2.705877,3,1.0,0.0,4.0,4.0,3.0,11.0


In [28]:
final_df.to_excel('/Users/JKMacBook/Documents/Lambda/Kaggle/NFL/nfl-big-data-bowl-2021/final/play_rankings_4l.xlsx', index=True)