In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
### IMPORTS AND GETTING DATA
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from __future__ import annotations

In [3]:


path = '/content/drive/My Drive/WM_Football_Independent_Study'

os.chdir(path)


from src.football_viz import FBField
from src.football_db import FootballDB
import src.dropdown_lists as dropdown



In [4]:
FDB = FootballDB()

Creating the database


In [94]:
### Functions

def get_defensive_stats(team, df, verbose = False):
  import numpy as np

  '''
  Inputs: DataFrame, Team, Verbose (optional)
    If verbose = True, prints number of plays to analyze
  Outputs: Dictionary of stats

  Takes a dataframe, subsets it, and devlops defensive stats for the specified team. Stats returned are:
  avg_yards_conceded: average yards conceded per play
  median_yards_conceded: median yards conceded per play
  first_down_efficency: 1 - (first downs allowed / number of plays)
  first_down_on_fourth_effic: 1 - (first downs allowed on fourth down / number of of fourth downs)
  '''


  if team not in df['defense'].unique():
      print('Input defense is not in our data, please check spelling at try again')
      return()

  team_df = df[df['defense'] == team]

  num_plays = len(team_df)
  if verbose == True:
    print('there are', num_plays, 'to analyze')

  avg_yards_conceded = np.mean(team_df['yards_gained'])
  median_yards_conceded = np.median(team_df['yards_gained'])
  try:
    first_down_efficency = team_df['first_down_conv'].value_counts()['Y'] / num_plays
  except KeyError:
    first_down_efficency = 0



  four_d = team_df[team_df['down'] == 4]
  num_four_d = len(four_d)
  try:
    first_down_on_fourth_effic = four_d['first_down_conv'].value_counts()['Y'] / num_four_d
  except KeyError:
    first_down_on_fourth_effic = 0

  stats_dict = {'avg_yards_conceded': round(avg_yards_conceded, 3),
                'median_yards_conceded': median_yards_conceded,
                'first_down_efficency': round(1 - first_down_efficency, 5),
                'first_down_on_fourth_effic': round(1 - first_down_on_fourth_effic, 5)
                }

  return(stats_dict)




  ######################################################################################


def get_offensive_stats(team, df, verbose = False):
  import numpy as np


  '''
  Inputs: DataFrame, Team, Verbose (optional)
    If verbose = True, prints number of plays to analyze
  Outputs: Dictionary of stats

  Takes a dataframe, subsets it, and devlops offensive stats for the specified team. Stats returned are:
  avg_yards: average yards gained per play
  first_down_efficency: first downs / number of plays
  first_down_on_fourth_effic: first downs allowed on fourth down / number of of fourth downs

  '''


  if team not in df['offense'].unique():
      print('Input offense is not in our data, please check spelling at try again')
      return()
  rush_team = df[df['offense'] == team]

  if 'yards_gained' not in rush_team.columns:
      print('No yards gained found, please ensure you are using the correct dataframe')
      return()

  num_plays = len(rush_team)
  if verbose == True:
    print('there are', num_plays, 'to analyze')

  avg_yard = np.mean(rush_team['yards_gained'])

  median_yard = np.median(rush_team['yards_gained'])
  try:
    fd_effic = rush_team['first_down_conv'].value_counts()['Y'] / num_plays
  except KeyError:
    fd_effic = 0
  avg_yard = np.mean(rush_team['yards_gained'])


  four_d = rush_team[rush_team['down'] == 4]
  num_four_d = len(four_d)
  try:
    first_down_on_fourth_effic = four_d['first_down_conv'].value_counts()['Y'] / num_four_d
  except KeyError:
    first_down_on_fourth_effic = 0


  stats_dict = {'avg_yard': np.round(avg_yard, 3),
                'median_yard': median_yard,
                'fd_effic': np.round(fd_effic, 5),
                'first_down_on_fourth_effic': np.round(first_down_on_fourth_effic, 5)
              }
  return(stats_dict)


##################################################

def get_offensive_dict(df):

  '''
  Inputs: a pandas dataframe
  Outputs: a dictionary of offensive stats for each team

  Generates a dictionary of offensive stats for each team in the dataset
  '''
  output = {}

  for team in df['offense'].unique():
    output[team] = get_offensive_stats(team, df)

  return(output)


####################################


def get_defensive_dict(df):
  output = {}

  for team in df['defense'].unique():
    output[team] = get_defensive_stats(team, df)

  return(output)

###################################


def get_off_stats_against(offense, defense, df, verbose = False):
  '''
  Inputs: offense, defense, dataframe
  Outputs: dictionary of offensive stats against that specfic team
  '''

  if offense not in df['offense'].unique():
      print('Input offense is not in our data, please check spelling at try again')
      return()
  if defense not in df['defense'].unique():
      print('Input defense is not in our data, please check spelling at try again')
      return()



  df_off = df[df['offense'] == offense]

  df_combo = df[(df['offense'] == offense) & (df['defense'] == defense)]

  if len(df_combo) == 0:
    if verbose:
      print('No data for', offense, 'against', defense)
    return()
  else:
    stats_dict = get_offensive_stats(offense, df_combo)

  return(stats_dict)


#########################

def get_def_stats_against(offense, defense, df, verbose = False):
  '''
  Inputs: offense, defense, dataframe
  Outputs: dictionary of defensive stats against that specfic team
  '''

  if offense not in df['offense'].unique():
      print('Input offense is not in our data, please check spelling at try again')
      return()
  if defense not in df['defense'].unique():
      print('Input defense is not in our data, please check spelling at try again')
      return()



  df_combo = df[(df['offense'] == offense) & (df['defense'] == defense)]

  if len(df_combo) == 0:
    if verbose:
      print('No data for', offense, 'against', defense)
    return()
  else:
    stats_dict = get_defensive_stats(defense, df_combo)

  return(stats_dict)

#########################################


def get_offensive_dict_against(offense, df):
  '''
  Inputs: offense, dataframe
  Outputs: dictionary of offensive stats against each team
  '''

  if offense not in df['offense'].unique():
      print('Input offense is not in our data, please check spelling at try again')
      return()


  output = {}

  for team in df['defense'].unique():
    output[team] = get_off_stats_against(offense, team, df)

  return(output)


####################################


def get_defensive_dict_against(defense, df):
  '''
  Inputs: defense, dataframe
  Outputs: dictionary of defensive stats against each team
  '''

  if defense not in df['defense'].unique():
      print('Input defense is not in our data, please check spelling at try again')
      return()

  output = {}

  for team in df['offense'].unique():
    output[team] = get_def_stats_against(team, defense, df)

  return(output)




Testing the functions.

Timing:
  - get_defensive_stats/get_offensive_stats: Trivially Fast
  - get_defensive_dict/get_offensive_dict: ~5.5 seconds
  - get_defensive_stats_against/get_offensve_stats_against: Trivially Fast
  - get_defensive_dict_against/get_offensive_dict_against: ~16s


In [6]:
rush = FDB.get_tRush()
throw = FDB.get_tPass()

In [7]:
rush_off = rush['offense'].unique()
rush_def = rush['defense'].unique()
throw_off = throw['offense'].unique()
throw_def = throw['defense'].unique()

In [9]:
#Checking every team is in every data frame

print('Checking rush offense')
for team in rush_off:
  if team not in throw_off:
    print(team, 'not in passing offense')
  if team not in throw_def:
    print(team, 'not in passing defense')
  if team not in rush_def:
    print(team, 'not in rushing defense')

print('Checking rush defense')
for team in rush_def:
  if team not in throw_off:
    print(team, 'not in passing offense')
  if team not in throw_def:
    print(team, 'not in passing defense')
  if team not in rush_off:
    print(team, 'not in rushing offense')

print('Checking passing offense')
for team in throw_off:
  if team not in rush_off:
    print(team, 'not in rushing offense')
  if team not in throw_def:
    print(team, 'not in passing defense')
  if team not in rush_def:
    print(team, 'not in rushing defense')

print('Checking passing offense')
for team in throw_def:
  if team not in rush_off:
    print(team, 'not in rushing offense')
  if team not in throw_off:
    print(team, 'not in passing offense')
  if team not in rush_def:
    print(team, 'not in rushing defense')



Checking rush offense
Checking rush defense
Checking passing offense
Checking passing offense


In [10]:
%%time
test = get_offensive_dict(rush)

CPU times: user 6.48 s, sys: 0 ns, total: 6.48 s
Wall time: 6.64 s


In [11]:
WM_pass_off = throw[throw['offense'] == 'VAWM']
WM_pass_off['yards_gained'].fillna(0, inplace = True)
WM_rush_off= rush[rush['offense'] == 'VAWM']
WM_rush_off['yards_gained'].fillna(0, inplace = True)


In [12]:
%%time
WM_Rushing_stats_overall = get_offensive_stats('VAWM', rush)

CPU times: user 39.8 ms, sys: 0 ns, total: 39.8 ms
Wall time: 40.5 ms


In [13]:
test['VAWM'] == WM_Rushing_stats_overall

True

In [14]:
WM_Rushing_stats_overall

{'avg_yard': 9.236,
 'median_yard': 6.0,
 'fd_effic': 301,
 'first_down_on_fourth_effic': 0.5}

In [15]:
test['VAWM']

{'avg_yard': 9.236,
 'median_yard': 6.0,
 'fd_effic': 301,
 'first_down_on_fourth_effic': 0.5}

In [16]:
%%time
test_2 = get_offensive_dict(throw)

CPU times: user 5.34 s, sys: 0 ns, total: 5.34 s
Wall time: 5.37 s


In [17]:
%%time
WM_passing_stats_overall = get_offensive_stats('VAWM', throw)

CPU times: user 38 ms, sys: 0 ns, total: 38 ms
Wall time: 38.5 ms


In [18]:
WM_passing_stats_overall

{'avg_yard': 19.464,
 'median_yard': nan,
 'fd_effic': 234,
 'first_down_on_fourth_effic': 0.57692}

In [19]:
test_2['VAWM']

{'avg_yard': 19.464,
 'median_yard': nan,
 'fd_effic': 234,
 'first_down_on_fourth_effic': 0.57692}

In [20]:
test_2['VAWM'] == WM_passing_stats_overall

False

In [27]:
WM_pass_off['defense'].unique()

array(['RIUN', 'VAJM', 'MEUN', 'PAVI', 'VARI', 'NCEL', 'PALF', 'NYCG',
       'NYAB', 'MDTO', 'DEUN', 'NCCM', 'NYST', 'VAHI', 'NCGW', 'MTST'],
      dtype=object)

In [99]:
%%time
get_off_stats_against(offense = 'VAWM', defense = 'MEUN', df = rush)

CPU times: user 121 ms, sys: 962 µs, total: 122 ms
Wall time: 128 ms


{'avg_yard': 9.75,
 'median_yard': 6.0,
 'fd_effic': 0.33333,
 'first_down_on_fourth_effic': 0.5}

In [91]:
get_def_stats_against(offense = 'VAWM', defense = 'MEUN', df = rush)

{'avg_yards_conceded': 9.75,
 'median_yards_conceded': 6.0,
 'first_down_efficency': 0.66667,
 'first_down_on_fourth_effic': 0.5}

In [73]:
rush['offense'].unique()

array(['MOSW', 'ILSO', 'ILWE', 'DEUN', 'RIUN', 'NHUN', 'NYST', 'NYAB',
       'NCEL', 'VAWM', 'MEUN', 'ILST', 'AZNO', 'VAJM', 'OHYO', 'TNMR',
       'PADU', 'OHDA', 'RIBR', 'MOSE', 'MAMK', 'PARM', 'INVA', 'SDST',
       'IADR', 'CADA', 'ORPS', 'NYMR', 'INBU', 'NDST', 'IANO', 'FLSS',
       'CASU', 'INST', 'IDST', 'WAEA', 'KYMO', 'ARCE', 'MTUN', 'CASA',
       'UTSO', 'IDUN', 'LASE', 'ILEA', 'CONO', 'TNCH', 'LANI', 'NCCE',
       'SCST', 'PAVI', 'SDVE', 'CTSH', 'PABU', 'DEST', 'MTST', 'MDMO',
       'NDUN', 'SCWO', 'UTWB', 'NCAT', 'ALNO', 'DCHO', 'NJMO', 'FLBC',
       'TNAI', 'FLAM', 'VANO', 'VARI', 'NJPR', 'PASF', 'MAHA', 'TNTC',
       'MDTO', 'MSJA', 'ARPB', 'PALF', 'CASL', 'KYEA', 'NYCW', 'MSVA',
       'TNAP', 'NYCG', 'NYCN', 'NHDA', 'NYCL', 'ALJA', 'CTYA', 'PAUN',
       'KYMU', 'MAHC', 'NYFO', 'PALE', 'NYWA', 'RIBT', 'DCGT', 'MSAL',
       'LAGR', 'LASO', 'TXAC', 'SCCH', 'TXPV', 'LAMC', 'ALST', 'ALAM',
       'TXLA', 'TXIW', 'TXSF', 'TXHT', 'SCCI', 'GAME', 'NCWE', 'SCFU',
      

In [88]:
WM_rush_off['defense'].unique()

array(['RIUN', 'VAJM', 'MEUN', 'PAVI', 'VARI', 'NCEL', 'PALF', 'NYCG',
       'NYAB', 'MDTO', 'DEUN', 'NCCM', 'NYST', 'VAHI', 'NCGW', 'MTST'],
      dtype=object)

In [97]:
%%time
foo = get_offensive_dict_against(offense = 'VAWM', df = rush)

CPU times: user 13.8 s, sys: 35.2 ms, total: 13.8 s
Wall time: 15.8 s


In [98]:
foo['MEUN']

{'avg_yard': 9.75,
 'median_yard': 6.0,
 'fd_effic': 0.33333,
 'first_down_on_fourth_effic': 0.5}