In [1]:
!pip install -U kaleido
import kaleido

Collecting kaleido
  Downloading kaleido-0.2.1-py2.py3-none-manylinux1_x86_64.whl (79.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.9/79.9 MB[0m [31m12.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: kaleido
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
lida 0.0.10 requires fastapi, which is not installed.
lida 0.0.10 requires python-multipart, which is not installed.
lida 0.0.10 requires uvicorn, which is not installed.[0m[31m
[0mSuccessfully installed kaleido-0.2.1


In [14]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [3]:
# @title Imports

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from __future__ import annotations
import tkinter as tk
from tkinter import simpledialog
import plotly.graph_objects as go
from plotly.colors import n_colors
from plotly.subplots import make_subplots

In [5]:
path = '/content/drive/MyDrive/Final_Project/Data_Vis_Final_Football'

os.chdir(path)

from src.football_db import FootballDB

In [6]:
FDB = FootballDB()

In [7]:
# @title Functions

def get_defensive_stats(team, df, verbose = False):
  import numpy as np

  '''
  Inputs: DataFrame, Team, Verbose (optional)
    If verbose = True, prints number of plays to analyze
  Outputs: Dictionary of stats

  Takes a dataframe, subsets it, and devlops defensive stats for the specified team. Stats returned are:
  avg_yards_conceded: average yards conceded per play
  median_yards_conceded: median yards conceded per play
  first_down_efficency: 1 - (first downs allowed / number of plays)
  first_down_on_fourth_effic: 1 - (first downs allowed on fourth down / number of of fourth downs)
  '''


  if team not in df['defense'].unique():
      print('Input defense is not in our data, please check spelling at try again')
      return()

  team_df = df[df['defense'] == team]

  num_plays = len(team_df)
  if verbose == True:
    print('there are', num_plays, 'to analyze')

  avg_yards_conceded = np.mean(team_df['yards_gained'])
  median_yards_conceded = np.median(team_df['yards_gained'])
  try:
    first_down_efficency = team_df['first_down_conv'].value_counts()['Y'] / num_plays
  except KeyError:
    first_down_efficency = 0



  four_d = team_df[team_df['down'] == 4]
  num_four_d = len(four_d)
  try:
    first_down_on_fourth_effic = four_d['first_down_conv'].value_counts()['Y'] / num_four_d
  except KeyError:
    first_down_on_fourth_effic = 0

  stats_dict = {'avg_yards_conceded': round(avg_yards_conceded, 3),
                'median_yards_conceded': median_yards_conceded,
                'first_down_efficency': round(1 - first_down_efficency, 5),
                'first_down_on_fourth_effic': round(1 - first_down_on_fourth_effic, 5)
                }

  return(stats_dict)




  ######################################################################################


def get_offensive_stats(team, df, verbose = False):
  import numpy as np


  '''
  Inputs: DataFrame, Team, Verbose (optional)
    If verbose = True, prints number of plays to analyze
  Outputs: Dictionary of stats

  Takes a dataframe, subsets it, and devlops offensive stats for the specified team. Stats returned are:
  avg_yards: average yards gained per play
  first_down_efficency: first downs / number of plays
  first_down_on_fourth_effic: first downs allowed on fourth down / number of of fourth downs

  '''


  if team not in df['offense'].unique():
      print('Input offense is not in our data, please check spelling at try again')
      return()
  rush_team = df[df['offense'] == team]

  if 'yards_gained' not in rush_team.columns:
      print('No yards gained found, please ensure you are using the correct dataframe')
      return()

  num_plays = len(rush_team)
  if verbose == True:
    print('there are', num_plays, 'to analyze')

  avg_yard = np.mean(rush_team['yards_gained'])

  median_yard = np.median(rush_team['yards_gained'])
  try:
    fd_effic = rush_team['first_down_conv'].value_counts()['Y'] / num_plays
  except KeyError:
    fd_effic = 0
  avg_yard = np.mean(rush_team['yards_gained'])


  four_d = rush_team[rush_team['down'] == 4]
  num_four_d = len(four_d)
  try:
    first_down_on_fourth_effic = four_d['first_down_conv'].value_counts()['Y'] / num_four_d
  except KeyError:
    first_down_on_fourth_effic = 0


  stats_dict = {'avg_yard': np.round(avg_yard, 3),
                'median_yard': median_yard,
                'fd_effic': np.round(fd_effic, 5),
                'first_down_on_fourth_effic': np.round(first_down_on_fourth_effic, 5)
              }
  return(stats_dict)


##################################################

def get_offensive_dict(df):

  '''
  Inputs: a pandas dataframe
  Outputs: a dictionary of offensive stats for each team

  Generates a dictionary of offensive stats for each team in the dataset
  '''
  output = {}

  for team in df['offense'].unique():
    output[team] = get_offensive_stats(team, df)

  return(output)


####################################


def get_defensive_dict(df):
  output = {}

  for team in df['defense'].unique():
    output[team] = get_defensive_stats(team, df)

  return(output)

###################################


def get_off_stats_against(offense, defense, df, verbose = False):
  '''
  Inputs: offense, defense, dataframe
  Outputs: dictionary of offensive stats against that specfic team
  '''

  if offense not in df['offense'].unique():
      print('Input offense is not in our data, please check spelling at try again')
      return()
  if defense not in df['defense'].unique():
      print('Input defense is not in our data, please check spelling at try again')
      return()



  df_off = df[df['offense'] == offense]

  df_combo = df[(df['offense'] == offense) & (df['defense'] == defense)]

  if len(df_combo) == 0:
    if verbose:
      print('No data for', offense, 'against', defense)
    return()
  else:
    stats_dict = get_offensive_stats(offense, df_combo)

  return(stats_dict)


#########################

def get_def_stats_against(offense, defense, df, verbose = False):
  '''
  Inputs: offense, defense, dataframe
  Outputs: dictionary of defensive stats against that specfic team
  '''

  if offense not in df['offense'].unique():
      print('Input offense is not in our data, please check spelling at try again')
      return()
  if defense not in df['defense'].unique():
      print('Input defense is not in our data, please check spelling at try again')
      return()



  df_combo = df[(df['offense'] == offense) & (df['defense'] == defense)]

  if len(df_combo) == 0:
    if verbose:
      print('No data for', offense, 'against', defense)
    return()
  else:
    stats_dict = get_defensive_stats(defense, df_combo)

  return(stats_dict)

#########################################


def get_offensive_dict_against(offense, df):
  '''
  Inputs: offense, dataframe
  Outputs: dictionary of offensive stats against each team
  '''

  if offense not in df['offense'].unique():
      print('Input offense is not in our data, please check spelling at try again')
      return()


  output = {}

  for team in df['defense'].unique():
    output[team] = get_off_stats_against(offense, team, df)

  return(output)


####################################


def get_defensive_dict_against(defense, df):
  '''
  Inputs: defense, dataframe
  Outputs: dictionary of defensive stats against each team
  '''

  if defense not in df['defense'].unique():
      print('Input defense is not in our data, please check spelling at try again')
      return()

  output = {}

  for team in df['offense'].unique():
    output[team] = get_def_stats_against(team, defense, df)

  return(output)




In [8]:
rush = FDB.get_tRush()
throw = FDB.get_tPass()
game = FDB.get_tGame()

In [9]:
# @title Modern Teams

selected = ['ARZ','ATL','BLT','BUF','CAR','CHI','CIN','CLV','DAL','DEN','DET',
 'GB','HST','IND','JAX','KC','LV','LAC','LA','MIA','MIN','NE','NO',
 'NYG','NYJ','PHI','PIT','SF','SEA','TB','TEN','WAS']

In [10]:
merged_throw = throw.merge(game, left_on='game_id', right_on='game_id')
throw_2022 = merged_throw[merged_throw['season'] == 2022]

td_frame = throw_2022[throw_2022['touchdown'] == 1.0]
td_frame

Unnamed: 0,play_id,game_id,offense,defense,game_clock,clock_min,clock_sec,quarter,down,distance,...,play_action,screen,first_down_conv,stunt,yards_gained,play_type,season,week,home_team,away_team
111007,4490223,23098,BUF,LA,10:03,10,3,1,3,1,...,1,0,Y,,37,P,2022,1,LA,BUF
111029,4490356,23098,LA,BUF,03:04,3,4,2,2,4,...,0,0,Y,1.0,4,P,2022,1,LA,BUF
111036,4490450,23098,BUF,LA,08:02,8,2,3,1,7,...,0,0,Y,,8,P,2022,1,LA,BUF
111047,4490547,23098,BUF,LA,09:33,9,33,4,3,4,...,0,0,Y,,53,P,2022,1,LA,BUF
111083,4521354,23104,DET,PHI,04:14,4,14,3,3,4,...,0,0,Y,,6,P,2022,1,DET,PHI
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
123520,4981485,23661,KC,CIN,04:21,4,21,3,3,10,...,0,0,Y,,19,P,2022,30,KC,CIN
123545,4990082,23662,KC,PHI,07:03,7,3,1,2,8,...,0,0,Y,1.0,19,P,2022,32,PHI,KC
123553,4990122,23662,PHI,KC,15:00,15,0,2,1,10,...,1,0,Y,,45,P,2022,32,PHI,KC
123579,4990300,23662,KC,PHI,12:08,12,8,4,3,3,...,1,0,Y,,10,P,2022,32,PHI,KC


In [13]:
# @title Ridgeline Plot

fig = go.Figure()
n = 0

avg_td_throw = td_frame.groupby('offense')['yards'].mean()
sorted_teams = avg_td_throw.sort_values().index.tolist()

for i in sorted_teams:

  selected_throw_frame = td_frame[td_frame['offense'] == i]

  tx = selected_throw_frame['offense']
  ty = selected_throw_frame['yards']

  colors = n_colors('rgb(5, 200, 200)', 'rgb(200, 10, 10)', 48, colortype='rgb')

  fig.add_trace(go.Violin(x= ty, line_color=colors[n], name = i))


  n += 1


fig.update_traces(orientation='h', side='positive', width=3, points=False, showlegend = False)
fig.update_layout(xaxis_title = 'Yards', yaxis_title = 'Teams',title = {'text':'Distribution of TD Pass Yards For 2022 Season By Team', 'font':{'size':20},'x':0.5},xaxis_showgrid=False, xaxis_zeroline=False, height = 800, width = 600)
fig.update_xaxes(range=[-20, 110])

fig.update_layout(shapes=[
    dict(type='line', x0=0, y0=-1, x1=0, y1=33, line=dict(color='grey', dash='dot')),
    dict(type='line', x0=10, y0=-1, x1=10, y1=33, line=dict(color='grey', dash='dot')),
    dict(type='line', x0=-15, y0=-0.5, x1=100, y1=-0.5, line=dict(color='grey'))])


fig.show()
fig.write_image('Ridgeline.png')
fig.to_html("/content/drive/MyDrive/Final_Project/Ridgeline.html")


ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3553, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-69875e8b3315>", line 35, in <cell line: 35>
    fig.write_image('Ridgeline.png')
  File "/usr/local/lib/python3.10/dist-packages/plotly/basedatatypes.py", line 3840, in write_image
    return pio.write_image(self, *args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/plotly/io/_kaleido.py", line 297, in write_image
    path.write_bytes(img_data)
  File "/usr/lib/python3.10/pathlib.py", line 1143, in write_bytes
    with self.open(mode='wb') as f:
  File "/usr/lib/python3.10/pathlib.py", line 1119, in open
    return self._accessor.open(self, mode, buffering, encoding, errors,
OSError: [Errno 107] Transport endpoint is not connected: 'Ridgeline.png'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File