# Success measures and combinaiton per penalty kill

## Imports

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
plt.rcParams['figure.dpi'] = 200
plt.rcParams['figure.facecolor'] = 'white'

## Load data

In [3]:
df = pd.read_csv('data/BDC_2024_Data_Cleaned.csv', index_col=0)
entries = pd.read_csv('data/BDC_2024_Zone_Entries.csv', index_col=0)
df['Plays of Interest'] = pd.read_csv('data/BDC_2024_Plays_of_Interest.csv', index_col=0)

In [4]:
pk = df.loc[df['Team Status'] == 'PK'].copy()
pp = df.loc[df['Team Status'] == 'PP'].copy()
p = df.loc[(df['Team Status'] == 'PK') | (df['Team Status'] == 'PP')]

In [5]:
num_pens = pk['Penalty ID'].nunique()

In [6]:
pk_grp = pk.groupby('Penalty ID')
pk_df = pk_grp['Plays of Interest'].value_counts(sort=False).unstack(fill_value=0).reset_index().rename_axis([None], axis=1)

In [7]:
pp_grp = pp.groupby('Penalty ID')
pp_df = pp_grp['Plays of Interest'].value_counts(sort=False).unstack(fill_value=0).reset_index().rename_axis([None], axis=1)
pk_df = pd.merge(pk_df, pp_df, suffixes=[' F', ' A'], how='outer', on='Penalty ID')

In [8]:
# shots for and against per PK
shots_against = pp.loc[(pp['Event'] == 'Shot') | (pp['Event'] == 'Goal')].groupby('Penalty ID').size()
shots_against.name = 'Shots A'
shots_for = pk.loc[(pk['Event'] == 'Shot') | (pk['Event'] == 'Goal')].groupby('Penalty ID').size()
shots_for.name = 'Shots F'
pk_df = pd.merge(pk_df, shots_for, how='left', on='Penalty ID')
pk_df = pd.merge(pk_df, shots_against, how='left', on='Penalty ID')

# shots for and against not blocked per PK
shots_for_nb = pk.loc[
    ((pk['Event'] == 'Shot') & (pk['Detail 2'] != 'blocked')) | 
    (pk['Event'] == 'Goal')
].groupby('Penalty ID').size().reindex(np.arange(1, num_pens + 1), fill_value=0)

shots_against_nb = pp.loc[
    ((pp['Event'] == 'Shot') & (pp['Detail 2'] != 'blocked')) | 
    (pp['Event'] == 'Goal')
].groupby('Penalty ID').size().reindex(np.arange(1, num_pens + 1), fill_value=0)

# Fenwick score
fen = shots_against_nb - shots_for_nb
fen.name = 'Fenwick A'
pk_df = pd.merge(pk_df, fen, how='left', on='Penalty ID')

In [9]:
# possession
g1 = pk.groupby(['Penalty ID'])
posf = g1['Pos Time'].agg(lambda x: x.drop_duplicates(keep='first').sum())
posf.reindex(np.arange(1, num_pens + 1), fill_value=0)

g2 = pp.groupby(['Penalty ID'])
posa = g2['Pos Time'].agg(lambda x: x.drop_duplicates(keep='first').sum())
posa.reindex(np.arange(1, num_pens + 1), fill_value=0)

ppos = 100 * posf / (posf + posa)
ppos.name = '% Possession F'
pk_df = pd.merge(pk_df, ppos, how='left', on='Penalty ID').fillna(0)

In [10]:
# time in zone
in_d_zone = (
    ((df['Team Status'] == 'PK') & (df['X Coordinate'] < 75)) | 
    ((df['Team Status'] == 'PP') & (df['X Coordinate'] > 125))
)
d_zone_entr = (in_d_zone.shift(1) == False) & (in_d_zone == True)
d_zone_ext = ((in_d_zone.shift(-1).fillna(0) == False) & (in_d_zone == True))
time_entr = df.loc[d_zone_entr, 'Time']
time_ext = df.loc[d_zone_ext, 'Time']
zone_ts = time_ext.values - time_entr.values

df['Time in D Zone'] = [0] * df.shape[0]
df.loc[d_zone_entr, 'Time in D Zone'] = zone_ts
pk_zone_ts = df.groupby('Penalty ID')['Time in D Zone'].sum().rename('Time in D Zone')
pk_df = pd.merge(pk_df, pk_zone_ts, how='left', on='Penalty ID').fillna(0)

In [11]:
# cf = (
#     pk_df['Controlled entry F'] + 
#     pk_df['Failed controlled entry F'] +
#     pk_df['Controlled exit F'] +
#     # pk_df['Failed controlled exit F'] + 
#     pk_df['Neutral zone pass F'] +
#     # pk_df['Failed neutral zone pass F'] +
#     pk_df['Cycle back']
# )

# ncf = (
#     pk_df['Dump in F'] + 
#     # pk_df['Failed dump in F'] +
#     pk_df['Dump out F'] +
#     pk_df['Failed dump out']
# )

# pk_df['Control F'] = (cf - ncf) / (cf + ncf)

In [12]:
# ca = (
#     pk_df['Controlled entry A'] + 
#     # pk_df['Failed controlled entry A'] +
#     pk_df['Controlled exit A'] +
#     # pk_df['Failed controlled exit'] + 
#     # pk_df['Neutral zone pass A'] 
#     # pk_df['Failed neutral zone pass']
# )

# nca = (
#         pk_df['Dump in A'] +
#         pk_df['Dump out A']
# ) 

# pk_df['Control A'] = (ca - nca) / (ca + nca)

In [13]:
# pk_df['Control'].fillna(0, inplace=True)
# pk_df['Control'].replace([np.inf, -np.inf], 0, inplace=True)

In [14]:
pk_df.to_csv('data/BDC_2024_Penalty_Kill_Statistics.csv')