In [73]:
import pandas as pd
import sqlite3
import os
import pandas as pd
import numpy as np
from tqdm import tqdm
import re
import csv

import seaborn as sns
import matplotlib.pyplot as plt

# Import XGBoost classifier
from xgboost import XGBClassifier

# Import scikit-learn functions
from sklearn.metrics import average_precision_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import matthews_corrcoef

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold

# Import scikit-plot functions
from scikitplot.metrics import plot_roc_curve
from scikitplot.metrics import plot_precision_recall_curve
from scikitplot.metrics import plot_calibration_curve

# Import SciPy function
from scipy.spatial import distance

pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)


In [74]:
# Data Directory
data_dir = "../../Data/Big-Data-Cup-2021"
bucketless_data_dir = '../../Data/bdc/data'
tracking_data_dir = "{}/TrackingData".format(data_dir)
db_path = '/Users/keltim01/Documents/databases/'

#connect to database 
conn = sqlite3.connect(db_path + 'bdc_2022.db')
cursor = conn.cursor()

In [75]:
# important numbers for the hockey rink 
ICE_LENGTH = 200
ICE_WIDTH = 85
GOAL_X = ICE_LENGTH - 10
GOAL_Y = ICE_WIDTH / 2
D_ZONE = 75
O_ZONE = ICE_LENGTH - 75

## Using tracking data to find a way to classify teams in creating "danger" on the powerplay by creating space. 

- with tracking data get totally away from shots as basis of structure but take locations of players as structure → also maybe get away from shots as basis at all
- xG everywhere not shots to be able to classify? 
- Combine tracking and event data to look at nearest defender for attackers on the powerplay
- factor in shot selection: xG shoot now, xG Pass etc.
- BART model?

Important questions: 
- Who is good in creating space? 
- Who creates more dangerous situations (more xG)? 
- Who creates danger and space in formation? 
- Who uses good chances (finishing) or selects good shots?  

In [76]:
# Merging Tracking and Play-by-Play data 
df_tracking = pd.read_sql('SELECT * FROM tracking', conn)

In [77]:
df_pbp_pp = pd.read_sql('SELECT * FROM play_by_play WHERE frame_id_1 IS NOT NULL AND strength_state > 0', conn) 

In [78]:
df_pbp_pp.head()

Unnamed: 0,level_0,index,game_date,season_year,team_name,opp_team_name,venue,period,clock_seconds,situation_type,goals_for,goals_against,player_name,event,event_successful,x_coord,y_coord,event_type,player_name_2,x_coord_2,y_coord_2,event_detail_1,event_detail_2,event_detail_3,frame_id_1,frame_id_2,home_team,away_team,game_id,is_shot,is_goal,event_id,team_id,player_id,detail_1_code,goal_diff,game_seconds_remaining,event_code,event_type_code,skaters_for,skaters_against,strength_state,distance_to_goal,angle_to_goal,prev_event,prev_event_code,prev_event_type,prev_event_type_code,prev_event_x_coord,prev_event_y_coord,prev_event_game_seconds_remaining,prev_event_distance_to_goal,prev_event_angle_to_goal,time_diff_last_event,angle_diff_last_event,distance_diff_last_event,second_prev_event,second_prev_event_code,second_prev_event_type,second_prev_event_type_code,second_prev_event_x_coord,second_prev_event_y_coord,second_prev_event_game_seconds_remaining,second_prev_event_distance_to_goal,second_prev_event_angle_to_goal,prev_time_diff_last_event,prev_angle_diff_last_event,prev_distance_diff_last_event,non_shot_expected_goals,expected_goals
0,441,441,8/2/2022,2021,Olympic (Women) - Canada,Olympic (Women) - United States,away,1,375,5 on 4,0,0,Brianne Jenner,Play,1,185,7,Direct,Marie-Philip Poulin,139.0,5.0,,0,0,344.0,370.0,Olympic (Women) - United States,Olympic (Women) - Canada,5,0,0,3,0,17,-1,0,2775.0,3,6,5,4,1,35.850384,0.140845,Play,3.0,Indirect,15.0,194.0,66.0,2778.0,23.837995,-0.170213,-3.0,0.311058,12.012389,Puck Recovery,4.0,,-1.0,165.0,82.0,2780.0,46.746658,0.632911,-2.0,-0.803124,-22.908663,0.189897,
1,442,442,8/2/2022,2021,Olympic (Women) - Canada,Olympic (Women) - United States,away,1,373,5 on 4,0,0,Marie-Philip Poulin,Play,1,161,3,Direct,Rebecca Johnston,143.0,19.0,,0,0,420.0,443.0,Olympic (Women) - United States,Olympic (Women) - Canada,5,0,0,3,0,60,-1,0,2773.0,3,6,5,4,1,49.002551,0.734177,Play,3.0,Direct,6.0,185.0,7.0,2775.0,35.850384,0.140845,-2.0,0.593332,13.152167,Play,3.0,Indirect,15.0,194.0,66.0,2778.0,23.837995,-0.170213,-3.0,0.311058,12.012389,0.189897,
2,443,443,8/2/2022,2021,Olympic (Women) - Canada,Olympic (Women) - United States,away,1,366,5 on 4,0,0,Rebecca Johnston,Play,1,176,3,Direct,Marie-Philip Poulin,130.0,17.0,,0,0,605.0,639.0,Olympic (Women) - United States,Olympic (Women) - Canada,5,0,0,3,0,81,-1,0,2766.0,3,6,5,4,1,41.907637,0.35443,Play,3.0,Direct,6.0,161.0,3.0,2773.0,49.002551,0.734177,-7.0,-0.379747,-7.094914,Play,3.0,Direct,6.0,185.0,7.0,2775.0,35.850384,0.140845,-2.0,0.593332,13.152167,0.189897,
3,444,444,8/2/2022,2021,Olympic (Women) - Canada,Olympic (Women) - United States,away,1,365,5 on 4,0,0,Marie-Philip Poulin,Play,1,130,17,Direct,Erin Ambrose,133.0,55.0,,0,0,650.0,670.0,Olympic (Women) - United States,Olympic (Women) - Canada,5,0,0,3,0,60,-1,0,2765.0,3,6,5,4,1,65.193941,2.352941,Play,3.0,Direct,6.0,176.0,3.0,2766.0,41.907637,0.35443,-1.0,1.998511,23.286305,Play,3.0,Direct,6.0,161.0,3.0,2773.0,49.002551,0.734177,-7.0,-0.379747,-7.094914,0.189897,
4,445,445,8/2/2022,2021,Olympic (Women) - Canada,Olympic (Women) - United States,away,1,363,5 on 4,0,0,Erin Ambrose,Play,1,139,55,Direct,Brianne Jenner,176.0,42.0,,0,0,701.0,718.0,Olympic (Women) - United States,Olympic (Women) - Canada,5,0,0,3,0,29,-1,0,2763.0,3,6,5,4,1,52.509523,4.08,Play,3.0,Direct,6.0,130.0,17.0,2765.0,65.193941,2.352941,-2.0,1.727059,-12.684418,Play,3.0,Direct,6.0,176.0,3.0,2766.0,41.907637,0.35443,-1.0,1.998511,23.286305,0.189897,


In [79]:
df_tracking.head()

Unnamed: 0,index,frame_id,period,track_id,team_id,team_name,jersey_number,x_coord,y_coord,video_shot,game_seconds,venue,player,position,game_id
0,0,96,1,2,Light,ROC,100,176.651307,52.308133,1,2590,away,,,4
1,1,96,1,3,Light,ROC,27,158.782222,41.325801,1,2590,away,Veronika Korzhakova,Center,4
2,2,96,1,4,Dark,Finland,10,151.138502,51.883526,1,2590,home,Elisa Holopainen,Left Wing,4
3,3,96,1,5,Light,ROC,100,163.814533,59.253376,1,2590,away,,,4
4,4,96,1,8,Dark,Finland,88,165.0,50.0,1,2590,home,Ronja Savolainen,Defense,4


In [80]:
test_pbp = df_pbp_pp.loc[df_pbp_pp['frame_id_1'] == 96]

In [81]:
test_tracking = df_tracking[(df_tracking['frame_id'] == 108) & (df_tracking['game_id'] == 5) & (df_tracking['period'] == 1)]

In [82]:
df_tracking.columns

Index(['index', 'frame_id', 'period', 'track_id', 'team_id', 'team_name',
       'jersey_number', 'x_coord', 'y_coord', 'video_shot', 'game_seconds',
       'venue', 'player', 'position', 'game_id'],
      dtype='object')

In [83]:
df_pbp_pp.columns

Index(['level_0', 'index', 'game_date', 'season_year', 'team_name',
       'opp_team_name', 'venue', 'period', 'clock_seconds', 'situation_type',
       'goals_for', 'goals_against', 'player_name', 'event',
       'event_successful', 'x_coord', 'y_coord', 'event_type', 'player_name_2',
       'x_coord_2', 'y_coord_2', 'event_detail_1', 'event_detail_2',
       'event_detail_3', 'frame_id_1', 'frame_id_2', 'home_team', 'away_team',
       'game_id', 'is_shot', 'is_goal', 'event_id', 'team_id', 'player_id',
       'detail_1_code', 'goal_diff', 'game_seconds_remaining', 'event_code',
       'event_type_code', 'skaters_for', 'skaters_against', 'strength_state',
       'distance_to_goal', 'angle_to_goal', 'prev_event', 'prev_event_code',
       'prev_event_type', 'prev_event_type_code', 'prev_event_x_coord',
       'prev_event_y_coord', 'prev_event_game_seconds_remaining',
       'prev_event_distance_to_goal', 'prev_event_angle_to_goal',
       'time_diff_last_event', 'angle_diff_last_event

In [84]:
df_pbp_pp[['frame_id_1','frame_id_2']].info()
    

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 861 entries, 0 to 860
Data columns (total 2 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   frame_id_1  861 non-null    float64
 1   frame_id_2  440 non-null    float64
dtypes: float64(2)
memory usage: 13.6 KB


In [85]:
df_pbp_pp[df_pbp_pp['frame_id_1'] == 867]

Unnamed: 0,level_0,index,game_date,season_year,team_name,opp_team_name,venue,period,clock_seconds,situation_type,goals_for,goals_against,player_name,event,event_successful,x_coord,y_coord,event_type,player_name_2,x_coord_2,y_coord_2,event_detail_1,event_detail_2,event_detail_3,frame_id_1,frame_id_2,home_team,away_team,game_id,is_shot,is_goal,event_id,team_id,player_id,detail_1_code,goal_diff,game_seconds_remaining,event_code,event_type_code,skaters_for,skaters_against,strength_state,distance_to_goal,angle_to_goal,prev_event,prev_event_code,prev_event_type,prev_event_type_code,prev_event_x_coord,prev_event_y_coord,prev_event_game_seconds_remaining,prev_event_distance_to_goal,prev_event_angle_to_goal,time_diff_last_event,angle_diff_last_event,distance_diff_last_event,second_prev_event,second_prev_event_code,second_prev_event_type,second_prev_event_type_code,second_prev_event_x_coord,second_prev_event_y_coord,second_prev_event_game_seconds_remaining,second_prev_event_distance_to_goal,second_prev_event_angle_to_goal,prev_time_diff_last_event,prev_angle_diff_last_event,prev_distance_diff_last_event,non_shot_expected_goals,expected_goals
718,9787,9787,16/2/2022,2021,Olympic (Women) - Switzerland,Olympic (Women) - Finland,away,2,566,5 on 4,0,1,Andrea Brandli,Puck Recovery,1,22,56,,,,,,0,0,867.0,,Olympic (Women) - Finland,Olympic (Women) - Switzerland,3,0,0,4,3,8,-1,-1,1766.0,4,-1,5,4,1,168.541538,12.444444,Dump In/Out,0.0,,-1.0,147.0,40.0,1769.0,43.072613,17.2,-3.0,-4.755556,125.468925,Puck Recovery,4.0,,-1.0,146.0,44.0,1769.0,44.025561,29.333333,0.0,-12.133333,-0.952948,0.197509,
719,9788,9788,16/2/2022,2021,Olympic (Women) - Switzerland,Olympic (Women) - Finland,away,2,566,5 on 4,0,1,Andrea Brandli,Play,1,22,56,Direct,Lara Christen,41.0,59.0,,0,0,867.0,906.0,Olympic (Women) - Finland,Olympic (Women) - Switzerland,3,0,0,3,3,8,-1,-1,1766.0,3,6,5,4,1,168.541538,12.444444,Puck Recovery,4.0,,-1.0,22.0,56.0,1766.0,168.541538,12.444444,0.0,0.0,0.0,Dump In/Out,0.0,,-1.0,147.0,40.0,1769.0,43.072613,17.2,-3.0,-4.755556,125.468925,0.364651,
750,9949,9949,16/2/2022,2021,Olympic (Women) - Finland,Olympic (Women) - Switzerland,home,2,200,5 on 4,1,0,Elisa Holopainen,Play,0,34,20,Direct,Nelli Laitinen,36.0,65.0,,0,0,867.0,,Olympic (Women) - Finland,Olympic (Women) - Switzerland,3,0,0,3,1,23,-1,1,1400.0,3,6,5,4,1,157.614244,6.933333,Play,3.0,Direct,6.0,69.0,50.0,1401.0,121.232215,16.133333,-1.0,-9.2,36.382029,Play,3.0,Direct,6.0,56.0,79.0,1403.0,138.882144,3.671233,-2.0,12.4621,-17.649929,0.32833,


In [86]:
df_pbp_pp.loc[11,'frame_id_1']

1028.0

In [None]:
df_distance = pd.DataFrame(index=df_pbp_pp.index)
dfs_distance = []
for index in df_pbp_pp.index:
    test_pbp = df_pbp_pp.loc[index]
    test_tracking = df_tracking[(df_tracking['frame_id'] == test_pbp['frame_id_1']) & (df_tracking['game_id'] == test_pbp['game_id']) & (df_tracking['period'] == test_pbp['period'])]
    if test_pbp['player_name'] in test_tracking['player'].to_list():
        diff_x1 = test_tracking.loc[(test_tracking['player'] != test_pbp['player_name']) & (test_tracking['venue'] != test_pbp['venue']),'x_coord'] - test_tracking.loc[test_tracking['player'] == test_pbp['player_name'],'x_coord'].values[0]
        diff_x1 = diff_x1.astype(float)
        diff_y1 = abs(test_tracking.loc[(test_tracking['player'] != test_pbp['player_name']) & (test_tracking['venue'] != test_pbp['venue']),'y_coord'] - test_tracking.loc[test_tracking['player'] == test_pbp['player_name'],'y_coord'].values[0])
        pl_distance = np.sqrt(diff_x1**2 + diff_y1**2)
        dfs_distance.append(pl_distance.min())
    else:
        if test_tracking.size > 0: 
            diff_x1 = test_tracking.loc[(test_tracking['player'] != test_pbp['player_name']) & (test_tracking['venue'] != test_pbp['venue']),'x_coord'] - test_pbp['x_coord']
            diff_x1 = diff_x1.astype(float)
            diff_y1 = abs(test_tracking.loc[(test_tracking['player'] != test_pbp['player_name']) & (test_tracking['venue'] != test_pbp['venue']),'y_coord'] - test_pbp['y_coord'])
            pl_distance = np.sqrt(diff_x1**2 + diff_y1**2)
            dfs_distance.append(pl_distance.min())
        else: 
            dfs_distance.append(np.nan)
    


In [140]:
df_pbp_pp['nearest_defender'] = dfs_distance

In [141]:
df_pbp_pp['diff_nearest_defender'] = df_pbp_pp['nearest_defender'] - df_pbp_pp['nearest_defender'].shift(1)

In [146]:
df_pbp_pp.loc[(df_pbp_pp['x_coord'] >= O_ZONE) & (df_pbp_pp['event'] != "Puck Recovery"),'diff_nearest_defender'].describe()

count    242.000000
mean      -2.520629
std       13.875851
min      -79.202106
25%       -7.064740
50%       -0.738430
75%        4.441878
max       37.867777
Name: diff_nearest_defender, dtype: float64

In [149]:
df_pbp_pp.drop('distance', axis=1, inplace=True)

In [150]:
df_pbp_pp.loc[(df_pbp_pp['x_coord'] >= O_ZONE) & (df_pbp_pp['event'] != "Puck Recovery")].sort_values('diff_nearest_defender', ascending=False).head(10)

Unnamed: 0,level_0,index,game_date,season_year,team_name,opp_team_name,venue,period,clock_seconds,situation_type,goals_for,goals_against,player_name,event,event_successful,x_coord,y_coord,event_type,player_name_2,x_coord_2,y_coord_2,event_detail_1,event_detail_2,event_detail_3,frame_id_1,frame_id_2,home_team,away_team,game_id,is_shot,is_goal,event_id,team_id,player_id,detail_1_code,goal_diff,game_seconds_remaining,event_code,event_type_code,skaters_for,skaters_against,strength_state,distance_to_goal,angle_to_goal,prev_event,prev_event_code,prev_event_type,prev_event_type_code,prev_event_x_coord,prev_event_y_coord,prev_event_game_seconds_remaining,prev_event_distance_to_goal,prev_event_angle_to_goal,time_diff_last_event,angle_diff_last_event,distance_diff_last_event,second_prev_event,second_prev_event_code,second_prev_event_type,second_prev_event_type_code,second_prev_event_x_coord,second_prev_event_y_coord,second_prev_event_game_seconds_remaining,second_prev_event_distance_to_goal,second_prev_event_angle_to_goal,prev_time_diff_last_event,prev_angle_diff_last_event,prev_distance_diff_last_event,non_shot_expected_goals,expected_goals,nearest_defender,diff_nearest_defender
573,8340,8340,14/2/2022,2021,Olympic (Women) - United States,Olympic (Women) - Finland,away,3,1137,5 on 4,2,0,Alex Cavallini,Play,1,198,40,Direct,Megan Keller,195.0,39.0,,0,0,1037.0,1091.0,Olympic (Women) - Finland,Olympic (Women) - United States,2,0,0,3,4,3,-1,2,1137.0,3,6,5,4,1,8.381527,-3.2,Puck Recovery,4.0,,-1.0,198.0,40.0,1138.0,8.381527,-3.2,-1.0,0.0,0.0,Dump In/Out,0.0,,-1.0,65.0,58.0,1141.0,125.957334,8.064516,-3.0,-11.264516,-117.575807,0.263964,,45.523091,37.867777
384,5056,5056,12/2/2022,2021,Olympic (Women) - Switzerland,Olympic (Women) - Olympic Athletes from Russia,away,3,627,5 on 4,2,1,Alina Muller,Play,1,195,43,Indirect,Lara Christen,197.0,43.0,,0,0,5339.0,5380.0,Olympic (Women) - Olympic Athletes from Russia,Olympic (Women) - Switzerland,0,0,0,3,3,6,-1,1,627.0,3,15,5,4,1,5.024938,-10.0,Puck Recovery,4.0,,-1.0,193.0,11.0,629.0,31.642535,-0.095238,-2.0,-9.904762,-26.617597,Zone Entry,7.0,Carried,2.0,126.0,81.0,633.0,74.687683,1.662338,-4.0,-1.757576,-43.045148,0.249744,,33.703392,29.414377
295,3820,3820,12/2/2022,2021,Olympic (Women) - Switzerland,Olympic (Women) - Olympic Athletes from Russia,away,1,674,5 on 4,0,0,Andrea Brandli,Play,1,197,43,Indirect,Lara Christen,197.0,43.0,,0,0,2226.0,2266.0,Olympic (Women) - Olympic Athletes from Russia,Olympic (Women) - Switzerland,0,0,0,3,3,8,-1,0,3074.0,3,15,5,4,1,7.017834,-14.0,Puck Recovery,4.0,,-1.0,197.0,43.0,3075.0,7.017834,-14.0,-1.0,0.0,0.0,Dump In/Out,0.0,,-1.0,91.0,76.0,3078.0,104.514353,2.955224,-3.0,-16.955224,-97.496519,0.263964,,91.047926,28.480793
180,2359,2359,8/2/2022,2021,Olympic (Women) - Finland,Olympic (Women) - Olympic Athletes from Russia,home,1,138,5 on 4,1,0,Minnamari Tuominen,Play,1,130,7,Direct,Nelli Laitinen,144.0,62.0,,0,0,1625.0,1669.0,Olympic (Women) - Finland,Olympic (Women) - Olympic Athletes from Russia,4,0,0,3,1,66,-1,1,2538.0,3,6,5,4,1,69.715493,1.690141,Play,3.0,Direct,6.0,176.0,12.0,2539.0,33.559648,0.459016,-1.0,1.231124,36.155845,Puck Recovery,4.0,,-1.0,178.0,10.0,2540.0,34.644624,0.369231,-1.0,0.089786,-1.084976,0.189897,,33.777255,25.825534
580,8353,8353,14/2/2022,2021,Olympic (Women) - United States,Olympic (Women) - Finland,away,3,1107,5 on 4,2,0,Dani Cameranesi,Takeaway,1,159,22,,,,,,0,0,1930.0,,Olympic (Women) - Finland,Olympic (Women) - United States,2,0,0,6,4,21,-1,2,1107.0,6,-1,5,4,1,37.165172,1.512195,Play,3.0,Indirect,15.0,190.0,10.0,1110.0,32.5,0.0,-3.0,1.512195,4.665172,Puck Recovery,4.0,,-1.0,160.0,1.0,1112.0,51.20791,0.722892,-2.0,-0.722892,-18.70791,0.189897,,27.225962,21.300169
829,10436,10436,16/2/2022,2021,Olympic (Women) - Finland,Olympic (Women) - Switzerland,home,3,360,5 on 4,2,0,Elisa Holopainen,Play,1,164,4,Direct,Minnamari Tuominen,128.0,41.0,,0,0,538.0,568.0,Olympic (Women) - Finland,Olympic (Women) - Switzerland,3,0,0,3,1,23,-1,2,360.0,3,6,5,4,1,46.456969,0.675325,Play,3.0,Indirect,15.0,192.0,24.0,362.0,18.607794,-0.108108,-2.0,0.783433,27.849175,Puck Recovery,4.0,,-1.0,184.0,32.0,362.0,12.093387,0.571429,0.0,-0.679537,6.514407,0.189897,,24.414148,20.901299
392,5281,5281,12/2/2022,2021,Olympic (Women) - Olympic Athletes from Russia,Olympic (Women) - Switzerland,home,3,73,5 on 4,2,3,Anna Shibanova,Play,1,131,15,Direct,Olga Sosina,131.0,76.0,,0,0,1020.0,1049.0,Olympic (Women) - Olympic Athletes from Russia,Olympic (Women) - Switzerland,0,0,0,3,2,12,-1,-1,73.0,3,6,5,4,1,65.094163,2.145455,Play,3.0,Direct,6.0,155.0,4.0,74.0,52.031241,0.909091,-1.0,1.236364,13.062922,Puck Recovery,4.0,,-1.0,185.0,5.0,78.0,37.831865,0.133333,-4.0,0.775758,14.199376,0.189897,,23.559339,20.89073
290,3811,3811,12/2/2022,2021,Olympic (Women) - Switzerland,Olympic (Women) - Olympic Athletes from Russia,away,1,688,5 on 4,0,0,Lara Christen,Play,1,195,47,Direct,Nicole Vallario,188.0,22.0,,0,0,1832.0,1851.0,Olympic (Women) - Olympic Athletes from Russia,Olympic (Women) - Switzerland,0,0,0,3,3,49,-1,0,3088.0,3,6,5,4,1,6.726812,-1.111111,Puck Recovery,4.0,,-1.0,192.0,36.0,3091.0,6.800735,-0.307692,-3.0,-0.803419,-0.073923,Dump In/Out,0.0,,-1.0,105.0,82.0,3096.0,93.729664,2.151899,-5.0,-2.459591,-86.928929,0.263964,,29.911901,19.773303
173,2348,2348,8/2/2022,2021,Olympic (Women) - Finland,Olympic (Women) - Olympic Athletes from Russia,home,1,154,5 on 4,1,0,Minnamari Tuominen,Play,1,136,27,Direct,Nelli Laitinen,146.0,64.0,,0,0,1162.0,1188.0,Olympic (Women) - Finland,Olympic (Women) - Olympic Athletes from Russia,4,0,0,3,1,66,-1,1,2554.0,3,6,5,4,1,56.180513,3.483871,Play,3.0,Direct,6.0,148.0,5.0,2556.0,56.304973,1.12,-2.0,2.363871,-0.124461,Play,3.0,Indirect,15.0,158.0,36.0,2559.0,32.653484,4.923077,-3.0,-3.803077,23.651489,0.189897,,26.498454,18.861975
71,777,777,8/2/2022,2021,Olympic (Women) - United States,Olympic (Women) - Canada,home,2,937,5 on 4,0,1,Kelly Pannek,Play,1,130,28,Indirect,Kendall Coyne Schofield,197.0,42.0,,0,0,1632.0,1713.0,Olympic (Women) - United States,Olympic (Women) - Canada,5,0,0,3,4,46,-1,-1,2137.0,3,15,5,4,1,61.727223,4.137931,Play,3.0,Direct,6.0,157.0,7.0,2138.0,48.469062,0.929577,-1.0,3.208354,13.25816,Play,3.0,Direct,6.0,138.0,45.0,2141.0,52.060061,20.8,-3.0,-19.870423,-3.590999,0.189897,,17.94945,15.821098


In [153]:
df_pbp_pp.loc[(df_pbp_pp['x_coord'] >= O_ZONE) & (df_pbp_pp['event'] != "Puck Recovery"),['player_name','team_name', 'expected_goals','nearest_defender','diff_nearest_defender']].groupby('player_name').sum().sort_values('diff_nearest_defender', ascending=False).head(10)

Unnamed: 0_level_0,expected_goals,nearest_defender,diff_nearest_defender
player_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Jenni Hiirikoski,0.070094,130.407287,38.807378
Alex Cavallini,0.0,117.272172,37.867777
Kelly Pannek,0.007217,45.767107,29.586193
Andrea Brandli,0.0,107.132817,28.480793
Alina Muller,0.0,68.692995,22.696592
Dani Cameranesi,0.200945,30.429829,21.300169
Olga Sosina,0.010898,202.813568,17.832818
Anna Savonina,0.0,60.010206,15.358531
Alex Carpenter,0.028285,63.121836,13.821582
Laura Stacey,0.0,21.107548,11.829935


In [155]:
df_pbp_pp.loc[(df_pbp_pp['x_coord'] >= O_ZONE) & (df_pbp_pp['event'] != "Puck Recovery"),['player_name','team_name', 'expected_goals','nearest_defender','diff_nearest_defender']].groupby('player_name').sum().sort_values('diff_nearest_defender', ascending=True).head(10)

Unnamed: 0_level_0,expected_goals,nearest_defender,diff_nearest_defender
player_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lara Christen,0.0,180.911263,-108.170776
Sinja Leemann,0.0,99.142933,-95.003581
Cayla Barnes,0.016857,84.44578,-68.678921
Anna Shibanova,0.0,147.859687,-59.700211
Noora Tulus,0.0,55.926424,-58.884373
Abby Roque,0.0,21.392594,-57.621528
Elisa Holopainen,0.14317,179.773978,-48.12369
Kendall Coyne Schofield,0.0,13.778295,-35.56502
Viivi Vainikka,0.422808,43.054672,-34.342182
Valeria Pavlova,0.0,17.137967,-27.821481


In [154]:
df_pbp_pp.loc[(df_pbp_pp['x_coord'] >= O_ZONE) & (df_pbp_pp['event'] != "Puck Recovery"),['player_name','team_name', 'expected_goals','nearest_defender','diff_nearest_defender']].groupby('player_name').mean().sort_values('diff_nearest_defender', ascending=False).head(10)

Unnamed: 0_level_0,expected_goals,nearest_defender,diff_nearest_defender
player_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Alex Cavallini,,58.636086,18.933888
Andrea Brandli,,53.566408,14.240397
Laura Stacey,,21.107548,11.829935
Dani Cameranesi,0.200945,15.214914,10.650085
Kelly Pannek,0.007217,15.255702,9.862064
Jenni Hiirikoski,0.035047,21.734548,6.467896
Alina Muller,,17.173249,5.674148
Emily Clark,,16.706173,5.154447
Anna Savonina,,20.003402,5.11951
Jillian Saulnier,,11.305101,4.071548
