In [131]:
import pandas as pd
import sqlite3
import os
import pandas as pd
import numpy as np
from tqdm import tqdm
import re
import csv

pd.set_option("display.max_columns", None)

In [132]:
# Data Directory
data_dir = "../../Data/Big-Data-Cup-2021"
bucketless_data_dir = '../../Data/bdc/data'
tracking_data_dir = "{}/TrackingData".format(data_dir)
db_path = '/Users/keltim01/Documents/databases/'


### Importing and cleaning the data

Thanks to [The Bucketless](https://twitter.com/the_bucketless) for the great [Tutorial](https://github.com/the-bucketless/bdc/blob/main/bdc_merge_example.ipynb) and the [Cleaned Data](https://github.com/the-bucketless/bdc/tree/main/data) as well as the [hockey_rink](https://github.com/the-bucketless/hockey_rink) package

### Play by Play Data 

In [133]:
# important numbers for the hockey rink 
ICE_LENGTH = 200
ICE_WIDTH = 85
GOAL_X = ICE_LENGTH - 10
GOAL_Y = ICE_WIDTH / 2
D_ZONE = 75
O_ZONE = ICE_LENGTH - 75

In [134]:
# create and connect to database 
conn = sqlite3.connect(db_path + 'bdc_2022.db')
cursor = conn.cursor()

In [135]:
df_pp_info = pd.read_csv("{}/pp_info.csv".format(bucketless_data_dir))


In [136]:
df_pbp = pd.read_csv("{}/pxp_womens_oly_2022_v2.csv".format(bucketless_data_dir))


In [137]:
df_pbp['home_team'] = df_pbp.apply(lambda x: x['team_name'] if x['venue'] == 'home' else x['opp_team_name'], axis=1)
df_pbp['away_team'] = df_pbp.apply(lambda x: x['team_name'] if x['venue'] == 'away' else x['opp_team_name'], axis=1)
df_pbp['game_id'] = df_pbp.loc[:, ['game_date', 'home_team', 'away_team']].sum(axis=1).astype('category').cat.codes
df_pbp['is_shot'] = 0
df_pbp['is_goal'] = 0
df_pbp['event_id'] = df_pbp['event'].astype('category').cat.codes
df_pbp['team_id'] = df_pbp['team_name'].astype('category').cat.codes
df_pbp['player_id'] = df_pbp['player_name'].astype('category').cat.codes
df_pbp['event_successful'] = df_pbp.apply(lambda x: True if x['event_successful']=='t' else False, axis=1)
df_pbp['event_detail_2'] = df_pbp.apply(lambda x: True if x['event_detail_2']=='t' else False, axis=1)
df_pbp['event_detail_3'] = df_pbp.apply(lambda x: True if x['event_detail_3']=='t' else False, axis=1)
df_pbp[f'detail_1_code'] = df_pbp[f'event_detail_1'].astype('category').cat.codes
df_pbp.loc[df_pbp['event']=='Shot', 'is_shot'] = 1
df_pbp.loc[(df_pbp['event']=='Shot') & (df_pbp['event_successful']), 'is_goal'] = 1
df_pbp['goal_diff'] = df_pbp['goals_for'].sub(df_pbp['goals_against'])
df_pbp.loc[df_pbp['period'] == 1,'game_seconds_remaining'] = 2400 + df_pbp.loc[df_pbp['period'] == 1,'clock_seconds'] 
df_pbp.loc[df_pbp['period'] == 2,'game_seconds_remaining'] = 1200 + df_pbp.loc[df_pbp['period'] == 2,'clock_seconds']
df_pbp.loc[df_pbp['period'] == 3,'game_seconds_remaining'] = df_pbp.loc[df_pbp['period'] == 3,'clock_seconds']

df_pbp['event_code'] = df_pbp['event'].astype('category').cat.codes
df_pbp['event_type_code'] = df_pbp['event_type'].astype('category').cat.codes
# mirror home team in even-numbered periods / away team in odd-numbered periods and the opposite for Canada - Usa Game (Thanks to the Bucketless for the code)
wrong_way_teams = ["Olympic (Women) - Canada", "Olympic (Women) - United States"]
mirror_indices = (
    (
        # need to mirror the home team in even-numbered periods
        ((df_pbp.venue == "home") & ~(df_pbp.period % 2))
        # and the away team in odd-numbered
        | ((df_pbp.venue == "away") & (df_pbp.period % 2))
    )
    # flipping things in the Canada-USA game
    ^ (df_pbp.team_name.isin(wrong_way_teams) & df_pbp.opp_team_name.isin(wrong_way_teams))
)

# a rink is 200 ft long
coord_columns = ["x_coord", "x_coord_2"]
df_pbp.loc[mirror_indices, coord_columns] = 200 - df_pbp.loc[mirror_indices, coord_columns]
df_pbp[['skaters_for','skaters_against']] = df_pbp['situation_type'].str.extract(r'(\d) on (\d)')
df_pbp = df_pbp.astype({'skaters_for': 'int64', 'skaters_against': 'int64'})
df_pbp['strength_state'] = df_pbp['skaters_for'].sub(df_pbp['skaters_against'])

# calculating angles and distances to the goal
diff_x1 = GOAL_X - df_pbp['x_coord']
diff_x1 = diff_x1.astype(float)
diff_y1 = abs(GOAL_Y - df_pbp['y_coord'])
df_pbp['distance_to_goal'] = np.sqrt(diff_x1 ** 2 + diff_y1 ** 2)
df_pbp['angle_to_goal'] = np.divide(diff_x1, diff_y1,out=np.zeros_like(diff_x1),where=(diff_y1 != 0))

# getting the information about the previous event
df_pbp[['prev_event','prev_event_code','prev_event_type','prev_event_type_code','prev_event_x_coord','prev_event_y_coord','prev_event_game_seconds_remaining','prev_event_distance_to_goal','prev_event_angle_to_goal']] = df_pbp[['event','event_code','event_type','event_type_code','x_coord', 'y_coord', 'game_seconds_remaining', 'distance_to_goal', 'angle_to_goal']].shift(1)
df_pbp['time_diff_last_event'] = df_pbp['game_seconds_remaining'] - df_pbp['prev_event_game_seconds_remaining']
df_pbp['angle_diff_last_event'] = df_pbp['angle_to_goal'] - df_pbp['prev_event_angle_to_goal']
df_pbp['distance_diff_last_event'] = df_pbp['distance_to_goal'] - df_pbp['prev_event_distance_to_goal']
# geting the information about the second previous event
df_pbp[['second_prev_event','second_prev_event_code','second_prev_event_type','second_prev_event_type_code','second_prev_event_x_coord','second_prev_event_y_coord','second_prev_event_game_seconds_remaining','second_prev_event_distance_to_goal','second_prev_event_angle_to_goal']] = df_pbp[['event','event_code','event_type','event_type_code','x_coord', 'y_coord', 'game_seconds_remaining', 'distance_to_goal', 'angle_to_goal']].shift(2)
df_pbp['prev_time_diff_last_event'] = df_pbp['prev_event_game_seconds_remaining'] - df_pbp['second_prev_event_game_seconds_remaining']
df_pbp['prev_angle_diff_last_event'] = df_pbp['prev_event_angle_to_goal'] - df_pbp['second_prev_event_angle_to_goal']
df_pbp['prev_distance_diff_last_event'] = df_pbp['prev_event_distance_to_goal'] - df_pbp['second_prev_event_distance_to_goal']

In [138]:
df_pbp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10609 entries, 0 to 10608
Data columns (total 66 columns):
 #   Column                                    Non-Null Count  Dtype  
---  ------                                    --------------  -----  
 0   game_date                                 10609 non-null  object 
 1   season_year                               10609 non-null  int64  
 2   team_name                                 10609 non-null  object 
 3   opp_team_name                             10609 non-null  object 
 4   venue                                     10609 non-null  object 
 5   period                                    10609 non-null  int64  
 6   clock_seconds                             10609 non-null  int64  
 7   situation_type                            10609 non-null  object 
 8   goals_for                                 10609 non-null  int64  
 9   goals_against                             10609 non-null  int64  
 10  player_name                       

In [139]:
df_pbp.to_sql('play_by_play', conn, if_exists='replace')

10609

### Powerplay Info 

In [140]:
df_pp_info.columns

Index(['game_name', 'penalty_number', 'start_video_clock_seconds',
       'end_video_clock_seconds', 'start_period', 'end_period',
       'start_game_clock_seconds', 'end_game_clock_seconds'],
      dtype='object')

In [141]:
df_pp_info.head()

Unnamed: 0,game_name,penalty_number,start_video_clock_seconds,end_video_clock_seconds,start_period,end_period,start_game_clock_seconds,end_game_clock_seconds
0,2022-02-08 Canada at USA,1,1278,1314,1,1,386,350
1,2022-02-08 Canada at USA,2,1624,1744,1,1,216,96
2,2022-02-08 Canada at USA,3,409,562,2,2,991,871
3,2022-02-08 Canada at USA,4,898,898,2,2,613,613
4,2022-02-08 Canada at USA,5,1690,1927,2,2,272,152


In [142]:
df_pp_info.loc[df_pp_info['start_period'] == 1,'start_game_seconds_remaining'] = 2400 + df_pp_info.loc[df_pp_info['start_period'] == 1,'start_game_clock_seconds'] 
df_pp_info.loc[df_pp_info['start_period'] == 2,'start_game_seconds_remaining'] = 1200 + df_pp_info.loc[df_pp_info['start_period'] == 2,'start_game_clock_seconds']
df_pp_info.loc[df_pp_info['start_period'] == 3,'start_game_seconds_remaining'] = df_pp_info.loc[df_pp_info['start_period'] == 3,'end_game_clock_seconds']
df_pp_info.loc[df_pp_info['end_period'] == 1,'end_game_seconds_remaining'] = 2400 + df_pp_info.loc[df_pp_info['end_period'] == 1,'end_game_clock_seconds'] 
df_pp_info.loc[df_pp_info['end_period'] == 2,'end_game_seconds_remaining'] = 1200 + df_pp_info.loc[df_pp_info['end_period'] == 2,'end_game_clock_seconds']
df_pp_info.loc[df_pp_info['end_period'] == 3,'end_game_seconds_remaining'] = df_pp_info.loc[df_pp_info['end_period'] == 3,'end_game_clock_seconds']

In [143]:
df_pp_info.to_sql('powerplay_info', conn, if_exists='replace')

37

### tracking data 

In [144]:
bucketless_games = [x for x in os.listdir('{}'.format(bucketless_data_dir)) if x.startswith('2022')]
bucketless_games

['2022-02-08 ROC at Finland',
 '2022-02-14 USA at Finland',
 '2022-02-14 Switzerland at Canada',
 '2022-02-08 Canada at USA',
 '2022-02-16 Switzerland at Finland',
 '2022-02-12 Switzerland at ROC']

In [145]:
tracking_games = [x for x in os.listdir('{}'.format(tracking_data_dir)) if bool(re.search('\d{4}', x))]
tracking_games

['2022-02-08 ROC at Finland',
 '2022-02-14 Switzerland at Canada',
 '2022-02-08 Canada at USA',
 '2022-02-14 Finland at USA',
 '2022-02-16 Switzerland at Finland',
 '2022-02-12 Switzerland at ROC']

In [146]:
df_pbp[['game_id', 'home_team', 'away_team','game_date']].value_counts()

game_id  home_team                                       away_team                                       game_date
5        Olympic (Women) - United States                 Olympic (Women) - Canada                        8/2/2022     1826
2        Olympic (Women) - Finland                       Olympic (Women) - United States                 14/2/2022    1810
1        Olympic (Women) - Canada                        Olympic (Women) - Switzerland                   14/2/2022    1765
0        Olympic (Women) - Olympic Athletes from Russia  Olympic (Women) - Switzerland                   12/2/2022    1748
4        Olympic (Women) - Finland                       Olympic (Women) - Olympic Athletes from Russia  8/2/2022     1739
3        Olympic (Women) - Finland                       Olympic (Women) - Switzerland                   16/2/2022    1721
dtype: int64

In [147]:
game_ids = {
'2022-02-08 ROC at Finland': 4,
'2022-02-14 Switzerland at Canada': 1,
'2022-02-08 Canada at USA': 5,
'2022-02-16 Switzerland at Finland':3,
'2022-02-12 Switzerland at ROC': 0,
'2022-02-14 USA at Finland': 2
}

In [148]:
for game in bucketless_games:
        files = [x for x in os.listdir('{}/{}'.format(bucketless_data_dir, game)) if not x.endswith('roster.csv')]
        game_date, away_team, _, home_team = game.split(' ')
        df_roster = pd.read_csv('{}/{}/{} roster.csv'.format(bucketless_data_dir, game, game))
        df_roster = df_roster.rename(columns={'jn': 'jersey_number', 'team':'venue'})
        for file in files:
                df_tracking = pd.read_csv('{}/{}/{}'.format(bucketless_data_dir, game, file))
                df_tracking.loc[df_tracking['period'] == 1,'game_seconds'] = 2400 + df_tracking.loc[df_tracking['period'] == 1,'game_seconds'] 
                df_tracking.loc[df_tracking['period'] == 2,'game_seconds'] = 1200 + df_tracking.loc[df_tracking['period'] == 2,'game_seconds']
                df_tracking['venue'] = df_tracking['team_name'].map({home_team: 'home', away_team: 'away'})
                df_tracking = df_tracking.merge(df_roster, on=['venue','jersey_number'],how="left")
                df_tracking = df_tracking.rename(columns={'x_ft': "x_coord", 'y_ft': "y_coord"})
                df_tracking['game_id'] = game_ids[game]
                df_tracking.to_sql('tracking', conn, if_exists='append')


### old Event data as training data

In [149]:
df_pbp_2021_womens = pd.read_csv("{}/hackathon_womens.csv".format(data_dir))
df_pbp_2021_nwhl = pd.read_csv("{}/hackathon_nwhl.csv".format(data_dir))
df_pbp_2021_womens = df_pbp_2021_womens.append(df_pbp_2021_nwhl, ignore_index=True)


  df_pbp_2021_womens = df_pbp_2021_womens.append(df_pbp_2021_nwhl, ignore_index=True)


In [150]:
df_pbp_2021_womens.columns = df_pbp_2021_womens.columns.str.lower().str.replace(' ', '_')
df_pbp_2021_womens = df_pbp_2021_womens.rename(columns={'x_coordinate': 'x_coord', 'y_coordinate': 'y_coord'})

In [151]:
df_pbp_2021_womens['game_id'] = df_pbp_2021_womens.loc[:, ['game_date', 'home_team', 'away_team']].sum(axis=1).astype('category').cat.codes
df_pbp_2021_womens['event_id'] = df_pbp_2021_womens['event'].astype('category').cat.codes
df_pbp_2021_womens['team_id'] = df_pbp_2021_womens['team'].astype('category').cat.codes
df_pbp_2021_womens['player_id'] = df_pbp_2021_womens['player'].astype('category').cat.codes
df_pbp_2021_womens['is_shot'] = 0
df_pbp_2021_womens['is_goal'] = 0
for x in range(1,5):
    df_pbp_2021_womens[f'detail_{x}_code'] = df_pbp_2021_womens[f'detail_{x}'].astype('category').cat.codes
df_pbp_2021_womens.loc[df_pbp_2021_womens['home_team'] == df_pbp_2021_womens['team'], 'is_home'] = 1
df_pbp_2021_womens.loc[df_pbp_2021_womens['event']=='Shot', 'is_shot'] = 1
df_pbp_2021_womens.loc[df_pbp_2021_womens['event']=='Goal', 'is_goal'] = 1
df_pbp_2021_womens['goal_diff'] = df_pbp_2021_womens['home_team_goals'].sub(df_pbp_2021_womens['away_team_goals'])
df_pbp_2021_womens['clock'] = pd.to_datetime(df_pbp_2021_womens['clock'], format='%M:%S')
df_pbp_2021_womens['game_seconds_remaining'] = df_pbp_2021_womens['clock'].dt.minute.mul(60).add(df_pbp_2021_womens['clock'].dt.second)

df_pbp_2021_womens['event_code'] = df_pbp_2021_womens['event'].astype('category').cat.codes
df_pbp_2021_womens['detail_1_code'] = df_pbp_2021_womens['detail_1'].astype('category').cat.codes


In [152]:
df_pbp_2021_womens.loc[df_pbp_2021_womens['team']==df_pbp_2021_womens['home_team'],'strength_state'] = df_pbp_2021_womens.loc[df_pbp_2021_womens['team']==df_pbp_2021_womens['home_team'],'home_team_skaters'].sub(df_pbp_2021_womens.loc[df_pbp_2021_womens['team']==df_pbp_2021_womens['home_team'],'away_team_skaters'])
df_pbp_2021_womens.loc[df_pbp_2021_womens['team']==df_pbp_2021_womens['away_team'],'strength_state'] = df_pbp_2021_womens.loc[df_pbp_2021_womens['team']==df_pbp_2021_womens['away_team'],'away_team_skaters'].sub(df_pbp_2021_womens.loc[df_pbp_2021_womens['team']==df_pbp_2021_womens['away_team'],'home_team_skaters'])

In [153]:
# calculating angles and distances to the goal
diff_2021_x1 = GOAL_X - df_pbp_2021_womens['x_coord']
diff_2021_x1 = diff_2021_x1.astype(float)
diff_2021_y1 = abs(GOAL_Y - df_pbp_2021_womens['y_coord'])

df_pbp_2021_womens['distance_to_goal'] = np.sqrt(diff_2021_x1 ** 2 + diff_2021_y1 ** 2)
df_pbp_2021_womens['angle_to_goal'] = np.divide(diff_2021_x1, diff_2021_y1,out=np.zeros_like(diff_2021_x1),where=(diff_2021_y1 != 0))

# getting the information about the previous event
df_pbp_2021_womens[['prev_event','prev_event_code','prev_event_type','prev_event_type_code','prev_event_x_coord','prev_event_y_coord','prev_event_game_seconds_remaining','prev_event_distance_to_goal','prev_event_angle_to_goal']] = df_pbp_2021_womens[['event','event_code','detail_1','detail_1_code','x_coord', 'y_coord', 'game_seconds_remaining', 'distance_to_goal', 'angle_to_goal']].shift(1)
df_pbp_2021_womens['time_diff_last_event'] = df_pbp_2021_womens['game_seconds_remaining'] - df_pbp_2021_womens['prev_event_game_seconds_remaining']
df_pbp_2021_womens['angle_diff_last_event'] = df_pbp_2021_womens['angle_to_goal'] - df_pbp_2021_womens['prev_event_angle_to_goal']
df_pbp_2021_womens['distance_diff_last_event'] = df_pbp_2021_womens['distance_to_goal'] - df_pbp_2021_womens['prev_event_distance_to_goal']

#getting information about second previous event 
df_pbp_2021_womens[['second_prev_event','second_prev_event_code','second_prev_event_type','second_prev_event_type_code','second_prev_event_x_coord','second_prev_event_y_coord','second_prev_event_game_seconds_remaining','second_prev_event_distance_to_goal','second_prev_event_angle_to_goal']] = df_pbp_2021_womens[['event','event_code','detail_1','detail_1_code','x_coord', 'y_coord', 'game_seconds_remaining', 'distance_to_goal', 'angle_to_goal']].shift(2)
df_pbp_2021_womens['prev_time_diff_last_event'] = df_pbp_2021_womens['prev_event_game_seconds_remaining'] - df_pbp_2021_womens['second_prev_event_game_seconds_remaining']
df_pbp_2021_womens['prev_angle_diff_last_event'] = df_pbp_2021_womens['prev_event_angle_to_goal'] - df_pbp_2021_womens['second_prev_event_angle_to_goal']
df_pbp_2021_womens['prev_distance_diff_last_event'] = df_pbp_2021_womens['prev_event_distance_to_goal'] - df_pbp_2021_womens['second_prev_event_distance_to_goal']

In [154]:
df_pbp_2021_womens.to_sql('pbp_training', conn, if_exists='replace')

50884

In [155]:
df_pbp_2021_womens['prev_event_type'].unique()

array([nan, 'Backhand', 'Retained', 'Dumped', 'Indirect', 'Direct',
       'Carried', 'Lost', 'Played', 'Snapshot', 'Fan', 'Cross-checking',
       'Slapshot', 'Wristshot', 'Deflection', 'Forehand', 'Tripping',
       'Wrap Around', 'Roughing', 'Holding', 'Delay of game', 'Slashing',
       'Too many men on the ice', 'Interference', 'Closing hand on puck',
       'Hooking', 'Goalkeeper interference', 'Boarding', 'High-sticking',
       'Charging', 'Illegal Check to the Head', 'Game Misconduct', 'Feet',
       'Elbowing', 'Unsportsmanlike conduct', 'Face-off violation',
       'Holding the stick'], dtype=object)

In [156]:
df_pbp['event_type'].unique(), df_pbp['prev_event_type'].unique()

(array(['Backhand', nan, 'Dumped', 'Indirect', 'Direct', 'Slapshot',
        'Snapshot', 'Carried', 'Fan', 'Played', 'Wristshot', 'Deflection',
        'Forehand', 'Cross-checking', 'Hooking', 'Boarding',
        'Interference', 'Wrap Around', 'Unsportsmanlike conduct',
        'Tripping', 'Roughing', 'Holding', 'Slashing', 'High-sticking',
        'Feet', 'Checking from behind', 'Goalkeeper interference',
        'Too many men on the ice'], dtype=object),
 array([nan, 'Backhand', 'Dumped', 'Indirect', 'Direct', 'Slapshot',
        'Snapshot', 'Carried', 'Fan', 'Played', 'Wristshot', 'Deflection',
        'Forehand', 'Cross-checking', 'Hooking', 'Boarding',
        'Interference', 'Wrap Around', 'Unsportsmanlike conduct',
        'Tripping', 'Roughing', 'Holding', 'Slashing', 'High-sticking',
        'Feet', 'Checking from behind', 'Goalkeeper interference',
        'Too many men on the ice'], dtype=object))

In [157]:
df_pbp['event'].unique(), df_pbp['prev_event'].unique()

(array(['Faceoff Win', 'Puck Recovery', 'Dump In/Out', 'Zone Entry',
        'Play', 'Shot', 'Takeaway', 'Penalty Taken'], dtype=object),
 array([nan, 'Faceoff Win', 'Puck Recovery', 'Dump In/Out', 'Zone Entry',
        'Play', 'Shot', 'Takeaway', 'Penalty Taken'], dtype=object))

In [158]:
df_pbp_2021_womens['event'].unique(), df_pbp_2021_womens['prev_event'].unique()

(array(['Faceoff Win', 'Puck Recovery', 'Dump In/Out', 'Zone Entry',
        'Play', 'Takeaway', 'Incomplete Play', 'Shot', 'Penalty Taken',
        'Goal'], dtype=object),
 array([nan, 'Faceoff Win', 'Puck Recovery', 'Dump In/Out', 'Zone Entry',
        'Play', 'Takeaway', 'Incomplete Play', 'Shot', 'Penalty Taken',
        'Goal'], dtype=object))

In [159]:
df_pbp[(df_pbp['is_shot']==1) & (df_pbp['player_name'] == 'Alina Muller') & (df_pbp['strength_state'] > 0)]

Unnamed: 0,game_date,season_year,team_name,opp_team_name,venue,period,clock_seconds,situation_type,goals_for,goals_against,player_name,event,event_successful,x_coord,y_coord,event_type,player_name_2,x_coord_2,y_coord_2,event_detail_1,event_detail_2,event_detail_3,frame_id_1,frame_id_2,home_team,away_team,game_id,is_shot,is_goal,event_id,team_id,player_id,detail_1_code,goal_diff,game_seconds_remaining,event_code,event_type_code,skaters_for,skaters_against,strength_state,distance_to_goal,angle_to_goal,prev_event,prev_event_code,prev_event_type,prev_event_type_code,prev_event_x_coord,prev_event_y_coord,prev_event_game_seconds_remaining,prev_event_distance_to_goal,prev_event_angle_to_goal,time_diff_last_event,angle_diff_last_event,distance_diff_last_event,second_prev_event,second_prev_event_code,second_prev_event_type,second_prev_event_type_code,second_prev_event_x_coord,second_prev_event_y_coord,second_prev_event_game_seconds_remaining,second_prev_event_distance_to_goal,second_prev_event_angle_to_goal,prev_time_diff_last_event,prev_angle_diff_last_event,prev_distance_diff_last_event
4996,12/2/2022,2021,Olympic (Women) - Switzerland,Olympic (Women) - Olympic Athletes from Russia,away,3,737,5 on 4,2,1,Alina Muller,Shot,False,59,26,Wristshot,,,,Blocked,True,False,219.0,,Olympic (Women) - Olympic Athletes from Russia,Olympic (Women) - Switzerland,0,1,0,5,3,6,0,1,737.0,5,26,5,4,1,132.035033,7.939394,Play,3.0,Direct,6.0,68.0,42.0,739.0,122.001025,244.0,-2.0,-236.060606,10.034009,Play,3.0,Direct,6.0,50.0,5.0,741.0,144.93533,3.733333,-2.0,240.266667,-22.934306
5049,12/2/2022,2021,Olympic (Women) - Switzerland,Olympic (Women) - Olympic Athletes from Russia,away,3,642,5 on 4,2,1,Alina Muller,Shot,False,40,27,Slapshot,,,,On Net,False,False,4887.0,,Olympic (Women) - Olympic Athletes from Russia,Olympic (Women) - Switzerland,0,1,0,5,3,6,2,1,642.0,5,19,5,4,1,150.798707,9.677419,Play,3.0,Direct,6.0,38.0,63.0,643.0,153.376172,7.414634,-1.0,2.262785,-2.577465,Play,3.0,Direct,6.0,66.0,38.0,645.0,124.081626,27.555556,-2.0,-20.140921,29.294545
10165,16/2/2022,2021,Olympic (Women) - Switzerland,Olympic (Women) - Finland,away,3,960,5 on 4,0,2,Alina Muller,Shot,False,51,20,Wristshot,,,,Blocked,True,False,3642.0,,Olympic (Women) - Finland,Olympic (Women) - Switzerland,3,1,0,5,3,6,0,-2,960.0,5,26,5,4,1,140.809268,6.177778,Zone Entry,7.0,Carried,2.0,74.0,16.0,961.0,118.988445,4.377358,-1.0,1.800419,21.820823,Play,3.0,Direct,6.0,168.0,81.0,967.0,44.342418,0.571429,-6.0,3.80593,74.646027


In [160]:
df_pbp[(df_pbp['is_shot']==1)].shape

(709, 66)