In [1]:
import os
import sys
import pandas as pd
import numpy as np

sys.path.insert(0,'../src/data/')
from team_data import  get_all_teams 

In [2]:
game_id = 2016020001 #2017021065
file_path = os.path.join("../data/cleaned/", str(game_id) + '.csv')
events_df = pd.read_csv(file_path, index_col=0)
events_df.columns

Index(['id', 'event_index', 'game_id', 'home_team', 'away_team', 'type',
       'secondary_type', 'description', 'code', 'period', 'period_type',
       'time', 'time_remaining', 'date', 'goals_home', 'goals_away',
       'shooter_team_id', 'shooter_team_name', 'shooter_team_code',
       'shooter_name', 'shooter_id', 'goalie_name', 'goalie_id',
       'is_empty_net', 'is_winning_goal', 'strength_name', 'strength_code',
       'coordinate_x', 'coordinate_y', 'distance_from_net', 'angle', 'is_goal',
       'game_sec', 'prev_event_type', 'prev_event_time_diff',
       'angle_between_prev_event', 'distance_from_prev_event', 'rebound_angle',
       'is_rebound', 'speed'],
      dtype='object')

In [3]:
events_df.head()

Unnamed: 0,id,event_index,game_id,home_team,away_team,type,secondary_type,description,code,period,...,angle,is_goal,game_sec,prev_event_type,prev_event_time_diff,angle_between_prev_event,distance_from_prev_event,rebound_angle,is_rebound,speed
0,1,0,2016020001,Ottawa Senators,Toronto Maple Leafs,GAME_SCHEDULED,,Game Scheduled,OTT1,1,...,,False,0,,,,,0.0,False,0.0
1,2,1,2016020001,Ottawa Senators,Toronto Maple Leafs,PERIOD_READY,,Period Ready,OTT2,1,...,,False,0,GAME_SCHEDULED,0.0,,,0.0,False,0.0
2,5,2,2016020001,Ottawa Senators,Toronto Maple Leafs,PERIOD_START,,Period Start,OTT5,1,...,,False,0,PERIOD_READY,0.0,,,0.0,False,0.0
3,6,3,2016020001,Ottawa Senators,Toronto Maple Leafs,FACEOFF,,Nazem Kadri faceoff won against Derick Brassard,OTT6,1,...,-0.0,False,0,PERIOD_START,0.0,,,0.0,False,0.0
4,51,4,2016020001,Ottawa Senators,Toronto Maple Leafs,STOP,,Icing,OTT51,1,...,,False,5,FACEOFF,5.0,,,0.0,False,0.0


# the game seconds feature

In [None]:
# 1. elapsed time since the game started
tmp =  events_df['time'].str.split(":", expand=True).astype(int)
events_df['game_sec'] = (events_df['period']-1) * 20 * 60 + tmp[0]*60 + tmp[1]

# previous event info

## Get time diff and distance from prev event (vectorized version)

In [3]:
# using df.shift(periods=1)
prev_events_df = events_df.shift(periods=1)

In [5]:
# 2. previous event type
events_df['prev_event_type'] = prev_events_df['type']

# 3. time difference in seconds
events_df['prev_event_time_diff'] = events_df['game_sec'] - prev_events_df['game_sec']

# 4. distance between current and previous event 
# first we calcualte the angle between the current and previous events (in degrees)
events_df['angle_between_prev_event'] = (events_df['angle'] - prev_events_df['angle']).abs().astype(float).round(4)
a = events_df['distance_from_net']
b = prev_events_df['distance_from_net']
# then with the knowledge of the two sides of a triangle and its angle, we can get the third side length
events_df['distance_from_prev_event'] = np.sqrt(a**2 + b**2 - (2*a*b*np.cos(events_df['angle_between_prev_event'] * np.pi / 180.0)))
events_df['distance_from_prev_event'] = events_df['distance_from_prev_event'].astype(float).round(4)
# 5. rebound angle is the change in angle between current and previous shot events = [0,180]
rebound_angle_mask = (events_df['type'] == 'SHOT') & (events_df['prev_event_type'] == 'SHOT') \
                    &(events_df['shooter_team_name'] == prev_events_df['shooter_team_name']) \
                    &(events_df['period'] == prev_events_df['period'])
events_df['rebound_angle'] = events_df['angle_between_prev_event']
events_df.loc[~rebound_angle_mask, 'rebound_angle'] = 0.0

# 6. see if the current event is a rebound
events_df['is_rebound'] = False
events_df.loc[rebound_angle_mask, 'is_rebound'] = True

# 7. speed of the puck
speed_mask = events_df['prev_event_time_diff'] > 0
events_df['speed'] = events_df[speed_mask]['distance_from_prev_event'] / events_df[speed_mask]['prev_event_time_diff']
events_df['speed'] = events_df['speed'].astype(float).round(4)
events_df.loc[np.isnan(events_df['speed']), 'speed'] = 0.0
events_df.loc[np.isnan(events_df["speed"]) | (events_df["period"] != prev_events_df["period"]), "speed"] = 0.0


In [8]:
sum(events_df['is_rebound'] == events_df['is_rebound_2']) - len(events_df)
events_df[events_df['is_rebound'] != events_df['is_rebound_2']] \
        [['event_index', 'type', 'is_rebound', 'is_rebound_2']]

Unnamed: 0,event_index,type,is_rebound,is_rebound_2
60,True,True,True,False
138,True,True,True,False
174,True,True,True,False
175,True,True,True,False
197,True,True,True,False
217,True,True,True,False
344,True,True,True,False


In [3]:
events_df.iloc[59][['event_index', 'type', 'shooter_team_name']]#, 'is_rebound', 'is_rebound_2']]

event_index                       59
type                            SHOT
shooter_team_name    Ottawa Senators
Name: 59, dtype: object

In [4]:
events_df.iloc[60][['event_index', 'type', 'shooter_team_name']]#, 'is_rebound', 'is_rebound_2']]

event_index          True
type                 True
shooter_team_name    True
Name: 60, dtype: object

In [17]:
# events_df.loc[np.isnan(events_df['speed']), 'speed'] = 0.0
# sum(events_df['speed'].astype(float).round(4) == events_df['speed_2']) - len(events_df)

# events_df['speed'] = events_df['speed'].astype(float).round(4)

# events_df[events_df['speed'] != events_df['speed_2']] \
#         [['event_index', 'type', 'speed', 'speed_2']]

-9

In [None]:
# type(events_df.at[0, 'speed']), type(events_df.at[0, 'speed_2'])

(numpy.float64, numpy.float64)

In [23]:
# events_df.iloc[0][['event_index', 'type', 'speed', 'speed_2']]

event_index                 0
type           GAME_SCHEDULED
speed                     NaN
speed_2                   NaN
Name: 0, dtype: object

## for loop version

In [17]:
import math
# sign = functools.partial(math.copysign, 1)   # or:
sign = lambda x: math.copysign(1, x)

events_df['prev_event_type'] = None
events_df['prev_event_time_diff'] = 0  # or -1
events_df['distance_from_prev_event'] = np.nan
events_df['is_rebound'] = False
events_df['rebound_angle'] = 0
events_df['speed'] = 0

for event_idx, row in events_df.iterrows():  # 371 rows (event_idx = [0,370]
    
    if event_idx != 0:
        prev_event = events_df.iloc[event_idx-1] 
        prev_event_type = prev_event['type']
        
        # 2. previous event type
        events_df.at[event_idx, 'prev_event_type'] = prev_event_type
        
        # 3. time difference in seconds
        events_df.at[event_idx, 'prev_event_time_diff'] = row['game_sec'] - prev_event['game_sec']
        
        
        # make sure both events are in the same period, otherwise it doesn't make sense
        # to get the distance and angle between consecutive events
        if prev_event['period'] == row['period']:
            
            # 4. distance between current and previous event
            angle_between = abs(row['angle'] - prev_event['angle'])
            a = row['distance_from_net']
            b = prev_event['distance_from_net']
            dist_prev_event = np.sqrt(a**2 + b**2 - 2*a*b*np.cos(angle_between*np.pi / 180))
            events_df.at[event_idx, 'distance_from_prev_event'] = dist_prev_event
            
            # 5. see if the current event is a rebound
            if prev_event['type'] == 'SHOT' and row['type'] == 'SHOT' \
            and prev_event['shooter_team_name'] == row['shooter_team_name']:
                events_df.at[event_idx, 'is_rebound'] = True
                
                # 6. rebound angle is the change in angle between current and previous shot events = [0,180]
                events_df.at[event_idx, 'rebound_angle'] = angle_between
        
        else:
            events_df.at[event_idx, 'distance_from_prev_event'] = np.NaN

# 7. speed of the puck
mask = events_df['prev_event_time_diff'] > 0
events_df['speed'] = events_df[mask]['distance_from_prev_event'] /events_df[mask]['prev_event_time_diff']
       
        
# maximum distance between events shouldn't exceed 219 (the rink diagonal length)
events_df['distance_from_prev_event'].min(), events_df['distance_from_prev_event'].max()

(2.2360679774981627, 187.25650856512306)

In [5]:
games_df[['event_index', 'type', 'distance_from_prev_event', 'prev_event_time_diff', 'speed', 'is_rebound', 'rebound_angle', 'angle', 'coordinate_x', 'coordinate_y', 'distance_from_net', 'distance_from_prev_event']]
# games_df[['event_index', 'type', 'is_rebound', 'period', 'angle', 'coordinate_x', 'coordinate_y', 'distance_from_net', 'distance_from_prev_event']]

Unnamed: 0,event_index,type,distance_from_prev_event,prev_event_time_diff,speed,is_rebound,rebound_angle,angle,coordinate_x,coordinate_y,distance_from_net,distance_from_prev_event.1
0,0,GAME_SCHEDULED,,0,,False,0,,,,,
1,1,PERIOD_READY,,0,,False,0,,,,,
2,2,PERIOD_START,,0,,False,0,,,,,
3,3,FACEOFF,,0,,False,0,-0.000000,-0.0,-0.0,89.000000,
4,4,STOP,,5,,False,0,,,,,
5,5,FACEOFF,,0,,False,0,-7.926927,-69.0,-22.0,159.524293,
6,6,TAKEAWAY,150.850920,38,3.969761,False,0,-78.111342,81.0,-38.0,38.832976,150.850920
7,7,BLOCKED_SHOT,138.191896,20,6.909595,False,0,9.884124,-43.0,23.0,133.988806,138.191896
8,8,BLOCKED_SHOT,21.633308,7,3.090473,False,0,4.194183,-61.0,11.0,150.402793,21.633308
9,9,SHOT,138.924440,1,138.924440,False,0,-22.619865,77.0,-5.0,13.000000,138.924440


## wrong trial

In [4]:
# games_df.loc[games_df['angle']<0, 'angle'] = (games_df.loc[games_df['angle']<0, 'angle'] * -1) + 90
# games_df['angle'].min(), games_df['angle'].max()

(0.0, 180.0)

## References

https://stackoverflow.com/questions/50308629/python-pandas-column-convert-minutes-to-second