In [1]:
import os
import sys
import pandas as pd
sys.path.insert(0,'../src/data/')
from team_data import  get_all_teams 

In [2]:
game_id = 2016020001 #2017021065
file_path = os.path.join("../data/cleaned/", str(game_id) + '.csv')
games_df = pd.read_csv(file_path, index_col=0)
games_df.columns

Index(['id', 'event_index', 'game_id', 'home_team', 'away_team', 'type',
       'secondary_type', 'description', 'code', 'period', 'period_type',
       'time', 'time_remaining', 'date', 'goals_home', 'goals_away',
       'shooter_team_id', 'shooter_team_name', 'shooter_team_code',
       'shooter_name', 'shooter_id', 'goalie_name', 'goalie_id',
       'is_empty_net', 'is_winning_goal', 'strength_name', 'strength_code',
       'coordinate_x', 'coordinate_y', 'distance_from_net', 'angle', 'is_goal',
       'game_sec', 'prev_event_type', 'prev_event_time_diff',
       'distance_from_prev_event', 'is_rebound', 'rebound_angle', 'speed'],
      dtype='object')

# the game seconds feature

In [3]:
import numpy as np
from datetime import timedelta

def seconder(x):
    mins, secs = map(float, x.split(':'))
    td = timedelta(minutes=mins, seconds=secs)
    return td.total_seconds()

# elapsed time since the game started
games_df['game_sec'] = games_df['time'].apply(seconder).astype(np.int64) + (games_df['period']-1) * 20 * 60

In [5]:
# games_df[['event_index', 'period', 'time', 'type']] 

# games_df[games_df['period']==1][['event_index', 'period', 'time', 'type']] 

# previous event info

## Get time diff and distance from prev event

In [4]:
import math
# sign = functools.partial(math.copysign, 1)   # or:
sign = lambda x: math.copysign(1, x)

games_df['prev_event_type'] = None
games_df['prev_event_time_diff'] = 0  # or -1
games_df['distance_from_prev_event'] = np.nan
games_df['is_rebound'] = False
games_df['rebound_angle'] = 0
games_df['speed'] = 0

for event_idx, row in games_df.iterrows():  # 371 rows (event_idx = [0,370]
    
    if event_idx != 0:
        prev_event = games_df.iloc[event_idx-1] 
        prev_event_type = prev_event['type']
        
        # 1. previous event type
        games_df.at[event_idx, 'prev_event_type'] = prev_event_type
        
        # 2. time difference in seconds
        games_df.at[event_idx, 'prev_event_time_diff'] = row['game_sec'] - prev_event['game_sec']
        
        # 3. angle between current and previous events
        # make sure both events are in the same period, otherwise it doesn't make sense
        if prev_event['period'] == row['period']:
            
            # if both events fall in a single quarter
            if sign(row['angle']) == sign(prev_event['angle']):
                rad_angle_between = abs(row['angle'] - prev_event['angle']) * np.pi / 180
            else: # both events in different quarters
                rad_angle_between = (abs(row['angle']) + abs(prev_event['angle'])) * np.pi / 180
            a = row['distance_from_net']
            b = prev_event['distance_from_net']
            dist_prev_event = np.sqrt(a**2 + b**2 - 2*a*b*np.cos(rad_angle_between))
            games_df.at[event_idx, 'distance_from_prev_event'] = dist_prev_event
            
            # 4. see if the current event is a rebound
            if prev_event['type'] == 'SHOT' and row['type'] == 'SHOT' \
            and prev_event['shooter_team_name'] == row['shooter_team_name']:
                games_df.at[event_idx, 'is_rebound'] = True
                
                # rebound angle is the change in angle between current and previous shot events = [0,180]
                print(type(rad_angle_between))
                games_df.at[event_idx, 'rebound_angle'] = rad_angle_between * 180 / np.pi
        
        else:
            games_df.at[event_idx, 'distance_from_prev_event'] = np.NaN

# 5. speed of the puck
mask = games_df['prev_event_time_diff'] > 0
games_df['speed'] = games_df[mask]['distance_from_prev_event'] /games_df[mask]['prev_event_time_diff']
       
        
# maximum distance between events shouldn't exceed 219 (the rink diagonal length)
games_df['distance_from_prev_event'].min(), games_df['distance_from_prev_event'].max()

<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>


(2.2360679774981627, 187.25650856512306)

In [5]:
games_df[['event_index', 'type', 'distance_from_prev_event', 'prev_event_time_diff', 'speed', 'is_rebound', 'rebound_angle', 'angle', 'coordinate_x', 'coordinate_y', 'distance_from_net', 'distance_from_prev_event']]
# games_df[['event_index', 'type', 'is_rebound', 'period', 'angle', 'coordinate_x', 'coordinate_y', 'distance_from_net', 'distance_from_prev_event']]

Unnamed: 0,event_index,type,distance_from_prev_event,prev_event_time_diff,speed,is_rebound,rebound_angle,angle,coordinate_x,coordinate_y,distance_from_net,distance_from_prev_event.1
0,0,GAME_SCHEDULED,,0,,False,0,,,,,
1,1,PERIOD_READY,,0,,False,0,,,,,
2,2,PERIOD_START,,0,,False,0,,,,,
3,3,FACEOFF,,0,,False,0,-0.000000,-0.0,-0.0,89.000000,
4,4,STOP,,5,,False,0,,,,,
5,5,FACEOFF,,0,,False,0,-7.926927,-69.0,-22.0,159.524293,
6,6,TAKEAWAY,150.850920,38,3.969761,False,0,-78.111342,81.0,-38.0,38.832976,150.850920
7,7,BLOCKED_SHOT,138.191896,20,6.909595,False,0,9.884124,-43.0,23.0,133.988806,138.191896
8,8,BLOCKED_SHOT,21.633308,7,3.090473,False,0,4.194183,-61.0,11.0,150.402793,21.633308
9,9,SHOT,138.924440,1,138.924440,False,0,-22.619865,77.0,-5.0,13.000000,138.924440


## wrong trial

In [4]:
# games_df.loc[games_df['angle']<0, 'angle'] = (games_df.loc[games_df['angle']<0, 'angle'] * -1) + 90
# games_df['angle'].min(), games_df['angle'].max()

(0.0, 180.0)

## References

https://stackoverflow.com/questions/50308629/python-pandas-column-convert-minutes-to-second