In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
season_plays_df = pd.read_csv("./data/processed/normalized_season_plays_2016-2021.csv", index_col=False)
season_plays_df.head()

Unnamed: 0,event_idx,event_type_id,period_idx,period_type,game_time,shot_type,team_initiative_id,team_initiative_name,x_coord,y_coord,...,goalie_id,goalie_name,strength,empty_net_bool,gamePk,game_season,game_type,game_start_time,x_coord_norm,y_coord_norm
0,271,SHOT,2,REGULAR,2016-09-26T00:01:58Z,Wrist Shot,STL,St. Louis Blues,-77.0,-11.0,...,8476341.0,Anton Forsberg,,,2016010001,20162017,PR,2016-09-25T23:00:00Z,77.0,11.0
1,273,SHOT,2,REGULAR,2016-09-26T00:02:25Z,Wrist Shot,STL,St. Louis Blues,-76.0,-10.0,...,8476341.0,Anton Forsberg,,,2016010001,20162017,PR,2016-09-25T23:00:00Z,76.0,10.0
2,269,SHOT,2,REGULAR,2016-09-25T23:58:58Z,Wrist Shot,STL,St. Louis Blues,-57.0,-26.0,...,8476341.0,Anton Forsberg,,,2016010001,20162017,PR,2016-09-25T23:00:00Z,57.0,26.0
3,267,SHOT,2,REGULAR,2016-09-25T23:58:14Z,Wrist Shot,STL,St. Louis Blues,-64.0,-4.0,...,8476341.0,Anton Forsberg,,,2016010001,20162017,PR,2016-09-25T23:00:00Z,64.0,4.0
4,264,SHOT,1,REGULAR,2016-09-25T23:39:18Z,Wrist Shot,STL,St. Louis Blues,43.0,-24.0,...,8476341.0,Anton Forsberg,,,2016010001,20162017,PR,2016-09-25T23:00:00Z,43.0,-24.0


In [10]:
def basic_features(plays_df):
    dist_from_net = _dist_from_net(plays_df)
    angle_from_net = _angle_from_net(plays_df)
    is_goal = _is_goal(plays_df)
    empty_net = _empty_net(plays_df)
    
    features_df = pd.concat([dist_from_net, angle_from_net, is_goal, empty_net], axis=1)
    return features_df

def _dist_from_net(plays_df):
    net_pos = np.array([100-11, 0])
    shot_vector = net_pos - plays_df[["x_coord_norm", "y_coord_norm"]]
    dist_from_net = np.linalg.norm(shot_vector, ord=2, axis=1)
    return pd.Series(dist_from_net, name="dist_from_net")

def _angle_from_net(plays_df):
    net_pos = np.array([100-11, 0])
    shot_vector = net_pos - plays_df[["x_coord_norm", "y_coord_norm"]]
    cos_angle = shot_vector @ net_pos / (np.linalg.norm(net_pos, ord=2) * np.linalg.norm(shot_vector, ord=2, axis=1))
    angle = np.arccos(cos_angle)
    return np.degrees(angle).rename("angle_from_net")
    
def _is_goal(plays_df):
    str_to_int = {"SHOT": 0, "GOAL": 1}
    is_goal = plays_df.event_type_id.replace(str_to_int)
    return is_goal.rename("is_goal")
    
def _empty_net(plays_df):
    return plays_df.empty_net_bool.copy().rename("empty_net")

In [11]:
features = basic_features(season_plays_df)