In [13]:
import os
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
import math
import json

# 0. Read dataset

In this section, I will read from the raw data (`json` file).

In [29]:
PATH_TIDY_DATA_CSV = r"Dataset/tidyData_original.csv"

In [30]:
df = pd.read_csv(PATH_TIDY_DATA_CSV)
df = df.reset_index()
print(f"Shape of df: {df.shape}")
df.head(10)

Shape of df: (385076, 20)


Unnamed: 0,index,gamePk,homeTeam,period,periodType,periodTime,periodTimeRemaining,dateTime,teamId,teamName,attackingSide,teamTriCode,eventType,x-coordinate,y-coordinate,goalieName,shooterName,shotType,isEmptyNet,strength
0,0,2018021200,Colorado Avalanche,1,REGULAR,00:35,19:25,2019-03-30T01:09:28Z,21,Colorado Avalanche,left,COL,Shot,-75.0,-5.0,Darcy Kuemper,Gabriel Landeskog,Backhand,,False
1,1,2018021200,Colorado Avalanche,1,REGULAR,01:44,18:16,2019-03-30T01:11:28Z,21,Colorado Avalanche,left,COL,Shot,-37.0,33.0,Darcy Kuemper,Tyson Barrie,Wrist Shot,,False
2,2,2018021200,Colorado Avalanche,1,REGULAR,02:42,17:18,2019-03-30T01:13:23Z,21,Colorado Avalanche,left,COL,Shot,-80.0,-6.0,Darcy Kuemper,Colin Wilson,Wrist Shot,,False
3,3,2018021200,Colorado Avalanche,1,REGULAR,05:19,14:41,2019-03-30T01:16:58Z,53,Arizona Coyotes,right,ARI,Shot,62.0,-23.0,Philipp Grubauer,Richard Panik,Wrist Shot,,False
4,4,2018021200,Colorado Avalanche,1,REGULAR,05:53,14:07,2019-03-30T01:19:11Z,53,Arizona Coyotes,right,ARI,Shot,-7.0,36.0,Philipp Grubauer,Lawson Crouse,Wrist Shot,,False
5,5,2018021200,Colorado Avalanche,1,REGULAR,07:02,12:58,2019-03-30T01:20:48Z,21,Colorado Avalanche,left,COL,Shot,-70.0,-21.0,Darcy Kuemper,Sven Andrighetto,Wrist Shot,,False
6,6,2018021200,Colorado Avalanche,1,REGULAR,07:19,12:41,2019-03-30T01:21:05Z,21,Colorado Avalanche,left,COL,Shot,-67.0,-38.0,Darcy Kuemper,Derick Brassard,Wrist Shot,,False
7,7,2018021200,Colorado Avalanche,1,REGULAR,07:48,12:12,2019-03-30T01:21:34Z,21,Colorado Avalanche,left,COL,Shot,-79.0,-20.0,Darcy Kuemper,Tyson Jost,Wrist Shot,,False
8,8,2018021200,Colorado Avalanche,1,REGULAR,08:11,11:49,2019-03-30T01:21:56Z,53,Arizona Coyotes,right,ARI,Shot,51.0,23.0,Philipp Grubauer,Jordan Oesterle,Wrist Shot,,False
9,9,2018021200,Colorado Avalanche,1,REGULAR,08:23,11:37,2019-03-30T01:24:22Z,53,Arizona Coyotes,right,ARI,Shot,37.0,1.0,Philipp Grubauer,Nick Cousins,Wrist Shot,,False


# 1. Include simple features

In [4]:
def time_string_to_seconds(time_str):
    """
    This function convert from mm:ss to the total number of seconds. For example: "03:45" -> 225
    """
    try:
        minutes, seconds = map(int, time_str.split(':'))
    except:
        print(f"Error in time string. Current time string: {time_str}")
    total_seconds = minutes * 60 + seconds
    return total_seconds

In [None]:
# def Determine_Shooting_Net(home_team, shoter_tem, period):
    
#     is_home_shot = True 
#     if home_team.lower() != shoter_tem.lower():
#         is_home_shot = False    # Away shot

#     net_coordinates = None
#     if is_home_shot:
#         if period % 2 == 1:
#             net_coordinates = (89, 0)
#         else:
#             net_coordinates = (-89, 0)
#     else:    # away
#         if period % 2 == 1:
#             net_coordinates = (-89, 0)
#         else:
#             net_coordinates = (89, 0)
    
#     return net_coordinates

In [7]:
def Determine_Shooting_Net(shot_coor_x, shot_coor_y):
    net_coordinates = None
    if shot_coor_x > 0:
        net_coordinates = (89, 0)
    else:
        net_coordinates = (-89, 0)
    return net_coordinates

In [9]:
def Calculate_Distance(point1, point2):
    x1, y1 = point1
    x2, y2 = point2
    distance = ((x2 - x1) ** 2 + (y2 - y1) ** 2) ** 0.5
    return distance

In [10]:
def Calculate_Shot_Angle(shot_location, net_location):
    shot_x, shot_y = shot_location
    net_x, net_y = net_location

    dx = net_x - shot_x
    dy = net_y - shot_y
    angle = math.atan2(dy, dx)
    return math.degrees(angle)

In [12]:
list_game_seconds = []
list_game_period = []
list_coordinates_x = []
list_coordinates_y = []
list_shot_distance = []
list_shot_angle = []
list_shot_type = []

for idx_event, shot_event in df.iterrows():
    game_seconds = time_string_to_seconds(shot_event['periodTime']) + (int(shot_event['period']) - 1) * 20 * 60 
    game_period = shot_event['period']
    coor_x = shot_event['x-coordinate']
    coor_y = shot_event['y-coordinate']
    
    # Extract the location of net
    # home_team = shot_event['homeTeam']
    # shoter_tem = shot_event['teamName']
    net_coordinates = Determine_Shooting_Net(coor_x, coor_y)
    shot_distance = Calculate_Distance((coor_x, coor_y), net_coordinates)
    shot_angle = Calculate_Shot_Angle((coor_x, coor_y), net_coordinates)
    shot_type = shot_event['shotType']
    
    list_game_seconds.append(game_seconds)
    list_game_period.append(game_period)
    list_coordinates_x.append(coor_x)
    list_coordinates_y.append(coor_y)
    list_shot_distance.append(shot_distance)
    list_shot_angle.append(shot_angle)
    list_shot_type.append(shot_type)

# 2. Add feature of previous event

- We will find event based on `dateTime`. For example, "2019-03-30T01:09:28Z"

In [45]:
path_json_file = r"Dataset/2018/regular_season/2018021200.json"

In [46]:
with open(path_json_file, 'r') as json_file:
    data = json.load(json_file)

In [47]:
list_event = data['liveData']['plays']['allPlays']
print(f"Number of event: {len(list_event)}")

Number of event: 365


In [48]:
df.head()

Unnamed: 0,index,gamePk,homeTeam,period,periodType,periodTime,periodTimeRemaining,dateTime,teamId,teamName,attackingSide,teamTriCode,eventType,x-coordinate,y-coordinate,goalieName,shooterName,shotType,isEmptyNet,strength
0,0,2018021200,Colorado Avalanche,1,REGULAR,00:35,19:25,2019-03-30T01:09:28Z,21,Colorado Avalanche,left,COL,Shot,-75.0,-5.0,Darcy Kuemper,Gabriel Landeskog,Backhand,,False
1,1,2018021200,Colorado Avalanche,1,REGULAR,01:44,18:16,2019-03-30T01:11:28Z,21,Colorado Avalanche,left,COL,Shot,-37.0,33.0,Darcy Kuemper,Tyson Barrie,Wrist Shot,,False
2,2,2018021200,Colorado Avalanche,1,REGULAR,02:42,17:18,2019-03-30T01:13:23Z,21,Colorado Avalanche,left,COL,Shot,-80.0,-6.0,Darcy Kuemper,Colin Wilson,Wrist Shot,,False
3,3,2018021200,Colorado Avalanche,1,REGULAR,05:19,14:41,2019-03-30T01:16:58Z,53,Arizona Coyotes,right,ARI,Shot,62.0,-23.0,Philipp Grubauer,Richard Panik,Wrist Shot,,False
4,4,2018021200,Colorado Avalanche,1,REGULAR,05:53,14:07,2019-03-30T01:19:11Z,53,Arizona Coyotes,right,ARI,Shot,-7.0,36.0,Philipp Grubauer,Lawson Crouse,Wrist Shot,,False


In [64]:
def Extract_Previous_Event(list_event, current_sample_dateTime):
    previous_event = None
    for (idx_event, event) in enumerate(list_event):
        event_dateTime= str(event['about']['dateTime'])
        if event_dateTime.lower() == current_sample_dateTime:
            previous_event = list_event[idx_event - 1]
        else:
            continue
    return previous_event

In [67]:
def Get_Info_Previous_Event(previous_event, ):
    event_type = previous_event['result']['event']
    coor_x = previous_event['coordinates']['x']
    coor_y = previous_event['coordinates']['y']
    event_period = previous_event['about']['period']
    event_periodTime = previous_event['about']['periodTime']
    
    return (event_type, coor_x, coor_y, event_period, event_periodTime)

In [68]:
idx_shot = 0
current_sample = df.iloc[idx_shot, :]
current_sample_dateTime = str(current_sample['dateTime']).lower()
print(f"Current sample dateTime: {current_sample_dateTime}")

Current sample dateTime: 2019-03-30t01:09:28z


In [69]:
previous_event = Extract_Previous_Event(list_event, current_sample_dateTime)
print(previous_event)

{'players': [{'player': {'id': 8474613, 'fullName': 'Derek Stepan', 'link': '/api/v1/people/8474613'}, 'playerType': 'Hitter'}, {'player': {'id': 8476455, 'fullName': 'Gabriel Landeskog', 'link': '/api/v1/people/8476455'}, 'playerType': 'Hittee'}], 'result': {'event': 'Hit', 'eventCode': 'COL101', 'eventTypeId': 'HIT', 'description': 'Derek Stepan hit Gabriel Landeskog'}, 'about': {'eventIdx': 4, 'eventId': 101, 'period': 1, 'periodType': 'REGULAR', 'ordinalNum': '1st', 'periodTime': '00:20', 'periodTimeRemaining': '19:40', 'dateTime': '2019-03-30T01:09:13Z', 'goals': {'away': 0, 'home': 0}}, 'coordinates': {'x': 62.0, 'y': -39.0}, 'team': {'id': 53, 'name': 'Arizona Coyotes', 'link': '/api/v1/teams/53', 'triCode': 'ARI'}}


In [70]:
(event_type, coor_x, coor_y, event_period, event_periodTime) = Get_Info_Previous_Event(previous_event)
print(f"Event type: {event_type}")
print(f"Event coor_x: {coor_x}")
print(f"Event coor_y: {coor_y}")
print(f"Event period: {event_period}")
print(f"Event period time: {event_periodTime}")

Event type: Hit
Event coor_x: 62.0
Event coor_y: -39.0
Event period: 1
Event period time: 00:20
