In [3]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import seaborn as sns
import os
import sys
import time
import datetime as dt
import warnings
import requests
import pickle
import json
from hockeydata import get_game_shifts, get_season_play_by_play, get_play_by_plays, list_games
from nhlscrapi.games.game import Game, GameKey, GameType
from nhlscrapi.games.cumstats import Score, ShotCt, Corsi, Fenwick
from hockeydata.output import csv

In [75]:
#set the end date to today
end_date = dt.datetime.today().strftime('%Y-%m-%d')
#set the start date to October 1st, 2022
start_date = '2023-04-17'
#list the games between the start and end date
games = list_games(start_date, end_date)
games = pd.DataFrame(games)
games.head()

Unnamed: 0,GAME_ID
0,2022030131
1,2022030111
2,2022030161
3,2022030181
4,2022030141


In [77]:
url = 'https://statsapi.web.nhl.com/api/v1/game/'
game_id = '2022030131'
full_url = url + game_id + '/feed/live'
game_data = requests.get(full_url).json()
game_data.keys()


dict_keys(['copyright', 'gamePk', 'link', 'metaData', 'gameData', 'liveData'])

In [78]:
df_1 = pd.json_normalize(game_data['liveData']['plays'])
df_2 = pd.json_normalize(game_data['liveData']['plays']['allPlays'])
# df_3 = pd.json_normalize(game_data['liveData']['plays']['allPlays']['players'])
df_2.head()

Unnamed: 0,result.event,result.eventCode,result.eventTypeId,result.description,about.eventIdx,about.eventId,about.period,about.periodType,about.ordinalNum,about.periodTime,about.periodTimeRemaining,about.dateTime,about.goals.away,about.goals.home,players,coordinates.x,coordinates.y,team.id,team.name,team.link,team.triCode,result.secondaryType,result.penaltySeverity,result.penaltyMinutes,result.strength.code,result.strength.name,result.gameWinningGoal,result.emptyNet
0,Game Scheduled,CAR1,GAME_SCHEDULED,Game Scheduled,0,1,1,REGULAR,1st,00:00,20:00,2023-04-17T22:15:54Z,0,0,,,,,,,,,,,,,,
1,Period Ready,CAR5,PERIOD_READY,Period Ready,1,5,1,REGULAR,1st,00:00,20:00,2023-04-17T23:10:32Z,0,0,,,,,,,,,,,,,,
2,Period Start,CAR51,PERIOD_START,Period Start,2,51,1,REGULAR,1st,00:00,20:00,2023-04-17T23:10:57Z,0,0,,,,,,,,,,,,,,
3,Faceoff,CAR52,FACEOFF,Casey Cizikas faceoff won against Derek Stepan,3,52,1,REGULAR,1st,00:00,20:00,2023-04-17T23:10:57Z,0,0,"[{'player': {'id': 8475231, 'fullName': 'Casey...",0.0,0.0,2.0,New York Islanders,/api/v1/teams/2,NYI,,,,,,,
4,Hit,CAR151,HIT,Matt Martin hit Jalen Chatfield,4,151,1,REGULAR,1st,00:14,19:46,2023-04-17T23:11:11Z,0,0,"[{'player': {'id': 8474709, 'fullName': 'Matt ...",93.0,29.0,2.0,New York Islanders,/api/v1/teams/2,NYI,,,,,,,


In [79]:
pbp = pd.DataFrame()
pbp = pd.json_normalize(game_data['liveData']['plays']['allPlays'])
#get the players on the ice for each event
#show all columns
pd.set_option('display.max_columns', None)
#players on the ice for each event
pbp.head()

Unnamed: 0,result.event,result.eventCode,result.eventTypeId,result.description,about.eventIdx,about.eventId,about.period,about.periodType,about.ordinalNum,about.periodTime,about.periodTimeRemaining,about.dateTime,about.goals.away,about.goals.home,players,coordinates.x,coordinates.y,team.id,team.name,team.link,team.triCode,result.secondaryType,result.penaltySeverity,result.penaltyMinutes,result.strength.code,result.strength.name,result.gameWinningGoal,result.emptyNet
0,Game Scheduled,CAR1,GAME_SCHEDULED,Game Scheduled,0,1,1,REGULAR,1st,00:00,20:00,2023-04-17T22:15:54Z,0,0,,,,,,,,,,,,,,
1,Period Ready,CAR5,PERIOD_READY,Period Ready,1,5,1,REGULAR,1st,00:00,20:00,2023-04-17T23:10:32Z,0,0,,,,,,,,,,,,,,
2,Period Start,CAR51,PERIOD_START,Period Start,2,51,1,REGULAR,1st,00:00,20:00,2023-04-17T23:10:57Z,0,0,,,,,,,,,,,,,,
3,Faceoff,CAR52,FACEOFF,Casey Cizikas faceoff won against Derek Stepan,3,52,1,REGULAR,1st,00:00,20:00,2023-04-17T23:10:57Z,0,0,"[{'player': {'id': 8475231, 'fullName': 'Casey...",0.0,0.0,2.0,New York Islanders,/api/v1/teams/2,NYI,,,,,,,
4,Hit,CAR151,HIT,Matt Martin hit Jalen Chatfield,4,151,1,REGULAR,1st,00:14,19:46,2023-04-17T23:11:11Z,0,0,"[{'player': {'id': 8474709, 'fullName': 'Matt ...",93.0,29.0,2.0,New York Islanders,/api/v1/teams/2,NYI,,,,,,,


In [80]:
"""
Functions for parsing the JSON play by play data
"""

from json import loads
import pandas as pd
from pandas import DataFrame
from operator import itemgetter

from hockeydata.constants import NHLAPI
from hockeydata.scrape.common import fix_team, get_page, to_seconds, fix_name, safeget

#these transformations are applied to make sure the naming is consistent between this source and the HTML source
event_types = {
    'PERIOD START': 'PSTR',
    'FACEOFF': 'FAC',
    'BLOCKED SHOT': 'BLOCK',
    'BLOCKED_SHOT': 'BLOCK',
    'GAME END': 'GEND',
    'GIVEAWAY': 'GIVE',
    'GOAL': 'GOAL',
    'HIT': 'HIT',
    'MISSED SHOT': 'MISS',
    'MISSED_SHOT': 'MISS',
    'PERIOD END': 'PEND',
    'SHOT': 'SHOT',
    'STOPPAGE': 'STOP',
    'TAKEAWAY': 'TAKE',
    'PENALTY': 'PENL',
    'EARLY INT START': 'EISTR',
    'EARLY INT END': 'EIEND',
    'SHOOTOUT COMPLETE': 'SOC',
    'CHALLENGE': 'CHL',
    'EMERGENCY GOALTENDER': 'EGPID'
}

def change_event_name(event):
    return event_types.get(event.upper(), event)

def scrape_game(game_id: str) -> DataFrame:
    raw_json = get_raw_json(game_id)
    df = parse_json(raw_json)
    return df

def get_raw_json(game_id: str) -> dict:
    url = NHLAPI + 'game/{}/feed/live'.format(game_id)
    res = get_page(url)

    try:
        json_res = loads(res)
        return json_res
    except Exception:
        return None

def parse_event(event) -> dict:
    play = dict()

    play['EVENT_INDEX'] = event['about']['eventIdx']
    play['PERIOD'] = event['about']['period']
    play['EVENT_TYPE'] = str(change_event_name(event['result']['event']))
    play['GAME_SECONDS'] = to_seconds(event['about']['periodTime'])

    # If there's a players key that means an event occurred on the play.
    if 'players' in event.keys():
        play['EVENT_PLAYER_1_NAME'] = fix_name(event['players'][0]['player']['fullName'])
        play['EVENT_PLAYER_1_ID'] = event['players'][0]['player']['id']

        for i in range(len(event['players'])):
            if event['players'][i]['playerType'] != 'Goalie':
                play['EVENT_PLAYER_{}_NAME'.format(i + 1)] = fix_name(event['players'][i]['player']['fullName'].upper())
                play['EVENT_PLAYER_{}_ID'.format(i + 1)] = event['players'][i]['player']['id']

        play['X_CORD'] = event['coordinates'].get('x')
        play['Y_CORD'] = event['coordinates'].get('y')

    return play

def parse_json(game_json: dict) -> DataFrame:
    columns = ['PERIOD', 'EVENT_INDEX', 'EVENT_TYPE', 'GAME_SECONDS', 'EVENT_PLAYER_1_NAME', 'EVENT_PLAYER_1_ID', 'EVENT_PLAYER_2_NAME',
               'EVENT_PLAYER_2_ID', 'EVENT_PLAYER_3_NAME', 'EVENT_PLAYER_3_ID', 'X_CORD', 'Y_CORD']

    events_to_ignore = ['PERIOD READY', 'PERIOD OFFICIAL', 'GAME READY', 'GAME OFFICIAL', 'GAME SCHEDULED']

    try:
        plays = game_json['liveData']['plays']['allPlays']
        events = [parse_event(play) for play in plays if play['result']['event'].upper() not in events_to_ignore]
        sorted_events = sorted(events, key=itemgetter('EVENT_INDEX'))
        return pd.DataFrame(sorted_events, columns=columns)
    except Exception as e:
        return None

def get_teams(game_id: str) -> dict:
    """
    Returns the teams involved in 'game_id'
    :param game_id:
    :return: dict of teams associated with this game_id : {'Home': 'ABC', 'Away': 'XYZ'}
    """
    try:
        raw_json = get_raw_json(game_id)
        return {
            'HOME': fix_team(safeget(raw_json, 'gameData', 'teams', 'home', 'name').upper()),
            'AWAY': fix_team(safeget(raw_json, 'gameData', 'teams', 'away', 'name').upper())
        }

    except Exception as e:
        return None

In [81]:
#let's define a function to get the play by play data for a given game
def custom_pbp(game_id):
    #get the play by play data
    url = 'https://statsapi.web.nhl.com/api/v1/game/'
    full_url = url + game_id + '/feed/live'
    game_data = requests.get(full_url).json()
    #parse the json data
    df = parse_json(game_data)
    #add the game_id to the dataframe
    df['GAME_ID'] = game_id
    #get the teams involved in the game
    teams = get_teams(game_id)
    #add the teams to the dataframe
    df['HOME_TEAM'] = teams['HOME']
    df['AWAY_TEAM'] = teams['AWAY']
    return df

In [82]:
#show all columns
pd.set_option('display.max_columns', None)

custom_pbp('2022030111')

Unnamed: 0,PERIOD,EVENT_INDEX,EVENT_TYPE,GAME_SECONDS,EVENT_PLAYER_1_NAME,EVENT_PLAYER_1_ID,EVENT_PLAYER_2_NAME,EVENT_PLAYER_2_ID,EVENT_PLAYER_3_NAME,EVENT_PLAYER_3_ID,X_CORD,Y_CORD,GAME_ID,HOME_TEAM,AWAY_TEAM
0,1,2,PSTR,0.0,,,,,,,,,2022030111,BOS,FLA
1,1,3,FAC,0.0,TOMAS NOSEK,8477931.0,ALEKSANDER BARKOV,8477493.0,,,0.0,0.0,2022030111,BOS,FLA
2,1,4,GIVE,11.0,GUSTAV FORSLING,8478055.0,,,,,54.0,-33.0,2022030111,BOS,FLA
3,1,5,STOP,21.0,,,,,,,,,2022030111,BOS,FLA
4,1,6,FAC,21.0,PAVEL ZACHA,8478401.0,ALEKSANDER BARKOV,8477493.0,,,69.0,22.0,2022030111,BOS,FLA
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
394,3,400,FAC,1179.0,CHARLIE COYLE,8475745.0,ALEKSANDER BARKOV,8477493.0,,,-69.0,-22.0,2022030111,BOS,FLA
395,3,401,GIVE,1183.0,DEREK FORBORT,8475762.0,,,,,-38.0,39.0,2022030111,BOS,FLA
396,3,402,HIT,1197.0,BRANDON CARLO,8478443.0,BRANDON MONTOUR,8477986.0,,,-73.0,-32.0,2022030111,BOS,FLA
397,3,403,PEND,1200.0,,,,,,,,,2022030111,BOS,FLA


In [83]:
pbp = pd.DataFrame()
game_id = games['GAME_ID'].unique()

for game in game_id:
    print(game)
    df = custom_pbp(game)
    df['GAME_ID'] = game
    pbp = pd.concat([pbp, df], axis=0)

pbp.head()

2022030131
2022030111
2022030161
2022030181
2022030141
2022030121
2022030171
2022030151
2022030132
2022030112
2022030162
2022030182
2022030122
2022030142
2022030152
2022030172
2022030133
2022030113
2022030163
2022030183
2022030173
2022030123
2022030143
2022030153
2022030134
2022030114
2022030164
2022030184
2022030144
2022030124
2022030174
2022030154
2022030135
2022030165
2022030185
2022030115
2022030155
2022030125
2022030145
2022030175
2022030136
2022030116
2022030166
2022030156
2022030126
2022030146
2022030186
2022030117
2022030157
2022030147
2022030211
2022030231
2022030221
2022030241
2022030212
2022030232
2022030222
2022030242
2022030223
2022030213
2022030233
2022030243
2022030224
2022030234
2022030214
2022030244
2022030225
2022030235
2022030215
2022030245
2022030236
2022030246
2022030237


Unnamed: 0,PERIOD,EVENT_INDEX,EVENT_TYPE,GAME_SECONDS,EVENT_PLAYER_1_NAME,EVENT_PLAYER_1_ID,EVENT_PLAYER_2_NAME,EVENT_PLAYER_2_ID,EVENT_PLAYER_3_NAME,EVENT_PLAYER_3_ID,X_CORD,Y_CORD,GAME_ID,HOME_TEAM,AWAY_TEAM
0,1,2,PSTR,0.0,,,,,,,,,2022030131,CAR,NYI
1,1,3,FAC,0.0,CASEY CIZIKAS,8475231.0,DEREK STEPAN,8474613.0,,,0.0,0.0,2022030131,CAR,NYI
2,1,4,HIT,14.0,MATT MARTIN,8474709.0,JALEN CHATFIELD,8478970.0,,,93.0,29.0,2022030131,CAR,NYI
3,1,5,HIT,20.0,RYAN PULOCK,8477506.0,JACK DRURY,8480835.0,,,58.0,-39.0,2022030131,CAR,NYI
4,1,6,SHOT,55.0,BRADY SKJEI,8476869.0,,,,,-36.0,-15.0,2022030131,CAR,NYI


In [84]:
len(pbp)

26082

In [85]:
#get the shifts
shifts = pd.DataFrame()
for game in game_id:
    print(game)
    df = get_game_shifts(game)
    df['GAME_ID'] = game
    shifts = pd.concat([shifts, df], axis=0)

2022030131
2022030111
2022030161
2022030181
2022030141
2022030121
2022030171
2022030151
2022030132
2022030112
2022030162
2022030182
2022030122
2022030142
2022030152
2022030172
2022030133
2022030113
2022030163
2022030183
2022030173
2022030123
2022030143
2022030153
2022030134
2022030114
2022030164
2022030184
2022030144
2022030124
2022030174
2022030154
2022030135
2022030165
2022030185
2022030115
2022030155
2022030125
2022030145
2022030175
2022030136
2022030116
2022030166
2022030156
2022030126
2022030146
2022030186
2022030117
2022030157
2022030147
2022030211
2022030231
2022030221
2022030241
2022030212
2022030232
2022030222
2022030242
2022030223
2022030213
2022030233
2022030243
2022030224
2022030234
2022030214
2022030244
2022030225
2022030235
2022030215
2022030245
2022030236
2022030246
2022030237


In [86]:
shifts.head()

Unnamed: 0,GAME_ID,PERIOD,TEAM,PLAYER,PLAYER_ID,START,END,DURATION
0,2022030131,1,CAR,PAUL STASTNY,8471669,0.0,26.0,26.0
1,2022030131,1,NYI,CAL CLUTTERBUCK,8473504,0.0,25.0,25.0
2,2022030131,1,CAR,DEREK STEPAN,8474613,0.0,34.0,34.0
3,2022030131,1,NYI,MATT MARTIN,8474709,0.0,31.0,31.0
4,2022030131,1,NYI,CASEY CIZIKAS,8475231,0.0,24.0,24.0


In [87]:
#we now want to merge the shifts and the play by play data
#we will do this by merging on the game_id, and building a time window around the shift start time

#create columns in the pbp dataframe for the home and away team players. HOME_PLAYER_1, HOME_PLAYER_2, ETC.
pbp['HOME_PLAYER_1'] = ""
pbp['HOME_PLAYER_2'] = ""
pbp['HOME_PLAYER_3'] = ""
pbp['HOME_PLAYER_4'] = ""
pbp['HOME_PLAYER_5'] = ""
pbp['HOME_PLAYER_6'] = ""
pbp['AWAY_PLAYER_1'] = ""
pbp['AWAY_PLAYER_2'] = ""
pbp['AWAY_PLAYER_3'] = ""
pbp['AWAY_PLAYER_4'] = ""
pbp['AWAY_PLAYER_5'] = ""
pbp['AWAY_PLAYER_6'] = ""

pbp.head()

Unnamed: 0,PERIOD,EVENT_INDEX,EVENT_TYPE,GAME_SECONDS,EVENT_PLAYER_1_NAME,EVENT_PLAYER_1_ID,EVENT_PLAYER_2_NAME,EVENT_PLAYER_2_ID,EVENT_PLAYER_3_NAME,EVENT_PLAYER_3_ID,X_CORD,Y_CORD,GAME_ID,HOME_TEAM,AWAY_TEAM,HOME_PLAYER_1,HOME_PLAYER_2,HOME_PLAYER_3,HOME_PLAYER_4,HOME_PLAYER_5,HOME_PLAYER_6,AWAY_PLAYER_1,AWAY_PLAYER_2,AWAY_PLAYER_3,AWAY_PLAYER_4,AWAY_PLAYER_5,AWAY_PLAYER_6
0,1,2,PSTR,0.0,,,,,,,,,2022030131,CAR,NYI,,,,,,,,,,,,
1,1,3,FAC,0.0,CASEY CIZIKAS,8475231.0,DEREK STEPAN,8474613.0,,,0.0,0.0,2022030131,CAR,NYI,,,,,,,,,,,,
2,1,4,HIT,14.0,MATT MARTIN,8474709.0,JALEN CHATFIELD,8478970.0,,,93.0,29.0,2022030131,CAR,NYI,,,,,,,,,,,,
3,1,5,HIT,20.0,RYAN PULOCK,8477506.0,JACK DRURY,8480835.0,,,58.0,-39.0,2022030131,CAR,NYI,,,,,,,,,,,,
4,1,6,SHOT,55.0,BRADY SKJEI,8476869.0,,,,,-36.0,-15.0,2022030131,CAR,NYI,,,,,,,,,,,,


In [88]:
for i in range(0, len(pbp)):
    #get the game_id
    game_id = pbp.iloc[i]['GAME_ID']
    #get the period
    period = pbp.iloc[i]['PERIOD']
    #get the game_seconds
    game_seconds = pbp.iloc[i]['GAME_SECONDS']
    #get the home team
    home_team = pbp.iloc[i]['HOME_TEAM']
    #get the away team
    away_team = pbp.iloc[i]['AWAY_TEAM']
    #get the home players
    home_players = shifts[(shifts['GAME_ID'] == game_id) & (shifts['PERIOD'] == period) & (shifts['TEAM'] == home_team) & (shifts['START'] <= game_seconds) & (shifts['END'] > game_seconds)]['PLAYER_ID'].values
    #get the away players
    away_players = shifts[(shifts['GAME_ID'] == game_id) & (shifts['PERIOD'] == period) & (shifts['TEAM'] == away_team) & (shifts['START'] <= game_seconds) & (shifts['END'] > game_seconds)]['PLAYER_ID'].values
    #add the home players to the pbp dataframe
    try:
        pbp.at[i, 'HOME_PLAYER_1'] = home_players[0]
        pbp.at[i, 'HOME_PLAYER_2'] = home_players[1]
        pbp.at[i, 'HOME_PLAYER_3'] = home_players[2]
        pbp.at[i, 'HOME_PLAYER_4'] = home_players[3]
        pbp.at[i, 'HOME_PLAYER_5'] = home_players[4]
        pbp.at[i, 'HOME_PLAYER_6'] = home_players[5]
        #add the away players to the pbp dataframe
        pbp.at[i, 'AWAY_PLAYER_1'] = away_players[0]
        pbp.at[i, 'AWAY_PLAYER_2'] = away_players[1]
        pbp.at[i, 'AWAY_PLAYER_3'] = away_players[2]
        pbp.at[i, 'AWAY_PLAYER_4'] = away_players[3]
        pbp.at[i, 'AWAY_PLAYER_5'] = away_players[4]
        pbp.at[i, 'AWAY_PLAYER_6'] = away_players[5]
    except:
        pass

pbp.head()

Unnamed: 0,PERIOD,EVENT_INDEX,EVENT_TYPE,GAME_SECONDS,EVENT_PLAYER_1_NAME,EVENT_PLAYER_1_ID,EVENT_PLAYER_2_NAME,EVENT_PLAYER_2_ID,EVENT_PLAYER_3_NAME,EVENT_PLAYER_3_ID,X_CORD,Y_CORD,GAME_ID,HOME_TEAM,AWAY_TEAM,HOME_PLAYER_1,HOME_PLAYER_2,HOME_PLAYER_3,HOME_PLAYER_4,HOME_PLAYER_5,HOME_PLAYER_6,AWAY_PLAYER_1,AWAY_PLAYER_2,AWAY_PLAYER_3,AWAY_PLAYER_4,AWAY_PLAYER_5,AWAY_PLAYER_6
0,1,2,PSTR,0.0,,,,,,,,,2022030131,CAR,NYI,8471669,8474613,8476906,8477293,8478970,8480835,8473504,8474709,8475231,8476917,8477506,8478009
1,1,3,FAC,0.0,CASEY CIZIKAS,8475231.0,DEREK STEPAN,8474613.0,,,0.0,0.0,2022030131,CAR,NYI,8471669,8474613,8476906,8477293,8478970,8480835,8473504,8474709,8475231,8476917,8477506,8478009
2,1,4,HIT,14.0,MATT MARTIN,8474709.0,JALEN CHATFIELD,8478970.0,,,93.0,29.0,2022030131,CAR,NYI,8471669,8474613,8476906,8477293,8478970,8480835,8473504,8474709,8475231,8476917,8477506,8478009
3,1,5,HIT,20.0,RYAN PULOCK,8477506.0,JACK DRURY,8480835.0,,,58.0,-39.0,2022030131,CAR,NYI,8471669,8474613,8476906,8477293,8478970,8480835,8473504,8474709,8475231,8476917,8477506,8478009
4,1,6,SHOT,55.0,BRADY SKJEI,8476869.0,,,,,-36.0,-15.0,2022030131,CAR,NYI,8477293,8473533,8476921,8475855,8476869,8477488,8478009,8476429,8480222,8477500,8478445,8475314


In [89]:
#fill nan values with 0 for X_CORD and Y_CORD
pbp['X_CORD'] = pbp['X_CORD'].fillna(0)
pbp['Y_CORD'] = pbp['Y_CORD'].fillna(0)


In [90]:
print(min(pbp['X_CORD']), max(pbp['X_CORD']), min(pbp['Y_CORD']), max(pbp['Y_CORD']))

-99.0 99.0 -42.0 42.0


In [91]:
#plot a heatmap of the SHOT event_type
#we want to plot the shots on this image of the rink: https://www.conceptdraw.com/How-To-Guide/picture/Sport-Hockey-Simple-hockey-field-Template.png
# load the image and plot it

#use plotly.graph_objects to plot the rink
fig = go.Figure()

#add the rink image to the background of the plot
fig.add_layout_image(
    dict(
    # local path to a downloaded image file: 'Sport-Hockey-Simple-hockey-field-Template.png'
    source="https://www.conceptdraw.com/How-To-Guide/picture/Sport-Hockey-Simple-hockey-field-Template.png",
    xref="x",
    yref="y",
    x=0,
    y=0,
    sizex=100,
    sizey=100,
    sizing="stretch",
    opacity=0.5,
    layer="below")
)

#add the shots to the plot
fig.add_trace(go.Scatter(
    x=pbp[pbp['EVENT_TYPE'] == 'SHOT']['X_CORD'],
    y=pbp[pbp['EVENT_TYPE'] == 'SHOT']['Y_CORD'],
    mode='markers',
    marker=dict(
        size=5,
        color='red',
        opacity=0.5
    ),
    name='Shots'
))

#add the goals to the plot
fig.add_trace(go.Scatter(
    x=pbp[pbp['EVENT_TYPE'] == 'GOAL']['X_CORD'],
    y=pbp[pbp['EVENT_TYPE'] == 'GOAL']['Y_CORD'],
    mode='markers',
    marker=dict(
        size=5,
        color='green',
        opacity=0.5
    ),
    name='Goals'
))

#add the misses to the plot
fig.add_trace(go.Scatter(
    x=pbp[pbp['EVENT_TYPE'] == 'MISS']['X_CORD'],
    y=pbp[pbp['EVENT_TYPE'] == 'MISS']['Y_CORD'],
    mode='markers',
    marker=dict(
        size=5,
        color='blue',
        opacity=0.5
    ),
    name='Misses'
))

#plot the figure
fig.show()

In [95]:
#plot a density plot of the SHOT event_type
#we want to plot the shots on this image of the rink: https://www.conceptdraw.com/How-To-Guide/picture/Sport-Hockey-Simple-hockey-field-Template.png

#use plotly.graph_objects to plot the rink

fig = go.Figure()

#add the rink image to the background of the plot
fig.update_layout(
    images=[go.layout.Image(
        source="Sport-Hockey-Simple-hockey-field-Template.png",  # Provide the path to your hockey rink image
        xref="x",
        yref="y",
        x=0,
        y=0,
        sizex=200,  # Adjust the size according to your image dimensions
        sizey=85,  # Adjust the size according to your image dimensions
        sizing="stretch",
        opacity=0
    )]
)


#add the shots to the plot
fig.add_trace(go.Histogram2dContour(
    x=pbp[pbp['EVENT_TYPE'] == 'SHOT']['X_CORD'],
    y=pbp[pbp['EVENT_TYPE'] == 'SHOT']['Y_CORD'],
    reversescale=False,
    xaxis='x',
    yaxis='y',
    name='Shots',
    nbinsx=50,
    nbinsy=50,
    zauto=False
))

fig.update_layout(
    title="Hockey Rink Events",
    xaxis=dict(
        range=[-100, 100],  # Adjust the range according to your rink dimensions
        showgrid=False,
        zeroline=False
    ),
    yaxis=dict(
        range=[-42.5, 42.5],  # Adjust the range according to your rink dimensions
        showgrid=False,
        zeroline=False
    )
)

fig.show()

In [97]:
#plot a density_heatmap of the SHOT event_type

#use plotly.graph_objects to plot the rink
fig = go.Figure()

#add the rink image to the background of the plot
fig.add_layout_image(
    dict(
    # local path to a downloaded image file: 'Sport-Hockey-Simple-hockey-field-Template.png'
    source = 'Sport-Hockey-Simple-hockey-field-Template.png',
    xref="x",
    yref="y",
    x=0,
    y=0,
    sizex=100,
    sizey=100,
    sizing="stretch",    
    layer="below")

)

#add the shots to the plot
fig.add_trace(go.Histogram2dContour(
    x=pbp[pbp['EVENT_TYPE'] == 'SHOT']['X_CORD'],
    y=pbp[pbp['EVENT_TYPE'] == 'SHOT']['Y_CORD'],
    colorscale=px.colors.sequential.Electric,
    reversescale=False,
    xaxis='x',
    yaxis='y',
    name='Shots',
    nbinsx=100,
    nbinsy=42,
    zauto=False
))

fig.update_layout(
    title="Hockey Rink Events",
    xaxis=dict(
        range=[-100, 100],  # Adjust the range according to your rink dimensions
        showgrid=False,
        zeroline=False
    ),
    yaxis=dict(
        range=[-42.5, 42.5],  # Adjust the range according to your rink dimensions
        showgrid=False,
        zeroline=False
    )
)

fig.show()

In [98]:
#save shifts to csv
shifts.to_csv('data\\shifts.csv', index=False)
#save pbp to csv
pbp.to_csv('data\\pbp.csv', index=False)

In [101]:
shifts.head()

Unnamed: 0,GAME_ID,PERIOD,TEAM,PLAYER,PLAYER_ID,START,END,DURATION
0,2022030131,1,CAR,PAUL STASTNY,8471669,0.0,26.0,26.0
1,2022030131,1,NYI,CAL CLUTTERBUCK,8473504,0.0,25.0,25.0
2,2022030131,1,CAR,DEREK STEPAN,8474613,0.0,34.0,34.0
3,2022030131,1,NYI,MATT MARTIN,8474709,0.0,31.0,31.0
4,2022030131,1,NYI,CASEY CIZIKAS,8475231,0.0,24.0,24.0


In [102]:
#create a dataframe of the players
players = pd.DataFrame()
players['PLAYER_ID'] = shifts['PLAYER_ID'].unique()
#map the player names to the dataframe
players['PLAYER_NAME'] = players['PLAYER_ID'].map(shifts.set_index('PLAYER_ID')['PLAYER'])
#map the player teams to the dataframe
players['TEAM'] = players['PLAYER_ID'].map(shifts.set_index('PLAYER_ID')['TEAM'])
players.head()

InvalidIndexError: Reindexing only valid with uniquely valued Index objects