# NHL Shot and Goal Distribution of Players b/w 2019-2020

https://towardsdatascience.com/nhl-analytics-with-python-6390c5d3206d

In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [3]:
import requests
import pickle

In [5]:
game_data = []
season_type = '02'
year = '2019'          # taking limited data of 2019-2020 
max_game_ID = 1290     # maximum number of game events in an year

In [6]:
# pulling the data from API
for i in range(0, max_game_ID):
    r = requests.get(url='http://statsapi.web.nhl.com/api/v1/game/'
                     + year + season_type + str(i).zfill(4) + '/feed/live')
    data = r.json()
    game_data.append(data)

Run the following code to get a segment of game data

In [None]:
#import json

# Pretty print json
#for i in range(0,2):
#    print (json.dumps(game_data[i], indent=1))

In [None]:
# for dumping the dataset
with open('./'+ year + 'FullDatasetNHL.pkl', 'wb') as f:
    pickle.dump(game_data, f, pickle.HIGHEST_PROTOCOL)


Dumping the dataset on Kaggle as 2019FullDatasetNHL.pkl

In [7]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
color_map = plt.cm.winter
# https://matplotlib.org/stable/tutorials/colors/colormaps.html

from matplotlib.patches import RegularPolygon
# for shape -- https://matplotlib.org/stable/api/patches_api.html

import math
from PIL import Image    # pillow to import image data

# Customer color mapping
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
import matplotlib.colors as mcolors

In [8]:
# Making 2 color maps for positive and negative shots
c = mcolors.ColorConverter().to_rgb
positive_cm = ListedColormap([c('#e1e5e5'), c('#d63b36')]) # Red - more attack
negative_cm = ListedColormap([c('#e1e5e5'), c('#28aee4')]) # Blue

## Calculating the Average Shooting Percentage at each point on the Ice

Making Dictionary for holding cordinates for Shots and Goals

In [9]:
league_data = {}

league_data['Shot'] = {}
league_data['Shot']['x'] = []
league_data['Shot']['y'] = []

league_data['Goal'] = {}
league_data['Goal']['x'] = []
league_data['Goal']['y'] = []

In [10]:
league_data

{'Shot': {'x': [], 'y': []}, 'Goal': {'x': [], 'y': []}}

In [11]:
# Currently interested in only these 2 types of events - Shots and Goals to determine the success rate
event_types = ['Shot', 'Goal']

In [12]:
for i in game_data:
    # if the liveData key is not present in the ith element of game data then skip it to prevent any errors
    if 'liveData' not in i:
        continue
    
    plays = i['liveData']['plays']['allPlays']
    
    # for each play
    for play in plays:
        for event in event_types:
            if play['result']['event'] in event:
                # save the x and y coordinates of goal/shot
                if 'x' in play['coordinates']:
                    league_data[event]['x'].append(play['coordinates']['x'])
                    league_data[event]['y'].append(play['coordinates']['y'])        

In [13]:
print("Some of the Shots are:")
for i in range(0,5):
    print(league_data['Shot']['x'][i],", ", league_data['Shot']['y'][i])
    
print("\nSome of the Goal points are:")
for i in range(0,5):
    print(league_data['Goal']['x'][i],", ",league_data['Goal']['y'][i])

Some of the Shots are:
-32.0 ,  -2.0
63.0 ,  -6.0
-59.0 ,  -20.0
-42.0 ,  -29.0
-52.0 ,  -7.0

Some of the Goal points are:
85.0 ,  -1.0
86.0 ,  0.0
84.0 ,  2.0
-83.0 ,  -6.0
83.0 ,  5.0


## Extracting Player Data

In [14]:
import json

player_names = [] # getting all player names who are shooters or scorers
for i in game_data:
    if 'liveData' not in i:
        continue
    
    plays = i["liveData"]['plays']['allPlays']
    players_name = []
    for play in plays:
        if 'players' in play:
            for player in play['players']:
                if player['playerType'] in ['Shooter', 'Scorer']:
                    player_names.append(player['player']['fullName'])

In [15]:
# Since the player names have some repetitions, we get a clean non-duplicate list of player names
player_names_unique = []
[player_names_unique.append(x) for x in player_names if x not in player_names_unique]

print(len(player_names))
print(len(player_names_unique))

123378
886


So we have 886 unique players playing in the NHL from 2019 to 2020 who are either Shooters/Scorers

In [16]:
print("Some of the NHL players of 2019-2020 are: \n")
for i in range(0,10):
    print(player_names_unique[i])

Some of the NHL players of 2019-2020 are: 

Brady Tkachuk
Nikita Zaitsev
Morgan Rielly
Andreas Johnsson
Mitchell Marner
Dmytro Timashov
Dylan DeMelo
Trevor Moore
Tyson Barrie
Cody Ceci


In [17]:
player_data = {}
for player in player_names_unique:
    player_data[player] = {}
    
    player_data[player]['Shot'] = {}
    player_data[player]['Shot']['x'] = []
    player_data[player]['Shot']['y'] = []

    player_data[player]['Goal'] = {}
    player_data[player]['Goal']['x'] = []
    player_data[player]['Goal']['y'] = []

#player_data

Player data empty template looks something like: 'John Tavares': {'Shot': {'x': [], 'y': []}, 'Goal': {'x': [], 'y': []}}

### Adding Goals and Shots for every player

In [83]:
event_types

['Shot', 'Goal']

In [18]:
for i in game_data:
    if 'liveData' not in i:
        continue
    
    plays = i['liveData']['plays']['allPlays']
    
    for play in plays:
        if 'players' in play:
            for player in play['players']:
                if player['player']['fullName'] in player_names_unique:
                    for event in event_types:
                        if play['result']['event'] in event:
                            # save the x and y coordinates of goal/shot
                            if 'x' in play['coordinates']:
                                player_data[player['player']['fullName']][event]['x'].append(play['coordinates']['x'])
                                player_data[player['player']['fullName']][event]['y'].append(play['coordinates']['y']) 


In [95]:
#player_data

### Analyzing the shots and goals of Specific player

In [19]:
player = 'Alex Ovechkin'

print("Shots of {}:".format(player))
for i in range(0,10):
    print(player_data[player]['Shot']['x'][i], ', ', player_data[player]['Shot']['y'][i])

print("\nGoals of {}:".format(player))    
for i in range(0,10):
    print(player_data[player]['Goal']['x'][i], ', ', player_data[player]['Goal']['y'][i])

Shots of Alex Ovechkin:
91.0 ,  -27.0
-78.0 ,  3.0
-52.0 ,  25.0
-68.0 ,  34.0
-57.0 ,  -11.0
-56.0 ,  -14.0
-50.0 ,  -20.0
58.0 ,  16.0
48.0 ,  21.0
58.0 ,  20.0

Goals of Alex Ovechkin:
-57.0 ,  18.0
54.0 ,  -40.0
53.0 ,  22.0
60.0 ,  20.0
-4.0 ,  34.0
82.0 ,  4.0
-34.0 ,  7.0
54.0 ,  21.0
-82.0 ,  -8.0
61.0 ,  10.0


## Calculating the Player Stats

In [29]:
def calculate_stats(player):
    player_total_shots = len(player_data[player]['Shot']['x']) + len(player_data[player]['Goal']['x']) 
    player_total_goals = len(player_data[player]['Goal']['x'])
    success_percent = player_total_goals/player_total_shots*100
    
    print("Player {} 2019-2020 summary: ".format(player))
    print("Number of Shots taken are: ", player_total_shots)
    print("Number of Goals scored are: ", player_total_goals)
    print("Success rate: {:.2f} %".format(success_percent))
    
    return success_percent

league_total_shots = len(league_data['Shot']['x']) + len(league_data['Goal']['x'])
league_total_goals = len(league_data['Goal']['x'])
league_success_avg_percent = league_total_goals/league_total_shots*100
print("NHL League 2019-2020 average: {:.2f} % \n".format(league_success_avg_percent))

# calculate spread of shots on goals % of player
player = 'Alex Ovechkin'
player_SOG_spread = calculate_stats(player) - league_success_avg_percent
print("\nPlayer {} Shots on Goals spread for the league: {:.2f} %".format(player, player_SOG_spread))

NHL League 2019-2020 average: 9.71 % 

Player Alex Ovechkin 2019-2020 summary: 
Number of Shots taken are:  334
Number of Goals scored are:  68
Success rate: 20.36 %

Player Alex Ovechkin Shots on Goals spread for the league: 10.65 %


#### Positive spread means having success rate greater than league average -> sign of a good player

## Shot Location Analysis and Plotting