In [2]:
import pandas as pd
import numpy as np 
import os
import matplotlib.pyplot as plt
from PIL import Image
import seaborn as sns
from clean import clean_json
import advanced_visualization as av

In [3]:
import matplotlib.image as image
file = './img/attack_zone.png'
logo = image.imread(file)

In [4]:
input_file = os.path.join('./datasets/csv_files','2017.csv')
df = pd.read_csv(input_file)

In [5]:
df.head()

Unnamed: 0,period,period_type,period_time,gameID,attacking_team_id,attacking_team_name,play_type,shooter,goalie,shot_type,x_coordinate,y_coordinate,empty_net,strength,home_team
0,1,REGULAR,00:38,2017020001,52,Winnipeg Jets,Shot,Josh Morrissey,Frederik Andersen,Wrist Shot,-36.0,-28.0,,,Winnipeg Jets
1,1,REGULAR,00:49,2017020001,52,Winnipeg Jets,Shot,Shawn Matthias,Frederik Andersen,Wrist Shot,-75.0,1.0,,,Winnipeg Jets
2,1,REGULAR,01:03,2017020001,52,Winnipeg Jets,Shot,Bryan Little,Frederik Andersen,Backhand,-73.0,10.0,,,Winnipeg Jets
3,1,REGULAR,01:46,2017020001,10,Toronto Maple Leafs,Shot,Eric Fehr,Steve Mason,Wrist Shot,80.0,-3.0,,,Winnipeg Jets
4,1,REGULAR,03:42,2017020001,52,Winnipeg Jets,Shot,Patrik Laine,Frederik Andersen,Snap Shot,-44.0,-21.0,,,Winnipeg Jets


## Preparing Data

In [6]:
attack_df = df.copy()
# Coordinate adjustments:
attack_df.loc[df['x_coordinate'] < 0, 'x_coordinate'] = -attack_df['x_coordinate']
attack_df.loc[df['x_coordinate'] < 0, 'y_coordinate'] = -attack_df['y_coordinate']
attack_df.head()

Unnamed: 0,period,period_type,period_time,gameID,attacking_team_id,attacking_team_name,play_type,shooter,goalie,shot_type,x_coordinate,y_coordinate,empty_net,strength,home_team
0,1,REGULAR,00:38,2017020001,52,Winnipeg Jets,Shot,Josh Morrissey,Frederik Andersen,Wrist Shot,36.0,28.0,,,Winnipeg Jets
1,1,REGULAR,00:49,2017020001,52,Winnipeg Jets,Shot,Shawn Matthias,Frederik Andersen,Wrist Shot,75.0,-1.0,,,Winnipeg Jets
2,1,REGULAR,01:03,2017020001,52,Winnipeg Jets,Shot,Bryan Little,Frederik Andersen,Backhand,73.0,-10.0,,,Winnipeg Jets
3,1,REGULAR,01:46,2017020001,10,Toronto Maple Leafs,Shot,Eric Fehr,Steve Mason,Wrist Shot,80.0,-3.0,,,Winnipeg Jets
4,1,REGULAR,03:42,2017020001,52,Winnipeg Jets,Shot,Patrik Laine,Frederik Andersen,Snap Shot,44.0,21.0,,,Winnipeg Jets


In [7]:
# Binning
bin_size = 5

In [8]:
attack_df['x_bin'] = pd.cut(attack_df['x_coordinate'], bins=range(0, 100+bin_size, bin_size))
attack_df['y_bin'] = pd.cut(attack_df['y_coordinate'], bins=range(-45, 45+bin_size, bin_size))

In [9]:
attack_df.head()

Unnamed: 0,period,period_type,period_time,gameID,attacking_team_id,attacking_team_name,play_type,shooter,goalie,shot_type,x_coordinate,y_coordinate,empty_net,strength,home_team,x_bin,y_bin
0,1,REGULAR,00:38,2017020001,52,Winnipeg Jets,Shot,Josh Morrissey,Frederik Andersen,Wrist Shot,36.0,28.0,,,Winnipeg Jets,"(35, 40]","(25, 30]"
1,1,REGULAR,00:49,2017020001,52,Winnipeg Jets,Shot,Shawn Matthias,Frederik Andersen,Wrist Shot,75.0,-1.0,,,Winnipeg Jets,"(70, 75]","(-5, 0]"
2,1,REGULAR,01:03,2017020001,52,Winnipeg Jets,Shot,Bryan Little,Frederik Andersen,Backhand,73.0,-10.0,,,Winnipeg Jets,"(70, 75]","(-15, -10]"
3,1,REGULAR,01:46,2017020001,10,Toronto Maple Leafs,Shot,Eric Fehr,Steve Mason,Wrist Shot,80.0,-3.0,,,Winnipeg Jets,"(75, 80]","(-5, 0]"
4,1,REGULAR,03:42,2017020001,52,Winnipeg Jets,Shot,Patrik Laine,Frederik Andersen,Snap Shot,44.0,21.0,,,Winnipeg Jets,"(40, 45]","(20, 25]"


In [10]:
games_per_team = df.groupby('attacking_team_name')['gameID'].nunique()
games_per_team.head()

attacking_team_name
Anaheim Ducks      86
Arizona Coyotes    82
Boston Bruins      94
Buffalo Sabres     82
Calgary Flames     82
Name: gameID, dtype: int64

In [11]:
shot_ratios = attack_df.groupby(['attacking_team_name', 'x_bin', 'y_bin']).size().reset_index(name='shots')
shot_ratios

  shot_ratios = attack_df.groupby(['attacking_team_name', 'x_bin', 'y_bin']).size().reset_index(name='shots')


Unnamed: 0,attacking_team_name,x_bin,y_bin,shots
0,Anaheim Ducks,"(0, 5]","(-45, -40]",0
1,Anaheim Ducks,"(0, 5]","(-40, -35]",5
2,Anaheim Ducks,"(0, 5]","(-35, -30]",5
3,Anaheim Ducks,"(0, 5]","(-30, -25]",5
4,Anaheim Ducks,"(0, 5]","(-25, -20]",1
...,...,...,...,...
11155,Winnipeg Jets,"(95, 100]","(20, 25]",0
11156,Winnipeg Jets,"(95, 100]","(25, 30]",0
11157,Winnipeg Jets,"(95, 100]","(30, 35]",0
11158,Winnipeg Jets,"(95, 100]","(35, 40]",0


In [12]:
shot_ratios['shots_per_hour'] = shot_ratios['shots'] / shot_ratios['attacking_team_name'].map(games_per_team)

In [13]:
shot_ratios.head()

Unnamed: 0,attacking_team_name,x_bin,y_bin,shots,shots_per_hour
0,Anaheim Ducks,"(0, 5]","(-45, -40]",0,0.0
1,Anaheim Ducks,"(0, 5]","(-40, -35]",5,0.05814
2,Anaheim Ducks,"(0, 5]","(-35, -30]",5,0.05814
3,Anaheim Ducks,"(0, 5]","(-30, -25]",5,0.05814
4,Anaheim Ducks,"(0, 5]","(-25, -20]",1,0.011628


In [14]:
league_avgs = shot_ratios.groupby(['x_bin', 'y_bin'])['shots_per_hour'].mean().reset_index()
league_avgs.head()

  league_avgs = shot_ratios.groupby(['x_bin', 'y_bin'])['shots_per_hour'].mean().reset_index()


Unnamed: 0,x_bin,y_bin,shots_per_hour
0,"(0, 5]","(-45, -40]",0.004427
1,"(0, 5]","(-40, -35]",0.026579
2,"(0, 5]","(-35, -30]",0.02306
3,"(0, 5]","(-30, -25]",0.017097
4,"(0, 5]","(-25, -20]",0.009691


In [15]:
### Check if shot_per_hour sums up to 1

## Visualization

In [16]:
shot_ratios['attacking_team_name'].unique()

array(['Anaheim Ducks', 'Arizona Coyotes', 'Boston Bruins',
       'Buffalo Sabres', 'Calgary Flames', 'Carolina Hurricanes',
       'Chicago Blackhawks', 'Colorado Avalanche',
       'Columbus Blue Jackets', 'Dallas Stars', 'Detroit Red Wings',
       'Edmonton Oilers', 'Florida Panthers', 'Los Angeles Kings',
       'Minnesota Wild', 'Montréal Canadiens', 'Nashville Predators',
       'New Jersey Devils', 'New York Islanders', 'New York Rangers',
       'Ottawa Senators', 'Philadelphia Flyers', 'Pittsburgh Penguins',
       'San Jose Sharks', 'St. Louis Blues', 'Tampa Bay Lightning',
       'Toronto Maple Leafs', 'Vancouver Canucks', 'Vegas Golden Knights',
       'Washington Capitals', 'Winnipeg Jets'], dtype=object)

In [17]:
team = 'San Jose Sharks'

In [18]:
df_vis = shot_ratios[shot_ratios['attacking_team_name']==team]
df_vis.head()

Unnamed: 0,attacking_team_name,x_bin,y_bin,shots,shots_per_hour
8280,San Jose Sharks,"(0, 5]","(-45, -40]",0,0.0
8281,San Jose Sharks,"(0, 5]","(-40, -35]",0,0.0
8282,San Jose Sharks,"(0, 5]","(-35, -30]",2,0.021739
8283,San Jose Sharks,"(0, 5]","(-30, -25]",0,0.0
8284,San Jose Sharks,"(0, 5]","(-25, -20]",0,0.0


In [19]:
df_vis['x_center'] = df_vis['x_bin'].apply(lambda x: (x.left + x.right) / 2)
df_vis['y_center'] = df_vis['y_bin'].apply(lambda x: (x.left + x.right) / 2)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_vis['x_center'] = df_vis['x_bin'].apply(lambda x: (x.left + x.right) / 2)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_vis['y_center'] = df_vis['y_bin'].apply(lambda x: (x.left + x.right) / 2)


In [20]:
df_vis.head()

Unnamed: 0,attacking_team_name,x_bin,y_bin,shots,shots_per_hour,x_center,y_center
8280,San Jose Sharks,"(0, 5]","(-45, -40]",0,0.0,2.5,-42.5
8281,San Jose Sharks,"(0, 5]","(-40, -35]",0,0.0,2.5,-37.5
8282,San Jose Sharks,"(0, 5]","(-35, -30]",2,0.021739,2.5,-32.5
8283,San Jose Sharks,"(0, 5]","(-30, -25]",0,0.0,2.5,-27.5
8284,San Jose Sharks,"(0, 5]","(-25, -20]",0,0.0,2.5,-22.5


In [21]:
df_vis = df_vis.merge(league_avgs, on=['x_bin', 'y_bin'], how='left')
# Create a new column for the difference between the team's shots_per_hour and the league average
df_vis['shot_diff'] = df_vis['shots_per_hour_x'] - df_vis['shots_per_hour_y']
df_vis.head()

Unnamed: 0,attacking_team_name,x_bin,y_bin,shots,shots_per_hour_x,x_center,y_center,shots_per_hour_y,shot_diff
0,San Jose Sharks,"(0, 5]","(-45, -40]",0,0.0,2.5,-42.5,0.004427,-0.004427
1,San Jose Sharks,"(0, 5]","(-40, -35]",0,0.0,2.5,-37.5,0.026579,-0.026579
2,San Jose Sharks,"(0, 5]","(-35, -30]",2,0.021739,2.5,-32.5,0.02306,-0.001321
3,San Jose Sharks,"(0, 5]","(-30, -25]",0,0.0,2.5,-27.5,0.017097,-0.017097
4,San Jose Sharks,"(0, 5]","(-25, -20]",0,0.0,2.5,-22.5,0.009691,-0.009691


In [33]:
from scipy.ndimage import gaussian_filter
import numpy as np
from scipy.interpolate import griddata
import matplotlib.pyplot as plt
# Ref: https://thecommutesports.com/2022/08/06/creating-nhl-shot-maps-with-python/
[x,y] = np.round(np.meshgrid(np.linspace(-45,45,90), np.linspace(0,100,100)))
grid = griddata((df_vis['y_center'][::-1],df_vis['x_center']),df_vis['shot_diff'],(x,y),method='linear',fill_value=0)
smooothed_data = gaussian_filter(grid,sigma = 3)
data_min= smooothed_data.min()
data_max= smooothed_data.max()

if abs(data_min) > data_max:
    data_max = data_min * -1
elif data_max > abs(data_min):
    data_min = data_max * -1

fig, ax = plt.subplots(1,1, figsize=(10,12), facecolor='w', edgecolor='k')
ax.imshow(logo, extent=[x.min(), x.max(), y.min(), y.max()])
ax = ax.contourf(x,y,smooothed_data,alpha = 0.8, cmap='bwr', 
        levels = np.linspace(data_min,data_max,15),
        vmin=data_min,
        vmax=data_max,
            )
plt.axis('off')
plt.title('Shot Map',fontdict={'fontsize': 15})
fig.colorbar(ax, orientation="horizontal",pad = 0.05)
plt.show()

ModuleNotFoundError: No module named 'scipy'

$ \text{Préparation des données : } $ 

In [23]:
input_path_2016 = os.path.join('./datasets/csv_files/','2016.csv')
input_path_2017 = os.path.join('./datasets/csv_files/','2017.csv')
input_path_2018 = os.path.join('./datasets/csv_files/','2018.csv')
input_path_2019 = os.path.join('./datasets/csv_files/','2019.csv')
input_path_2020 = os.path.join('./datasets/csv_files/','2020.csv')

In [24]:
# Récupération des données de 2016/2017 à 2020/2021
df_16 = pd.read_csv(input_path_2016)
df_17 = pd.read_csv(input_path_2017)
df_18 = pd.read_csv(input_path_2018)
df_19 = pd.read_csv(input_path_2019)
df_20 = pd.read_csv(input_path_2020)

In [25]:
# On rassemble toutes les données dans un dataframe
df = pd.concat([df_16,df_17,df_18,df_19,df_20], ignore_index = True)

In [26]:
# Ajustement des coordonnées
attack_df = av.adjust_coordinates(df)

In [27]:
# Ajout des bins pour 'x_coordinate' et 'y_coordinate'
attack_df = av.add_bins(attack_df, bin_size = 5)

In [28]:
# Ajout d'une colonne avec l'année
attack_df = av.add_year_column(attack_df)

In [29]:
team_list = attack_df['attacking_team_name'].unique()
year_list = attack_df['year'].unique()

In [30]:
av.get_visualization_df(attack_df, team_list[0], year_list[0])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_vis['x_center'] = df_vis['x_bin'].apply(lambda x: (x.left + x.right) / 2)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_vis['y_center'] = df_vis['y_bin'].apply(lambda x: (x.left + x.right) / 2)


Unnamed: 0,attacking_team_name,x_bin,y_bin,shots,shots_per_hour_x,x_center,y_center,shots_per_hour_y,shot_diff
0,Toronto Maple Leafs,"(0, 5]","(-45, -40]",0,0.000000,2.5,-42.5,0.000379,-0.000379
1,Toronto Maple Leafs,"(0, 5]","(-40, -35]",0,0.000000,2.5,-37.5,0.013058,-0.013058
2,Toronto Maple Leafs,"(0, 5]","(-35, -30]",0,0.000000,2.5,-32.5,0.011930,-0.011930
3,Toronto Maple Leafs,"(0, 5]","(-30, -25]",0,0.000000,2.5,-27.5,0.016366,-0.016366
4,Toronto Maple Leafs,"(0, 5]","(-25, -20]",1,0.011364,2.5,-22.5,0.006836,0.004528
...,...,...,...,...,...,...,...,...,...
355,Toronto Maple Leafs,"(95, 100]","(20, 25]",0,0.000000,97.5,22.5,0.000000,0.000000
356,Toronto Maple Leafs,"(95, 100]","(25, 30]",0,0.000000,97.5,27.5,0.000379,-0.000379
357,Toronto Maple Leafs,"(95, 100]","(30, 35]",0,0.000000,97.5,32.5,0.000330,-0.000330
358,Toronto Maple Leafs,"(95, 100]","(35, 40]",0,0.000000,97.5,37.5,0.000000,0.000000
