[PlayTypes](https://statsapi.web.nhl.com/api/v1/playTypes)
[Example game](https://statsapi.web.nhl.com/api/v1/game/2017020001/feed/live)
[Example team](https://statsapi.web.nhl.com/api/v1/teams/10)

# Imports

In [12]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [13]:
import pathlib
if pathlib.Path().resolve().name == 'notebooks':
    %cd ..
%pwd

'/home/shabgard/ift6758-venv/project/Milestone2/NHL/NHL'

In [27]:
import plotly.graph_objects as go
import plotly.express as px
import pandas as pd
import numpy as np
from src.data import NHLDataDownloader
from scipy import stats
from src.utils import normalize
import ipywidgets as widgets
from IPython.display import display
import pprint
from PIL import Image

# Raw data


In [28]:
# Utilisation du téléchargeur de données NHL

# Étape 1: Spécifier la saison
season_year = 2018
nhl_downloader = NHLDataDownloader(season_year)

# Étape 2: Télécharger les données
season_data = nhl_downloader.load_data()


Season 2018 successfully loaded from file


In [29]:
for play in season_data['regulars'][0]['liveData']['plays']['allPlays']:
  if play['result']['event'] == 'Shot':
    shot_exemple = play
  if play['result']['event'] == 'Goal':
    goal_exemple = play

shot_exemple

{'players': [{'player': {'id': 8477953,
    'fullName': 'Kasperi Kapanen',
    'link': '/api/v1/people/8477953'},
   'playerType': 'Shooter'},
  {'player': {'id': 8471679,
    'fullName': 'Carey Price',
    'link': '/api/v1/people/8471679'},
   'playerType': 'Goalie'}],
 'result': {'event': 'Shot',
  'eventCode': 'TOR833',
  'eventTypeId': 'SHOT',
  'description': 'Kasperi Kapanen Backhand saved by Carey Price',
  'secondaryType': 'Backhand'},
 'about': {'eventIdx': 355,
  'eventId': 833,
  'period': 3,
  'periodType': 'REGULAR',
  'ordinalNum': '3rd',
  'periodTime': '20:00',
  'periodTimeRemaining': '00:00',
  'dateTime': '2018-10-04T01:49:23Z',
  'goals': {'away': 2, 'home': 2}},
 'coordinates': {'x': -81.0, 'y': -9.0},
 'team': {'id': 10,
  'name': 'Toronto Maple Leafs',
  'link': '/api/v1/teams/10',
  'triCode': 'TOR'}}

In [30]:
goal_exemple

{'players': [{'player': {'id': 8479318,
    'fullName': 'Auston Matthews',
    'link': '/api/v1/people/8479318'},
   'playerType': 'Scorer',
   'seasonTotal': 2},
  {'player': {'id': 8466139,
    'fullName': 'Patrick Marleau',
    'link': '/api/v1/people/8466139'},
   'playerType': 'Assist',
   'seasonTotal': 1},
  {'player': {'id': 8474581,
    'fullName': 'Jake Gardiner',
    'link': '/api/v1/people/8474581'},
   'playerType': 'Assist',
   'seasonTotal': 1},
  {'player': {'id': 8471679,
    'fullName': 'Carey Price',
    'link': '/api/v1/people/8471679'},
   'playerType': 'Goalie'}],
 'result': {'event': 'Goal',
  'eventCode': 'TOR843',
  'eventTypeId': 'GOAL',
  'description': 'Auston Matthews (2) Snap Shot, assists: Patrick Marleau (1), Jake Gardiner (1)',
  'secondaryType': 'Snap Shot',
  'strength': {'code': 'EVEN', 'name': 'Even'},
  'gameWinningGoal': True,
  'emptyNet': False},
 'about': {'eventIdx': 362,
  'eventId': 843,
  'period': 4,
  'periodType': 'OVERTIME',
  'ordinalN

## Outil de débogage interactif

In [31]:
rink_image = Image.open("figures/nhl_rink.png")

pp = pprint.PrettyPrinter(indent=4)

data = season_data

# Dropdown to select 'regulars' or 'playoffs'
type_dropdown = widgets.Dropdown(options=['regulars', 'playoffs'],
                                 description='Type:')

# Slider to switch between different games
game_slider = widgets.IntSlider(value=0,
                                min=0,
                                max= len(data['regulars']) - 1,
                                description='Game Index:')

# Slider to switch between different plays
play_slider = widgets.IntSlider(value=0,
                                min=0,
                                max=len(data[
                                    'regulars'][0][
                                        'liveData'][
                                            'plays'][
                                                'allPlays']) - 1,
                                description='Play Index:')

# Output widget to display the plot
plot_output = widgets.Output()

def update_game_slider_range(*args):
    """Update the game slider range based on the type selected."""
    test = {"test":"regulars"}
    if type_dropdown.value == 'regulars':
        game_slider.max = len(data['regulars']) - 1
    else:
        game_slider.max = len(data['playoffs']) - 1
type_dropdown.observe(update_game_slider_range, 'value')

def update_play_slider_range(*args):
    """Update the play slider range based on game_slider's value."""
    if type_dropdown.value == 'regulars':
        coords_length = len(data[
            'regulars'][
                game_slider.value][
                    'liveData'][
                        'plays'][
                            'allPlays']) - 1
    else:
        key = list(data['playoffs'].keys())[game_slider.value]
        coords_length = len(data[
            'playoffs'][
                key][
                    'liveData'][
                        'plays'][
                            'allPlays']) - 1
    
    play_slider.max = coords_length - 1
game_slider.observe(update_play_slider_range, 'value')

def plot_coordinates(change):
    """Plot the coordinates based on the selected options."""
    with plot_output:
        plot_output.clear_output(wait=True)

        if type_dropdown.value == 'regulars':
            game = data['regulars'][game_slider.value]

        else:
            key = list(data['playoffs'].keys())[game_slider.value]
            game = data['playoffs'][key]

        play = game[
            'liveData'][
                'plays'][
                    'allPlays'][
                        play_slider.value]
        coords = play['coordinates']
        teams = game['liveData']['linescore']['teams']

        home = teams["home"]["team"]["triCode"]
        home_score = teams["home"]["goals"]
        away = teams["away"]["team"]["triCode"]
        away_score = teams["away"]["goals"]

        print(f'{home} {home_score} goals vs {away} {away_score} goals')
        pp.pprint(play)

        if len(coords) == 0:
          coords = {'x':None, 'y':None}

        fig = go.Figure()

        fig.add_trace(go.Scatter(x= [coords['x']], y=[coords['y']],
                                mode='markers',
                                marker_size=15,
                                marker_color="#111111"))

        # Add images
        fig.add_layout_image(
                dict(
                    source=rink_image,
                    xref="x",
                    yref="y",
                    x=-100,
                    y=42.5,
                    sizex=200,
                    sizey=85,
                    sizing="stretch",
                    opacity=0.9,
                    layer="below")
        )

        fig.update_xaxes(
            showline=False,
            zeroline=False,
            showgrid=False,
            range=[-100, 100])
        fig.update_yaxes(
            showline=False,
            zeroline=False,
            showgrid=False,
            range=[-42.5, 42.5],
            scaleanchor = "x",
            scaleratio = 1,
          )

        fig.update_layout(
            autosize=False,
            template="plotly_white")

        fig.show()

# Watch for changes
type_dropdown.observe(plot_coordinates, 'value')
game_slider.observe(plot_coordinates, 'value')
play_slider.observe(plot_coordinates, 'value')

# Initial plot
plot_coordinates(None)

# Display widgets
display(type_dropdown, game_slider, play_slider, plot_output)

Dropdown(description='Type:', options=('regulars', 'playoffs'), value='regulars')

IntSlider(value=0, description='Game Index:', max=1270)

IntSlider(value=0, description='Play Index:', max=365)

Output()

# To DataFrames

In [38]:
season_year = 2018
nhl_downloader.set_season(season_year)
df = nhl_downloader.load_df_shots()
df

ConnectionError: HTTPSConnectionPool(host='statsapi.web.nhl.com', port=443): Max retries exceeded with url: /api/v1/teams?season=20202021 (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x7f77bbec1c90>: Failed to resolve 'statsapi.web.nhl.com' ([Errno -2] Name or service not known)"))

# Visualisations simples

## Number of shots, by type

In [None]:
ddf = df[df['Type'] != '']
df_g = ddf.groupby(['Type', 'Goal']).size().to_frame('Counts').reset_index()
df_g['Percentage'] = df_g['Counts'] / df_g.groupby('Type')['Counts'].transform('sum') * 100

px.bar(df_g,
       x='Type',
       y='Counts',
       color='Goal',
       title=f'Number of shots, by type, Saison {season_year}',
       log_y=True,
       text=df_g['Percentage'].apply(lambda x: '{0:1.2f}%'.format(x)),
       height=550
       ).update_xaxes(categoryorder='total descending')

## Distance to net

In [None]:
bins = pd.IntervalIndex.from_tuples([(0, 5), (5, 10), (10, 20), (20, 30), (30, 40), (40, 50), (50, 60), (60, 75), (75, 90), (90, 120), (120, 150), (150, 190)])

df_g = df.groupby(['Goal','Net_distance']).size().to_frame('Counts').reset_index()
df_g['Bins'] = pd.cut(df_g['Net_distance'], bins, precision=0)
df_g = df_g.groupby(['Goal','Bins'], observed=True)['Counts'].sum().to_frame('Counts').reset_index()
df_g['Percentage'] = df_g['Counts'] / df_g.groupby('Bins', observed=True)['Counts'].transform('sum') * 100
df_g['Bins'] = df_g['Bins'].astype('str')

fig = px.bar(df_g,
       x='Bins',
       y='Counts',
       color='Goal',
       title=f'Number of shots, by distance, Saison {season_year}',
       log_y=True,
       text=df_g['Percentage'].apply(lambda x: '{0:1.2f}%'.format(x)),
       height=550,
       labels=dict(Bins='Distance (ft)', Counts="Counts"))

fig.show()
fig.write_html(f'figures/shot-distance-{season_year}.html')

## Percentage of Goals by type and distance

In [None]:
df_td = df.copy()
df_td = df_td[(df_td['Type'] != '') & (~df_td['Net_distance'].isna())]
df_td['Distance'] = pd.qcut(df_td['Net_distance'], 12, precision=0)

grouped = df_td.groupby(['Type', 'Distance']).agg(Total_Shots=('Goal', 'size'), Goals=('Goal', 'sum')).reset_index()
grouped['Percentage'] = grouped['Goals'] / grouped['Total_Shots'] * 100

q3 = grouped.Percentage.quantile(0.75)
iqr = q3 - grouped.Percentage.quantile(0.25)
upper_fence = q3 + 1.5 * iqr
# Emperical value
upper_fence = 33
grouped = grouped[grouped['Percentage'] < upper_fence]

grouped = grouped.pivot(index='Type', columns='Distance')['Percentage']
grouped.columns = grouped.columns.astype('str')
grouped.fillna(0, inplace=True)

fig = px.imshow(grouped, width=800, height=650, labels=dict(y='Shot Type', x="Distance (ft)", color="Goal Percentage"),
                title=f'Goals success rate by type and distance, Saison {season_year}')
fig.update_xaxes(side="top")
fig.update_layout(title_font_size=25)
fig.show()
fig.write_html(f'figures/type-shot-distance-{season_year}.html')

In [None]:
df_sl = df[df.X_dist < 90]
df_sl = df_sl.groupby(['X_dist', 'Y']).size().to_frame('Counts').reset_index()
df_sl = df_sl.pivot(index='X_dist', columns='Y')['Counts'].fillna(0)

fig = px.imshow(np.log(df_sl + 1), width=600, height=600,
          labels=dict(x='Distance from center of rink (ft)', y="Distance from goal line (ft)", color="Number of shots (log)"),
          title='Shots location',
          )
fig.update_layout(title_font_size=30)

# Visualisations avancées

In [None]:
def get_shots_location(df: pd.DataFrame, team: str) -> pd.DataFrame:
    """
    Get the difference between the average number of shot by all teams vs the specified team.

    Args:
        df : DataFrame returned by the fonction load_df_shots.
        team: Tricode of the team. e.g. 'MTL'
    """
    df_tot = df[df.X_dist < 90]
    df_team = df_tot[df_tot.Team == team]
    ng=df_team.Game_id.nunique()
    #print(ng)

    # Get the average number of shots for every location
    df_tot = df_tot.groupby(['X_dist', 'Y']).size().to_frame('Counts').reset_index()
    df_tot = df_tot.pivot(index='X_dist', columns='Y')['Counts'].fillna(0)
    df_tot = df_tot / (df.Game_id.nunique() * 2)
    df_tot = pd.melt(df_tot.reset_index(), id_vars='X_dist', value_vars=df_tot.columns)

    # Get the team average number of shots for every location
    df_team = df_team.groupby(['X_dist', 'Y']).size().to_frame('Counts').reset_index()
    df_team = df_team.pivot(index='X_dist', columns='Y')['Counts'].fillna(0)
    df_team = df_team / ng

    df_team = pd.melt(df_team.reset_index(), id_vars='X_dist', value_vars=df_team.columns)

    # Compute the difference
    df_sl = pd.merge(df_tot, df_team, on=["X_dist", "Y"],  how="outer").fillna(0)
    df_sl["dif"] = df_sl.value_y - df_sl.value_x


    # Compute the binned difference
    df_sl['ybin'] = pd.cut(df_sl.Y, 15)
    df_sl['xbin'] = pd.cut(df_sl.X_dist, 15)
    df_sl['dif_bin'] = df_sl.groupby(['xbin', 'ybin'], observed=True).dif.transform('sum')

    return df_sl

In [None]:
def get_dens(team,df):
    """
    Compute the difference between shots after applying kde.

    Args:
        team: Tricode of the team. e.g. 'MTL'
    """

    dfs = get_shots_location(df, team)
    max = dfs.dif_bin.max()

    values = [dfs.X_dist.values, dfs.Y.values]
    weights_tot = dfs.value_x.values
    weights_team = dfs.value_y.values

    gauss_tot = stats.gaussian_kde(values, bw_method=0.3,weights=weights_tot)
    gauss_team = stats.gaussian_kde(values, bw_method=0.3,weights=weights_team)


    x = np.linspace(0, 89, 90)
    y = np.linspace(-42, 42, 85)
    X, Y = np.meshgrid(x, y)
    xy = np.vstack([X.ravel(), Y.ravel()])
    dens_tot = np.exp(gauss_tot(xy)).reshape((X.shape))
    dens_team= np.exp(gauss_team(xy)).reshape((X.shape))

    dens = dens_team - dens_tot

    ratio = np.array(dens).reshape(-1).min() / np.array(dens).reshape(-1).max()
    return normalize(np.flip(dens), max * ratio, max)

In [None]:
rink_image = Image.open("figures/half_nhl_rink.png")

fig = go.Figure()

x = np.linspace(0, 89, 90)
y = np.linspace(-42, 42, 85)

teams = df.Team.unique()
visibility = [False] * len(teams)
dropdown_list = []

fig.add_trace(
    go.Contour(
        z=get_dens(teams[0],df),
        # z=np.flip(dens_team - dens_tot),
        x=x,
        y=y,
        opacity = 0.6,
        colorscale='RdBu_r',
        contours_coloring='fill',  # This will fill the contours
        contours=dict(start=-0.5, end=0.5, size=0.05),  # You can adjust these parameters
        visible=True
    )
)

vis = visibility.copy()
vis[0] = True

dropdown_list.append(dict(
                args=[{"visible": vis}],
                label=teams[0],
                method="restyle"
            ))

for id, team in enumerate(teams[1:]):


# Plotly Contour Plot
    fig.add_trace(
        go.Contour(
            z=get_dens(team,df),
            x=x,
            y=y,
            opacity = 0.6,
            colorscale='RdBu_r',
            contours_coloring='fill',
            contours=dict(start=-0.5, end=0.5, size=0.05),
            visible=False
        )
    )

    vis = visibility.copy()
    vis[id + 1] = True

    dropdown_list.append(dict(
                    args=[{"visible": vis}],
                    label=team,
                    method="restyle"
                ))


fig.update_layout(
    height=600,
    width=680,
    title = f'Season {season_year}',
    title_font_size=30,
    updatemenus=[
        dict(
            active=0,
            buttons=dropdown_list,
            showactive=True,
            x=0.35,
            xanchor="left",
            y=1.18,
            yanchor="top"
        ),
    ]
)
fig.add_layout_image(
        dict(
            source=rink_image,
            xref="x",
            yref="y",
            x=0,
            y=42.5,
            sizex=100,
            sizey=85,
            opacity=1,
            layer="below")
)

fig.update_xaxes(showline=False, zeroline=False, showgrid=False, range=[0, 100])
fig.update_yaxes(
    showline=False,
    zeroline=False,
    showgrid=False,
    range=[-42.5, 42.5],
    scaleanchor = "x",
    scaleratio = 1,
  )

fig.show()