# || Visualisations avancées

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.ndimage import gaussian_filter
import matplotlib.pyplot as plt
import seaborn as sns
from ipywidgets import interact, Dropdown
from IPython.display import display
base_path = './'
csv_file = os.path.join(base_path, 'combined_play_by_play_cleaned.csv')

def load_play_by_play_data(csv_file):
    """
    Load play-by-play data from a CSV file.
    """
    if os.path.exists(csv_file):
        return pd.read_csv(csv_file)
    else:
        print(f"File {csv_file} not found.")
        return None

data = load_play_by_play_data(csv_file)
if data is not None:
    print("Données chargées avec succès.")
else:
    print("Erreur lors du chargement des données.")
def adjust_coordinates(df):
    """
    Adjust the x and y coordinates based on the period and zoneCode.

    In hockey, the rink is mirrored at the second period for both teams.
    The xCoord is adjusted by flipping it for actions in the defensive or neutral zones
    depending on the period. Offensive zone actions typically stay in the same direction.
    """
    def adjust(row):
        # Adjust for period mirroring
        xCoord = -row['xCoord'] if row['Periode'] == 2 else row['xCoord']
        
        # Adjust based on zoneCode:
        # Offensive Zone ("O") - leave the coordinate unchanged
        # Defensive Zone ("D") - flip the coordinate
        # Neutral Zone ("N") - can either be flipped or left unchanged (depending on your choice)
        if row['zoneCode'] == 'D':  # If in defensive zone, mirror the x-coordinate
            xCoord = -xCoord
        
        return xCoord

    # Apply the adjustment function to the DataFrame
    df['xCoord_adj'] = df.apply(adjust, axis=1)
    return df

# Adjust the coordinates in the DataFrame
data = adjust_coordinates(data)


Données chargées avec succès.


In [2]:
""" 
On charge les données de tirs au but et buts, on corrige les coordonnées en fonction de la période, on filtre les données pertinentes, on calcule les différences de taux de tir entre les équipes et la ligue, puis on crée une visualisation interactive montrant ces différences pour une équipe et une saison sélectionnées.
"""

def Correct_Side_Rink_Coordinate(list_x_coor,list_y_coor):
    list_x_coor=np.array(list_x_coor)
    list_y_coor=np.array(list_y_coor)
    mask=list_x_coor<0
    list_x_coor[mask]=-list_x_coor[mask]
    list_y_coor[mask]=-list_y_coor[mask]
    return list_x_coor,list_y_coor

path_tidy_data_csv="/home/mohamed/project-template/ift6758/visualizations/combined_play_by_play_cleaned.csv"
ice_rink_image_path="/home/mohamed/project-template/figures/nhl_rink.png"

df=pd.read_csv(path_tidy_data_csv).dropna(subset=['xCoord','yCoord'])
df['xCoord']=pd.to_numeric(df['xCoord'])
df['yCoord']=pd.to_numeric(df['yCoord'])
df['idGame']=df['idGame'].astype(str)

print("Valeurs uniques dans la colonne 'Type' :")
print(df['Type'].unique())

event_column='Type'
df=df[df[event_column].isin(['Tir au but','But'])]

bin_width=2
rink_width=100
rink_height=85

list_season=['2016','2017','2018','2019','2020']
list_team_names=df['Equipe'].unique()

rink_image=plt.imread(ice_rink_image_path)

def plot_team_season(team_name,season):
    season_df=df[df['idGame'].str.startswith(season)]
    x_season,y_season=Correct_Side_Rink_Coordinate(season_df['xCoord'].values,season_df['yCoord'].values)
    bins=[np.arange(-rink_width/2,rink_width/2+bin_width,bin_width),
    np.arange(-rink_height/2,rink_height/2+bin_width,bin_width)]
    hist_season,x_edges,y_edges=np.histogram2d(x_season,y_season,bins=bins)
    total_num_game_season=len(season_df['idGame'].unique())
    hist_season=hist_season/(total_num_game_season*2)
    team_df=season_df[season_df['Equipe']==team_name]
    if team_df.empty:
        print(f"Aucune donnée pour l'équipe {team_name} en saison {season}")
        return
    x_team,y_team=Correct_Side_Rink_Coordinate(team_df['xCoord'].values,team_df['yCoord'].values)
    hist_team,_,_=np.histogram2d(x_team,y_team,bins=[x_edges,y_edges])
    total_num_game=len(team_df['idGame'].unique())
    hist_team=hist_team/total_num_game
    hist_diff=hist_team-hist_season
    hist_diff=gaussian_filter(hist_diff,sigma=1.5)
    max_abs_diff=np.max(np.abs(hist_diff))
    if max_abs_diff==0:
        max_abs_diff=1e-6
    zmin=-max_abs_diff
    zmax=max_abs_diff
    plt.figure(figsize=(10,6))
    plt.imshow(rink_image,extent=[-rink_width/2,rink_width/2,-rink_height/2,rink_height/2],aspect='auto',alpha=0.6)
    X,Y=np.meshgrid(x_edges[:-1],y_edges[:-1])
    levels=np.linspace(zmin,zmax,21)
    contour=plt.contourf(X,Y,hist_diff.T,levels=levels,cmap='RdBu_r',alpha=0.7)
    cbar=plt.colorbar(contour)
    cbar.set_label('Différence du taux de tir')
    plt.title(f"Différence du taux de tir par heure - Saison {season} - {team_name}")
    plt.xlabel('Distance latérale (pieds)')
    plt.ylabel('Distance longitudinale (pieds)')
    plt.xlim([-rink_width/2,rink_width/2])
    plt.ylim([-rink_height/2,rink_height/2])
    plt.gca().set_aspect('equal',adjustable='box')
    plt.tight_layout()
    plt.show()

team_dropdown=Dropdown(options=list_team_names,description='Équipe:')
season_dropdown=Dropdown(options=list_season,description='Saison:')

interact(plot_team_season,team_name=team_dropdown,season=season_dropdown)


Valeurs uniques dans la colonne 'Type' :
['Tir au but' 'But']


interactive(children=(Dropdown(description='Équipe:', options=('Canadiens', 'Maple Leafs', 'Capitals', 'Bruins…

<function __main__.plot_team_season(team_name, season)>

In [3]:
import plotly.graph_objs as go
import base64
from scipy.ndimage import gaussian_filter

""" 
On analyse les données de tirs au but et de buts en filtrant les événements pertinents, en corrigeant les coordonnées des tirs selon la période, puis en calculant les 
histogrammes de tirs pour la ligue et chaque équipe. apres on applique un filtre gaussien pour lisser les différences de taux de tir et on génère des visualisations interactives 
en HTML montrant les excès de taux de tir par heure pour chaque équipe et saison.
"""

data_path = "/home/mohamed/project-template/ift6758/visualizations/combined_play_by_play_cleaned.csv"
rink_image_path = "/home/mohamed/project-template/figures/nhl_rink.png" 

df = pd.read_csv(data_path).dropna(subset=['xCoord', 'yCoord'])
df['xCoord'] = pd.to_numeric(df['xCoord'])
df['yCoord'] = pd.to_numeric(df['yCoord'])
df['idGame'] = df['idGame'].astype(str)


df = df[df['Type'].isin(['Tir au but', 'But'])]

seasons = ['2016', '2017', '2018', '2019']
teams = df['Equipe'].unique()

def encode_image(image_path):
    with open(image_path, 'rb') as f:
        image_bytes = f.read()
    return base64.b64encode(image_bytes).decode()
rink_image_base64 = encode_image(rink_image_path)
def correct_coordinates(x, y):
    mask = x < 0
    x[mask] = -x[mask]
    y[mask] = -y[mask]
    return x, y
for season in seasons:
    season_folder = f"./season_{season}"
    os.makedirs(season_folder, exist_ok=True)

    season_df = df[df['idGame'].str.startswith(season)]
    rink_width_ft, rink_height_ft = 100, 85  

    bin_size = 2
    bins = [
        np.arange(-rink_width_ft / 2, rink_width_ft / 2 + bin_size, bin_size),
        np.arange(-rink_height_ft / 2, rink_height_ft / 2 + bin_size, bin_size)
    ]
    x_season, y_season = correct_coordinates(season_df['xCoord'].values, season_df['yCoord'].values)
    hist_league, x_edges, y_edges = np.histogram2d(x_season, y_season, bins=bins)
    num_games_season = len(season_df['idGame'].unique())
    hist_league /= (num_games_season * 2)

    shift_right = 10 
    layout = go.Layout(
        images=[dict(
            source=f"data:image/png;base64,{rink_image_base64}",
            x=-rink_width_ft / 2 + shift_right, 
            y=rink_height_ft / 2,  
            sizex=rink_width_ft, sizey=-rink_height_ft,  
            xref="x", yref="y",  
            opacity=0.5,  
            layer="below" 
        )],
        title=f"Excess shot rate per hour - Season {season}",
        xaxis=dict(
            range=[-rink_width_ft / 2 + shift_right, rink_width_ft / 2 + shift_right],
            scaleanchor="y", 
            showgrid=False, zeroline=False, showticklabels=False
        ),
        yaxis=dict(
            range=[-rink_height_ft / 2, rink_height_ft / 2],
            showgrid=False, zeroline=False, showticklabels=False
        ),
        plot_bgcolor='rgba(0,0,0,0)', 
        height=850, width=950  
    )

    traces = []
    for team in teams:
        team_df = season_df[season_df['Equipe'] == team]
        if team_df.empty:
            continue

        # Calcul de l'histogramme pour l'équipe
        x_team, y_team = correct_coordinates(team_df['xCoord'].values, team_df['yCoord'].values)
        hist_team, _, _ = np.histogram2d(x_team, y_team, bins=[x_edges, y_edges])

        # Normalisation par le nombre de matchs de l'équipe
        num_games_team = len(team_df['idGame'].unique())
        hist_team /= num_games_team

        # Calcul de la différence de taux de tir et filtre gaussien
        hist_diff = hist_team - hist_league
        hist_diff = gaussian_filter(hist_diff, sigma=1.5)

        # Définition automatique des valeurs min/max
        max_abs_diff = np.max(np.abs(hist_diff))
        zmin, zmax = -max_abs_diff, max_abs_diff
        heatmap = go.Heatmap(
            z=np.round(hist_diff.T, 5), 
            x=x_edges, y=y_edges,
            colorscale='RdBu_r',
            zmin=zmin, zmax=zmax,
            visible=(team == teams[0]), 
            opacity=0.6,
            colorbar=dict(title='Excess shot rate per hour')
        )
        traces.append(heatmap)
    fig = go.Figure(data=traces, layout=layout)
    fig.update_layout(
        updatemenus=[dict(
            buttons=[dict(label=team, method="update", args=[{"visible": [t == team for t in teams]}])
            for team in teams],
            direction="down", showactive=True
        )]
    )
    output_path = os.path.join(season_folder, f"{season}_shot_analysis.html")
    fig.write_html(output_path)
    print(f"Fichier généré: {output_path}")


Fichier généré: ./season_2016/2016_shot_analysis.html
Fichier généré: ./season_2017/2017_shot_analysis.html
Fichier généré: ./season_2018/2018_shot_analysis.html
Fichier généré: ./season_2019/2019_shot_analysis.html
