In [None]:
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from scipy.stats import gaussian_kde
import plotly.express as px

sys.path.append('../..')
from ift6758.controller.nhl_data_downloader import get_dataframe_from_concatenated_csv_files

df = get_dataframe_from_concatenated_csv_files(2019)
df = df.dropna(subset=["x_coord", "y_coord"])
df = df.reset_index(drop=True)
df

In [None]:
df["home_team_defending_side"] = None
number_of_lines = df.shape[0]
games_ids = []
home_team_defending_side_first_period = {}
home_team_defending_side_second_period = {}
for index in range(number_of_lines):
	game_id = df.loc[index, "game_id"]
	if game_id in games_ids:
		period = df.loc[index, "period"]
		if period % 2 == 1:
			df.loc[index, "home_team_defending_side"] = home_team_defending_side_first_period[game_id]
		else:
			df.loc[index, "home_team_defending_side"] = home_team_defending_side_second_period[game_id]
	else:
		filtered_df = df.query(f"game_id == {game_id}")
		period = df.loc[index, "period"]
		number_of_positif = filtered_df.query(f"team_type == 'home' and period == {period} and x_coord > 0").shape[0]
		number_of_negatif = filtered_df.query(f"team_type == 'home' and period == {period} and x_coord <= 0").shape[0]
		if number_of_positif > number_of_negatif:
			df.loc[index, "home_team_defending_side"] = "left"
			if period % 2 == 1:
				home_team_defending_side_first_period[game_id] = "left"
				home_team_defending_side_second_period[game_id] = "right"
			else:
				home_team_defending_side_first_period[game_id] = "right"
				home_team_defending_side_second_period[game_id] = "left"
		else:
			df.loc[index, "home_team_defending_side"] = "right"
			if period % 2 == 1:
				home_team_defending_side_first_period[game_id] = "right"
				home_team_defending_side_second_period[game_id] = "left"
			else:
				home_team_defending_side_first_period[game_id] = "left"
				home_team_defending_side_second_period[game_id] = "right"
		games_ids.append(game_id)

In [None]:
def adjust_coordinates_v2(df):
    # Fonction pour ajuster les coordonnées x 
    def adjust_row(row):
        if row['team_type'] == 'away' and row['home_team_defending_side'] == 'left':
            return -row['x_coord']  # Inverser la coordonnée x
        elif row['team_type'] == 'home' and row['home_team_defending_side'] == 'right':
            return -row['x_coord']  # Inverser la coordonnée x
        else:
            return row['x_coord']  # Ne pas changer la coordonnée x

    df['adjusted_x_coord'] = df.apply(adjust_row, axis=1)
    
    return df


adjusted_df = adjust_coordinates_v2(df)
adjusted_df.head(90)

In [None]:
def calculate_shot_rate_per_zone(df, grid_size=10, match_duration_minutes=60):
    """
    Calcule le taux de tir moyen par heure de la ligue pour chaque zone de la patinoire.
    
    Parameters:
    - df: DataFrame contenant les données des tirs.
    - grid_size: Taille des cellules de la grille (par défaut 10).
    - match_duration_minutes: Durée du match en minutes (par défaut 60).
    
    Returns:
    - shots_per_zone: DataFrame avec le taux de tir par zone.
    """

    df['adjusted_x_coord'] = df['adjusted_x_coord'].replace([np.inf, -np.inf], np.nan).fillna(0)
    df['y_coord'] = df['y_coord'].replace([np.inf, -np.inf], np.nan).fillna(0)

    df['x_zone'] = (df['adjusted_x_coord'] // grid_size).astype(int)
    df['y_zone'] = (df['y_coord'] // grid_size).astype(int)

    shots_per_zone = df.groupby(['x_zone', 'y_zone']).size().reset_index(name='shot_count')
    
    # Calculer le taux de tir par heure
    total_minutes = len(df['game_id'].unique()) * match_duration_minutes  # Nombre total de minutes jouées
    shots_per_zone['shot_rate_per_hour'] = (shots_per_zone['shot_count'] / total_minutes) * 60
    
    return shots_per_zone

league_shot_rate_df = calculate_shot_rate_per_zone(adjusted_df)

# Afficher le résultat
league_shot_rate_df

In [None]:
def calculate_team_shot_rate(df, grid_size=10, match_duration_minutes=60):
    """
    Calcule le taux de tir moyen par heure pour chaque équipe et chaque zone.
    """
    df['x_zone'] = (df['adjusted_x_coord'] // grid_size).astype(int)
    df['y_zone'] = (df['y_coord'] // grid_size).astype(int)
    
    team_shots_per_zone = df.groupby(['team_name', 'x_zone', 'y_zone']).size().reset_index(name='shot_count')
    
    total_minutes_per_team = df.groupby('team_name')['game_id'].nunique() * match_duration_minutes
    team_shots_per_zone['shot_rate_per_hour'] = team_shots_per_zone.apply(
        lambda row: (row['shot_count'] / total_minutes_per_team[row['team_name']]) * 60, axis=1)
    
    return team_shots_per_zone

# Calculer le taux de tir moyen par heure pour chaque équipe
team_shot_rate_df = calculate_team_shot_rate(adjusted_df)

team_shot_rate_df.iloc[0:20]

In [None]:
def calculate_difference_from_league(team_shot_rate_df, league_shot_rate_df):
    """
    Calcule la différence du taux de tir par heure entre chaque équipe et la moyenne de la ligue.
    
    Parameters:
    - team_shot_rate_df: DataFrame contenant le taux de tir par équipe.
    - league_shot_rate_df: DataFrame contenant le taux de tir moyen de la ligue.
    
    Returns:
    - diff_df: DataFrame avec la différence de taux de tir par heure pour chaque équipe et chaque zone.
    """
    # Fusionner les deux DataFrames pour comparer les taux de chaque équipe avec ceux de la ligue
    diff_df = pd.merge(team_shot_rate_df, league_shot_rate_df, on=['x_zone', 'y_zone'], 
                       suffixes=('_team', '_league'))
    
    # Calculer la différence brute et le pourcentage de différence
    diff_df['difference'] = diff_df['shot_rate_per_hour_team'] - diff_df['shot_rate_per_hour_league']
    diff_df['percentage_difference'] = (diff_df['difference'] / diff_df['shot_rate_per_hour_league']) * 100
    
    return diff_df

difference_df = calculate_difference_from_league(team_shot_rate_df, league_shot_rate_df)
difference_df.iloc[0:20]

In [None]:
# Filtrer pour ne garder que les tirs dans la zone offensive (x > 0)
offensive_shots = adjusted_df[adjusted_df['adjusted_x_coord'] > 0]

# Appliquer une estimation de densité de noyau (KDE) sur les tirs
kde = gaussian_kde([offensive_shots['x_zone'], offensive_shots['y_zone']], bw_method=0.2)

# Créer une grille de coordonnées pour la zone offensive
x_grid = np.linspace(0, 100, 100) 
y_grid = np.linspace(-42.5, 42.5, 85)
x_mesh, y_mesh = np.meshgrid(x_grid, y_grid)
z = kde(np.vstack([x_mesh.ravel(), y_mesh.ravel()]))

# Reshape pour correspondre à la grille
z = z.reshape(x_mesh.shape)


In [None]:
import plotly.graph_objects as go
from PIL import Image
import numpy as np
from scipy.stats import gaussian_kde
import os

# Charger l'image de la patinoire
current_dir = os.getcwd()
rink_img_path = os.path.join(current_dir, '..', 'data', 'Images', 'nhl_rink.png')
rink_img_path = os.path.normpath(rink_img_path)
rink_img = Image.open(rink_img_path)


def plot_all_teams_density(df):
    # Créer un DataFrame pour toutes les densités calculées
    density_data = []
    for team_name in df['team_name'].unique():
        team_data = df[df['team_name'] == team_name]
        kde = gaussian_kde([team_data['adjusted_x_coord'], team_data['y_coord']], bw_method=0.2)
        z = kde(np.vstack([x_mesh.ravel(), y_mesh.ravel()]))
        z = z.reshape(x_mesh.shape)
        density_data.append({
            'team_name': team_name,
            'z': z,
        })

    fig = go.Figure()

    for team in density_data:
        fig.add_trace(go.Contour(
            z=team['z'],
            x=x_grid,
            y=y_grid,
            visible=False,
            name=team['team_name'],
            colorscale=[[0, 'blue'],[0.3, 'white'],[0.6,'lightcoral'],[0.8, 'red'],[1, 'darkred']],
            contours=dict(start=0, end=z.max(), size=z.max() / 10),
            opacity=0.7,
            showscale=False
        ))

    fig.data[0].visible = True
    buttons = [
        dict(label=team['team_name'],
             method='update',
             args=[{'visible': [team['team_name'] == fig.data[i].name for i in range(len(fig.data))]},
                   {'title': f"Shot location on the rink for {team['team_name']} for season 2020-21"}])
        for team in density_data
    ]

    fig.update_layout(
        updatemenus=[dict(active=0, buttons=buttons)],
        title=f"Shot location on the rink for {density_data[0]['team_name']}for season 2020-21 ",
        xaxis_title="length of the skating rink(ft)",
        yaxis_title="width of the skating rink(ft)",
        height=600,
        width=800
    )
    
    # Ajouter l'image de la patinoire en arrière-plan
    fig.add_layout_image(
        dict(
            source=rink_img,
            xref="x",
            yref="y",
            x=-100,  
            y=42.5, 
            sizex=200,  
            sizey=85, 
            sizing="stretch",
            opacity=1.0, 
            layer="below"
        )
    )

    #fig.write_html(path_html)
    fig.show()

#current_dir = os.getcwd()
#path_html = os.path.join(current_dir, "shot_s_2020_2021.html")
plot_all_teams_density(offensive_shots)


In [None]:
os.getcwd()