In [59]:
import json
import os
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from ipywidgets import interact, IntSlider, VBox
from IPython.display import display


def load_data(year_list):
    """
    Loads NHL game data for the given years in chronological order.
    
    Args:
        year_list (list): A list containing the years to load data from, in chronological order.
        
    Returns:
        list: A list of game data loaded from the JSON files, ordered from oldest to most recent.
    """
    all_data={}
    for year in year_list:
        print(f"Loading data for season {year}")
        
        # Get all files for the season year that match the format nhl_season_year_*.json
        files = [f for f in os.listdir("Data") if f.startswith(f"nhl_season_{year}_") and f.endswith(".json")]
        
        files.sort()
        temp =[]
        # Load data from each file and append to all_data
        for file_name in files:
            file_path = os.path.join("Data", file_name)
            
            with open(file_path, 'r') as f:
                game_data = json.load(f)
                temp.append(game_data)
                
        all_data[year]=temp
        
        print(f"Data for season {year} loaded successfully.")
    
    return all_data


year_list = [2016, 2017, 2018]  
all_data = load_data(year_list)

rink_img_path = 'Data/rink.png'
rink_img = mpimg.imread(rink_img_path)



Loading data for season 2016
Data for season 2016 loaded successfully.
Loading data for season 2017
Data for season 2017 loaded successfully.
Loading data for season 2018
Data for season 2018 loaded successfully.


In [19]:
def plot_rink(game, event):
    """
    Plots the hockey rink and displays the location of a specific event on the rink.

    Args:
        game (dict): A dictionary containing game information, including game ID and season.
        event (dict): A dictionary containing event details, such as event coordinates 
                      ('xCoord', 'yCoord') and event description ('typeDescKey').
    
    The function uses the provided event data to plot the location of the event on the rink. 
    If the event has coordinates, it marks the position and labels it with a description.
    """
    fig, ax = plt.subplots(figsize=(8, 6))

    # Display the hockey rink image within the provided coordinates
    ax.imshow(rink_img, extent=[-100, 100, -42.5, 42.5])
    
    # Check if event contains x and y coordinates to plot
    if 'details' in event:
        if 'xCoord' in event['details'] and 'yCoord' in event['details']:
            x, y = event['details']['xCoord'], event['details']['yCoord']
            # Plot event position on the rink
            plt.scatter(x, y, color='blue', s=100)
            # Add event description text at the plotted position
            plt.text(x + 5, y, event['typeDescKey'], fontsize=12, color='white', 
                     bbox=dict(facecolor='black', alpha=0.5))
    
    # Set rink boundaries and aspect ratio
    ax.set_xlim(-100, 100)
    ax.set_ylim(-42.5, 42.5)
    ax.set_title(f"Game {game['id']} - {game['season']}")
    plt.gca().set_aspect('equal', adjustable='box')
    plt.show()

def browse_events(data,game_idx, event_idx):
    """
    Displays information about a specific game and event, and visualizes the event on the rink.

    Args:
        game_idx (int): Index of the game in the dataset.
        event_idx (int): Index of the event within the game's play-by-play data.
    
    The function prints detailed information about the game (teams, score, shots on goal, overtime, 
    and shootout details) and then calls `plot_rink` to visualize the event on a rink plot.
    """
    # Fetch the specific game and event from the data
    game = data[game_idx]
    event = game['plays'][event_idx]
    
    # Print basic game information
    print("Game ID:", game['id'])
    print("Season:", game['season'])
    print(f"Date and Time: {game['gameDate']} at {game['startTimeUTC']}")

    home_team = game['homeTeam']
    away_team = game['awayTeam']
    
    # Print teams, score, and shots on goal (SoG) statistics
    print(f"Teams: {home_team['abbrev']} (Home) vs {away_team['abbrev']} (Away)")
    print(f"Goals: {home_team['score']} (Home) - {away_team['score']} (Away)")
    print(f"SoG: {home_team['sog']} (Home) - {away_team['sog']} (Away)")
    
    # Check if the game went to overtime or shootout
    if game['otInUse']:
        print("Overtime: Yes")
    else:
        print("Overtime: No")
        
    if game['shootoutInUse']:
        print("Shootout: Yes")
        print(f"SO Goals: {home_team.get('shootoutGoals', 'None')} (Home) - {away_team.get('shootoutGoals', 'None')} (Away)")
        print(f"SO Attempts: {home_team.get('shootoutAttempts', 'None')} (Home) - {away_team.get('shootoutAttempts', 'None')} (Away)")
    else:
        print("Shootout: No")

    # Plot the event on the rink and print the event details
    plot_rink(game, event)
    print(event)


In [20]:
from ipywidgets import IntSlider, Dropdown, interact

def etape_2(year_list):
    """
    Function to load game data for the given years, set up a dropdown to select the year,
    and interactive sliders to browse games and events within the selected year.

    Args:
        year_list (list): A list of years from which the NHL data will be loaded.
        
    Returns:
        dict: A dictionary where each year maps to its loaded game data.
    """
    # Load the data using the load_data function
    all_data = load_data(year_list)  # Load each year and store in dict

    # Dropdown for selecting year
    year_dropdown = Dropdown(options=year_list, description='Select Year')

    # Set up initial sliders for game and event selection (we will update them dynamically)
    game_slider = IntSlider(min=0, max=0, step=1, description='Game ID')
    event_slider = IntSlider(min=0, max=0, step=1, description='Event')

    def update_year(selected_year):
        """
        Updates the game and event sliders based on the selected year.
        """
        games = all_data[selected_year]  # Get the data for the selected year

        # Update the game slider based on the number of games in the selected year
        game_slider.max = len(games) - 1
        game_slider.value = 0  

        # Update the event slider for the first game initially
        event_slider.max = len(games[0]['plays']) - 1
        event_slider.value = 0  

        def update_plot(game_idx, event_idx):
            """
            Updates the event slider based on the selected game and displays the event.
            """
            event_slider.max = len(games[game_idx]['plays']) - 1  
            event_slider.value = 0
            browse_events(games,game_idx, event_idx) 

        # Use the updated sliders interactively
        interact(update_plot, game_idx=game_slider, event_idx=event_slider)

    # Interactive dropdown for year selection
    interact(update_year, selected_year=year_dropdown)




In [21]:
etape_2([2016,2017])

Loading data for season 2016
Data for season 2016 loaded successfully.
Loading data for season 2017
Data for season 2017 loaded successfully.


interactive(children=(Dropdown(description='Select Year', options=(2016, 2017), value=2016), Output()), _dom_c…

Etape 3:

In [32]:
import requests
import pandas as pd

players= {}
def request(url, id):
    """
    Sends a GET request to the specified URL and retrieves the response data.

    Args:
        url (str): The URL to request data from.
        id (str): The ID associated with the request (used for error reporting).
    
    Returns:
        dict or None: the function returns the JSON response as a dictionary. Otherwise, it returns None.
    """
    response = requests.get(url)

    if response.status_code == 200:
        return response.json() 
    else:
        print(f"Failed to fetch data for {id}. Status code: {response.status_code}")
        return None
    
def get_player_name(id):
    """
    Retrieves the first name of the player associated with the given player ID.

    Args:
        id (str): The unique player ID used to fetch player data from the NHL API.
    
    Returns:
        str or None: The first name of the player if found, otherwise None.
    
    The function checks if the player data is already cached in the `players` dictionary. 
    If not, it fetches the player data from the NHL API and stores it in the cache.
    """
    url = f"https://api-web.nhle.com/v1/player/{id}/landing"
    response = None
    if id not in players:
        response = request(url, id)
        if response is not None: 
            players[id] = response
            response = response['firstName']['default']
    else:
        response = players[id]['firstName']['default']
 
    return response

def get_force(situation_code, event_owner_team_id, home_team_id, away_team_id):
    """
    Determines the game situation (e.g., Power Play, Shorthanded, Even Strength) 
    based on the number of skaters and goalies for both teams.

    Args:
        situation_code (str): A 4-digit string representing the game situation:
                              - 1st digit: number of away goalies.
                              - 2nd digit: number of away skaters.
                              - 3rd digit: number of home skaters.
                              - 4th digit: number of home goalies.
        event_owner_team_id (str): The team ID associated with the event being analyzed.
        home_team_id (str): The team ID for the home team.
        away_team_id (str): The team ID for the away team.
    
    Returns:
        str: A string describing the game situation (e.g., "Power Play", "Shorthanded", 
             "Even Strength", or "Empty Net Power Play"). Returns "Unknown" if the situation
             cannot be determined.
    """
    away_goalie = int(situation_code[0])
    away_skaters = int(situation_code[1])
    home_skaters = int(situation_code[2])
    home_goalie = int(situation_code[3])

    # Check if both teams have an equal number of skaters
    if away_skaters == home_skaters:
        return "Even Strength"
    elif away_skaters > home_skaters:
        if event_owner_team_id == away_team_id:
            return "Power Play" if away_goalie == 1 else "Empty Net Power Play"
        else:
            return "Shorthanded"
    elif home_skaters > away_skaters:
        if event_owner_team_id == home_team_id:
            return "Power Play" if home_goalie == 1 else "Empty Net Power Play"
        else:
            return "Shorthanded"
    else:
        return "Unknown"


In [56]:
import pandas as pd
import os

def create_dataframes(year_list, data):
    """
    Crée des DataFrames pour les jeux et les sauvegarde en fichiers CSV.

    :param year_list: Un dictionnaire où la clé est l'année et la valeur est une liste d'ID de jeux.
    :param data: Une liste contenant les données de chaque jeu.
    """
    # Créer un dossier pour sauvegarder les DataFrames s'il n'existe pas
    output_dir = 'dataframe'
    os.makedirs(output_dir, exist_ok=True)

    for year in year_list:
        for game in data[year]:
            
            game_id = game['id']
            print(game_id)
            home_team = game['homeTeam']['name']['default']
            away_team = game['awayTeam']['name']['default']

            dfPlays = pd.DataFrame.from_records(game['plays'])
            df_filtered = dfPlays[dfPlays['typeDescKey'].isin(['goal', 'shot-on-goal'])]

            plays_data = []
            for idx, row in df_filtered.iterrows():
                event_id = row['eventId']
                sort_order = row['sortOrder']
                time_in_period = row['timeInPeriod']
                period_number = row['periodDescriptor']['number']
                situation_code = row['situationCode']
                x_coord = row['details'].get('xCoord', None)
                y_coord = row['details'].get('yCoord', None)
                shot_type = row['details'].get('shotType', None)
                shooter_id = get_player_name(row['details'].get('shootingPlayerId', row['details'].get('scoringPlayerId', None)))
                goalie_id = get_player_name(row['details'].get('goalieInNetId', None))
                event_owner_team_id = row['details']['eventOwnerTeamId']
                event_type = row['typeDescKey']

                force_type = get_force(situation_code, event_owner_team_id, game['homeTeam']['id'], game['awayTeam']['id'])

                empty_net = goalie_id is None or (event_owner_team_id == game['homeTeam']['id'] and int(situation_code[0]) == 0) or (event_owner_team_id != game['homeTeam']['id'] and int(situation_code[3]) == 0)

                team = home_team if event_owner_team_id == game['homeTeam']['id'] else away_team

                plays_data.append({
                    'ID':game_id,
                    'Sort Order':sort_order,
                    'Time': time_in_period,
                    'Period': period_number,
                    'Event ID': event_id,
                    'Team': team,
                    'Type': event_type,
                    'Coordinates': (x_coord, y_coord),
                    'Shooter ID': shooter_id,
                    'Goalie ID': goalie_id,
                    'Shot Type': shot_type,
                    'Empty Net': empty_net,
                    'Force Type': force_type
                })

            # Créer la DataFrame finale
            df_final = pd.DataFrame(plays_data)

            # Créer le nom du fichier en utilisant le format spécifié
            filename = f'season_{year}_{game_id}.csv'
            file_path = os.path.join(output_dir, filename)

            # Sauvegarder la DataFrame dans un fichier CSV
            df_final.to_csv(file_path, index=False)
            print(f'Sauvegardé: {file_path}')


In [60]:
create_dataframes(year_list, all_data)

2016020001
Sauvegardé: dataframe\season_2016_2016020001.csv
2016020002
Failed to fetch data for None. Status code: 404
Failed to fetch data for None. Status code: 404
Sauvegardé: dataframe\season_2016_2016020002.csv
2016020003
Failed to fetch data for None. Status code: 404
Sauvegardé: dataframe\season_2016_2016020003.csv
2016020004
Sauvegardé: dataframe\season_2016_2016020004.csv
2016020005
Sauvegardé: dataframe\season_2016_2016020005.csv
2016020006
Failed to fetch data for None. Status code: 404
Sauvegardé: dataframe\season_2016_2016020006.csv
2016020007
Sauvegardé: dataframe\season_2016_2016020007.csv
2016020008
Failed to fetch data for None. Status code: 404
Sauvegardé: dataframe\season_2016_2016020008.csv
2016020009
Failed to fetch data for None. Status code: 404
Sauvegardé: dataframe\season_2016_2016020009.csv
2016020010
Sauvegardé: dataframe\season_2016_2016020010.csv
2016020011
Sauvegardé: dataframe\season_2016_2016020011.csv
2016020012
Sauvegardé: dataframe\season_2016_2016020

KeyboardInterrupt: 

In [67]:
import pandas as pd
import os

def load_dataframes(year_list):
    """
    Charge les DataFrames pour les années spécifiées et les concatène.

    :param year_list: Une liste d'années à charger.
    :return: Un dictionnaire où la clé est l'année et la valeur est la DataFrame concaténée pour cette année.
    """
    df_dict = {}

    for year in year_list:
        # Initialiser une liste pour stocker les DataFrames de l'année
        dfs = []

        files = [f for f in os.listdir("dataframe") if f.startswith(f'season_{year}_') and f.endswith(".csv")]

        files.sort()
        for filename in files:
            file_path = os.path.join('dataframe', filename)
            df = pd.read_csv(file_path)
            dfs.append(df)

        # Concatenation des DataFrames pour l'année
        if dfs:
            df_year = pd.concat(dfs, ignore_index=True)
            df_dict[year] = df_year
        else:
            print(f"Aucun DataFrame trouvé pour l'année {year}.")

    return df_dict


years_to_load = [2016, 2017]  # Liste d'années à charger
dataframes = load_dataframes(years_to_load)



In [68]:

for year, df in dataframes.items():
    print(f'DataFrame pour l\'année {year}:')
    print(df.tail())

DataFrame pour l'année 2016:
               ID  Sort Order   Time  Period  Event ID     Team          Type  \
74958  2016021230         608  15:53       3       670  Canucks  shot-on-goal   
74959  2016021230         611  15:59       3       672  Canucks  shot-on-goal   
74960  2016021230         631  18:07       3       676  Canucks  shot-on-goal   
74961  2016021230         643  19:21       3       679   Oilers  shot-on-goal   
74962  2016021230         649  19:51       3       680   Oilers  shot-on-goal   

      Coordinates Shooter ID Goalie ID Shot Type  Empty Net     Force Type  
74958  (-57, -21)       Reid   Laurent      snap      False  Even Strength  
74959  (-68, -32)      Brock   Laurent      snap      False  Even Strength  
74960  (-76, -20)     Philip   Laurent     wrist      False  Even Strength  
74961   (69, -27)       Matt   Richard      snap      False  Even Strength  
74962    (68, 19)       Zack   Richard      slap      False  Even Strength  
DataFrame pour l'année