In [1]:
import os
import requests
import json
import numpy as np
import pandas as pd
from tqdm import tqdm
from os.path import exists

pd.options.mode.chained_assignment = None
        
def get_season_ids(season: str, path: str='data', force_search=False):
    '''
    Returns the list of all game ids (regular and playoff) in a specified season and saves them to a json file
    Arguments:
        season (str): Season for which the ids and retrieved (for example: "20182019").
        path (str): Folder where the data is the contained.
        force_search (bool): fetch the data with get request even if the identifiers file already exists (can be used for the current season)
    Returns:
        ids (list of ints): identifiers of each regular and playoff game of the specified season.
    '''

    #if the identifiers into a json file 
    if not os.path.exists(path+f"/{season}/identifiers.json") or force_search:

        #delete the existing identifiers file (which may or may not exist)
        try:
            os.remove(path+f"/{season}/identifiers.json")
        except OSError:
            pass
        os.makedirs(path+f"/{season}", exist_ok=True)

        #get the schedule for the season and create a json file
        url = f"https://statsapi.web.nhl.com/api/v1/schedule?season={season}&gameType=R&gameType=P"
        r_seasons = requests.get(url)
        seasons = r_seasons.json()

        #get all the game ids from the schedule
        ids = []
        for date in seasons['dates']:
            ids += [game['gamePk'] for game in date['games']]

        #save the list of ids into a json file
        with open(path+f"/{season}/identifiers.json", 'w') as identifiers_file:
            json.dump(ids, identifiers_file)

    else:
        #load the ids file if it already exists
        f = open(path+f"/{season}/identifiers.json")
        ids = json.load(f)
        f.close()
    return ids


def get_game_data(season: str, path: str = './data'):
    '''
    Fonction qui permet de télécharger le contenu de l'API pour une saison donnée et l'enregistrer dans un fichier.
    La fonction télécharge les données de chaque partie relative à la saison donnée en argument.
    Exemple : les données relatives à la saison 2016/2017 seront enregistrées dans './data/20162017/' + {game_id} + '.json'.
    Si les informations relatives à une partie existent déjà, le fichier n'est pas retéléchargé.
    Arguments:
        season (str): la saison dont on souhaite télécharger les informations relatives aux parties.
        path (str): par défaut './data' le chemin du dossier où stocker les informations relatives à chaque saison.
    '''

    os.makedirs(path, exist_ok = True)
    os.makedirs(path + '/' + season, exist_ok = True)
    ids = get_season_ids(season, path)
    for i in tqdm(ids):
        if not exists(path+ '/' + season + '/'+ str(i) + '.pkl'):
            #Les données de la partie n'étaient pas déjà enregistrées dans le fichier
            try :
                r = requests.get(f"https://statsapi.web.nhl.com/api/v1/game/{i}/feed/live/")
                df = pd.DataFrame.from_records(r.json()["liveData"]["plays"]["allPlays"])
                df = clean_data(df)
                df.to_pickle(f"{path}/{season}/{i}.pkl")
            except Exception as e :
                print(f"Erreur pour la partie ID = {i} \n {e}")

def clean_data(df: pd.DataFrame) -> pd.DataFrame:
    '''
    Function that cleans the data from the API.
    It removes the columns that are not useful for the analysis and add columns of interest.
    Arguments:
        df (pd.DataFrame): DataFrame containing the data from the API.
    Returns:
        df (pd.DataFrame): DataFrame containing the cleaned data.
    '''

    # remove the rows that are not matching the events of interest (for now only Goal and Shot)
    df = df[df["result"].apply(lambda x: "event" in x and x["event"] in ["Goal", "Shot"])]
    df["eventId"] = df["about"].apply(lambda x: x["eventId"])
    df["event"] = df["result"].apply(lambda x: x["event"])
    df["eventType"] = df["result"].apply(lambda x: x["secondaryType"] if "secondaryType" in x else None)
    df["eventCoordinates"] = df["coordinates"].apply(lambda e: (e["x"], e["y"]) if e is not None and "x" in e.keys() and "y" in e.keys() else None)
    df["isGoal"] = df["event"] == "Goal"
    #
    df["description"] = df["result"].apply(lambda x: x["description"])
    #
    df["dateTime"] = df["about"].apply(lambda x: x["dateTime"])
    df["period"] = df["about"].apply(lambda x: x["period"])
    df["periodTime"] = df["about"].apply(lambda x: x["periodTime"])
    df["periodTimeRemaining"] = df["about"].apply(lambda x: x["periodTimeRemaining"])
    df["teamId"] = df["team"].apply(lambda x: x["id"])
    df["teamName"] = df["team"].apply(lambda x: x["name"])
    df["shooterName"] = df["players"].apply(lambda x: x[0]["player"]["fullName"] if len(x) > 0 else None)
    df["goalieName"] = df["players"].apply(lambda x: x[-1]["player"]["fullName"] if len(x) > 0 else None)
    # drop the columns that are not useful for the analysis anymore
    df = df.drop(columns=["result", "about", "team", "players", "coordinates"])
    # remove rows that containes at least one nan value
    df = df.dropna()
    return df

In [2]:
get_game_data("20162017")

100%|██████████| 1317/1317 [00:00<00:00, 63238.68it/s]


In [3]:
import numpy as np
from matplotlib import pyplot as plt
from ipywidgets import *
from data_aquisition import *

In [4]:
def split_indentifiers(season, path = './data'):
    with open(path + '/' + season + '/' + 'identifiers.json') as f:
        indentifiers_list = json.load(f)
    id_regular_season = []
    id_playoffs = []
    for i in indentifiers_list:
        if str(i)[:-4] == season[:-4] + '02' :
            id_regular_season.append(i)
        elif str(i)[:-4] == season[:-4] + '03' :
            id_playoffs.append(i)
        else:
            print(f"something unexpected happened at id = {str(i)}")
    id_regular_season.sort()
    id_playoffs.sort()
    return id_regular_season, id_playoffs

In [87]:
def interactif_widget(season : str, path = './data'):
    get_game_data(season)
    id_regular_season, id_playoffs = split_indentifiers(season, path)
    playoffs = False
    title = ''
    team_names = ''
    
    def update_id_slider(id_slider):
        if not playoffs:
            id_slider.options= id_regular_season
        else:
            id_slider.options= id_playoffs

    def on_change_type_game(change):
        if change['type'] == 'change' and change['name'] == 'value':
            nonlocal playoffs
            playoffs = not playoffs
            update_id_slider(id_slider)
    
    def on_change_game_id(change):
        if change['type'] == 'change' and change['name'] == 'value':
            nonlocal df
            df = pd.read_pickle(f"./data/{season}/{id_slider.value}.pkl").set_index('eventId')
            event_id_list = df.index.values.tolist()
            id_play.children[0].options=event_id_list    
            
            
    type_choice = Dropdown(options=[('Regular season', 1), ('Playoffs', 2)], description='Type of the game:', style= {'description_width': 'initial'})
    type_choice.observe(on_change_type_game)
    
    id_slider = SelectionSlider(description='Game ID:',   options=id_regular_season)
    ui = HBox([type_choice,id_slider])
    display(ui)
    
    infos_game = widgets.HTML(
    value="<br>data et heure<br>team 1 (home) vs team 2 (away)<br>goals - goals <br>attempts vs attempts<br>",
    description='Informations about the game :',
    style= {'description_width': 'initial'})
    display(infos_game)
    
    df = pd.read_pickle(f"./data/{season}/{id_slider.value}.pkl").set_index('eventId')
    event_id_list = df.index.values.tolist()
    current_id = event_id_list[0]

    def plot_points(x,y):    
        plt.plot(x, y, marker="o", markersize=20, markeredgecolor="red", markerfacecolor="blue")

    def configure_graph_grid():
        plt.rcParams["figure.figsize"] = [12, 6]
        plt.rcParams["figure.autolayout"] = True
        im = plt.imread("./figures/nhl_rink.png")
        fig, ax = plt.subplots()
        ax.set_xlabel('feet')
        ax.set_ylabel('feet')
        im = ax.imshow(im, extent=[-200/2., 200/2., -85/2., 85/2.])
        plt.yticks(np.arange(-42.5,43.5,21.25))
        plt.xticks(np.arange(-100,101,25))
        plt.title(title)
        
    def do_update(x,y):
        """Based on the new control state, update the interactive plot.
    
           The approach here is to clear and redraw the whole plot rather than simply to update 
           the lines. Simply setting new x and y data for the lines works in the line case, but 
           doesn't because we're drawing them as arrows.
        """
        configure_graph_grid()
        plot_points(x,y)
        plt.draw()
    
    def handle_event(w):
        """Process events from the ipywidgets.interactive handler.

           Argument names in the event handler must match the keys in the "interactive" call (below).       
        """
        nonlocal current_id
        current_id = w
        
        # Repack the x and y coordinates into two vectors, and call do_update to process the change.
        x = df['eventCoordinates'][w][0]
        y = df['eventCoordinates'][w][1]
        
        nonlocal title
        title = df['description'][w]
    
        do_update(x, y) 
        
        
    def on_change_pbp_id(change):
        if change['type'] == 'change' and change['name'] == 'value':
            infos_pbp.value = f"<br>Date Time : \t{df['dateTime'][current_id]}<br>Period : P{df['period'][current_id]} - {df['periodTime'][current_id]}<br>Team : {df['teamName'][current_id]}<br>Description : {df['description'][current_id]}<br>Coordinates : {df['eventCoordinates'][current_id]}<br>"
    
    id_play = interactive(handle_event, w = SelectionSlider(description='Play by play ID:', options=event_id_list, style= {'description_width': 'initial'}))
    id_slider.observe(on_change_game_id)
    display(id_play)
    
    str_infos_pbp = f"<br>Date Time : \t{df['dateTime'][event_id_list[0]]}<br>Period : P{df['period'][event_id_list[0]]} - {df['periodTime'][event_id_list[0]]}<br>Team : {df['teamName'][event_id_list[0]]}<br>Description : {df['description'][event_id_list[0]]}<br>Coordinates : {df['eventCoordinates'][event_id_list[0]]}<br>"
    infos_pbp = widgets.HTML(value=str_infos_pbp,description='Informations about the play by play :',style= {'description_width': 'initial'})
    id_play.children[0].observe(on_change_pbp_id)
    display(infos_pbp)

In [89]:
interactif_widget('20162017')

100%|██████████| 1317/1317 [00:00<00:00, 101493.74it/s]


HBox(children=(Dropdown(description='Type of the game:', options=(('Regular season', 1), ('Playoffs', 2)), sty…

HTML(value='<br>data et heure<br>team 1 (home) vs team 2 (away)<br>goals - goals <br>attempts vs attempts<br>'…

interactive(children=(SelectionSlider(description='Play by play ID:', options=(8, 11, 15, 16, 24, 25, 27, 29, …

HTML(value='<br>Date Time : \t2016-10-12T23:19:59Z<br>Period : P1 - 01:11<br>Team : Toronto Maple Leafs<br>Des…