In [1]:
import json
import time
import pandas as pd
import pickle

In [2]:
# Open the files for each dictionary

with open('games_trimmed.pkl', 'rb') as f:
    games_trimmed = pickle.load(f)

with open('game_shifts.pkl', 'rb') as f:
    game_shifts = pickle.load(f)

with open('coordinate_shots.pkl', 'rb') as f:
    coordinate_shots = pickle.load(f)

with open('coordinate_shifts.pkl', 'rb') as f:
    coordinate_shifts = pickle.load(f)

with open('player_id_games.pkl', 'rb') as f:
    player_id_games = pickle.load(f)

with open('team_games.pkl', 'rb') as f:
    team_games = pickle.load(f)
# The dictionaries have been loaded into memory and can now be accessed as usual.

In [21]:
def get_game_year(gameID):
    # Divide the gameID by 1,000,000 to retrieve the year in which the game was played
    game_year = gameID // 1000000
    
    # Return the game_year
    return game_year

In [8]:
def get_players_on_ice(gameID, period, periodTime):
    
    # Retrieve the game dataframe corresponding to the gameID
    game_dataframe = game_shifts[gameID]

    # Check that the period is within the range of 1-3. If not, return a null value.
    if period not in range(1,4):
        return None

    # Identify the period of the shot to find the corresponding shifts_n column
    shift_column = "shifts_" + str(period)

    # Initialize an empty list to store the player_ids for players on the ice
    player_ids = []

    # Loop through each row in the game dataframe
    for index, row in game_dataframe.iterrows():
        # Retrieve the shift tuples for the current player
        shift_tuples = row[shift_column]

        # Check which players had a shift time tuple that includes periodTime
        for shift_tuple in shift_tuples:
            if shift_tuple[0] < periodTime <= shift_tuple[1]:
                player_ids.append(row['player_id'])

    return player_ids

In [12]:
def get_player_shots(player_id, player_id_games):
    # Initialize an empty list to store the shots taken when the player was on the ice.
    player_shots = []

    # Retrieve the games for the player from the player_id_games dictionary
    games = player_id_games[player_id]

    # Iterate over the keys in the games dictionary
    for year, gameIDs in games.items():
        # Iterate over the gameIDs in the list of gameIDs for the current year
        for gameID in gameIDs:
            # Retrieve the game dictionary for the current gameID
            game_dict = games_trimmed[gameID]

            # Iterate over the shots in the game
            for shot in game_dict.values():

                periodTime = shot['about']['periodTime']
                period = shot['about']['period']

                # Check if the player was on the ice at the time of the shot
                players_on_ice = get_players_on_ice(gameID, period, periodTime)
                if players_on_ice is not None and player_id in players_on_ice:
                    player_shots.append(shot)

    return player_shots

In [13]:
def get_shots_by_player(player_id, player_id_games):
    # Initialize an empty list to store the shots taken by the player.
    player_shots = []

    # Retrieve the games for the player from the player_id_games dictionary
    games = player_id_games[player_id]

    # Iterate over the keys in the games dictionary
    for year, gameIDs in games.items():
        # Iterate over the gameIDs in the list of gameIDs for the current year
        for gameID in gameIDs:
            # Retrieve the game dictionary for the current gameID
            game_dict = games_trimmed[gameID]

            # Iterate over the shots in the game
            for shot in game_dict.values():

                periodTime = shot['about']['periodTime']
                period = shot['about']['period']

                # Check if the shot was taken in periods 1-3
                if period in range(1, 4):
                    # Check if the player took the shot
                    if shot['players'][0]['player']['id'] == player_id:
                        player_shots.append(shot)

    return player_shots

In [14]:
def get_player_shots_against(player_id, player_id_games, player_teams):
    # Initialize an empty list to store the shots taken when the player was on the ice against teams they didn't play for.
    player_shots = []

    # Retrieve the games for the player from the player_id_games dictionary
    games = player_id_games[player_id]

    # Retrieve the teams that playerID played for from the player_teams dictionary
    teams = player_teams[player_id]

    # Iterate over the keys in the games dictionary
    for year, gameIDs in games.items():
        # Retrieve the list of teams playerID played for in the current year
        player_teams_year = teams[year]

        # Iterate over the gameIDs in the list of gameIDs for the current year
        for gameID in gameIDs:
            # Retrieve the game dictionary for the current gameID
            game_dict = games_trimmed[gameID]

            # Iterate over the shots in the game
            for shot in game_dict.values():

                # Retrieve the triCode of the team that took the shot
                shot_triCode = shot['team']['triCode']

                # Check if the triCode of the shot does not match any of the teams playerID played for in that year
                if shot_triCode not in player_teams_year:
                    periodTime = shot['about']['periodTime']
                    period = shot['about']['period']

                    # Check if the player was on the ice at the time of the shot
                    players_on_ice = get_players_on_ice(gameID, period, periodTime)
                    if players_on_ice is not None and player_id in players_on_ice:
                        player_shots.append(shot)

    return player_shots

In [15]:
def get_goal_shot_ratio(player_id, player_id_games, player_shots=None):

    # If player_shots is not provided, retrieve them using the get_player_shots function
    if player_shots is None:
        player_shots = get_player_shots(player_id, player_id_games)

    # If the player has never been on the ice for a shot, return 0
    if len(player_shots) == 0:
        return 0

    # Initialize counters for the number of goals and shots
    goal_count = 0
    shot_count = 0

    # Loop through the shot dictionaries in the player_shots list
    for shot in player_shots:
        # Check the event type of the shot (either 'Goal' or 'Shot')
        event_type = shot['result']['event']

        # If the event is a goal, increment the goal_count
        if event_type == 'Goal':
            goal_count += 1

        # If the event is a shot, increment the shot_count
        if event_type in ('Goal', 'Shot'):
            shot_count += 1

    # Calculate the proportion of goals to shots by dividing the number of goals by the number of shots
    proportion_of_goals = goal_count / shot_count

    return proportion_of_goals

In [16]:
def get_goal_shot_ratio_against(player_id, player_id_games, player_shots=None):

    # If player_shots is not provided, retrieve them using the get_player_shots function
    if player_shots is None:
        player_shots = get_player_shots_against(player_id, player_id_games)

    # If the player has never been on the ice for a shot, return 0
    if len(player_shots) == 0:
        return 0

    # Initialize counters for the number of goals and shots
    goal_count = 0
    shot_count = 0

    # Loop through the shot dictionaries in the player_shots list
    for shot in player_shots:
        # Check the event type of the shot (either 'Goal' or 'Shot')
        event_type = shot['result']['event']

        # If the event is a goal, increment the goal_count
        if event_type == 'Goal':
            goal_count += 1

        # If the event is a shot, increment the shot_count
        if event_type in ('Goal', 'Shot'):
            shot_count += 1

    # Calculate the proportion of goals to shots by dividing the number of goals by the number of shots
    proportion_of_goals = goal_count / shot_count

    return proportion_of_goals

In [30]:
def get_coordinate_goal_shot_ratio(x, y, coordinate_shots=coordinate_shots, player_id_games=player_id_games):
    # Initialize counters for the number of goals and shots
    goal_count = 0
    shot_count = 0

    # Check if the x, y coordinate exists as a key in the coordinate_shots dictionary
    if (x, y) in coordinate_shots:
        # If the x, y coordinate exists as a key, retrieve the list of shots taken from that coordinate
        shots = coordinate_shots[(x, y)]

        # Loop through the shot dictionaries in the shots list
        for shot in shots:
            # Check the event type of the shot (either 'Goal' or 'Shot')
            event_type = shot['result']['event']

            # If the event is a goal, increment the goal_count
            if event_type == 'Goal':
                goal_count += 1

            # If the event is a shot, increment the shot_count
            if event_type in ('Goal', 'Shot'):
                shot_count += 1

    # Calculate the proportion of goals to shots by dividing the number of goals by the number of shots
    proportion_of_goals = goal_count / shot_count

    # Return the proportion_of_goals
    return proportion_of_goals

In [39]:
def get_coordinate_goal_shot_ratio_against(player_id, coordinate, player_id_games=player_id_games, coordinate_shots=coordinate_shots):

    # Retrieve the player_shots using the get_player_shots function
    player_shots = get_player_shots_against(player_id, player_id_games)

    # Filter the player_shots list to only include shots taken from the given coordinate
    coordinate_shots = [shot for shot in player_shots if shot['coordinates'] == coordinate]

    # Use the get_goal_shot_ratio function to calculate the proportion of goals to shots for the coordinate_shots list
    proportion_of_goals = get_goal_shot_ratio_against(player_id, player_id_games, coordinate_shots)

    # Return the proportion_of_goals
    return proportion_of_goals