In [25]:
import SeasonData as sdata
import pandas as pd
import ipywidgets

In [26]:
# Generating play-by-play data for year 2022
data = sdata.NHLData()  # Object creation
data.fetch_regular_season(year=2022)  # Fetching data
data_rs = data.regular_season  # Get data in variable

Data imported: 0


In [27]:
def minutes_to_seconds(df: pd.DataFrame, column: str) -> pd.DataFrame:
    """
    Processing data and transformation to dataframe
    :param df: 
    :param column: 
    :return: 
    """
    # Split columns into 'minutes' and 'seconds' as integer
    df['minutes'] = df[column].str.split(':').str[0].astype(int)
    df['seconds'] = df[column].str.split(':').str[1].astype(int)

    # Total in seconds
    df[column] = df['minutes'] * 60 + df['seconds']

    #Drop columns
    df.drop(['minutes', 'seconds'], axis=1, inplace=True)

    return df

In [28]:
def convert_event_to_dataframe(game_nhl: dict) -> pd.DataFrame:
    """
    Convert NHL game event data into a clean dataframe  
    :param game_nhl: Dictionary containing the data of the NHL game
    :return: A Pandas DataFrame containing filtered data
    """

    # Extract the play-by-play data from the game dictionary
    df_pbp = pd.DataFrame(game_nhl['plays'])

    # Extract player data
    df_players = pd.DataFrame(game_nhl['rosterSpots'])[['playerId', 'firstName', 'lastName']]

    # Keep the default name for each player (first and last name)
    df_players['firstName'] = df_players['firstName'].apply(lambda x: x['default'])
    df_players['lastName'] = df_players['lastName'].apply(lambda x: x['default'])

    # Extract the 'home' and 'away' teams
    home_team = {'teamId': game_nhl['homeTeam']['id'], 'teamName': game_nhl['homeTeam']['name']['default'],
                 'teamSide': 'home'}
    away_team = {'teamId': game_nhl['awayTeam']['id'], 'teamName': game_nhl['awayTeam']['name']['default'],
                 'teamSide': 'away'}
    df_team = pd.DataFrame([home_team, away_team])

    # Parse the game's start time to UTC
    start_time = pd.to_datetime(game_nhl['startTimeUTC'])

    # Create a new dataframe for the event data
    new_df = pd.DataFrame(df_pbp[['periodDescriptor', 'timeInPeriod', 'situationCode', 'typeDescKey', 'details']])

    # Insert the game ID as the first column
    new_df.insert(0, 'idGame', game_nhl['id'])

    # Filter to keep only events of type 'shot-on-goal' or 'goal'
    new_df = new_df[(new_df['typeDescKey'] == 'shot-on-goal') | (new_df['typeDescKey'] == 'goal')].reset_index(
        drop=True)

    # Convert time in the period to seconds and add it to the game start time
    new_df = minutes_to_seconds(new_df, 'timeInPeriod')
    new_df['timeInPeriod'] = start_time + pd.to_timedelta(new_df['timeInPeriod'], unit='s')

    # Decompose the period descriptor fields
    df_period = pd.DataFrame(new_df['periodDescriptor'].tolist())
    new_df.drop('periodDescriptor', axis=1, inplace=True)

    # Convert 'number' and 'maxRegulationPeriods' columns as strings
    df_period[['number', 'maxRegulationPeriods']] = df_period[['number', 'maxRegulationPeriods']].astype(str)

    # Add 'periodType' and 'currentPeriod' columns to the new dataframe
    new_df.insert(1, 'periodType', df_period['periodType'])
    new_df.insert(2, 'currentPeriod', df_period['number'] + '/' + df_period['maxRegulationPeriods'])

    # Decompose the details fields
    df_details = pd.DataFrame(new_df['details'].tolist())
    new_df.drop('details', axis=1, inplace=True)

    # Combine the x and y coordinates into a tuple
    df_details['iceCoord'] = df_details[['xCoord', 'yCoord']].apply(tuple, axis=1)

    # Merge 'shooting' and 'scoring' player, to keep only one column
    df_details['shootingPlayerId'] = df_details['shootingPlayerId'].fillna(0) + df_details['scoringPlayerId'].fillna(0)

    # Convert 'shootingPlayerId' and 'goalieInNetId' as integer
    df_details['shootingPlayerId'] = df_details['shootingPlayerId'].astype(int)
    df_details['goalieInNetId'] = df_details['goalieInNetId'].astype('Int64')  # Int64: handling NaN values

    # Add the shooter names by merging IDs
    df_details = pd.merge(df_players, df_details, left_on='playerId', right_on='shootingPlayerId', how='inner').drop(
        columns=['playerId'])

    # Keep only full name
    df_details['shootingPlayer'] = df_details['firstName'] + ' ' + df_details['lastName']
    df_details.drop(['firstName', 'lastName'], axis=1, inplace=True)

    # Add the goalies names by merging IDs
    df_details = pd.merge(df_players, df_details, left_on='playerId', right_on='goalieInNetId', how='inner').drop(
        columns=['playerId'])

    # Keep only full name
    df_details['goaliePlayer'] = df_details['firstName'] + ' ' + df_details['lastName']
    df_details.drop(['firstName', 'lastName'], axis=1, inplace=True)

    # Add team data by merging IDs
    df_details = pd.merge(df_team, df_details, left_on='teamId', right_on='eventOwnerTeamId', how='inner')

    # Add the extracted data to the new dataframe
    new_df['iceCoord'] = df_details['iceCoord']
    new_df['shootingPlayer'] = df_details['shootingPlayer']
    new_df['goaliePlayer'] = df_details['goaliePlayer']
    new_df['shotType'] = df_details['shotType']
    new_df.insert(5, 'eventOwnerTeam', df_details['teamName'])
    new_df['teamSide'] = df_details['teamSide']

    # TODO: emptyGoalNet
    #df_details['teamSide'] = df_details['teamSide'].apply(lambda event: 0 if event == 'home' else 3)
    #new_df['emptyGoalNet'] = new_df['situationCode'].str[0].astype(bool).apply(lambda x: not x)

    # TODO: advantage / disadvantage
    # Get split information 0 | 65 | 1

    return new_df

In [29]:
# Test on one game (avoid to test on all data)

new_data = sdata.NHLData()
new_data.fetch_regular_season(year=2022)
data_2022 = data.regular_season['2022']
game_one_2022 = data_2022[0]
df_game_one_2022 = convert_event_to_dataframe(game_one_2022)
df_game_one_2022.head()


Data imported: 0


Unnamed: 0,idGame,periodType,currentPeriod,timeInPeriod,situationCode,eventOwnerTeam,typeDescKey,iceCoord,shootingPlayer,goaliePlayer,shotType,teamSide
0,2022020001,REG,1/3,2022-10-07 18:00:23+00:00,1551,Predators,shot-on-goal,"(-75, 32)",Ryan McDonagh,James Reimer,wrist,home
1,2022020001,REG,1/3,2022-10-07 18:00:59+00:00,1551,Predators,shot-on-goal,"(-52, -14)",Roman Josi,James Reimer,wrist,home
2,2022020001,REG,1/3,2022-10-07 18:01:01+00:00,1551,Predators,goal,"(45, 10)",Roman Josi,James Reimer,wrist,home
3,2022020001,REG,1/3,2022-10-07 18:01:12+00:00,1551,Predators,shot-on-goal,"(80, 0)",Roman Josi,James Reimer,tip-in,home
4,2022020001,REG,1/3,2022-10-07 18:02:42+00:00,1551,Predators,shot-on-goal,"(28, -11)",Mark Borowiecki,James Reimer,wrist,home


In [30]:
# Test on all regular season
"""
#Loop on all years in regular season
for year in data_rs:
    current_year = data_rs[year]  #Get year
    sdata_year = len(current_year)  #Get the number of games in the year

    #Loop on all game in the year
    for game in range(sdata_year):
        current_year[game] = convert_event_to_dataframe(game_nhl=current_year[game])  #Processing game data

data_rs['2022'][0].head()
"""

"\n#Loop on all years in regular season\nfor year in data_rs:\n    current_year = data_rs[year]  #Get year\n    sdata_year = len(current_year)  #Get the number of games in the year\n\n    #Loop on all game in the year\n    for game in range(sdata_year):\n        current_year[game] = convert_event_to_dataframe(game_nhl=current_year[game])  #Processing game data\n\ndata_rs['2022'][0].head()\n"

In [31]:
# Fetching teams abbreviation
print('game id')
print(data.regular_season['2022'][0]['id'])
print('away team abbrev')
print(data.regular_season['2022'][0]['awayTeam']['abbrev'])
print('home team abbrev')
print(data.regular_season['2022'][0]['homeTeam']['abbrev'])
print('away team score')
print(data.regular_season['2022'][0]['awayTeam']['score'])
print('home team score')
print(data.regular_season['2022'][0]['homeTeam']['score'])
print('away team shoot on goal')
print(data.regular_season['2022'][0]['awayTeam']['sog'])
print('home team shoot on goal')
print(data.regular_season['2022'][0]['homeTeam']['sog'])


game id
2022020001
away team abbrev
SJS
home team abbrev
NSH
away team score
1
home team score
4
away team shoot on goal
31
home team shoot on goal
32


In [32]:
# we assume it is a regular season

season = '2022'
rs_data = sdata.NHLData()
rs_data.fetch_regular_season(year=season)


def plot_game(game_number):
    game_id = int(f"{season}{sdata.GameType.REGULAR_SEASON.value}{game_number:04d}")

    # to improve
    game_data = next((item for item in rs_data.regular_season[season] if item["id"] == game_id), None)

    print(game_data['startTimeUTC'])
    print(
        f"Game ID: {game_number}; {game_data['homeTeam']['abbrev']} (home) vs {game_data['awayTeam']['abbrev']} (away)")

    col1 = ['', 'Teams', 'Goals', 'SoG']
    col2 = ["Home", f"{game_data['homeTeam']['abbrev']}", f"{game_data['homeTeam']['score']}",
            f"{game_data['homeTeam']['sog']}"]
    col3 = ["Away", f"{game_data['awayTeam']['abbrev']}", f"{game_data['awayTeam']['score']}",
            f"{game_data['awayTeam']['sog']}"]
    print('')
    for c1, c2, c3 in zip(col1, col2, col3):
        print(f'{c1:<18} {c2:<18} {c3:<18}')


ipywidgets.interact(plot_game, game_number=(1, len(rs_data.regular_season[season]), 1))

Data imported: 0


interactive(children=(IntSlider(value=656, description='game_number', max=1312, min=1), Output()), _dom_classe…

<function __main__.plot_game(game_number)>