Solutions: IFT6758 - Project milestone 1

1. Download the play-by-play data for the 2016-2017 season.

In [25]:
import sys
from multiprocessing.util import debug

sys.path.append('..')
import controller.nhl_data_downloader
import importlib
importlib.reload(controller.nhl_data_downloader)
from controller.nhl_data_downloader import NHLDataDownloader




In [26]:
downloader = NHLDataDownloader()
print(downloader.data_dir)

c:\Users\poki1\Documents\UdeM\IFT 6758 Science des Donees\Projet\ift6758\notebooks\..\controller\../data/play_by_play


In [None]:
#downloader.download_all_seasons_play_by_play(2016,2023)
downloader.download_regular_season(2016, 5)

2. Interactive debug ipywidget to explore the data.

In [28]:
import ipywidgets as widgets
from IPython.display import display, Markdown
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import json
import os

In [29]:

downloader = NHLDataDownloader()
# Global data storage
all_games = {}
filtered_events = []  # Store filtered events globally

# Widgets to select season, season type, game ID, and event ID
season = widgets.Dropdown(options=[str(year) for year in range(2016, 2024)], description='Season')
season_type = widgets.Dropdown(options=['Regular Season', 'Playoffs'], description='Season Type')
game_slider = widgets.IntSlider(min=0, description='Game ID')
event_slider = widgets.IntSlider(min=0, description='Event ID')

# Widget to display the rink image and event info
rink_image_output = widgets.Output()
event_info_output = widgets.Output()
game_info_output = widgets.Output()

# Function to plot event coordinates on the rink image
def plot_event_on_rink(x, y, event, home_abbrev, away_abbrev):
    rink_img_path = os.path.join('..','figures', 'nhl_rink.png')  # Update this to the correct path for the rink image
    rink_img = mpimg.imread(rink_img_path)
    
    # Get event details for the title
    event_desc = event.get("typeDescKey", "Unknown Event")
    event_time = event.get("timeInPeriod", "Unknown Time")
    period_number = event.get("periodDescriptor", {}).get("number", "Unknown Period")

    # Construct the title string (similar to the example picture)
    title = f"{event_desc}\n{event_time} P-{period_number}"

    with rink_image_output:
        rink_image_output.clear_output()  # Clear the previous plot
        fig, ax = plt.subplots(figsize=(6, 6))
        ax.imshow(rink_img, extent=[-100, 100, -42.5, 42.5])
        if x is not None and y is not None:
            ax.scatter(x, y, color='blue', label="Event location")
            ax.legend()
        ax.set_title(title)
        ax.text(-90, 45, away_abbrev, fontsize=12, ha='center', va='center')  # Away team on the left
        ax.text(90, 45, home_abbrev, fontsize=12, ha='center', va='center')  # Home team on the right
        plt.show()

# Function to format and return game information
def get_game_info(game_data):
    
    game_start_time = game_data.get("startTimeUTC", "Unknown Date")
    home_abbrev = game_data.get("homeTeam", {}).get("abbrev", "Unknown Home Abbrev")
    away_abbrev = game_data.get("awayTeam", {}).get("abbrev", "Unknown Away Abbrev")
    home_name = game_data.get("homeTeam", {}).get("name").get("default", "Unknown Home Name")
    away_name = game_data.get("awayTeam", {}).get("name").get("default", "Unknown Home Name")
    home_id = game_data.get("homeTeam", {}).get("id", 0)
    away_id = game_data.get("awayTeam", {}).get("id", 0)
    home_goals = game_data.get("homeTeam", {}).get("score", 0)
    away_goals = game_data.get("awayTeam", {}).get("score", 0)
    home_sog = game_data.get("homeTeam", {}).get("sog", 0)
    away_sog = game_data.get("awayTeam", {}).get("sog", 0)

    # Format the game state (period, overtime, etc.)
    game_period = game_data.get("periodDescriptor", {}).get("periodType", "Unknown")
    
    # Pretty display of game info
    game_info = f"""
    {game_start_time}
    Game ID: {game_data.get('id', 'Unknown')}; {home_abbrev} (home) vs {away_abbrev} (away)

    {game_period}

    {'':<15}{'Home'.ljust(20)}{'Away'.ljust(20)}
    {'Teams:'.ljust(15)}{home_name.ljust(20)}{away_name.ljust(20)}
    {'Team ID:'.ljust(15)}{str(home_id).ljust(20)}{str(away_id).ljust(20)}
    {'Goals:'.ljust(15)}{str(home_goals).ljust(20)}{str(away_goals).ljust(20)}
    {'SoG:'.ljust(15)}{str(home_sog).ljust(20)}{str(away_sog).ljust(20)}
    """
    # Output game info in Markdown for better formatting
    with game_info_output:
        game_info_output.clear_output(wait=True)
        display(Markdown(f"{game_info}"))
    return home_abbrev, away_abbrev

# Function to display event details in JSON format
def get_event_info(event):
    event_info_json = json.dumps(event, indent=4)
    with event_info_output:
        event_info_output.clear_output(wait=True)
        display(Markdown(f"```json\n{event_info_json}\n```"))

# Download data based on season and season type
def on_season_or_type_change(change):
    global all_games
    selected_season = int(season.value)  # Get the selected season
    season_type_value = season_type.value

    # Download data based on season and type
    if season_type_value == 'Regular Season':
        all_games = downloader.download_regular_season(selected_season)
    else:
        all_games = downloader.download_playoffs(selected_season)

    # Ensure that all_games is not empty before proceeding
    if len(all_games) > 0:
        # Convert keys to a list and update the slider max value
        game_slider.max = len(all_games) - 1
        update_game_plot(0)
        update_event_plot(0, 0)

# Observe changes in season and season type dropdown
season.observe(on_season_or_type_change, names='value')
season_type.observe(on_season_or_type_change, names='value')

# Update plot based on game ID and event ID
def update_event_plot(game_index, event_index, home_abbrev=None, away_abbrev=None):
    global filtered_events

    if len(filtered_events) > 0 and event_index < len(filtered_events):
        event = filtered_events[event_index]
        x = event.get("details", {}).get("xCoord", None)
        y = event.get("details", {}).get("yCoord", None)
        if x is not None and y is not None:
            plot_event_on_rink(x, y, event, home_abbrev, away_abbrev)
        get_event_info(event)  # Display event info after the image

# Update plot based on game ID and show game information
def update_game_plot(game_index):
    global all_games, filtered_events
    game_ids = list(all_games.keys())
    if game_index < len(game_ids):
        game_id = game_ids[game_index]
        game_data = all_games.get(game_id, {})
        home_abbrev, away_abbrev = get_game_info(game_data)  # Update game info text
        
        # Filter events with typeDescKey "shot-on-goal" or "goal"
        filtered_events = [e for e in game_data.get("plays", []) if e.get("typeDescKey") in ["shot-on-goal", "goal"]]
        
        # Update the event slider max value to match the number of filtered events
        event_slider.max = len(filtered_events) - 1  # Update event slider based on filtered events
        # Reset the event_slider to 0
        event_slider.value = 0
        update_event_plot(game_index, 0, home_abbrev, away_abbrev)  # Update the plot based on the first event

def on_game_change(change):
    game_index = change['new']
    update_game_plot(game_index)
    update_event_plot(0, 0)

game_slider.observe(on_game_change, names='value')

# Update plot based on event ID
def on_event_change(change):
    game_index = game_slider.value
    event_index = change['new']
    # Get the current game data
    game_ids = list(all_games.keys())
    if game_index < len(game_ids):
        game_id = game_ids[game_index]
        game_data = all_games.get(game_id, {})
        home_abbrev = game_data.get("homeTeam", {}).get("abbrev", "Unknown Home Abbrev")
        away_abbrev = game_data.get("awayTeam", {}).get("abbrev", "Unknown Away Abbrev")
        update_event_plot(game_index, event_index, home_abbrev, away_abbrev)

event_slider.observe(on_event_change, names='value')

# Layout organization
layout = widgets.VBox([
    season, 
    season_type,
    game_slider,
    game_info_output,  # Insert game info between the game and event sliders
    event_slider,
    rink_image_output,  # Rink image after the event slider
    event_info_output  # Event info after the rink image
])

# Display the organized layout
display(layout)

# Initialize plot
update_event_plot(0, 0)

VBox(children=(Dropdown(description='Season', options=('2016', '2017', '2018', '2019', '2020', '2021', '2022',…

Failed to download data for game 2016030117.
Failed to download data for game 2016030127.
Failed to download data for game 2016030137.
Failed to download data for game 2016030146.
Failed to download data for game 2016030147.
Failed to download data for game 2016030155.
Failed to download data for game 2016030156.
Failed to download data for game 2016030157.
Failed to download data for game 2016030166.
Failed to download data for game 2016030167.
Failed to download data for game 2016030175.
Failed to download data for game 2016030176.
Failed to download data for game 2016030177.
Failed to download data for game 2016030187.
Failed to download data for game 2016030217.
Failed to download data for game 2016030237.
Failed to download data for game 2016030327.
Failed to download data for game 2016030417.


FileNotFoundError: [Errno 2] No such file or directory: '..\\figures\\nhl_rink.png'

FileNotFoundError: [Errno 2] No such file or directory: '..\\figures\\nhl_rink.png'

FileNotFoundError: [Errno 2] No such file or directory: '..\\figures\\nhl_rink.png'

FileNotFoundError: [Errno 2] No such file or directory: '..\\figures\\nhl_rink.png'

FileNotFoundError: [Errno 2] No such file or directory: '..\\figures\\nhl_rink.png'

3. Data cleaning

In [30]:
game_id = "2016020002"
downloader = NHLDataDownloader()
shots_and_goals_df = downloader.extract_shots_and_goals_for_game(game_id)
print(shots_and_goals_df.head())

Game data for 2016020002 already exists in local cache.
      game_id   game_date  period time_in_period  event_id    event_type  \
0  2016020002  2016-10-12       1          00:12         7  shot-on-goal   
1  2016020002  2016-10-12       1          01:10         9  shot-on-goal   
2  2016020002  2016-10-12       1          03:28        15  shot-on-goal   
3  2016020002  2016-10-12       1          03:45        18  shot-on-goal   
4  2016020002  2016-10-12       1          04:37        20  shot-on-goal   

   is_goal shot_type  x_coord  y_coord  event_owner_team_id   team_name  \
0    False      snap       64      -16                   19       Blues   
1    False     wrist       52       -8                   19       Blues   
2    False  backhand      -82       -8                   16  Blackhawks   
3    False     wrist      -76       29                   16  Blackhawks   
4    False     wrist      -76        2                   16  Blackhawks   

  team_type  empty_net strength_stat