Solutions: IFT6758 - Project milestone 1

1. Download the play-by-play data for the 2016-2017 season.

In [None]:
import ift6758
import sys
import importlib
from multiprocessing.util import debug

sys.path.append('../..')
from ift6758.controller.nhl_data_downloader import NHLDataDownloader
#import controller.nhl_data_downloader

importlib.reload(ift6758.controller.nhl_data_downloader)
#from controller.nhl_data_downloader import NHLDataDownloader




In [None]:
downloader = NHLDataDownloader()
print(downloader.data_dir)

In [None]:
#downloader.download_all_seasons_play_by_play(2016,2023)
downloader.download_regular_season(2016, 5)

2. Interactive debug ipywidget to explore the data.

In [None]:
import ipywidgets as widgets
from IPython.display import display, Markdown
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import json
import os

In [None]:

downloader = NHLDataDownloader()
# Global data storage
all_games = {}
filtered_events = []  # Store filtered events globally

# Widgets to select season, season type, game ID, and event ID
season = widgets.Dropdown(options=[str(year) for year in range(2016, 2024)], description='Season')
season_type = widgets.Dropdown(options=['Regular Season', 'Playoffs'], description='Season Type')
game_slider = widgets.IntSlider(min=0, description='Game ID')
event_slider = widgets.IntSlider(min=0, description='Event ID')

# Widget to display the rink image and event info
rink_image_output = widgets.Output()
event_info_output = widgets.Output()
game_info_output = widgets.Output()

# Function to plot event coordinates on the rink image
def plot_event_on_rink(x, y, event, home_abbrev, away_abbrev):
    current_dir = os.getcwd()
    rink_img_path = os.path.join(current_dir,'..', 'data', 'Images', 'nhl_rink.png')
    rink_img_path = os.path.normpath(rink_img_path)  # Normalize the path to avoid issues with '..'correct path for the rink image
    rink_img = mpimg.imread(rink_img_path)
    
    # Get event details for the title
    event_desc = event.get("typeDescKey", "Unknown Event")
    event_time = event.get("timeInPeriod", "Unknown Time")
    period_number = event.get("periodDescriptor", {}).get("number", "Unknown Period")

    # Construct the title string (similar to the example picture)
    title = f"{event_desc}\n{event_time} P-{period_number}"

    with rink_image_output:
        rink_image_output.clear_output()  # Clear the previous plot
        fig, ax = plt.subplots(figsize=(6, 6))
        ax.imshow(rink_img, extent=[-100, 100, -42.5, 42.5])
        if x is not None and y is not None:
            ax.scatter(x, y, color='blue', label="Event location")
            ax.legend()
        ax.set_title(title)
        ax.text(-90, 45, away_abbrev, fontsize=12, ha='center', va='center')  # Away team on the left
        ax.text(90, 45, home_abbrev, fontsize=12, ha='center', va='center')  # Home team on the right
        plt.show()

# Function to format and return game information
def get_game_info(game_data):
    
    game_start_time = game_data.get("startTimeUTC", "Unknown Date")
    home_abbrev = game_data.get("homeTeam", {}).get("abbrev", "Unknown Home Abbrev")
    away_abbrev = game_data.get("awayTeam", {}).get("abbrev", "Unknown Away Abbrev")
    home_name = game_data.get("homeTeam", {}).get("name").get("default", "Unknown Home Name")
    away_name = game_data.get("awayTeam", {}).get("name").get("default", "Unknown Home Name")
    home_id = game_data.get("homeTeam", {}).get("id", 0)
    away_id = game_data.get("awayTeam", {}).get("id", 0)
    home_goals = game_data.get("homeTeam", {}).get("score", 0)
    away_goals = game_data.get("awayTeam", {}).get("score", 0)
    home_sog = game_data.get("homeTeam", {}).get("sog", 0)
    away_sog = game_data.get("awayTeam", {}).get("sog", 0)

    # Format the game state (period, overtime, etc.)
    game_period = game_data.get("periodDescriptor", {}).get("periodType", "Unknown")
    
    # Pretty display of game info
    game_info = f"""
    {game_start_time}
    Game ID: {game_data.get('id', 'Unknown')}; {home_abbrev} (home) vs {away_abbrev} (away)

    {game_period}

    {'':<15}{'Home'.ljust(20)}{'Away'.ljust(20)}
    {'Teams:'.ljust(15)}{home_name.ljust(20)}{away_name.ljust(20)}
    {'Team ID:'.ljust(15)}{str(home_id).ljust(20)}{str(away_id).ljust(20)}
    {'Goals:'.ljust(15)}{str(home_goals).ljust(20)}{str(away_goals).ljust(20)}
    {'SoG:'.ljust(15)}{str(home_sog).ljust(20)}{str(away_sog).ljust(20)}
    """
    # Output game info in Markdown for better formatting
    with game_info_output:
        game_info_output.clear_output(wait=True)
        display(Markdown(f"{game_info}"))
    return home_abbrev, away_abbrev

# Function to display event details in JSON format
def get_event_info(event):
    event_info_json = json.dumps(event, indent=4)
    with event_info_output:
        event_info_output.clear_output(wait=True)
        display(Markdown(f"```json\n{event_info_json}\n```"))

# Download data based on season and season type
def on_season_or_type_change(change):
    global all_games
    selected_season = int(season.value)  # Get the selected season
    season_type_value = season_type.value

    # Download data based on season and type
    if season_type_value == 'Regular Season':
        all_games = downloader.download_regular_season(selected_season)
    else:
        all_games = downloader.download_playoffs(selected_season)

    # Ensure that all_games is not empty before proceeding
    if len(all_games) > 0:
        # Convert keys to a list and update the slider max value
        game_slider.max = len(all_games) - 1
        update_game_plot(0)
        update_event_plot(0, 0)

# Observe changes in season and season type dropdown
season.observe(on_season_or_type_change, names='value')
season_type.observe(on_season_or_type_change, names='value')

# Update plot based on game ID and event ID
def update_event_plot(game_index, event_index, home_abbrev=None, away_abbrev=None):
    global filtered_events

    if len(filtered_events) > 0 and event_index < len(filtered_events):
        event = filtered_events[event_index]
        x = event.get("details", {}).get("xCoord", None)
        y = event.get("details", {}).get("yCoord", None)
        if x is not None and y is not None:
            plot_event_on_rink(x, y, event, home_abbrev, away_abbrev)
        get_event_info(event)  # Display event info after the image

# Update plot based on game ID and show game information
def update_game_plot(game_index):
    global all_games, filtered_events
    game_ids = list(all_games.keys())
    if game_index < len(game_ids):
        game_id = game_ids[game_index]
        game_data = all_games.get(game_id, {})
        home_abbrev, away_abbrev = get_game_info(game_data)  # Update game info text
        
        # Filter events with typeDescKey "shot-on-goal" or "goal"
        filtered_events = [e for e in game_data.get("plays", []) if e.get("typeDescKey") in ["shot-on-goal", "goal"]]
        
        # Update the event slider max value to match the number of filtered events
        event_slider.max = len(filtered_events) - 1  # Update event slider based on filtered events
        # Reset the event_slider to 0
        event_slider.value = 0
        update_event_plot(game_index, 0, home_abbrev, away_abbrev)  # Update the plot based on the first event

def on_game_change(change):
    game_index = change['new']
    update_game_plot(game_index)
    update_event_plot(0, 0)

game_slider.observe(on_game_change, names='value')

# Update plot based on event ID
def on_event_change(change):
    game_index = game_slider.value
    event_index = change['new']
    # Get the current game data
    game_ids = list(all_games.keys())
    if game_index < len(game_ids):
        game_id = game_ids[game_index]
        game_data = all_games.get(game_id, {})
        home_abbrev = game_data.get("homeTeam", {}).get("abbrev", "Unknown Home Abbrev")
        away_abbrev = game_data.get("awayTeam", {}).get("abbrev", "Unknown Away Abbrev")
        update_event_plot(game_index, event_index, home_abbrev, away_abbrev)

event_slider.observe(on_event_change, names='value')

# Layout organization
layout = widgets.VBox([
    season, 
    season_type,
    game_slider,
    game_info_output,  # Insert game info between the game and event sliders
    event_slider,
    rink_image_output,  # Rink image after the event slider
    event_info_output  # Event info after the rink image
])

# Display the organized layout
display(layout)

# Initialize plot
update_event_plot(0, 0)

3. Data cleaning

In [None]:
game_id = "2016020002"
downloader = NHLDataDownloader()
shots_and_goals_df = downloader.extract_shots_and_goals_for_game(game_id)
print(shots_and_goals_df.head())