Solutions: IFT6758 - Project milestone 1

1. Download the play-by-play data for the 2016-2017 season.

In [1]:
import sys
from multiprocessing.util import debug

sys.path.append('..')
from ift6758.data import NHLDataDownloader

In [3]:
downloader = NHLDataDownloader()

In [3]:
downloader.download_all_seasons_play_by_play(2016,2023)

Game data for 2016020001 already exists in local cache.
Game data for 2016020002 already exists in local cache.
Game data for 2016020003 already exists in local cache.
Game data for 2016020004 already exists in local cache.
Game data for 2016020005 already exists in local cache.
Game data for 2016020006 already exists in local cache.
Game data for 2016020007 already exists in local cache.
Game data for 2016020008 already exists in local cache.
Game data for 2016020009 already exists in local cache.
Game data for 2016020010 already exists in local cache.
Game data for 2016020011 already exists in local cache.
Game data for 2016020012 already exists in local cache.
Game data for 2016020013 already exists in local cache.
Game data for 2016020014 already exists in local cache.
Game data for 2016020015 already exists in local cache.
Game data for 2016020016 already exists in local cache.
Game data for 2016020017 already exists in local cache.
Game data for 2016020018 already exists in local

2. Interactive debug ipywidget to explore the data.

In [64]:
import ipywidgets as widgets
from IPython.display import display, clear_output
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import os
import json

In [67]:
downloader = NHLDataDownloader()
# Global data storage
all_games = {}
game_id = ""
event_id = ""

# Widgets to select season, season type, game ID, and event ID
season = widgets.Dropdown(options=[str(year) for year in range(2016, 2024)], description='Season')
season_type = widgets.Dropdown(options=['Regular Season', 'Playoffs'], description='Season Type')
game_slider = widgets.IntSlider(min=0, description='Game ID')
event_slider = widgets.IntSlider(min=0, description='Event ID')

# Widgets to display game info and event info
game_info_text = widgets.Text(value="", description="Game Info", disabled=True, layout=widgets.Layout(width="90%"))
event_info_text = widgets.Text(value="", description="Event Info", disabled=True, layout=widgets.Layout(width="90%"))

# Widget to display the rink image
rink_image_output = widgets.Output()

# Function to plot event coordinates on the rink image
def plot_event_on_rink(x, y):
    rink_img_path = os.path.join('..','figures', 'nhl_rink.png')  # Update this to the correct path for the rink image
    rink_img = mpimg.imread(rink_img_path)

    with rink_image_output:
        rink_image_output.clear_output()  # Clear the previous plot
        fig, ax = plt.subplots(figsize=(6, 6))
        ax.imshow(rink_img, extent=[-100, 100, -42.5, 42.5])
        if x is not None and y is not None:
            ax.scatter(x, y, color='blue', label="Event location")
            ax.legend()
        plt.show()


# Function to format and return game information
def get_game_info(game_data):
    game_start_time = game_data.get("startTimeUTC", "Unknown Date")
    home_abbrev = game_data.get("homeTeam", {}).get("abbrev", "Unknown Home Abbrev")
    away_abbrev = game_data.get("awayTeam", {}).get("abbrev", "Unknown Away Abbrev")
    home_goals = game_data.get("homeTeam", {}).get("score", 0)
    away_goals = game_data.get("awayTeam", {}).get("score", 0)
    home_sog = game_data.get("homeTeam", {}).get("sog", 0)
    away_sog = game_data.get("awayTeam", {}).get("sog", 0)

    # Format the game state (period, overtime, etc.)
    game_period = game_data.get("periodDescriptor", {}).get("periodType", "Unknown")

    # Return formatted game info similar to the image
    return (f"{game_start_time}\n"
            f"Game ID: {game_data.get('id', 'Unknown')}; {home_abbrev} (home) vs {away_abbrev} (away)\n\n"
            f"{game_period}\n"
            f"          Home       Away\n"
            f"Teams:    {home_abbrev}         {away_abbrev}\n"
            f"Goals:    {home_goals}           {away_goals}\n"
            f"SoG:      {home_sog}           {away_sog}\n")


# Function to format and return event details
def get_event_info(event):
    # Use json.dumps to pretty-print the event data
    event_info_json = json.dumps(event, indent=4)

    # Return the formatted JSON string
    return event_info_json

# Download data based on season and season type
def on_season_or_type_change(change):
    global all_games
    selected_season = int(season.value)  # Get the selected season
    season_type_value = season_type.value

    # Download data based on season and type
    if season_type_value == 'Regular Season':
        all_games = downloader.download_regular_season(selected_season)
    else:
        all_games = downloader.download_playoffs(selected_season)

    # Ensure that all_games is not empty before proceeding
    if len(all_games) > 0:
        # Convert keys to a list and update the slider max value
        game_slider.max = len(all_games) - 1
        update_game_plot(0)
        update_event_plot(0, 0)

# Observe changes in season and season type dropdown
season.observe(on_season_or_type_change, names='value')
season_type.observe(on_season_or_type_change, names='value')

# Update plot based on game ID and event ID
def update_event_plot(game_index, event_index):
    global all_games
    global event_info_text

    # Get list of game IDs and access the selected game data
    game_ids = list(all_games.keys())
    if game_index < len(game_ids):
        game_id = game_ids[game_index]
        game_data = all_games.get(game_id, {})

        # Get the events for the selected game
        events = game_data.get("plays", [])
        if len(events) > 0 and event_index < len(events):
            event = events[event_index]
            x = event.get("details", {}).get("xCoord", None)
            y = event.get("details", {}).get("yCoord", None)
            if x is not None and y is not None:
                plot_event_on_rink(x, y)
            event_info_text.value = get_event_info(event)
        else:
            event_info_text.value = "No event data available."


# Update plot based on game ID and show game information
def update_game_plot(game_index):
    global all_games
    game_ids = list(all_games.keys())
    if game_index < len(game_ids):
        game_id = game_ids[game_index]
        game_data = all_games.get(game_id, {})
        clear_output()  # Clear the previous output to update new game info
        game_info_text.value = get_game_info(game_data)  # Update game info text
        event_slider.max = len(game_data.get("plays", [])) - 1  # Update event slider based on the number of events

def on_game_change(change):
    game_index = change['new']
    update_game_plot(game_index)
    update_event_plot(game_index, 0)

game_slider.observe(on_game_change, names='value')


# Update plot based on event ID
def on_event_change(change):
    game_index = game_slider.value
    event_index = change['new']
    update_event_plot(game_index, event_index)


event_slider.observe(on_event_change, names='value')

# Display widgets and text areas
display(season,season_type, game_slider, game_info_text, event_slider,rink_image_output, event_info_text)

# Initialize plot
update_event_plot(0, 0)

Dropdown(description='Season', options=('2016', '2017', '2018', '2019', '2020', '2021', '2022', '2023'), value…

Dropdown(description='Season Type', options=('Regular Season', 'Playoffs'), value='Regular Season')

IntSlider(value=0, description='Game ID')

Text(value='', description='Game Info', disabled=True, layout=Layout(width='90%'))

IntSlider(value=0, description='Event ID')

Output()

Text(value='', description='Event Info', disabled=True, layout=Layout(width='90%'))

3. Data cleaning

In [2]:
game_id = "2016020002"
downloader = NHLDataDownloader()
shots_and_goals_df = downloader.extract_shots_and_goals_for_game(game_id)
print(shots_and_goals_df.head())

Game data for 2016020002 already exists in local cache.
      game_id   game_date  period time_in_period    event_type  shot-on-goal  \
0  2016020002  2016-10-12       1          00:12  shot-on-goal          True   
1  2016020002  2016-10-12       1          00:11           hit         False   
2  2016020002  2016-10-12       1          01:10  shot-on-goal          True   
3  2016020002  2016-10-12       1          01:24           hit         False   
4  2016020002  2016-10-12       1          02:38           hit         False   

  shot_type  x_coord  y_coord  team_id   team_name team_type  empty_net  \
0      snap       64      -16       19       Blues      away      False   
1   Unknown       88      -37       19       Blues      away      False   
2     wrist       52       -8       19       Blues      away      False   
3   Unknown      -95       21       16  Blackhawks      home      False   
4   Unknown      -33      -37       16  Blackhawks      home      False   

  strength_s