In [4]:
"""
Interactive NHL Data Explorer with ipywidgets
Must be run in a Jupyter notebook
"""

import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import ipywidgets as widgets
from IPython.display import display, clear_output
from pathlib import Path

class InteractiveNHLExplorer:
    """Interactive explorer for NHL data using ipywidgets"""
    
    def __init__(self):
        # Load from tidy data which has REAL player names
        tidy_data_path = Path(r"C:\Users\AgeTeQ\Desktop\data\classes\DS\tp1\project-template\data\tidy")
        self.data = self._load_tidy_data(tidy_data_path)
        self.current_game_data = None
        
    def _load_tidy_data(self, tidy_dir):
        """Load data from tidy folder which has real player names"""
        if not tidy_dir.exists():
            print(f"Tidy data directory not found: {tidy_dir}")
            return pd.DataFrame()
        
        try:
            # Find all CSV files in tidy directory
            csv_files = list(tidy_dir.rglob("*.csv"))
            print(f"Found {len(csv_files)} CSV files in tidy directory")
            
            all_data = []
            for csv_file in csv_files:
                try:
                    df = pd.read_csv(csv_file)
                    print(f"Loaded {len(df)} rows from {csv_file.name}")
                    print(f"Columns: {df.columns.tolist()}")
                    
                    # Check if we have real player names
                    if 'player_name' in df.columns:
                        sample_names = df['player_name'].head(3).tolist()
                        print(f"Sample player names: {sample_names}")
                    
                    # Add calculated distance if not present
                    if 'distance_from_net' not in df.columns:
                        df = self._calculate_distance_for_data(df)
                    
                    all_data.append(df)
                except Exception as e:
                    print(f"Error loading {csv_file}: {e}")
                    continue
            
            if all_data:
                combined_df = pd.concat(all_data, ignore_index=True)
                print(f"Successfully loaded {len(combined_df)} total rows")
                
                # Verify we have real names
                if 'player_name' in combined_df.columns:
                    print(f"Real player names sample: {combined_df['player_name'].head(5).tolist()}")
                if 'goalie_name' in combined_df.columns:
                    print(f"Real goalie names sample: {combined_df['goalie_name'].head(5).tolist()}")
                    
                return combined_df
            else:
                print("No data loaded from tidy directory")
                return pd.DataFrame()
                
        except Exception as e:
            print(f"Error loading tidy data: {e}")
            return pd.DataFrame()
    
    def _calculate_distance_for_data(self, df):
        """Calculate distance for data that doesn't have it"""
        df['x_coord'] = pd.to_numeric(df['x_coord'], errors='coerce')
        df['y_coord'] = pd.to_numeric(df['y_coord'], errors='coerce')
        
        def calculate_distance(row):
            if pd.isna(row['x_coord']) or pd.isna(row['y_coord']):
                return None
            if row['x_coord'] < 0:
                net_x = -89
            else:
                net_x = 89
            distance = ((row['x_coord'] - net_x) ** 2 + row['y_coord'] ** 2) ** 0.5
            return distance
        
        valid_coords = df['x_coord'].notna() & df['y_coord'].notna()
        df.loc[valid_coords, 'distance_from_net'] = df[valid_coords].apply(calculate_distance, axis=1)
        df['is_goal'] = (df['event_type'] == 'GOAL').astype(int)
        
        return df
    
    def create_interactive_widget(self):
        """Create and display the interactive widgets"""
        if self.data.empty:
            print("No data available.")
            return
        
        # Dropdown for season
        seasons = sorted(self.data['season'].unique()) if 'season' in self.data.columns else ['All Seasons']
        # Dropdown for game type
        game_types = sorted(self.data['game_type'].unique()) if 'game_type' in self.data.columns else ['General']
        
        self.season_dropdown = widgets.Dropdown(
            options=seasons,
            value=seasons[0],
            description='Season:',
            style={'description_width': 'initial'}
        )
        
        self.game_type_dropdown = widgets.Dropdown(
            options=game_types,
            value=game_types[0],
            description='Game Type:',
            style={'description_width': 'initial'}
        )
        
        self.game_dropdown = widgets.Dropdown(
            description='Game ID:',
            style={'description_width': 'initial'}
        )
        
        self.event_slider = widgets.IntSlider(
            value=0,
            min=0,
            max=100,
            step=1,
            description='Event Index:',
            continuous_update=False,
            style={'description_width': 'initial'}
        )
        
        self.output = widgets.Output()
        
        # Handlers
        def on_season_change(change):
            self._update_game_list()
            
        def on_game_type_change(change):
            self._update_game_list()
            
        def on_game_change(change):
            self._update_event_slider()
            self._plot_current_event()
            
        def on_event_change(change):
            self._plot_current_event()
        
        # Connect handlers
        self.season_dropdown.observe(on_season_change, names='value')
        self.game_type_dropdown.observe(on_game_type_change, names='value')
        self.game_dropdown.observe(on_game_change, names='value')
        self.event_slider.observe(on_event_change, names='value')
        
        # Layout
        display(widgets.VBox([
            widgets.HBox([self.season_dropdown, self.game_type_dropdown]),
            self.game_dropdown,
            self.event_slider,
            self.output
        ]))
        
        # Initialize
        self._update_game_list()
    
    def _update_game_list(self):
        """Update the game dropdown based on season and game type"""
        season = self.season_dropdown.value
        game_type = self.game_type_dropdown.value
        
        games = self.data[
            (self.data['season'] == season) & 
            (self.data['game_type'] == game_type)
        ]['game_id'].unique()
        
        self.game_dropdown.options = sorted(games)
        if len(games) > 0:
            self.game_dropdown.value = games[0]
    
    def _update_event_slider(self):
        """Update the event slider to match the number of events in a game"""
        game_id = self.game_dropdown.value
        if game_id:
            game_data = self.data[self.data['game_id'] == game_id]
            self.current_game_data = game_data.reset_index(drop=True)
            max_events = len(game_data) - 1 if len(game_data) > 0 else 0
            self.event_slider.max = max_events
            self.event_slider.value = 0
    
    def _plot_current_event(self):
        """Plot the current selected event"""
        with self.output:
            clear_output(wait=True)
            
            if (self.current_game_data is None or 
                len(self.current_game_data) == 0 or 
                self.game_dropdown.value is None):
                print("No game data available. Please select a game.")
                return
            
            event_index = self.event_slider.value
            if event_index >= len(self.current_game_data):
                event_index = len(self.current_game_data) - 1
                self.event_slider.value = event_index
            
            event = self.current_game_data.iloc[event_index]
            
            # Create figure with three subplots
            fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(20, 6))
            
            # Plot 1: Current event location
            self._draw_ice_rink(ax1)
            self._plot_single_event(ax1, event, event_index)
            ax1.set_title('Current Event Location', fontsize=12)
            
            # Plot 2: All events in the game
            self._draw_ice_rink(ax2)
            self._plot_all_events(ax2)
            shots_count = len(self.current_game_data[self.current_game_data['is_goal'] == 0])
            goals_count = len(self.current_game_data[self.current_game_data['is_goal'] == 1])
            ax2.set_title(f'All Events in Game\nShots: {shots_count}, Goals: {goals_count}')
            
            # Plot 3: Event details
            self._display_event_info(ax3, event, event_index)
            ax3.set_title('Event Information', fontsize=12, pad=20)
            
            plt.tight_layout()
            plt.show()
    
    def _draw_ice_rink(self, ax):
        """Draw the NHL ice rink"""
        rink_path = r"C:\Users\AgeTeQ\Desktop\data\classes\DS\tp1\project-template\figures\nhl_rink.png"
        
        try:
            rink_img = plt.imread(rink_path)
            ax.imshow(rink_img, extent=[-100, 100, -42.5, 42.5])
            ax.set_xlim(-100, 100)
            ax.set_ylim(-42.5, 42.5)
            ax.grid(False)
        except:
            # Fallback: draw simple rink representation
            ax.set_xlim(-100, 100)
            ax.set_ylim(-42.5, 42.5)
            
            # Main rink surface
            rink = patches.Rectangle((-100, -42.5), 200, 85, linewidth=2, 
                                   edgecolor='black', facecolor='lightblue', alpha=0.3)
            ax.add_patch(rink)
            
            # Blue lines
            ax.axvline(x=-25, color='blue', linestyle='-', alpha=0.5, linewidth=2)
            ax.axvline(x=25, color='blue', linestyle='-', alpha=0.5, linewidth=2)
            
            # Goal lines
            ax.axvline(x=-89, color='red', linestyle='-', alpha=0.7, linewidth=2)
            ax.axvline(x=89, color='red', linestyle='-', alpha=0.7, linewidth=2)
            
            # Center line
            ax.axvline(x=0, color='red', linestyle='-', alpha=0.5, linewidth=1)
            
            ax.grid(True, alpha=0.3)
        
        ax.set_aspect('equal')
    
    def _plot_single_event(self, ax, event, event_index):
        """Plot a single event on the rink"""
        if pd.notna(event.get('x_coord')) and pd.notna(event.get('y_coord')):
            x, y = event['x_coord'], event['y_coord']
            
            # Determine marker style based on event type
            if event.get('is_goal') == 1:
                color = 'red'
                marker = '*'
                size = 200
                label = 'Goal'
            else:
                color = 'blue'
                marker = 'o'
                size = 120
                label = 'Shot'
            
            # Plot the event
            scatter = ax.scatter(x, y, c=color, marker=marker, s=size, alpha=0.9, 
                               edgecolors='white', linewidth=2, zorder=5, label=label)
            
            # Add event number annotation
            ax.annotate(f"Event {event_index + 1}", 
                       xy=(x, y), xytext=(x + 8, y + 8),
                       fontsize=9, fontweight='bold',
                       bbox=dict(boxstyle="round,pad=0.3", facecolor="yellow", alpha=0.9))
            
            # Add legend for event types
            ax.legend(loc='upper right', fontsize=8)
        else:
            # No coordinates available
            ax.text(0, 0, "No coordinates available", ha='center', va='center',
                   fontsize=12, bbox=dict(boxstyle="round,pad=0.5", facecolor="yellow", alpha=0.8))
    
    def _plot_all_events(self, ax):
        """Plot all shots and goals for the selected game"""
        if self.current_game_data is None:
            return
            
        # Separate shots and goals
        shots = self.current_game_data[self.current_game_data['is_goal'] == 0]
        goals = self.current_game_data[self.current_game_data['is_goal'] == 1]
        
        # Plot shots
        if not shots.empty:
            ax.scatter(shots['x_coord'], shots['y_coord'], 
                      c='blue', alpha=0.6, s=50, label=f'Shots ({len(shots)})')
        
        # Plot goals
        if not goals.empty:
            ax.scatter(goals['x_coord'], goals['y_coord'], 
                      c='red', alpha=1.0, s=100, marker='*', label=f'Goals ({len(goals)})')
        
        # Add legend
        ax.legend(loc='upper right')
    
    def _display_event_info(self, ax, event, event_index):
        """Display detailed information about the selected event"""
        ax.axis('off')
        
        if self.current_game_data is not None:
            # Get player names directly from the columns
            player_name = event.get('player_name', 'N/A')
            goalie_name = event.get('goalie_name', 'N/A')
            
            # Format the information text
            info_text = f"""
EVENT {event_index + 1} of {len(self.current_game_data)}

GAME INFORMATION:
Game ID: {event.get('game_id', 'N/A')}
Season: {event.get('season', 'N/A')}
Game Type: {event.get('game_type', 'N/A')}

EVENT DETAILS:
Event Type: {event.get('event_type', 'N/A')}
Period: {event.get('period', 'N/A')}
Time: {event.get('period_time', 'N/A')}
Team: {event.get('team_name', 'N/A')}
Player: {player_name}
Goalie: {goalie_name}
Shot Type: {event.get('shot_type', 'N/A')}

LOCATION DATA:
X Coordinate: {event.get('x_coord', 'N/A')}
Y Coordinate: {event.get('y_coord', 'N/A')}
Distance from Net: {event.get('distance_from_net', 'N/A')} ft
Goal: {'Yes' if event.get('is_goal') == 1 else 'No'}
"""
            
            # Display the information
            ax.text(0.05, 0.95, info_text, transform=ax.transAxes, fontsize=9, 
                   verticalalignment='top', fontfamily='monospace', linespacing=1.5,
                   bbox=dict(boxstyle="round,pad=1", facecolor="lightgray", alpha=0.8))

def launch_interactive_tool():
    """Start the interactive NHL data explorer"""
    print("Starting NHL Data Explorer - Loading from Tidy Data")
    explorer = InteractiveNHLExplorer()
    explorer.create_interactive_widget()
    return explorer

# Setup for Jupyter notebook
if __name__ == "__main__":
    # Set default figure size
    plt.rcParams['figure.figsize'] = [20, 6]
    plt.rcParams['font.size'] = 10
    
    # Launch the tool
    explorer = launch_interactive_tool()

Starting NHL Data Explorer - Loading from Tidy Data
Found 18 CSV files in tidy directory
Loaded 636877 rows from all_seasons_combined.csv
Columns: ['game_id', 'season', 'game_type', 'event_type', 'period', 'period_time', 'x_coord', 'y_coord', 'shot_type', 'team_id', 'team_name', 'player_name', 'goalie_name', 'is_goal', 'empty_net', 'strength', 'game_winning_goal', 'attacking_net', 'team_type', 'distance_from_net', 'shot_angle', 'period_time_seconds']
Sample player names: ['Mitch Marner', 'Chris Kelly', 'Cody Ceci']
Loaded 100 rows from tidy_data_sample.csv
Columns: ['game_id', 'season', 'game_type', 'event_type', 'period', 'period_time', 'x_coord', 'y_coord', 'shot_type', 'team_id', 'team_name', 'player_name', 'goalie_name', 'is_goal', 'empty_net', 'strength', 'game_winning_goal', 'attacking_net', 'team_type', 'distance_from_net', 'shot_angle', 'period_time_seconds']
Sample player names: ['Mitch Marner', 'Chris Kelly', 'Cody Ceci']
Loaded 74963 rows from all_general_games_tidy.csv
Colu

VBox(children=(HBox(children=(Dropdown(description='Season:', options=(np.int64(2016), np.int64(2017), np.int6…