In [64]:
#hide
import pandas as pd
from bokeh.plotting import figure, show, output_file
from bokeh.transform import jitter
from bokeh.models import ColumnDataSource, Select, CustomJS, RadioButtonGroup
from bokeh.palettes import Colorblind
from bokeh.layouts import layout, column, row
from IPython.core.display import display, HTML

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sb
from sklearn.cluster import KMeans
from sklearn import metrics
from sklearn.datasets import make_blobs
from datetime import datetime
sb.set_theme(style="whitegrid")
sb.set(rc={'figure.figsize':(14,8.27)})

In [65]:
#hide

players_df = pd.read_csv(r"C:\Users\jacks\Development\Python\jcrs-blog\_notebooks\person.csv")
players_df['position'] = players_df['primary_position_code'].replace({'D': 'Defense', 'L': 'Left Wing', 'C': "Center", 'R': 'Right Wing'})
nhl_stats_df = pd.read_csv(r"C:\Users\jacks\Development\Python\jcrs-blog\_notebooks\playerStats.csv")
nhl_stats_df = nhl_stats_df[(nhl_stats_df['league_id'] == 133) & (nhl_stats_df['games'] > 9)]
nhl_stats_df = nhl_stats_df.groupby(['person_id', 'year']).max().reset_index().sort_values(by=['person_id','year'])
nhl_stats_df['years_in_nhl'] = nhl_stats_df.groupby('person_id').cumcount()+1
stats = pd.merge(left=players_df, right=nhl_stats_df, on='person_id')
years = sorted(stats['years_in_nhl'].unique(), reverse=False)
positions = players_df['position'].unique()


In [66]:
#hide

stats['points'] = stats['goals'] + stats['assists']
rename_map = {
    'power_play_goals': 'power play goals',
    'power_play_assists': 'power play assists',
    'game_winning_goals': 'game winning goals',
    'short_handed_goals': 'short handed goals',
    'short_handed_assists': 'short handed assists',
    'blocked_shots': 'blocked shots',
    'plus_minus': 'plus minus',
}
stats = stats.rename(columns=rename_map)

stats['power play points'] = stats['power play goals'] + stats['power play assists']
stats['short handed points'] = stats['short handed goals'] + stats['short handed assists']

x_axis_values = [
    'goals',
    'power play goals',
    'short handed goals',
    'game winning goals',
    'assists',
    'power play assists',
    'short handed assists',
    'points',
    'power play points',
    'short handed points',
    'shots',
    'hits',
    'pim',
    'blocked shots',
    'plus minus',
    'shifts',
    'games',
    'weight'
]

In [75]:
#hide
position_sources = {}

for index, position in enumerate(positions):
    position_sources[index] = {}
    position_df = stats[stats['position'] == position]
    for x_axis_value in x_axis_values:
        value_df = position_df[[x_axis_value, 'years_in_nhl']].rename(columns={x_axis_value: 'field'})
        scatter_source = ColumnDataSource(value_df)

        group_df = value_df.groupby('years_in_nhl')
        quantiles_df = pd.DataFrame()
        quantiles_df['floor'] =  group_df.quantile(0)
        quantiles_df['q1'] =  group_df.quantile(0.25)
        quantiles_df['q2'] =  group_df.quantile(0.5)
        quantiles_df['q3'] =  group_df.quantile(0.75)
        quantiles_df['roof'] =  group_df.quantile(1)
        source_dict = {
            'floor': group_df.quantile(0),
            'q1': group_df.quantile(0.25),
            'q2': group_df.quantile(0.5),
            'q3':group_df.quantile(0.75),
            'roof': group_df.quantile(1)
        } 
        box_source = ColumnDataSource(quantiles_df)
        position_sources[index][x_axis_value] = {
            'box': box_source,
            'scatter': scatter_source
        }

initial_selection = 'goals'
initial_position = 0
selections_source = ColumnDataSource({'selection':[initial_selection], 'position':[initial_position]})
scatter_source = ColumnDataSource(position_sources[initial_position][initial_selection]['scatter'].data)
box_source = ColumnDataSource(position_sources[initial_position][initial_selection]['box'].data)


In [76]:
#hide_input

total_width = 360

x_select = Select(options=x_axis_values, width=total_width, height=30)
x_select.js_on_change("value", CustomJS(args=dict(
    scatter_source=scatter_source, 
    box_source=box_source, 
    selections_source=selections_source,
    sources=position_sources
), code="""
    selections_source.data = {'selection': [this.value], 'position': [selections_source.data.position]};
    selections_source.change.emit();

    const position = selections_source.data['position'];
    const selection = this.value
    
    const position_source = sources[position]
    const selection_source = position_source[selection]
    
    const new_scatter = selection_source['scatter'].data
    scatter_source.data = new_scatter;
    scatter_source.change.emit();

    const new_box = selection_source['box'].data
    box_source.data = new_box;
    box_source.change.emit();
"""))
position_radio = RadioButtonGroup(labels=positions.tolist(), active=0, width=total_width)
position_radio.js_on_click(CustomJS(args=dict(
    scatter_source=scatter_source, 
    box_source=box_source, 
    selections_source=selections_source,
    sources=position_sources
), code="""
    selections_source.data = {'selection': [selections_source.data.selection], 'position': [this.active]};
    selections_source.change.emit();
    
    const position = this.active;
    console.log(position)
    const selection = selections_source.data['selection'];
    const position_source = sources[position]
    const selection_source = position_source[selection]
    
    const new_scatter = selection_source['scatter'].data
    scatter_source.data = new_scatter;
    scatter_source.change.emit();

    const new_box = selection_source['box'].data
    box_source.data = new_box;
    box_source.change.emit();
"""))
p = figure(height=600, width=total_width)
p.yaxis.ticker = years
p.yaxis.axis_label = "Years in the NHL"
p.y_range.flipped = True

p.scatter(y=jitter('years_in_nhl', 0.55), x='field', size=5, alpha=0.05, source=scatter_source, color=Colorblind[8][0])

line_width = 0.05
line_height = 0.8

boxplot_color = Colorblind[8][7]
p.rect('q1', 'years_in_nhl', source=box_source, color=boxplot_color,
       width=line_width, height=line_height)
p.rect('q2', 'years_in_nhl', source=box_source, color=boxplot_color,
       width=line_width, height=line_height)
p.rect('q3', 'years_in_nhl', source=box_source, color=boxplot_color,
       width=line_width, height=line_height)
p.hbar(source=box_source, y='years_in_nhl',left='q3', right='q1', line_alpha=0.5,
       fill_alpha=0, width=0.75, line_color=boxplot_color)
p.segment(y0='years_in_nhl', x0='q3', y1='years_in_nhl', x1='roof', 
          source=box_source, color=boxplot_color, width=0.75)
p.segment(y0='years_in_nhl', x0='floor', y1='years_in_nhl', x1='q1', 
          source=box_source, color=boxplot_color, width=0.75)

output_file('nhlbymonth_boxplot.html')

show(layout(
    column(
        x_select, 
        position_radio,
        p
    )))
HTML('nhlbymonth_boxplot.html')