In [1]:
#collapse
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb
import numpy as np
from datetime import datetime
from bokeh.plotting import figure, show, output_file
from bokeh.io import output_notebook

players_df = pd.read_csv(r"C:\Users\jacks\Development\Python\jcrs-blog\_notebooks\person.csv")
roster_df = pd.read_csv(r"C:\Users\jacks\Development\Python\jcrs-blog\_notebooks\roster.csv")
teams_df = pd.read_csv(r"C:\Users\jacks\Development\Python\jcrs-blog\_notebooks\teams.csv")
playerStats_df = pd.read_csv(r"C:\Users\jacks\Development\Python\jcrs-blog\_notebooks\playerStats.csv")

nhl_stats_df = playerStats_df[(playerStats_df['league_id'] == 133) & (playerStats_df['games'] > 9)]

nhl_player_stats_df = pd.merge(left=nhl_stats_df, right=players_df, on='person_id')
nhl_player_stats_team_df = pd.merge(left=nhl_player_stats_df, right=teams_df, on='team_id')
nhl_player_stats_team_df['birth_date'] = pd.to_datetime(nhl_player_stats_team_df['birth_date'], format="%Y-%m-%d %H:%M:%S-%f")
nhl_player_stats_team_df['birth_month_number'] = nhl_player_stats_team_df['birth_date'].dt.month
nhl_player_stats_team_df['birth_month'] = nhl_player_stats_team_df['birth_date'].dt.month_name()
nhl_player_stats_team_df = nhl_player_stats_team_df.sort_values(by='birth_month_number')

In [51]:
#collapse
nhl_month_stats_df = nhl_player_stats_team_df[[
    'birth_month',
    'assists',
    'goals',
    'pim',
    'shots',
    'games',
    'hits',
    'power_play_goals',
    'power_play_assists',
    'blocked_shots',
    'shifts',
]]
rename_map = {'power_play_goals': 'power play goals', 
              'power_play_assists': 'power play assists',
              'blocked_shots': 'blocked shots',
}
nhl_month_stats_df = nhl_month_stats_df.rename(columns=rename_map)
nhl_month_stats_df['points'] = nhl_month_stats_df['goals'] + nhl_month_stats_df['assists']
nhl_month_stats_df['goals per game'] = nhl_month_stats_df['goals'] / nhl_month_stats_df['games'] 
nhl_month_stats_df['assists per game'] = nhl_month_stats_df['assists'] / nhl_month_stats_df['games'] 
nhl_month_stats_df['points per game'] = nhl_month_stats_df['points'] / nhl_month_stats_df['games'] 
nhl_month_stats_df['power play points'] = nhl_month_stats_df['power play goals'] + nhl_month_stats_df['power play assists'] 
nhl_month_stats_df['shooting percentage'] = nhl_month_stats_df['goals'] / nhl_month_stats_df['shots'] * 100

In [103]:
#collapse
from numpy import number
from bokeh.models import ColumnDataSource, HBar, Segment, Rect, Select, CustomJS
from bokeh.layouts import row

output_notebook()
x_value = 'assists'
y_value = 'birth_month'

categories = nhl_month_stats_df[y_value].unique().tolist()

# if lower bound is less than zero, zero it
def greaterThanZero(x):
    return x if (x > 0) else 0

sources = {}
x_axis_values = nhl_month_stats_df.select_dtypes([number]).columns
for x_axis_value in x_axis_values:
    df = nhl_month_stats_df[[
        y_value, 
        x_axis_value, 
    ]]
    groups = df.groupby(y_value)
    q1 = groups.quantile(q=0.25).reindex(categories)
    q2 = groups.quantile(q=0.5).reindex(categories)
    q3 = groups.quantile(q=0.75).reindex(categories)
    iqr = q3 - q1
    upper = q3 + 1.5*iqr
    lower = q1 - 1.5*iqr
    lower[x_axis_value] = lower[x_axis_value].apply(greaterThanZero)
    
    source_dict = {
        'y': categories,
        'lower': lower[x_axis_value],
        'q1': q1[x_axis_value],
        'q2': q2[x_axis_value],
        'q3': q3[x_axis_value],
        'upper': upper[x_axis_value]
    }
    
    source = ColumnDataSource(data=source_dict)
    
    sources[x_axis_value] = {
        'source': source,
    }
    
tooltips = """
<div>
    <span style="font-size: 12px; font-weight: bold;">@y</span>&nbsp;
</div>
<div>
    <span style="font-size: 10px;">Lower bounds: @lower</span><br>
    <span style="font-size: 10px;">First quartile: @q1</span><br>
    <span style="font-size: 10px;">Mean: @q2</span><br>
    <span style="font-size: 10px;">Third quartile: @q3</span><br>
    <span style="font-size: 10px;">Upper bounds: @upper</span>
</div>
"""
p = figure(width=800, height=400, y_range=categories, tooltips=tooltips)

source = sources[x_value]['source']

hbar = HBar(
        y='y', 
        height=0.6,
        left='q1',
        right='q3',
        fill_color="#E08E79"
)
lower_segment = Segment(x0='lower', y0='y', x1='q1' ,y1='y')
upper_segment = Segment(x0='q3', y0='y', x1='upper' ,y1='y')
lower_rect = Rect(x='lower', y='y', width=0.001, height=0.8)
middle_rect = Rect(x='q2', y='y', width=0.001, height=0.8)
upper_rect = Rect(x='upper', y='y', width=0.001, height=0.8)

p.add_glyph(source, hbar)
p.add_glyph(source, lower_segment)
p.add_glyph(source, upper_segment)
p.add_glyph(source, lower_rect)
p.add_glyph(source, middle_rect)
p.add_glyph(source, upper_rect)

x_select = Select(title="Select Stat", options=x_axis_values.tolist(), value=x_value)
x_select.js_on_change("value", CustomJS(args=dict(source=source, sources=sources), 
code="""
    source.data = sources[this.value]['source'].data
    source.change.emit();
    lower_rect_source.data = sources[this.value]['lower_rect_source'].data
    lower_rect_source.change.emit();
    middle_rect_source.data = sources[this.value]['middle_rect_source'].data
    middle_rect_source.change.emit();
    upper_rect_source.data = sources[this.value]['upper_rect_source'].data
    upper_rect_source.change.emit();
"""))
layout = row(x_select, p)

output_file('boxplot.html')
# show the results
show(layout)
#q1.index.values

In [104]:
#hide_input
from IPython.core.display import display, HTML
display(HTML('./boxplot.html'))