In [1]:
from jupyter_dash import JupyterDash
import dash_core_components as dcc
import dash_html_components as html
import dash_bootstrap_components as dbc

from dash.dependencies import Input, Output
import dash_table

from sqlalchemy import create_engine
import pandas as pd
import numpy as np
import time
import json
import os

from data_processing import *

import generate_player_seasons as g
from datetime import datetime

from networkx.drawing.nx_agraph import graphviz_layout
from networkx.readwrite import json_graph

import networkx as nx

import plotly.graph_objects as go

from networkx.readwrite import json_graph

def load_db_credentials():
    '''Load database credentials from bash_profile'''

    user = os.environ['db_user']
    password = os.environ['db_pass']
    server = os.environ['db_host']
    database = os.environ['db_name']

    return user, password, server, database

import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
from scipy.optimize import curve_fit
import pickle

def get_scoring_labels(graph, position, node):
    
    G = json_graph.node_link_graph(json.loads(graph))
    
    floor_rate = str(get_ppg_range(G, node))
    ceiling_rate = str(get_ppg_range(G, node, False))
    floor_role = get_role(float(floor_rate), position)
    ceiling_role = get_role(float(ceiling_rate), position)
    
    rate_label = np.unique([floor_rate, ceiling_rate])
    role_label = np.unique([floor_role, ceiling_role])

    if len(rate_label) > 1:
    
        rate_text = '-'.join([floor_rate, ceiling_rate])
            
    else:

        rate_text = rate_label[0]
            
    if len(role_label) > 1:
        
        role_text = '-'.join([floor_role, ceiling_role])
            
    else:
        
        role_text = role_label[0]
        
    return rate_text, role_text

def get_scoring_rates(graph, position, node):
    
    G = json_graph.node_link_graph(json.loads(graph))
    
    floor_rate = get_ppg_range(G, node)
    ceiling_rate = get_ppg_range(G, node, False)
        
    return floor_rate, ceiling_rate
    
def check_late_birthday(date_of_birth):

    try:
        dt = datetime.strptime(date_of_birth, '%Y-%m-%d')
        draft = date(dt.year + 18, 9, 15)
        if relativedelta(draft, dt).years < 18 :
            return True
        else:
            return False

    except:
        return None
    
def get_value_rank(data):
            
    df = draft_plus5[(draft_plus5.primary_position == data.primary_position)]
    
    return stats.percentileofscore(df.draft_plus_5_points, data.nhl_expected_value)

def calc_draft_value(data):
    
    if data.late_birthday.sum() > 0:
        
        return pd.Series({
            'draft_plus_5_points' : (data[(data.season_age >= 20) 
                                         & (data.season_age <= 24)
                                         & (data.league == 'NHL')].tp).sum(),
            'draft_plus_5_gp' : data[(data.season_age >= 20) 
                                   & (data.season_age <= 24)
                                   & (data.league == 'NHL')].gp.sum(),
            'draft_plus_5_g' : data[(data.season_age >= 20) 
                                   & (data.season_age <= 24)
                                   & (data.league == 'NHL')].g.sum(),
            'draft_plus_5_a' : data[(data.season_age >= 20) 
                                       & (data.season_age <= 24)
                                       & (data.league == 'NHL')].a.sum()
                         })
    
    else:
        return pd.Series({
            'draft_plus_5_points' : (data[(data.season_age >= 19) 
                                         & (data.season_age <= 23)
                                         & (data.league == 'NHL')].tp).sum(),
            'draft_plus_5_gp' : data[(data.season_age >= 19) 
                                   & (data.season_age <= 23)
                                   & (data.league == 'NHL')].gp.sum(),
            'draft_plus_5_g' : data[(data.season_age >= 19) 
                                       & (data.season_age <= 23)
                                       & (data.league == 'NHL')].g.sum(),
            'draft_plus_5_a' : data[(data.season_age >= 19) 
                                       & (data.season_age <= 23)
                                       & (data.league == 'NHL')].a.sum()})
    
def func(x, a, b, c):
    return a * np.exp(-b * x) + c

def inverse_exp(x, a, b, c):

    return np.log((x - c) / a) / -b

def fit_draft_curve(data):
    
    y = data.nhl_expected_value
    x = data.draft_pick
    
    popt, pcov = curve_fit(func,
                       x,
                       y)
    
    return popt

def get_draft_value(data, clusters):
    
    X = data[['nhl_expected_value', 'xpick']]
    
    kmeans = KMeans(n_clusters=clusters).fit(X)
    
    return kmeans

    
def get_draft_upside(data, clusters):
    
    X = data[['ceiling_percentile', 'ceiling_percentile']]
    
    kmeans = KMeans(n_clusters=clusters).fit(X)
    
    return kmeans

def get_cluster_labels(data, cluster, labels, c_type = 'value'):
    
    df = data.groupby([cluster])['nhl_expected_value' if c_type == 'value' \
                                 else 'nhl_ceiling'].mean().sort_values(ascending=False)
        
    return {k:v for k,v in zip(df.index, labels[c_type]['labels'])}

def get_ceiling_rank(data):

    df = agg[(agg.primary_position == data.primary_position.item())]
    
    return stats.percentileofscore(df.draft_plus_5_points, data.nhl_ceiling.item())
 
def get_role(rate, position):
    
    if position == 'D':
        return np.where(rate > np.percentile(d_subset.ppg, 25),
                        np.where(rate > np.percentile(d_subset.ppg, 50),
                                 np.where(rate > np.percentile(d_subset.ppg, 75),
                                          np.where(rate > np.percentile(d_subset.ppg, 90),
                                                   'Elite',
                                                   'Top Pair'
                                                  ),
                                          'Top 4'
                                         ),
                                 'Top 6'
                                ),
                        'Depth'
                       ).item()
    
    else:
        return np.where(rate > np.percentile(f_subset.ppg, 25),
                        np.where(rate > np.percentile(f_subset.ppg, 50),
                                 np.where(rate > np.percentile(f_subset.ppg, 75),
                                          np.where(rate > np.percentile(f_subset.ppg, 90),
                                                   'Elite',
                                                   'Top Line'
                                                  ),
                                          'Top 6'
                                         ),
                                 'Top 9'
                                ),
                        'Depth'
                       ).item()
    
def get_path_ppg(G, node):
    
    points = []

    # points.append(G.nodes[node]['tp'])
    if G.nodes[node]['league'] == 'NHL':
        points.append(G.nodes[node]['ppg'])

    for n in sorted(nx.ancestors(G, node))[::-1]:
        if (n != 1) & (G.nodes[n]['league'] == 'NHL'):
            # points.append(G.nodes[n]['tp'])
            points.append(G.nodes[n]['ppg'])

        node=n
    # return round(np.sum(points) / (64 * len(points)),2)
    return round(np.mean(points),2)

def get_ppg_range(G, node, floor=True):
    
    points = []

    # points.append(G.nodes[node]['tp'])
    if G.nodes[node]['league'] == 'NHL':
        points.append(G.nodes[node]['ppg'])

    for n in sorted(nx.ancestors(G, node))[::-1]:
        if (n != 1) & (G.nodes[n]['league'] == 'NHL'):
            # points.append(G.nodes[n]['tp'])
            points.append(G.nodes[n]['ppg'])

        node=n
    
    if points:
        if floor:
            return round(np.min(points), 2)

        else:
            return round(np.max(points), 2)
    else:
        return 0

   
cluster_labels = {
    'F' : {
        'value' : {
            'num_cluster' : 7,
            'labels' : ['Franchise', 'Top 5', 'Top 15', '1st Round', '2nd-3rd Round', 'Mid Round', 'Late Round']
        },
        'upside' : {
            'num_cluster' : 6,
            'labels' : ['Top End', 'High End', 'Mid End', 'Safe', 'Project', 'Unknown']
        }
    },
    'D' : {
        'value' : {
            'num_cluster' : 7,
            'labels' : ['Top 5', 'Top 15', '1st Round', '2nd round', '3rd Round', 'Mid Round', 'Late Round']
        },
        'upside' : {
            'num_cluster' : 6,
            'labels' : ['Top End', 'High End', 'Mid End', 'Safe', 'Project', 'Unknown']
        }
    }
}

f_params = (224.75603480181096, 0.06324523838602378, 2.542136232484409) # 224.75603480181096, 0.06324523838602378, 2.542136232484409
d_params = (101.8852594934708, 0.0542482743796363, 1.5254264511302613) # 101.8852594934708, 0.0542482743796363, 1.5254264511302613


season_order = {
    'DY' : 0,
    'D + 1' : 1,
    'D + 2' : 2,
    'D + 3' : 3,
    'D + 4' : 4,
    'D + 5' : 5,
}


In [14]:
df = pd.read_csv('../data/draft-rankings-v2.csv')

In [72]:
user, password, server, database = load_db_credentials()
engine = create_engine(f'postgresql://{user}:{password}@{server}:5432/{database}')

player_value = pd.read_sql(f'''select * from summary_projections ''', con=engine)

In [73]:
info = pd.read_sql(f''' select * from player_info where catches is NULL ''', engine)

In [20]:
# dataset =  pd.read_sql(f'''select playerid, player, gp, g, a, tp, ppg, position, year, team, league  from skater_stats order by gp desc ''', con=engine)
dataset = pd.read_sql( open('player_stats.sql', 'r').read(), engine)
dataset['primary_position'] = dataset.position.apply(get_primary_position)

In [23]:
# merge player information with player seasons
dataset = dataset.merge(
    info[info.date_of_birth.notnull()].drop('position', axis=1), 
    on = ['playerid'],
)

dataset['late_birthday'] = dataset.date_of_birth.apply(check_late_birthday)

# get season age per player season
dataset['start_year'], dataset['end_year']  = zip(*dataset['year'].apply(lambda x : x.split('-')))
dataset = get_season_age(dataset)



In [229]:
df = pd.read_sql('''select 
	league,
	year,
	sum(g) as g,
	max(gp) as gp,
	count(distinct teamid) teams,
	sum(g) / (max(gp) * count(distinct teamid)) as goals_per_game
from skater_stats ss 
where season_stage not like '%ET%'
and season_stage not like '%Cup%'
and season_stage not like '%JCWC%'
and season_stage not like '%PJHL%'
and season_stage not like '%Czech U16 2%' 
and season_stage not like '%Showcase%'
and season_stage not like '%ECC%'
and season_stage not like '% Q'
and season_stage not like 'MHL Supercup'
group by 
1,
2
having (max(gp) * count(distinct teamid)) > 0;

''', engine)


scoring = df.merge(df[df['year'] == '2019-2020'][['league', 'goals_per_game']],
              on='league',
              suffixes=('', '_base'),
              how='left')

scoring['goals_per_game'] /= scoring['goals_per_game_base']

# deflator

In [230]:
dataset = dataset.merge(scoring[['league', 'year', 'goals_per_game']], on = ['league', 'year'])

dataset['tp'] /= dataset.goals_per_game
dataset['g'] /= dataset.goals_per_game
dataset['a'] /= dataset.goals_per_game
dataset['gpg'] /= dataset.goals_per_game
dataset['apg'] /= dataset.goals_per_game
dataset['ppg'] /= dataset.goals_per_game

In [26]:
dataset = pd.read_csv('../../apple-model-app/data/player_season_stats_v3.csv')
dataset['primary_position'] = dataset.position.apply(get_primary_position)


Columns (35) have mixed types.Specify dtype option on import or set low_memory=False.



In [79]:
f_subset = dataset.loc[(dataset.gp >= 20)
                            & (dataset.league == 'NHL')
                            & (dataset.primary_position == 'F')]

d_subset = dataset.loc[(dataset.gp >= 20)
                            & (dataset.league == 'NHL')
                            & (dataset.primary_position == 'D')]

## get scoring rate and role per season 

In [88]:
dataset = pd.read_csv('../../apple-model-app/data/player_season_stats_v3.csv')
dataset['primary_position'] = dataset.position.apply(get_primary_position)
dataset['positions'] = dataset['position'].apply(lambda x : x.replace("{", "['").replace("}", "']").replace(",", "','"))
dataset['positions'] = dataset['positions'].apply(lambda x : ast.literal_eval(x))
dataset['late_birthday'] = dataset.date_of_birth.apply(check_late_birthday)


In [75]:
player_value.draft_year = player_value.draft_year.astype(int)

In [76]:
v = np.vectorize(get_scoring_labels)

In [80]:
player_value['Projected Rate'], player_value['Projected Role'] = v(player_value.graph, 
                                                                   player_value.position, 
                                                                   player_value.nhl_maximizing_node)

In [81]:
v = np.vectorize(get_scoring_rates)

In [82]:
player_value['Floor Rate'], player_value['Ceiling Rate'] = v(player_value.graph, 
                                                            player_value.position, 
                                                            player_value.nhl_maximizing_node)

In [83]:
player_value['season_order'] = player_value.base_year.map(season_order)
# player_value['playerid'] = player_value.playerid.astype(str)

In [84]:
player_value = player_value.merge(info[['playerid', 'height', 'weight', 'rights', 'nation', 'shoots',
                                        'draft_round', 'draft_pick', 'draft_team', 'nhl_draft']],
                                 on = 'playerid')

## get draft value

In [89]:
dataset['years_post_draft'] = (dataset.end_year.astype(int) - dataset.draft_year_eligible.astype(float))

dataset.dropna(subset=['years_post_draft'], inplace=True)

dataset['base_year'] = np.where(dataset.end_year.astype(int) == dataset.draft_year_eligible.astype(int),
                                       'DY',
                                       'D + '+ dataset.years_post_draft.astype(int).astype(str)
                                      )

curr_scoring = dataset.groupby(['playerid', 'base_year', 'end_year', 'years_post_draft']).apply(calc_draft_value)\
                            .sort_index(level='years_post_draft')\
                            .groupby(level=0).cumsum().reset_index()

In [90]:
curr_scoring.rename(columns={'draft_plus_5_points' : 'current_nhl_pts',
                             'draft_plus_5_gp' : 'current_nhl_gp',
                             'draft_plus_5_g' : 'current_nhl_g',
                             'draft_plus_5_a' : 'current_nhl_a'
                            }, inplace=True)

In [92]:
player_value['playerid'] = player_value.playerid.astype(int)

In [93]:
player_value = player_value.merge(curr_scoring,
                   on = ['playerid', 'base_year']
                  )

In [94]:
agg = dataset.groupby(['playerid', 'primary_position']).apply(calc_draft_value).reset_index()

In [95]:
player_value = player_value.merge(agg.drop('primary_position', axis=1),
                   on = ['playerid']
                  )

In [96]:
player_value['primary_position'] = player_value.position

In [97]:
player_value['nhl_ceiling'] += player_value['current_nhl_pts']
player_value['nhl_expected_value'] += player_value['current_nhl_pts']

In [98]:
player_value['ceiling_percentile'] = player_value.groupby(['playerid', 'base_year']).apply(get_ceiling_rank).values

In [99]:
player_value['xpick'] = np.nan_to_num(np.where(player_value.position == 'F',
                                inverse_exp(player_value.nhl_expected_value, *f_params),
                                inverse_exp(player_value.nhl_expected_value, *d_params)
                                          ),
                                  nan=150)

player_value['xpick_value'] = np.where(player_value.position == 'F',
                                       func(player_value.draft_pick.astype(float), *f_params),
                                       func(player_value.draft_pick.astype(float), *d_params)
                                          )

In [100]:
f_draft_value = pickle.load(open("../models/forward_value_kmeans.pkl", "rb"))
f_draft_upside = pickle.load(open("../models/forward_upside_kmeans.pkl", "rb"))
d_draft_value = pickle.load(open("../models/defence_value_kmeans.pkl", "rb"))
d_draft_upside = pickle.load(open("../models/defence_upside_kmeans.pkl", "rb"))

In [101]:
player_value['value_cluster'] = np.where(
    player_value.position == 'F',
    f_draft_value.predict(player_value[['nhl_expected_value', 'xpick']]),
    d_draft_value.predict(player_value[['nhl_expected_value', 'xpick']])
                                        )
player_value['upside_cluster'] = np.where(
    player_value.position == 'F',
    f_draft_upside.predict(player_value[['nhl_ceiling', 'ceiling_percentile']]),
    d_draft_upside.predict(player_value[['nhl_ceiling', 'ceiling_percentile']])
                                        )
    
player_value['draft_value_range'] = np.where(
    player_value.position == 'F',
    player_value.value_cluster.map(get_cluster_labels(player_value[player_value.position == 'F'],
                                                      'value_cluster',
                                                      cluster_labels['F'])),
    player_value.value_cluster.map(get_cluster_labels(player_value[player_value.position == 'D'],
                                                      'value_cluster',
                                                      cluster_labels['D']))
                                            )

player_value['draft_upside'] = np.where(
    player_value.position == 'F',
    player_value.upside_cluster.map(get_cluster_labels(player_value[player_value.position == 'F'],
                                                       'upside_cluster',
                                                       cluster_labels['F'], 'upside')),
    player_value.upside_cluster.map(get_cluster_labels(player_value[player_value.position == 'D'],
                                                       'upside_cluster',
                                                       cluster_labels['D'], 'upside'))
                                        )



In [102]:
get_cluster_labels(player_value[player_value.position == 'D'],
                                                       'upside_cluster',
                                                       cluster_labels['D'], 'upside')

{3: 'Top End',
 2: 'High End',
 4: 'Mid End',
 0: 'Safe',
 1: 'Project',
 5: 'Unknown'}

In [103]:
player_value['xpick_value'] = player_value['xpick_value'].astype(int)

player_value['surplus'] = (player_value['nhl_expected_value'] - player_value['xpick_value'])

x = np.arange(dataset.draft_pick.astype(float).min(), dataset.draft_pick.astype(float).max())
y_f = func(x, *f_params)
y_d = func(x, *d_params)

player_value['surplus_relative'] = np.where(player_value.position == 'F',
                                player_value.surplus / y_f.max(),
                                player_value.surplus / y_d.max()
                                          )

In [104]:
player_value = player_value.merge(dataset[['playerid', 'year','league', 'positions',
                                            'gp', 'g', 'a', 'tp', 'ppg', 'real_season_age', 'base_year']
                                         ].sort_values('gp', ascending=False)\
                                .drop_duplicates(['playerid', 'year']),
                   on=['playerid', 'base_year']
                  )

In [105]:
player_value['draft_plus_5_ppg'] = player_value['draft_plus_5_points'] / player_value['draft_plus_5_gp'] 

In [106]:
player_value.rename(columns={'player_name' : 'player'}, inplace=True)

In [248]:
player_value[player_value.player == 'Alexis Lafrenière']

Unnamed: 0,playerid,player,position,nhl_likelihood,most_likely_nhl_node,most_likely_nhl_prob,nhl_floor,nhl_expected_value,nhl_ceiling,nhl_maximizing_node,nhl_ceiling_prob,graph,date_of_birth,base_year,draft_year,Projected Rate,Projected Role,Floor Rate,Ceiling Rate,season_order,height,weight,rights,nation,shoots,draft_round,draft_pick,draft_team,nhl_draft,end_year,years_post_draft,current_nhl_pts,current_nhl_gp,current_nhl_g,current_nhl_a,draft_plus_5_points,draft_plus_5_gp,draft_plus_5_g,draft_plus_5_a,primary_position,ceiling_percentile,xpick,xpick_value,value_cluster,upside_cluster,draft_value_range,draft_upside,surplus,surplus_relative,year,league,gp,g,a,tp,ppg,real_season_age,draft_plus_5_ppg
251,296697,Alexis Lafrenière,F,1.0,16,0.7,284,247.0,284.0,16,0.7,"{""directed"": true, ""multigraph"": false, ""graph...",2001-10-11,DY,2020,0.74-0.99,Top Line-Elite,0.74,0.99,0,186.0,87.0,New York Rangers,Canada,L,1,1,New York Rangers,,2020,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,F,99.10146,-1.328597,213,5,2,Franchise,Top End,34.0,0.159233,2019-2020,QMJHL,52.0,35.0,77.0,112.0,2.153846,19.22,


In [107]:
player_value[player_value.player == 'Alexis Lafrenière']

Unnamed: 0,playerid,player,position,nhl_likelihood,most_likely_nhl_node,most_likely_nhl_prob,nhl_floor,nhl_expected_value,nhl_ceiling,nhl_maximizing_node,nhl_ceiling_prob,graph,date_of_birth,base_year,draft_year,Projected Rate,Projected Role,Floor Rate,Ceiling Rate,season_order,height,weight,rights,nation,shoots,draft_round,draft_pick,draft_team,nhl_draft,end_year,years_post_draft,current_nhl_pts,current_nhl_gp,current_nhl_g,current_nhl_a,draft_plus_5_points,draft_plus_5_gp,draft_plus_5_g,draft_plus_5_a,primary_position,ceiling_percentile,xpick,xpick_value,value_cluster,upside_cluster,draft_value_range,draft_upside,surplus,surplus_relative,year,league,positions,gp,g,a,tp,ppg,real_season_age,draft_plus_5_ppg
243,296697,Alexis Lafrenière,F,1.0,16,0.7,284,247.0,284.0,16,0.7,"{""directed"": true, ""multigraph"": false, ""graph...",2001-10-11,DY,2020,0.74-0.99,Top Line-Elite,0.74,0.99,0,186.0,87.0,New York Rangers,Canada,L,1,1,New York Rangers,,2020,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,F,98.709905,-1.328597,213,5,2,Franchise,Top End,34.0,0.159233,2019-2020,QMJHL,[LW],52.0,35.0,77.0,112.0,2.153846,19.22,


### Write Data to CSV

In [46]:
player_value.to_csv('../data/draft-value.csv', index=False)
player_value.to_csv('../../apple-model-app/data/draft-value.csv', index=False)

### Load data

In [34]:
player_value = pd.read_csv('../data/draft-value.csv')


In [35]:
player_value.head()

Unnamed: 0,playerid,player,position,nhl_likelihood,most_likely_nhl_node,most_likely_nhl_prob,nhl_floor,nhl_expected_value,nhl_ceiling,nhl_maximizing_node,nhl_ceiling_prob,graph,date_of_birth,base_year,draft_year,Projected Rate,Projected Role,Floor Rate,Ceiling Rate,season_order,height,weight,rights,nation,draft_round,draft_pick,draft_team,nhl_draft,end_year,years_post_draft,current_nhl_pts,current_nhl_gp,current_nhl_g,current_nhl_a,draft_plus_5_points,draft_plus_5_gp,draft_plus_5_g,draft_plus_5_a,primary_position,ceiling_percentile,xpick,xpick_value,value_cluster,upside_cluster,draft_value_range,draft_upside,surplus,surplus_relative,year,league,gp,g,a,tp,ppg,real_season_age,draft_plus_5_ppg
0,201868,Jamie Drysdale,D,0.98,15,0.7,126,107.0,126.0,15,0.7,"{""directed"": true, ""multigraph"": false, ""graph...",2002-04-08,DY,2020,0.4-0.58,Top 4-Top Pair,0.4,0.58,0,180.0,77.0,Anaheim Ducks,Canada,1,6,Anaheim Ducks,,2020,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,D,49.088089,-0.638226,75,5,2,Top 5,High End,32.0,0.326428,2019-2020,OHL,49.0,9.0,38.0,47.0,0.959184,18.73,
1,397010,Kasper Simontaival,F,0.02,15,0.02,22,0.0,23.0,15,0.02,"{""directed"": true, ""multigraph"": false, ""graph...",2002-01-11,DY,2020,0.36,Top 9,0.36,0.36,0,175.0,78.0,Los Angeles Kings,Finland,3,66,Los Angeles Kings,,2020,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,F,49.088089,150.0,6,0,3,Late Round,Project,-6.0,-0.0281,2019-2020,Jr. A SM-liiga,48.0,25.0,32.0,57.0,1.1875,18.97,
2,337733,Lukas Svejkovsky,F,0.29,14,0.12,67,11.0,68.0,14,0.12,"{""directed"": true, ""multigraph"": false, ""graph...",2001-11-23,DY,2020,0.27-0.4,Top 9,0.27,0.4,0,178.0,75.0,Pittsburgh Penguins,USA ...,4,108,Pittsburgh Penguins,,2020,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,F,49.088089,51.860329,2,2,1,2nd-3rd Round,Safe,9.0,0.04215,2019-2020,WHL,52.0,18.0,20.0,38.0,0.730769,19.11,
3,499424,Topi Niemelä,D,0.02,10,0.02,15,0.0,16.0,10,0.02,"{""directed"": true, ""multigraph"": false, ""graph...",2002-03-25,DY,2020,0.24,Top 6,0.24,0.24,0,182.0,74.0,Toronto Maple Leafs,Finland,3,64,Toronto Maple Leafs,,2020,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,D,48.892918,150.0,4,0,1,Late Round,Project,-4.0,-0.040803,2019-2020,Liiga,47.0,1.0,8.0,9.0,0.191489,18.77,
4,448947,Vasili Ponomaryov,F,0.3,12,0.13,55,12.0,55.0,12,0.13,"{""directed"": true, ""multigraph"": false, ""graph...",2002-03-13,DY,2020,0.39-0.47,Top 9-Top 6,0.39,0.47,0,180.0,80.0,Carolina Hurricanes,Russia,2,53,Carolina Hurricanes,,2020,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,F,48.892918,50.093399,10,2,1,2nd-3rd Round,Safe,2.0,0.009367,2019-2020,QMJHL,57.0,18.0,31.0,49.0,0.859649,18.8,



## Subset prospects (>=2015 and < 100 GP)

In [None]:
!pip install unidecode

In [47]:
import datetime
now = datetime.datetime.now()

def get_current_year(date):
    '''Return hockey season label based on what the current date is'''
    if date.month >= 1 and date.month <= 9:
        return date.year

    else:
        return date.year + 1

In [30]:
import pandas as pd
import json

In [31]:
player_value = pd.read_csv('../data/draft-value.csv')
teams = json.load(open('../../apple-model-app/assets/team-styles.json'))

### Prepare Data for dashboard

In [13]:
from unidecode import unidecode
import plotly.graph_objects as go
from plotly.subplots import make_subplots 
import numpy as np

df = pd.read_csv('../../apple-model-app/data/draft-value.csv')
team = 'Toronto Maple Leafs'
teams = json.load(open('../../apple-model-app/assets/team-styles.json'))
primary_colour = teams['nhl'][team]['color']

prospects = df.sort_values(by=['player', 'season_order'],
                                 ascending=[True, False])\
        .drop_duplicates(subset=['playerid'])\
        .loc[(
                (df.draft_year >= 2016)
            )
             &
            (
                (df.end_year == 2020)
             )
             &
                (df.rights.notnull())
             &
                (df.draft_plus_5_gp < 50)
             ]

prospects['rights'] = prospects.rights.apply(unidecode)
prospects['position_count'] = prospects.positions.apply(lambda x : len(x))

d_prospects = prospects[prospects.position == 'D']
f_prospects = prospects[prospects.position == 'F']

defence = df[df.position == 'D']
forwards = df[df.position == 'F']

pipeline = prospects[prospects.rights == team].sort_values('Projected Rate', ascending=True)

team_forwards = pipeline[pipeline.position == 'F']
team_defence = pipeline[pipeline.position == 'D']

AttributeError: 'DataFrame' object has no attribute 'positions'

In [14]:
prospects.head()

Unnamed: 0,playerid,player,position,nhl_likelihood,most_likely_nhl_node,most_likely_nhl_prob,nhl_floor,nhl_expected_value,nhl_ceiling,nhl_maximizing_node,nhl_ceiling_prob,graph,date_of_birth,base_year,draft_year,Projected Rate,Projected Role,Floor Rate,Ceiling Rate,season_order,height,weight,rights,nation,draft_round,draft_pick,draft_team,nhl_draft,end_year,years_post_draft,current_nhl_pts,current_nhl_gp,current_nhl_g,current_nhl_a,draft_plus_5_points,draft_plus_5_gp,draft_plus_5_g,draft_plus_5_a,primary_position,ceiling_percentile,xpick,xpick_value,value_cluster,upside_cluster,draft_value_range,draft_upside,surplus,surplus_relative,year,league,gp,g,a,tp,ppg,real_season_age,draft_plus_5_ppg
2400,118081,Aapeli Räsänen,F,0.0,1,1.0,0,0.0,0.0,1,1.0,"{""directed"": true, ""multigraph"": false, ""graph...",1998-06-01,D + 4,2016,0,Depth,0.0,0.0,4,183.0,94.0,Edmonton Oilers,Finland,6,153,Edmonton Oilers,,2020,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,F,99.321894,150.0,2,0,5,Late Round,Unknown,-2.0,-0.009367,2019-2020,NCAA,34.0,11.0,13.0,24.0,0.705882,22.58,
1610,299098,Aarne Talvitie,F,0.02,7,0.02,25,0.0,25.0,7,0.02,"{""directed"": true, ""multigraph"": false, ""graph...",1999-02-11,D + 3,2017,0.39,Top 9,0.39,0.39,3,178.0,90.0,New Jersey Devils,Finland,6,160,New Jersey Devils,,2020,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,F,48.892918,150.0,2,0,3,Late Round,Project,-2.0,-0.009367,2019-2020,NCAA,30.0,6.0,13.0,19.0,0.633333,21.89,
331,291514,Adam Beckman,F,0.4,18,0.14,97,24.0,98.0,18,0.14,"{""directed"": true, ""multigraph"": false, ""graph...",2001-05-10,D + 1,2019,0.39-0.61,Top 9-Top 6,0.39,0.61,1,185.0,81.0,Minnesota Wild,Canada,3,75,Minnesota Wild,,2020,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,F,99.015272,37.13994,4,2,4,2nd-3rd Round,Mid End,20.0,0.093666,2019-2020,WHL,63.0,48.0,59.0,107.0,1.698413,19.64,
1410,265684,Adam Boqvist,D,0.96,7,0.81,112,101.0,126.0,7,0.8,"{""directed"": true, ""multigraph"": false, ""graph...",2000-08-15,D + 2,2018,0.44-0.59,Top Pair-Elite,0.44,0.59,2,182.0,82.0,Chicago Blackhawks,Sweden,1,8,Chicago Blackhawks,,2020,2.0,13.0,41.0,4.0,9.0,13.0,41.0,4.0,9.0,D,99.321894,0.4414,67,5,2,Top 5,High End,34.0,0.34683,2019-2020,NHL,41.0,4.0,9.0,13.0,0.317073,20.38,0.317073
592,349951,Adam Edström,F,0.12,20,0.03,17,3.0,69.0,24,0.02,"{""directed"": true, ""multigraph"": false, ""graph...",2000-10-12,D + 1,2019,0.3-0.41,Top 9,0.3,0.41,1,199.0,96.0,New York Rangers,Sweden,6,161,New York Rangers,,2020,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,F,99.050652,97.970998,2,3,1,Mid Round,Safe,1.0,0.004683,2019-2020,SHL,46.0,4.0,5.0,9.0,0.195652,20.22,


In [191]:
f_role_labels = [[25, 'Top 9'], [50, 'Top 6'], [75, 'Top Line'], [90, 'Elite']]
d_role_labels = [[25, 'Top 6'], [50, 'Top 4'], [75, 'Top Pair'], [90, 'Elite']]

layout = go.Layout(
    plot_bgcolor='rgb(255,255,255)',
    paper_bgcolor='rgba(0,0,0,0)',
    hovermode='closest',
    margin=dict(b=10,l=40,r=10,t=10),
    legend=dict(font=dict(size=10), yanchor="bottom", xanchor="right", y=0.9, x=0.99)
)

fig = make_subplots(rows=1, cols=2, horizontal_spacing = 0.2)
fig.update_layout(layout)
fig['layout']['xaxis1'].update(range=[0, f_subset['ppg'].max()], title='Projected Rate', ticks='outside', showline=True,  linewidth=2,  linecolor='black', tickprefix="<b>",ticksuffix ="</b><br>")
fig['layout']['xaxis2'].update(range=[0, d_subset['ppg'].max()], title='Projected Rate', ticks='outside', showline=True,  linewidth=2, linecolor='black', tickprefix="<b>",ticksuffix ="</b><br>")
fig.update_yaxes(ticks='outside',tickmode='linear', tickprefix="<b>",ticksuffix ="</b>")

d1 = go.Scatter(
            x=team_forwards['Floor Rate'],
            y=team_forwards['player'],
            mode='markers',
            showlegend=False,
            name='NHL Expectation',
            marker=dict(
                color='rgba(212, 212, 212, 1)', 
                size=12
            )
        )

d2 = go.Scatter(
            x=team_defence['Floor Rate'],
            y=team_defence['player'],
            mode='markers',
            showlegend=True,
            name='NHL Expectation',
            marker=dict(
                color='rgba(212, 212, 212, 1)', 
                size=12
            )
        )

d1_ = go.Scatter(
            x=team_forwards['Ceiling Rate'],
            y=team_forwards['player'],
            mode='markers',
            name='NHL Ceiling',
            showlegend=False,
            marker=dict(
                line=dict(color=primary_colour, width=4),
                color='rgba(255,255,255,0)', 
                size=16
            )
        )

d2_ = go.Scatter(
            x=team_defence['Ceiling Rate'],
            y=team_defence['player'],
            mode='markers',
            name='NHL Ceiling',
            marker=dict(
                line=dict(color=primary_colour, width=4),
                color='rgba(255,255,255,0)',
                size=16
            ),
            showlegend=True
        )


fig.add_trace(d1, row=1, col=1)
fig.add_trace(d2, row=1, col=2)
fig.add_trace(d1_, row=1, col=1)
fig.add_trace(d2_, row=1, col=2)


fig.add_trace(d1, row=1, col=1)
fig.add_trace(d2, row=1, col=2)

for value, label in f_role_labels:
    fig.add_shape(
            # Line Horizontal
                type="line",
                y0=-1,
                x1=np.percentile(f_subset.ppg, value),
                x0=np.percentile(f_subset.ppg, value),
                y1=len(team_forwards)+1,
                line=dict(
                    color="rgba(0, 0, 0, 0.6)",
                    width=2,
                    dash="dashdot",
                ),
                row=1, 
                col=1
    )
    fig.add_annotation(
        x=np.percentile(f_subset.ppg, value),
        y= len(team_forwards) +1,
        xref="x",
        yref="y",
        text=label,
        showarrow=False,
        font=dict(
            color="white"
            ),
        align="center",
        arrowhead=2,
        arrowsize=1,
        arrowwidth=2,
        arrowcolor="rgba(0, 0, 0, 1)",
        ax=-30,
        ay=-30,
        bordercolor="rgba(0, 0, 0, 1)",
        borderwidth=2,
        borderpad=2,
        bgcolor="rgba(0, 0, 0, 1)",
        opacity=0.8,
        textangle=-90,
        row=1, 
        col=1
    )

for value, label in d_role_labels:

    if label != 'Depth':
        fig.add_shape(
                # Line Horizontal
                    type="line",
                    y0=-1,
                    x1=np.percentile(d_subset.ppg, value),
                    x0=np.percentile(d_subset.ppg, value),
                    y1=len(team_defence) + 1,
                    line=dict(
                        color="rgba(0, 0, 0, 0.6)",
                        width=2,
                        dash="dashdot",
                    ),
                    row=1, 
                    col=2
        )

        fig.add_annotation(
            x=np.percentile(d_subset.ppg, value),
            y=len(team_defence) + 1,
            xref="x",
            yref="y",
            text=label,
            showarrow=False,
            font=dict(
                color="white"
                ),
            align="center",
            arrowhead=2,
            arrowsize=1,
            arrowwidth=2,
            arrowcolor="rgba(0,0, 0, 1)",
            ax=-30,
            ay=-30,
            bordercolor="rgba(0, 0, 0, 1)",
            borderwidth=2,
            borderpad=2,
            bgcolor="rgba(0, 0, 0, 1)",
            opacity=0.8,
            textangle=-90,
            row=1, 
            col=2
        )

fig.show()


In [76]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots 
import numpy as np

def get_team_prospect_graph(team):
    
    teams = json.load(open('../../apple-model-app/assets/team-styles.json'))
    primary_colour = teams[team]['color']

    prospects = df.sort_values(by=['player', 'season_order'],
                                     ascending=[True, False])\
            .drop_duplicates(subset=['playerid'])\
            .loc[(
                    (df.draft_year >= 2016)
                )
                 &
                (
                    (df.end_year == 2020)
                 )
                 &
                    (df.rights.notnull())
                 &
                    (df.draft_plus_5_gp < 50)
                 ]

    prospects['rights'] = prospects.rights.apply(unidecode)

    d_prospects = prospects[prospects.position == 'D']
    f_prospects = prospects[prospects.position == 'F']

    defence = df[df.position == 'D']
    forwards = df[df.position == 'F']

    pipeline = prospects[prospects.rights == team].sort_values('Ceiling Rate', ascending=True)

    team_forwards = pipeline[pipeline.position == 'F']
    team_defence = pipeline[pipeline.position == 'D']

    f_role_labels = [[30, 'Top 9'], [50, 'Top 6'], [75, 'Top Line'], [95, 'Elite']]
    d_role_labels = [[20, 'Top 6'], [50, 'Top 4'], [70, 'Top Pair'], [90, 'Elite']]

    layout = go.Layout(
    plot_bgcolor='rgb(255,255,255)',
    paper_bgcolor='rgba(0,0,0,0)',
    hovermode='closest',
    margin=dict(b=10,l=40,r=10,t=10),
    legend=dict(font=dict(size=10), yanchor="bottom", xanchor="right", y=0.9, x=0.99)
)

    fig = make_subplots(rows=1, cols=2, horizontal_spacing = 0.2)
    fig.update_layout(layout)
    fig['layout']['xaxis1'].update(range=[0, f_subset['ppg'].max()], title='Projected Rate', ticks='outside', showline=True,  linewidth=2,  linecolor='black', tickprefix="<b>",ticksuffix ="</b><br>")
    fig['layout']['xaxis2'].update(range=[0, d_subset['ppg'].max()], title='Projected Rate', ticks='outside', showline=True,  linewidth=2, linecolor='black', tickprefix="<b>",ticksuffix ="</b><br>")
    fig.update_yaxes(ticks='outside',tickmode='linear', tickprefix="<b>",ticksuffix ="</b>")

    d1 = go.Scatter(
                x=team_forwards['Floor Rate'],
                y=team_forwards['player'],
                mode='markers',
                showlegend=False,
                name='NHL Floor',
                marker=dict(
                    color='rgba(212, 212, 212, 1)', 
                    size=12
                )
            )

    d2 = go.Scatter(
                x=team_defence['Floor Rate'],
                y=team_defence['player'],
                mode='markers',
                showlegend=True,
                name='NHL Floor',
                marker=dict(
                    color='rgba(212, 212, 212, 1)', 
                    size=12
                )
            )

    d1_ = go.Scatter(
                x=team_forwards['Ceiling Rate'],
                y=team_forwards['player'],
                mode='markers',
                name='NHL Ceiling',
                showlegend=False,
                marker=dict(
                    line=dict(color=primary_colour, width=4),
                    color='rgba(255,255,255,0)', 
                    size=16
                )
            )

    d2_ = go.Scatter(
                x=team_defence['Ceiling Rate'],
                y=team_defence['player'],
                mode='markers',
                name='NHL Ceiling',
                marker=dict(
                    line=dict(color=primary_colour, width=4),
                    color='rgba(255,255,255,0)',
                    size=16
                ),
                showlegend=True
            )


    fig.add_trace(d1, row=1, col=1)
    fig.add_trace(d2, row=1, col=2)
    fig.add_trace(d1_, row=1, col=1)
    fig.add_trace(d2_, row=1, col=2)

    for value, label in f_role_labels:
        fig.add_shape(
                # Line Horizontal
                    type="line",
                    y0=-1,
                    x1=np.percentile(f_subset.ppg, value),
                    x0=np.percentile(f_subset.ppg, value),
                    y1=len(team_forwards)+1,
                    line=dict(
                        color="rgba(0, 0, 0, 0.6)",
                        width=2,
                        dash="dashdot",
                    ),
                    row=1, 
                    col=1
        )
        fig.add_annotation(
            x=np.percentile(f_subset.ppg, value),
            y= len(team_forwards) +1,
            xref="x",
            yref="y",
            text=label,
            showarrow=False,
            font=dict(
                color="white"
                ),
            align="center",
            arrowhead=2,
            arrowsize=1,
            arrowwidth=2,
            arrowcolor="rgba(0, 0, 0, 1)",
            ax=-30,
            ay=-30,
            bordercolor="rgba(0, 0, 0, 1)",
            borderwidth=2,
            borderpad=2,
            bgcolor="rgba(0, 0, 0, 1)",
            opacity=0.8,
            textangle=-90,
            row=1, 
            col=1
        )

    for value, label in d_role_labels:

        if label != 'Depth':
            fig.add_shape(
                    # Line Horizontal
                        type="line",
                        y0=-1,
                        x1=np.percentile(d_subset.ppg, value),
                        x0=np.percentile(d_subset.ppg, value),
                        y1=len(team_defence) + 1,
                        line=dict(
                            color="rgba(0, 0, 0, 0.6)",
                            width=2,
                            dash="dashdot",
                        ),
                        row=1, 
                        col=2
            )

            fig.add_annotation(
                x=np.percentile(d_subset.ppg, value),
                y=len(team_defence) + 1,
                xref="x",
                yref="y",
                text=label,
                showarrow=False,
                font=dict(
                    color="white"
                    ),
                align="center",
                arrowhead=2,
                arrowsize=1,
                arrowwidth=2,
                arrowcolor="rgba(0,0, 0, 1)",
                ax=-30,
                ay=-30,
                bordercolor="rgba(0, 0, 0, 1)",
                borderwidth=2,
                borderpad=2,
                bgcolor="rgba(0, 0, 0, 1)",
                opacity=0.8,
                textangle=-90,
                row=1, 
                col=2
            )
        
    return fig

In [77]:
get_team_prospect_graph(team)

### Surplus

In [323]:
def get_team_surplus_graph(team):
    
    teams = json.load(open('../../apple-model-app/assets/team-styles.json'))
    primary_colour = teams[team]['color']
    
    prospects = df.sort_values(by=['player', 'season_order'],
                                     ascending=[True, False])\
            .drop_duplicates(subset=['playerid'])\
            .loc[(
                    (df.draft_year >= 2016)
                )
                 &
                (
                    (df.end_year == 2020)
                 )]

    layout = go.Layout(
        plot_bgcolor='rgb(255,255,255)',
        paper_bgcolor='rgba(0,0,0,0)',
        hovermode='closest',
    )

    order = prospects.groupby(['rights']).surplus.sum().sort_values(ascending=False).index
    color=np.array(['rgb(212,212,212)']*order.shape[0])
    color[order == team] = primary_colour

    f = go.Figure(
        go.Bar(x=prospects.groupby(['rights']).surplus.sum().sort_values(ascending=False).index,
               y=prospects.groupby(['rights']).surplus.sum().sort_values(ascending=False), 
               marker=dict(color=color.tolist()) 
        ),
        layout=layout,
    )

    # Change the bar mode
    f.update_layout(barmode='stack',xaxis={'categoryorder':'array', 'categoryarray':order},yaxis={'title' : 'draft value added'})
    
    return f

### Quantity vs. Quality

In [366]:
def get_team_composition_card(team):

    # Prospect composition
    f_col_order = ['Depth', 'Top 9', 'Top 6', 'Top Line', 'Elite']
    d_col_order = ['Depth', 'Top 6', 'Top 4', 'Top Pair', 'Elite']
    
    teams = json.load(open('../../apple-model-app/assets/team-styles.json'))
    primary_colour = teams[team]['color']
    
    prospects = df.sort_values(by=['player', 'season_order'],
                                     ascending=[True, False])\
            .drop_duplicates(subset=['playerid'])\
            .loc[(
                    (df.draft_year >= 2016)
                )
                 &
                (
                    (df.end_year == 2020)
                 )]
    
    d_prospects = prospects[prospects.position == 'D']
    f_prospects = prospects[prospects.position == 'F']

    team_forward_roles = pd.pivot_table(f_prospects,
                   values=['playerid'], 
                   columns=['Projected Role'],
                   index=['rights'],
                   aggfunc='count').fillna(0).astype(int).droplevel(0, axis=1)[f_col_order]

    team_defence_roles = pd.pivot_table(d_prospects,
                   values=['playerid'], 
                   columns=['Projected Role'],
                   index=['rights'],
                   aggfunc='count').fillna(0).astype(int).droplevel(0, axis=1)[d_col_order]

    team_forward_roles['Prospects Total'] = team_forward_roles.sum(axis=1)
    team_defence_roles['Prospects Total'] = team_defence_roles.sum(axis=1)

    composition = [
        dbc.Col([
            dbc.Row(
                dbc.Card(
                    dbc.CardBody(
                        [
                            html.H5(index, className="card-title"),
                            html.P(val, className="card-text")
                        ]
                    )
                )) for index, val in roles.iteritems()
        ]) for roles in [team_defence_roles.loc[team], 
                        team_forward_roles.loc[team]]
    ]
    
    return composition

def get_team_summary_card(team):
    
    prospects = df.sort_values(by=['player', 'season_order'],
                                     ascending=[True, False])\
            .drop_duplicates(subset=['playerid'])\
            .loc[(
                    (df.draft_year >= 2016)
                )
                 &
                (
                    (df.end_year == 2020)
                 )]
    
    d_prospects = prospects[prospects.position == 'D']
    f_prospects = prospects[prospects.position == 'F']
    
    team_forward_total = f_prospects.groupby(['rights']).agg({
        'nhl_likelihood' : 'sum',
        'Projected Rate' : 'mean',
    }).sort_values('Projected Rate', ascending=False).round(2)

    team_defence_total = d_prospects.groupby(['rights']).agg({
        'nhl_likelihood' : 'sum',
        'Projected Rate' : 'mean',
    }).sort_values('Projected Rate', ascending=False).round(2)
    
    team_forward_total['prospect_quantity_rank'] = team_forward_total['nhl_likelihood'].rank(method='min', ascending=False).astype(int)
    team_forward_total['prospect_quality_rank'] = team_forward_total['Projected Rate'].rank(method='min', ascending=False).astype(int)
    team_defence_total['prospect_quantity_rank'] = team_defence_total['nhl_likelihood'].rank(method='min', ascending=False).astype(int)
    team_defence_total['prospect_quality_rank'] = team_defence_total['Projected Rate'].rank(method='min', ascending=False).astype(int)
    
    card_content = [
                dbc.Row([
                    dbc.Col(
                        dbc.Card(
                            dbc.CardBody(
                                [
                                    html.H5(index, className="card-title"),
                                    html.P(val, className="card-text")
                                ]
                            )
                        )
                    ) for index, val in players.iteritems()
                ]) for players in [team_defence_total.loc[team], 
                                    team_forward_total.loc[team]]
            ]
    
    return card_content

In [200]:
team = 'Calgary Flames'

In [201]:

prospects = df.sort_values(by=['player', 'season_order'],
                                 ascending=[True, False])\
        .drop_duplicates(subset=['playerid'])\
        .loc[(
                (df.draft_year >= 2016)
            )
             &
            (
                (df.end_year == 2020)
             )
             &
                (df.rights.notnull())
             &
                (df.draft_plus_5_gp < 50)
             ]

prospects['rights'] = prospects.rights.apply(unidecode)

d_prospects = prospects[prospects.position == 'D']
f_prospects = prospects[prospects.position == 'F']

defence = df[df.position == 'D']
forwards = df[df.position == 'F']

pipeline = prospects[prospects.rights == team].sort_values('nhl_expected_value', ascending=True)

team_forwards = pipeline[pipeline.position == 'F']
team_defence = pipeline[pipeline.position == 'D']

team_forward_total = f_prospects.groupby(['rights']).agg({
    'nhl_likelihood' : 'sum',
    'Ceiling Rate' : 'mean',
    'surplus' : 'sum',
}).sort_values('Ceiling Rate', ascending=False).round(2)

team_defence_total = d_prospects.groupby(['rights']).agg({
    'nhl_likelihood' : 'sum',
    'Ceiling Rate' : 'mean',
    'surplus' : 'sum',
}).sort_values('Ceiling Rate', ascending=False).round(2)


In [193]:

team_forward_total['prospect_quantity_rank'] = team_forward_total['nhl_likelihood'].rank(method='min', ascending=False)
team_forward_total['prospect_quality_rank'] = team_forward_total['Ceiling Rate'].rank(method='min', ascending=False)
team_defence_total['prospect_quantity_rank'] = team_defence_total['nhl_likelihood'].rank(method='min', ascending=False)
team_defence_total['prospect_quality_rank'] = team_defence_total['Ceiling Rate'].rank(method='min', ascending=False)
team_forward_total['prospect_surplus_rank'] = team_forward_total['surplus'].rank(method='min', ascending=False)
team_defence_total['prospect_surplus_rank'] = team_defence_total['surplus'].rank(method='min', ascending=False)

cols = [
    {'id' : 'nhl_likelihood' , 'name' : ["xPlayers"]},
    {'id' : 'prospect_quantity_rank' , 'name' : ["xPlayers Rank"]},
    {'id' : 'Ceiling Rate' , 'name' : ["Scoring Rate"]},
    {'id' : 'prospect_quality_rank' , 'name' : ["Scoring Rank"]},
    {'id' : 'surplus' , 'name' : ["Draft Surplus"]},
    {'id' : 'prospect_quality_rank' , 'name' : ["Surplus Rank"]},
    ]

team_summary = pd.concat([team_defence_total.loc[team].rename('D'),
          team_forward_total.loc[team].rename('F')], axis=1)

In [211]:
df[df.player.str.contains('Juuso Välimäki')]

Unnamed: 0,playerid,player,position,nhl_likelihood,most_likely_nhl_node,most_likely_nhl_prob,nhl_floor,nhl_expected_value,nhl_ceiling,nhl_maximizing_node,nhl_ceiling_prob,graph,date_of_birth,base_year,draft_year,Projected Rate,Projected Role,Floor Rate,Ceiling Rate,season_order,height,weight,rights,nation,draft_round,draft_pick,draft_team,nhl_draft,end_year,years_post_draft,current_nhl_pts,current_nhl_gp,current_nhl_g,current_nhl_a,draft_plus_5_points,draft_plus_5_gp,draft_plus_5_g,draft_plus_5_a,primary_position,ceiling_percentile,xpick,xpick_value,value_cluster,upside_cluster,draft_value_range,draft_upside,surplus,surplus_relative,year,league,gp,g,a,tp,ppg,real_season_age,draft_plus_5_ppg
1727,221525,Juuso Välimäki,D,0.93,22,0.27,107,64.0,107.0,22,0.27,"{""directed"": true, ""multigraph"": false, ""graph...",1998-10-06,DY,2017,0.32-0.49,Top 4-Top Pair,0.32,0.49,0,187.0,93.0,Calgary Flames,Finland,1,16,Calgary Flames,,2017,0.0,0.0,0.0,0.0,0.0,2.948066,24.0,0.982689,1.965377,D,49.088089,9.015727,44,2,2,Top 15,High End,20.0,0.204017,2016-2017,WHL,60.0,17.960604,39.702388,57.662992,0.96105,19.24,0.122836
1728,221525,Juuso Välimäki,D,0.93,21,0.29,108,66.0,109.0,21,0.29,"{""directed"": true, ""multigraph"": false, ""graph...",1998-10-06,D + 1,2017,0.32-0.49,Top 4-Top Pair,0.32,0.49,1,187.0,93.0,Calgary Flames,Finland,1,16,Calgary Flames,,2018,1.0,0.0,0.0,0.0,0.0,2.948066,24.0,0.982689,1.965377,D,49.088089,8.434855,44,2,2,Top 15,High End,22.0,0.224419,2017-2018,WHL,43.0,12.96464,28.707416,41.672056,0.969118,20.24,0.122836
1729,221525,Juuso Välimäki,D,0.92,7,0.6,75,56.948066,77.948066,7,0.6,"{""directed"": true, ""multigraph"": false, ""graph...",1998-10-06,D + 2,2017,0.31-0.42,Top 4,0.31,0.42,2,187.0,93.0,Calgary Flames,Finland,1,16,Calgary Flames,,2019,2.0,2.948066,24.0,0.982689,1.965377,2.948066,24.0,0.982689,1.965377,D,99.327791,11.223566,44,2,4,Top 15,Mid End,12.948066,0.132082,2018-2019,NHL,24.0,0.982689,1.965377,2.948066,0.122836,21.24,0.122836


In [159]:
team_summary

Unnamed: 0,D,F
nhl_likelihood,2.87,4.05
Ceiling Rate,0.31,0.39
surplus,44.77,-98.64
prospect_quantity_rank,4.0,13.0
prospect_quality_rank,7.0,16.0
prospect_surplus_rank,13.0,26.0


### Team Avg Draft Position Matrix 

In [73]:
df[df.draft_year >= 2010].groupby(['draft_team', 'draft_round'])['draft_pick'].mean().unstack(1).fillna(0).round(0).astype(int)

draft_round,1,2,3,4,5,6,7
draft_team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Anaheim Ducks,21,48,82,107,134,172,200
Arizona Coyotes,14,42,74,106,133,168,196
Atlanta Thrashers,8,0,87,101,150,161,199
Boston Bruins,16,47,85,112,142,173,203
Buffalo Sabres,11,41,76,100,132,159,193
Calgary Flames,15,52,69,106,133,166,192
Carolina Hurricanes,11,43,72,104,133,166,193
Chicago Blackhawks,21,47,79,113,143,173,204
Colorado Avalanche,10,38,74,99,134,161,191
Columbus Blue Jackets,14,44,74,103,134,163,195


### Dash Page

### creating div

In [1]:
import sys
import pandas as pd

sys.path.append('../../apple-model-app/')

from helpers import *

In [251]:
def get_logo(team, size=60):

    from unidecode import unidecode
    import json

    # load team logos
    teams = json.load(open('../../apple-model-app/assets/team-styles.json'))

    try:
        logo = teams[unidecode(team)]['logo']

    except:
        logo = None 

    return logo

def get_logo_dims(team, size=60):

    from unidecode import unidecode
    import json

    # load team logos
    teams = json.load(open('../../apple-model-app/assets/team-styles.json'))

    try:
        logo = teams[unidecode(team)]['logo']
        _, (width, height) = getsizes(logo)
        
        if  height >= width:
            width = (width / height) * size
            height = (height / height) * size
        else:
            height = (height / width) * size
            width = (width / width) * size    
    except:
        logo = None 
        width = size
        height = size

    return {'height' : f"{height}px", 'width' : f"{width}px"}


In [187]:
table = dash_table.DataTable(
            id='team-summary-table',
            columns=[{'id' : c , 'name' : c} for c in team_summary.columns],
            data=team_summary.to_dict("records"),
            style_table={
                'width': '98%', # both are needed to make virtualization table fill screen
                'maxWidth' : '100%',
                'padding-left' : '2%',
                'height': '98%', # both are needed to make virtualization table fill screen
                'maxHeight' : '100%',
            },
            style_cell={
                "textAlign": "center",
                "border": "4px solid white",
                "color"  : 'black',
                "minWidth": "40%",
                "minHeight": "200px",
                "maxHeight": "200px",
                "maxWidth": "40%",
            },
        )


In [188]:
from jupyter_dash import JupyterDash
import dash_core_components as dcc
import dash_html_components as html
import dash_bootstrap_components as dbc

app = JupyterDash(__name__)
app.layout = html.Div(
    children=[
        table,
    ],
    style={'height': '60vh', 'width' : '33vw'}
)

if __name__ == '__main__':
    app.run_server( port='7777')

Dash app running on http://127.0.0.1:7777/


### Extras

In [None]:
### gets team primary colours and logos
import requests
from bs4 import BeautifulSoup
import webbrowser
from unidecode import unidecode

team_colors = {}
team_logos = {}
    
for team in player_value.rights.dropna().unique():
    
    team = unidecode(team)
    
    team_url = team.lower().replace(' ', '-')

    url = f'https://teamcolorcodes.com/{team_url}-color-codes/'
    
    r = requests.get(url)
    if r.status_code == 404:
        url = url.strip('-codes/') + 's'
        r = requests.get(url)
    
    soup = BeautifulSoup(r.text, 'html.parser')
    
    for i, div in enumerate(soup.find_all('div', class_='colorblock')):
        if i == 0:
            color = div['style'].split('background-color: ', maxsplit=1)[1].split(';', maxsplit=1)[0]
            team_colors[team] = color
            break
            
    soup = BeautifulSoup(r.text, 'html.parser')
    for img in soup.find_all('img', {'class':['aligncenter', 'size-full', 'alignnone', 'size-medium']}):
        team_logos[team] = img['src']
        webbrowser.open(img['src'])
        break
teams = {unidecode(team) : {'color' : team_colors[ unidecode(team)],
                    'logo' : team_logos[ unidecode(team)]} for team in player_value.rights.dropna().unique()} 

teams['Seattle Kraken'] = {
    'logo': 'https://upload.wikimedia.org/wikipedia/en/thumb/4/48/Seattle_Kraken_official_logo.svg/1200px-Seattle_Kraken_official_logo.svg.png',
    'color' : '#001628'
}

import json

with open('../../apple-model-app/assets/team-styles.json', 'w') as f:
    json.dump(teams, f )

In [5]:
import time

start = time.time()
d = get_s3_data('data/projections.csv')
print(time.time() - start)

102.0851240158081


In [6]:
db_user, db_pass, db_host, db_name = os.environ['db_user'], os.environ['db_pass'], os.environ['db_host'], os.environ['db_name']
engine = create_engine(f'postgresql://{db_user}:{db_pass}@{db_host}:5432/{db_name}')

In [8]:
start = time.time()
projections = pd.read_sql(f'''select * from projections''', con=engine)
print(time.time() - start)

70.26026892662048


In [10]:
dataset = pd.read_csv('../data/player_season_stats_v2.csv')

  interactivity=interactivity, compiler=compiler, result=result)


In [11]:
dataset.shape

(170337, 43)


### Adding WJC

In [293]:
teams = json.load(open('../../apple-model-app/assets/team-styles.json'))

In [254]:
rosters = pd.read_csv('../../apple-model-app/data/wjc-rosters.csv')

In [251]:
teams['AUS'] = {'logo' : 'https://cdn.countryflags.com/thumbs/austria/flag-800.png'}
teams['CAN'] = {'logo' : 'https://cdn.countryflags.com/thumbs/canada/flag-800.png'}
teams['CZE'] = {'logo' : 'https://cdn.countryflags.com/thumbs/czech-republic/flag-800.png'}
teams['FIN'] = {'logo' : 'https://cdn.countryflags.com/thumbs/finland/flag-800.png'}
teams['GER'] = {'logo' : 'https://cdn.countryflags.com/thumbs/germany/flag-800.png'}
teams['RUS'] = {'logo' : 'https://cdn.countryflags.com/thumbs/russia/flag-800.png'}
teams['SLO'] = {'logo' : 'https://cdn.countryflags.com/thumbs/slovakia/flag-800.png'}
teams['SWE'] = {'logo' : 'https://cdn.countryflags.com/thumbs/sweden/flag-800.png'}
teams['SWI'] = {'logo' : 'https://cdn.countryflags.com/thumbs/switzerland/flag-800.png'}
teams['USA'] = {'logo' : 'https://cdn.countryflags.com/thumbs/united-states-of-america/flag-800.png'}

In [260]:
styles = dict(
    wjc= {k : v for k, v in teams.items() if k in rosters.team.unique()} ,
    nhl= {k : v for k, v in teams.items() if k not in rosters.team.unique()}
)

In [263]:
with open('../../apple-model-app/assets/team-styles.json', 'w') as f:
    json.dump(styles, f )

In [280]:
url = 'https://cdn.countryflags.com/thumbs/canada/flag-800.png'

In [296]:
url = teams['nhl']['Vancouver Canucks']['logo']

In [310]:
def getsizes(uri):
    from io import BytesIO
    from PIL import Image

    image_raw = requests.get(uri)
    image = Image.open(BytesIO(image_raw.content))
    width, height = image.size

    return width, height

In [306]:
def get_logo_dims(team, league='nhl', size=60):

    from unidecode import unidecode
    import json

    # load team logos
    teams = json.load(open('../../apple-model-app/assets/team-styles.json'))

#     try:
    logo = teams[league][unidecode(team)]['logo']
    width, height = getsizes(logo)

    if  height >= width:
        width = (width / height) * size
        height = (height / height) * size
    else:
        height = (height / width) * size
        width = (width / width) * size    

    return {'height' : f"{height}px", 'width' : f"{width}px"}

In [315]:
url = 'https://upload.wikimedia.org/wikipedia/en/thumb/7/72/Anaheim_Ducks.svg/1200px-Anaheim_Ducks.svg.png'
getsizes(url)

(1200, 676)

In [316]:
get_logo_dims('Anaheim Ducks')

{'height': '33.800000000000004px', 'width': '60.0px'}

### Team Pipeline roster

In [9]:
import sys

sys.path.append('../../apple-model-app/')
from helpers import *


In [108]:
get_forward_ids(pipeline)

KeyError: 'position_count'