In [52]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import pandas as pd
import numpy as np
import sklearn
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.metrics import davies_bouldin_score, silhouette_score
from sklearn.preprocessing import StandardScaler, PowerTransformer
from sklearn.decomposition import PCA
from sympy import Point
import os, shutil
from scipy.stats import percentileofscore
import math

np.warnings = warnings
import import_ipynb
%store -r transformed_defence_data
%store -r evenStrengthD_names
%store -r transformed_ppD_data
%store -r powerPlayD_names
%store -r transformed_pkD_data
%store -r penaltyKillD_names
%store -r transformed_forward_data
%store -r evenStrengthF_names
%store -r transformed_ppF_data
%store -r powerPlayF_names
%store -r transformed_pkF_data
%store -r penaltyKillF_names

offensive_stats = transformed_defence_data.filter(regex="I_F").columns
OnIce = transformed_defence_data.filter(regex="OnIce_F").columns
offensive_stats = offensive_stats.append(OnIce)
offensive_stats = offensive_stats.drop(["I_F_takeaways", "I_F_hits", "I_F_dZoneGiveaways", 'I_F_oZoneShiftStarts', 'I_F_dZoneShiftStarts',
       'I_F_neutralZoneShiftStarts', 'I_F_flyShiftStarts',
       'I_F_oZoneShiftEnds', 'I_F_dZoneShiftEnds', 'I_F_neutralZoneShiftEnds',
       'I_F_flyShiftEnds', 'I_F_shifts', 'I_F_penalityMinutes'])

defensive_other = transformed_defence_data.filter(items=["I_F_takeaways", "I_F_dZoneGiveaways", "shotsBlockedByPlayer"]).columns
defensive_stats = transformed_defence_data.filter(regex="OnIce_A").columns
defensive_stats = defensive_stats.append(defensive_other)
defensive_stats = defensive_stats.append(transformed_defence_data.filter(items=["team_rank"]).columns)

#involve both offensive and defensive stats
general_stats = transformed_defence_data.filter(items=["onIce_xGoalsPercentage", "onIce_corsiPercentage", 
                                                       "onIce_fenwickPercentage", "gameScore"]).columns
#both_stats = ["onIce_xGoalsPercentage", "onIce_corsiPercentage", "onIce_fenwickPercentage"]

penalty_stats_negative = ["penalties", "I_F_penalityMinutes"]
penalty_stats_positive =  ["penalityMinutesDrawn", "penaltiesDrawn"]
penalty_stats = ["penalties", "I_F_penalityMinutes", "penalityMinutesDrawn", "penaltiesDrawn"]

hits = ["I_F_hits"]

original_forward_team_rank = transformed_forward_data["team_rank"]
original_defense_team_rank = transformed_defence_data["team_rank"]


In [3]:
def get_percentile_rank(df):
    return df.rank(pct=True).mul(100)
    

In [4]:
# df needs to be a data table not a ranking table
def penalties_rank(df):

    penalties_df = df[penalty_stats].copy()

    penalties_df["minutes_diff"] = df.I_F_penalityMinutes - df.penalityMinutesDrawn
    penalties_df["pen_diff"] = df["penalties"]- df["penaltiesDrawn"]

    penalties_df.drop(columns=penalty_stats, inplace=True)
    pen_rankings = get_percentile_rank(penalties_df)
    pen_rankings["pen_diff"] = pen_rankings["pen_diff"].mul(2)
    summed_rankings = pen_rankings.sum(axis=1)
    
    return get_percentile_rank(summed_rankings).apply(lambda x: 100 - x)

In [5]:
def physicality_rank(df):
    return get_percentile_rank(df["I_F_hits"])

In [47]:
def defensive_rank(df):
    #print(defensive_stats)

    icetime = df["icetime"].div(100)


    more_is_good_stats = df.filter(items=["I_F_takeaways", "shotsBlockedByPlayer"])
    defence_df = df[defensive_stats].drop(columns=["I_F_takeaways", "shotsBlockedByPlayer"])
    # the best defenders have the lower defensive stats like xGoal against, etc
    defence_df = defence_df.apply(lambda x: 100 - x)

    defence_df = pd.concat([defence_df, more_is_good_stats], axis=1)

    most_juice = ["OnIce_A_xGoals","OnIce_A_highDangerxGoals"]

    extra_juice = ["OnIce_A_goals", "OnIce_A_flurryScoreVenueAdjustedxGoals", "OnIce_A_scoreVenueAdjustedxGoals"]

    to_juice = ["OnIce_A_xOnGoal", "OnIce_A_reboundGoals", "OnIce_A_highDangerGoals",
              'OnIce_A_highDangerxGoals', "OnIce_A_mediumDangerxGoals"
              'OnIce_A_mediumDangerGoals', "OnIce_A_unblockedShotAttempts", "OnIce_A_scoreAdjustedUnblockedShotAttempts"
              'OnIce_A_xGoals_with_earned_rebounds', 'shotsBlockedByPlayer', 'I_F_takeaways']
    
    to_hinder = ["OnIce_A_xGoalsFromxReboundsOfShots", "OnIce_A_scoreAdjustedShotsAttempts"]
    
    defence_df[defence_df.filter(items=to_juice).columns] *= 2
    defence_df[defence_df.filter(items=extra_juice).columns] *=3
    defence_df[defence_df.filter(items=most_juice).columns] *= 6
    defence_df[defence_df.filter(items=to_hinder).columns] *= 0.25
    defence_df["icetime"] = df["icetime"] * 5
    defence_df["team_rank"] = df["team_rank"] * 2

    summed_rankings = defence_df.sum(axis=1)
    return get_percentile_rank(summed_rankings)


In [7]:
print(defensive_stats)

Index(['OnIce_A_xOnGoal', 'OnIce_A_xGoals', 'OnIce_A_flurryAdjustedxGoals',
       'OnIce_A_scoreVenueAdjustedxGoals',
       'OnIce_A_flurryScoreVenueAdjustedxGoals', 'OnIce_A_shotsOnGoal',
       'OnIce_A_missedShots', 'OnIce_A_blockedShotAttempts',
       'OnIce_A_shotAttempts', 'OnIce_A_goals', 'OnIce_A_rebounds',
       'OnIce_A_reboundGoals', 'OnIce_A_lowDangerShots',
       'OnIce_A_mediumDangerShots', 'OnIce_A_highDangerShots',
       'OnIce_A_lowDangerxGoals', 'OnIce_A_mediumDangerxGoals',
       'OnIce_A_highDangerxGoals', 'OnIce_A_lowDangerGoals',
       'OnIce_A_mediumDangerGoals', 'OnIce_A_highDangerGoals',
       'OnIce_A_scoreAdjustedShotsAttempts', 'OnIce_A_unblockedShotAttempts',
       'OnIce_A_scoreAdjustedUnblockedShotAttempts',
       'OnIce_A_xGoalsFromxReboundsOfShots',
       'OnIce_A_xGoalsFromActualReboundsOfShots', 'OnIce_A_reboundxGoals',
       'OnIce_A_xGoals_with_earned_rebounds',
       'OnIce_A_xGoals_with_earned_rebounds_scoreAdjusted',
       'OnIce_A

In [9]:
def offensive_rank(df):
    stats = offensive_stats
    offensive_df = df[stats].drop(columns=["I_F_giveaways", 'I_F_savedShotsOnGoal', 'I_F_missedShots'])

    less_is_good_stats = df.filter(items=["I_F_giveaways", "I_F_savedShotsOnGoal", "I_F_missedShots"])
    less_is_good_stats = less_is_good_stats.apply(lambda x: 100-x)
    offensive_df = pd.concat([offensive_df, less_is_good_stats], axis=1)

    goals = ['I_F_goals', "I_F_points", "OnIce_F_Goals"]

    extra_juice = ['I_F_primaryAssists', "OnIce_F_xGoals"]

    to_juice = ['I_F_highDangerxGoals', 'I_F_highDangerGoals', 'OnIce_F_xGoals', 'I_F_points',
                'onIce_xGoalsPercentage', 'I_F_highDangerShots', 'OnIce_F_xGoals',
                'OnIce_F_highDangerxGoals', "I_F_xGoals", 'I_F_mediumDangerxGoals']
    
    to_slightly_hinder = ['OnIce_F_xGoals_with_earned_rebounds',
       'OnIce_F_xGoals_with_earned_rebounds_scoreAdjusted',
       'OnIce_F_xGoals_with_earned_rebounds_scoreFlurryAdjusted']
    
    to_hinder = ['I_F_playStopped', 'I_F_playContinuedInZone', 'I_F_savedShotsOnGoal', 'I_F_savedUnblockedShotAttempts',
       'I_F_playContinuedOutsideZone', 'I_F_missedShots', 'OnIce_F_missedShots', 'I_F_playContinuedOutsideZone',
       'I_F_freeze', 'I_F_blockedShotAttempts',
       'OnIce_F_lowDangerShots', 'OnIce_F_lowDangerxGoals', 'OnIce_F_lowDangerGoals', 'I_F_xFreeze', 'OnIce_F_missed_shots']
    
    offensive_df[offensive_df.filter(regex='ebound').columns] *= 0.3
    offensive_df[offensive_df.filter(items=goals).columns] *= 8
    offensive_df[offensive_df.filter(items=to_juice).columns] *= 1.75
    offensive_df[offensive_df.filter(items=extra_juice).columns] *= 4
    offensive_df[offensive_df.filter(items=to_hinder).columns] *= 0.2
    offensive_df[offensive_df.filter(items=to_slightly_hinder).columns] *= 0.5
   #  total_weight = (0.35 * len(offensive_df.filter(regex='ebound')) + 6 * len(goals) + 1.5 * len(to_juice) + 3.5 * len(extra_juice) + 
   #           0.1*len(to_hinder) + (len(stats) - (len(offensive_df.filter(regex='ebound').columns) + len(goals) + len(to_juice) + len(extra_juice)+ len(to_hinder) ) ))
            
    summed_rankings = offensive_df.sum(axis=1)
    return get_percentile_rank(summed_rankings)
   #  new_df = pd.DataFrame(offensive_df.sum(axis=1).div(total_weight), columns=["offensive_rank"])
   #  return new_df


In [22]:
stats = offensive_stats.append(general_stats)
#print(offensive_stats)

goals = ['I_F_goals', "I_F_points", "OnIce_F_Goals"]

extra_juice = ['I_F_primaryAssists', "OnIce_F_xGoals"]

to_juice = ['I_F_highDangerxGoals', 'I_F_highDangerGoals', 'I_F_points',
                'onIce_xGoalsPercentage', 'I_F_highDangerShots', 'OnIce_F_xGoals',
                'OnIce_F_highDangerxGoals', "I_F_xGoals", 'I_F_mediumDangerxGoals']
    
to_slightly_hinder = ['OnIce_F_xGoals_with_earned_rebounds',
       'OnIce_F_xGoals_with_earned_rebounds_scoreAdjusted',
       'OnIce_F_xGoals_with_earned_rebounds_scoreFlurryAdjusted']
    
to_hinder = ['I_F_playStopped', 'I_F_playContinuedInZone', 'I_F_savedShotsOnGoal', 'I_F_savedUnblockedShotAttempts',
       'I_F_playContinuedOutsideZone', 'I_F_missedShots', 'OnIce_F_missedShots', 'I_F_playContinuedOutsideZone',
       'I_F_freeze', 'I_F_blockedShotAttempts',
       'OnIce_F_lowDangerShots', 'OnIce_F_lowDangerxGoals', 'OnIce_F_lowDangerGoals', 'I_F_xFreeze', 'OnIce_F_missed_shots']
print((len(stats) - (len(transformed_forward_data.filter(regex='ebound').columns) + len(goals) + len(to_juice) + len(extra_juice)+ len(to_hinder) ) ))

print(len(stats))
print(len(transformed_forward_data.filter(regex='ebound').columns))

25
79
25


In [20]:
def general_play_rank(df):
    general_df = df[general_stats].copy()
    general_df["onIce_xGoalsPercentage"] *= 3
    general_df["gameScore"] *= 2
    general_df["onIce_corsiPercentage"] *= 0.75
    general_df["onIce_fenwickPercentage"] *= 0.5
    summed_rankings = general_df.sum(axis=1)
    return get_percentile_rank(summed_rankings)

In [11]:
ranking = get_percentile_rank(transformed_defence_data)
#print(ranking)
full_df = pd.concat([evenStrengthD_names, ranking], axis=1)
print(full_df)
full_df.to_csv('percentile_rank.csv', index=False)

      playerId  season              name team position    icetime   gameScore  \
2      8480950    2023   Ilya Lyubushkin  TOR        D  51.282051   10.622711   
17     8480860    2023        Kevin Bahl  NJD        D  69.963370   18.315018   
37     8476473    2023     Connor Murphy  CHI        D  35.897436   10.256410   
47     8481743    2023    Marc Del Gaizo  NSH        D   2.197802   51.282051   
107    8480803    2023     Evan Bouchard  EDM        D  92.673993  100.000000   
...        ...     ...               ...  ...      ...        ...         ...   
4557   8480871    2023      Adam Boqvist  CBJ        D  22.710623   52.380952   
4562   8474166    2023     Alec Martinez  VGK        D  43.956044   28.571429   
4592   8477346    2023  MacKenzie Weegar  CGY        D  91.941392   93.406593   
4597   8476854    2023   Hampus Lindholm  BOS        D  78.754579   58.608059   
4617   8477488    2023       Brett Pesce  CAR        D  65.934066   82.051282   

      onIce_xGoalsPercentag

In [12]:
def get_defense_overall_rating(row):
    ev_weight = [2.5, 3, 1, 3, 1.5]

    oRank = row["offensive_rank"] * 5
    dRank = row["defensive_rank"] * 6
    hRank = row["physicality_rank"] * 1.5
    gpRank = row["general_play_rank"] * 8
    pRank = row["penalties_rank"] * 1.25
    icetime = row["ev_icetime"] 
    finishing = row["ev_icetime"] * 1

    sum = 5 + 6 + 1.5 + 8 + 1.25 + 1 + 1

    total = oRank + dRank + hRank + gpRank + pRank + finishing + icetime
    if(row["power_play_rank"]):
        total += row["power_play_rank"] * .5
        sum += 0.5
    if(row["penalty_kill_rank"]):
        total += row["penalty_kill_rank"] * .75
        sum += 0.75

    return total / sum

In [13]:
def get_forward_overall_rating(row):
    oRank = row["offensive_rank"] * 5
    dRank = row["defensive_rank"] * 2.5
    hRank = row["physicality_rank"] * 0.5
    gpRank = row["general_play_rank"] * 3.75
    pRank = row["penalties_rank"] * 1.25
    icetime = row["ev_icetime"]
    finishing = row["ev_icetime"] * 7

    sum = 5 + 2.5 + 0.5 + 3.75 + 1.25 + 7 + 1

    total = oRank + dRank + hRank + gpRank + pRank  + finishing + icetime
    if(row["power_play_rank"]):
        total += row["power_play_rank"] * 0.75
        sum += 0.75
    if(row["penalty_kill_rank"]):
        total += row["penalty_kill_rank"] * .5
        sum += 0.5

    return total / sum

In [44]:
def calculate_total_rank(ranking, func, evenStrength_names, evenStrength_data,
                          penaltyKill_names, penaltyKill_data, powerPlay_names, powerPlay_data, team_rank):
    pRank = penalties_rank(evenStrength_data)
    dRank = defensive_rank(ranking)
    hRank = physicality_rank(ranking)
    oRank = offensive_rank(ranking)
    generalPlayRank = general_play_rank(ranking)
    pp_total_ranking = get_percentile_rank(powerPlay_data)
    pk_total_ranking = get_percentile_rank(penaltyKill_data)
    ppRank = offensive_rank(pp_total_ranking)
    pkRank = defensive_rank(pk_total_ranking)

    ev_df = pd.concat([evenStrength_names, team_rank, ranking["icetime"], oRank, dRank, generalPlayRank, hRank, pRank, ranking["finishing"]], axis=1)
    ev_df.columns = ["playerId", "season", "name", "team", "position", "team_rank", "ev_icetime", "offensive_rank",
                     "defensive_rank", "general_play_rank", "physicality_rank", "penalties_rank", "Finishing"]

    pk_df = pd.concat([penaltyKill_names, pkRank], axis=1)
    pk_df.columns = ["playerId", "season", "name", "team", "position", "penalty_kill_rank"]
    pp_df = pd.concat([powerPlay_names, ppRank], axis=1)
    pp_df.columns = ["playerId", "season", "name", "team", "position", "power_play_rank"]

    full_df = pd.merge(ev_df, pk_df, how='left', on=['playerId', 'season', 'name', 'team', 'position'])
    full_df = pd.merge(full_df, pp_df, how='left', on=['playerId', 'season', 'name', 'team', 'position'])
    
    full_df.fillna(0, inplace=True)

    #full_df = pd.concat([full_df, ranking["icetime"].div(100)], axis=1)
    #print(full_df)

    names, data = full_df.iloc[:,:4], full_df.iloc[:, 5:]


    data["total"] = data.apply(func, axis=1)
    #data.iloc[:,8] = data.iloc[:, 8].mul(data["ev_icetime"], axis=0)

    all_ratings = get_percentile_rank(data)

    # full_df["overall"] = all_ratings["total"]
    full_df["overall"] = all_ratings["total"]

    return full_df
    
    


In [14]:
pRank = penalties_rank(transformed_defence_data)

full_df = pd.concat([evenStrengthD_names, pRank], axis=1)
full_df.columns = ["playerId", "season", "name", "team", "position", "penalties_rank"]
full_df.to_csv("evenStengthD_rankings/penalty_rank.csv", index=False)

In [15]:
dRank = defensive_rank(ranking)


full_df = pd.concat([evenStrengthD_names, dRank], axis=1)
full_df.columns = ["playerId", "season", "name", "team", "position", "defence_rank"]
full_df.to_csv("evenStengthD_rankings/defence_rank.csv", index=False)

In [16]:
hRank = physicality_rank(ranking)
full_df = pd.concat([evenStrengthD_names, hRank], axis=1)
full_df.columns = ["playerId", "season", "name", "team", "position", "physicality_rank"]
full_df.to_csv("evenStengthD_rankings/physicality_rank.csv", index=False)

In [17]:
oRank = offensive_rank(ranking)

full_df = pd.concat([evenStrengthD_names, oRank], axis=1)
full_df.columns = ["playerId", "season", "name", "team", "position", "offense_rank"]
full_df.to_csv("evenStengthD_rankings/offense_rank.csv", index=False)

In [37]:
generalPlayRank = general_play_rank(ranking)

full_df = pd.concat([evenStrengthD_names, generalPlayRank], axis=1)
full_df.columns = ["playerId", "season", "name", "team", "position", "general_play_rank"]
full_df.to_csv("evenStengthD_rankings/general_play_rank.csv", index=False)

In [19]:
pp_total_ranking = get_percentile_rank(transformed_ppD_data)

pp_ranking_df = pd.concat([powerPlayD_names, pp_total_ranking], axis=1)
pp_ranking_df.to_csv("./pp_total_ranking.csv", index=False)

ppRank = offensive_rank(pp_total_ranking)
full_df = pd.concat([powerPlayD_names, ppRank], axis=1)
full_df.columns = ["playerId", "season", "name", "team", "position", "power_play_rank"]
full_df.to_csv("evenStengthD_rankings/powerplay_rank_rank.csv", index=False)

In [165]:
pk_total_ranking = get_percentile_rank(transformed_pkD_data)

pp_ranking_df = pd.concat([penaltyKillD_names, pk_total_ranking], axis=1)
pp_ranking_df.to_csv("./pk_total_ranking.csv", index=False)

pkRank = defensive_rank(pk_total_ranking)
full_df = pd.concat([penaltyKillD_names, pkRank], axis=1)
full_df.columns = ["playerId", "season", "name", "team", "position", "penalty_kill_rank"]
full_df.to_csv("evenStengthD_rankings/penalty_kill_rank.csv", index=False)

In [53]:
defense_ranking = get_percentile_rank(transformed_defence_data)
full_df = calculate_total_rank(defense_ranking, get_defense_overall_rating, evenStrengthD_names, transformed_defence_data,
                                penaltyKillD_names, transformed_pkD_data, powerPlayD_names, transformed_ppD_data, original_defense_team_rank)
full_df.round().to_csv("evenStengthD_rankings/total_rankings.csv", index=False)

In [48]:
forward_ranking = get_percentile_rank(transformed_forward_data)
# df = pd.concat([evenStrengthF_names, forward_ranking], axis=1)
# df.to_csv("forward_ranking/forward_ranking.csv")
full_df = calculate_total_rank(forward_ranking, get_forward_overall_rating, evenStrengthF_names, transformed_forward_data,
                                penaltyKillF_names, transformed_pkF_data, powerPlayF_names, transformed_ppF_data, original_forward_team_rank)
if not os.path.exists("./forward_ranking"):
    os.mkdir("./forward_ranking")
full_df.round().to_csv("forward_ranking/total_rankings.csv", index=False)