In [3]:
import pandas as pd
import numpy as np
%matplotlib inline

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from scipy import spatial
from scipy import stats
from scipy.stats import linregress

# Notes
- Evaluate Caruso's defensive impact with DAVIS metric from Dribble Analytics
- Metrics include:
    - STL x
    - BLK x
    - STL% x
    - BLK% x
    - DFGA
    - DWS x
    - DBPM x
    - DRTG
- Compare against all other guards


## Load dataframe

In [25]:
df = pd.read_csv('BRef_AdvStats.csv')
df.head()
# Filter for Games over 20
df = df[df.G >= 20]
df.head()
# df[df.Player == "Alex Caruso" | df.Player == "Jaren Jackson Jr." | df.Player == "Nic Claxton"]

Unnamed: 0,Player,Pos,Age,Tm,G,STL%,BLK%,DWS,DBPM,VORP,STL,BLK
0,A.J. Green,SG,23,MIL,24,1.1,0.0,0.2,-0.6,0.1,5,0
4,Aaron Gordon,PF,27,DEN,46,1.2,2.3,1.4,-0.5,1.5,35,35
5,Aaron Holiday,PG,26,ATL,47,1.8,1.2,0.6,0.9,0.0,27,10
6,Aaron Nesmith,SF,23,IND,46,1.6,1.6,0.9,-0.2,-0.2,37,20
7,Aaron Wiggins,SG,24,OKC,40,1.2,0.7,0.7,-0.6,0.0,19,6


## Create DAVIS Calculator

In [10]:
pos_features = ["STL", "BLK", "STL%", "BLK%", "DWS", "DBPM"]
neg_features = ["VORP"]

In [11]:
# Calculate distance
def calc_dist(pos, neg, df):
    features = pos + neg

    df_norm = df.copy()
    df_norm[features] = MinMaxScaler().fit_transform(df_norm[features])

    max_good = list(df_norm[pos].max().values)
    min_bad = list(df_norm[neg].min().values)
    best_values = max_good + min_bad

    euclid = []
    manhat = []
    wasser = []

    for index, row in df_norm.iterrows():
        euclid.append(spatial.distance.euclidean(row[features], best_values))
        manhat.append(spatial.distance.cityblock(row[features], best_values))
        wasser.append(stats.wasserstein_distance(row[features], best_values))
    
    dist_df = pd.DataFrame(zip(df['Player'], euclid, manhat, wasser), columns=['Player', 'euclid', 'manhat', 'wasser'])
    dist_df[['euclid', 'manhat', 'wasser']] = MinMaxScaler().fit_transform(dist_df[['euclid', 'manhat', 'wasser']])

    dist_df['davis'] = (dist_df['euclid'] + dist_df['manhat']+dist_df['wasser'])
    dist_df['davis'] *= -1
    dist_df['davis'] += 1
    dist_df['pos'] = df['Pos']

    dist_df = dist_df.sort_values(by='davis', ascending=False).reset_index(drop=True)
    return(dist_df)

In [26]:
davis = calc_dist(pos_features, neg_features, df)
davis.head()
davis_guards = davis[davis.pos == "C"]
davis_guards

Unnamed: 0,Player,euclid,manhat,wasser,davis,pos
2,Alex Caruso,0.139764,0.145826,0.166855,0.547554,C
16,Anthony Davis,0.244910,0.362267,0.333402,0.059421,C
17,Bam Adebayo,0.310425,0.346541,0.296462,0.046573,C
49,Jordan Goodwin,0.403213,0.450981,0.454830,-0.309024,C
58,Gary Trent Jr.,0.469182,0.478927,0.405288,-0.353397,C
...,...,...,...,...,...,...
367,Bojan Bogdanović,0.868284,0.923167,0.810599,-1.602050,C
374,Anthony Gill,0.866110,0.882181,0.878088,-1.626379,C
382,Duncan Robinson,0.905518,0.906350,0.913914,-1.725781,C
394,Matt Ryan,0.948655,0.958737,0.945828,-1.853220,C
