In [2]:
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import MinMaxScaler

In [3]:
# import custom stats preparing tools
import sys

sys.path.insert(1,'../utils')

from stats_preparing import *

## choosing a metric

Here we decided to study the variation of distance between different metric. We decided to use the metric that
output the greatest variation in hope to better isolate each player. 

In [15]:
# we use pandas to load data directly from csv
df = pd.read_csv('../data/players/95-96_merge_stats.csv')

# apply a couple of preprocessing function
df = remove_rk(remove_slash_name(df))
df = remove_age(remove_team(df))
df = remove_game(remove_min(df))

df = remove_nan(df)

df_old, _ = extract_name_position(df)

df = pd.read_csv('../data/players/19-20_merge_stats.csv')

df = remove_rk(remove_slash_name(df))
df = remove_age(remove_team(df))
df = remove_game(remove_min(df))

df = remove_nan(df)

df_current, _ = extract_name_position(df)

scaler = MinMaxScaler().fit(np.concatenate((df_current.values, df_old.values)))


curr_values = scaler.transform(df_current.values)
old_values = scaler.transform(df_old.values)

performance_metric = {}
for metric in ['braycurtis', 'canberra', 'chebyshev', 'correlation', 'hamming', 'kulsinski', 'minkowski', 'rogerstanimoto', 'russellrao', 'sokalmichener', 'sokalsneath',
               'sqeuclidean']:
    neigh = NearestNeighbors(n_neighbors=1, metric=metric)

    neigh.fit(curr_values)

    dst, ids = neigh.kneighbors(old_values)

    performance_metric[metric] = np.std(dst)

    
max_idx = max(performance_metric, key=performance_metric.get)
max_idx

'canberra'

In [10]:
def get_alike_player(name):
    # we use pandas to load data directly from csv
    df = pd.read_csv('../data/players/95-96_merge_stats.csv')

    # apply a couple of preprocessing function
    df = remove_rk(remove_slash_name(df))
    df = remove_age(remove_team(df))
    df = remove_game(remove_min(df))

    pos_used = ['PG','SG','SF','PF','C']
    df = df.loc[df['Pos'].isin(pos_used)]
    df = remove_nan(df)

    df_jordan, _ = extract_name_position(df.loc[df['Player'] == name])
    df_jordan

    df = pd.read_csv('../data/players/19-20_merge_stats.csv')

    df = remove_rk(remove_slash_name(df))
    df = remove_age(remove_team(df))
    df = remove_game(remove_min(df))

    pos_used = ['PG','SG','SF','PF','C']
    df = df.loc[df['Pos'].isin(pos_used)]
    df = remove_nan(df)

    df_current_player, df_current_names = extract_name_position(df)

    neigh = NearestNeighbors(n_neighbors=5, metric=max_idx)

    scaler = MinMaxScaler().fit(np.concatenate((df_current_player.values, df_jordan.values)))

    neigh.fit(scaler.transform(df_current_player.values))

    dst, ids = neigh.kneighbors(scaler.transform(df_jordan.values))
    return df_current_names.iloc[ids[0]]

In [11]:
player_names = ['Michael Jordan', 'Charles Barkley', 'Shawn Bradley',
                'Muggsy Bogues', 'Patrick Ewing', 'Larry Johnson']
for p in player_names:
    print('Equivalent current player to {}'.format(p))
    print(get_alike_player(p))
    print()

Equivalent current player to Michael Jordan
                Player Pos
373      Kawhi Leonard  SF
623  Russell Westbrook  PG
162      Anthony Davis  PF
40        Bradley Beal  SG
307     Brandon Ingram  PF

Equivalent current player to Charles Barkley
                    Player Pos
191            Joel Embiid   C
162          Anthony Davis  PF
334           Nikola Jokić   C
507          Julius Randle  PF
12   Giannis Antetokounmpo  PF

Equivalent current player to Shawn Bradley
               Player Pos
626  Hassan Whiteside   C
1         Bam Adebayo  PF
6       Jarrett Allen   C
548       Ben Simmons  PG
0        Steven Adams   C

Equivalent current player to Muggsy Bogues
            Player Pos
288   Jaylen Hoard  SF
586    Evan Turner  PG
508    Josh Reaves  SG
417  Chimezie Metu  PF
367   Jalen Lecque  PG

Equivalent current player to Patrick Ewing
             Player Pos
183  Andre Drummond   C
22    Deandre Ayton   C
169   DeMar DeRozan  SF
162   Anthony Davis  PF
191     Joel Emb

In [67]:
def get_alike_team(team, old='95-96', new='19-20', _type='merge_stats', n_neighbors=1):
    # we use pandas to load data directly from csv
    df = pd.read_csv('../data/players/{}_{}.csv'.format(old,_type))

    # apply a couple of preprocessing function
    df = remove_rk(remove_slash_name(df))
    df = df.loc[df['Tm'] == team]
    df = remove_age(remove_team(df))
    df = remove_game(remove_min(df))

    pos_used = ['PG','SG','SF','PF','C']
    df = df.loc[df['Pos'].isin(pos_used)]
    df = remove_nan(df)

    df_old_team, df_old_names = extract_name_position(df)

    df = pd.read_csv('../data/players/{}_{}.csv'.format(new,_type))

    df = remove_rk(remove_slash_name(df))
    df = remove_age(remove_team(df))
    df = remove_game(remove_min(df))

    pos_used = ['PG','SG','SF','PF','C']
    df = df.loc[df['Pos'].isin(pos_used)]
    df = remove_nan(df)

    df_current_player, df_current_names = extract_name_position(df)
    neigh = NearestNeighbors(n_neighbors=n_neighbors, metric=max_idx)
    
    scaler = MinMaxScaler().fit(np.concatenate((df_current_player.values, df_old_team.values)))
    neigh.fit(scaler.transform(df_current_player.values))

    dst, ids = neigh.kneighbors(scaler.transform(df_old_team.values))

    result_df = pd.DataFrame({'{} players'.format(old) : (df_old_names.values)[:,0],
    '{} players'.format(new) : [', '.join([str(df_current_names.iloc[j].values[0]) for j in i]) for i in ids]})
    
    return result_df

In [75]:
new = '19-20'
old = '00-01'
result_df = get_alike_team('PHI', new=new, old=old, _type='merge_stats', n_neighbors=3)
print('old player and current equivalent :')
result_df

old player and current equivalent :


Unnamed: 0,00-01 players,19-20 players
0,Raja Bell,"Jordan Bone, Derrick Walton, Gabe Vincent"
1,Rodney Buford,"PJ Dozier, Lonnie Walker, DaQuan Jeffries"
2,Matt Geiger,"Anžejs Pasečņiks, Harry Giles, Hamidou Diallo"
3,Tyrone Hill,"Richaun Holmes, Ivica Zubac, Steven Adams"
4,Allen Iverson,"Kawhi Leonard, Bradley Beal, Devin Booker"
5,Jumaine Jones,"James Ennis, Jalen McDaniels, Javonte Green"
6,Toni Kukoč,"Delon Wright, Troy Brown Jr., Jalen Brunson"
7,George Lynch,"Cody Zeller, Dwight Powell, Daniel Theis"
8,Todd MacCulloch,"Ante Žižić, Daniel Gafford, Robert Williams"
9,Vernon Maxwell,"Jerome Robinson, Tyler Johnson, Edmond Sumner"
