In [1]:
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import MinMaxScaler

In [2]:
def remove_slash_name(_df):
    df = _df.copy()
    df['Player'] = df['Player'].apply(lambda row: row.split('\\')[0])
    df['Player'] = df['Player'].apply(lambda row: row.split('*')[0])
    
    return df

def remove_rk(_df):
    df = _df.copy()
    return df.drop(['Rk'], axis=1)

def remove_team(_df):
    df = _df.copy()
    return df.drop(['Tm'], axis=1)

def remove_age(_df):
    df = _df.copy()
    return df.drop(['Age'], axis=1)

def remove_game(_df):
    df = _df.copy()
    return df.drop(['G'], axis=1)   

def remove_game_started(_df):
    df = _df.copy()
    return df.drop(['GS'], axis=1)   

def remove_min(_df):
    df = _df.copy()
    return df.drop(['MP'], axis=1)    

def extract_name_position(_df):
    df = _df.copy()
    return _df.drop(['Pos','Player'], axis=1), _df[['Player', 'Pos']]
    

def remove_nan(_df):
    df = _df.copy()
    return df.dropna(axis=1, how='all').fillna(0)

In [3]:
# we use pandas to load data directly from csv
df = pd.read_csv('../data/adv_stats_95-96.csv')

# apply a couple of preprocessing function
df = remove_rk(remove_slash_name(df))
df = remove_age(remove_team(df))
df = remove_game(remove_min(df))

pos_used = ['PG','SG','SF','PF','C']
df = df.loc[df['Pos'].isin(pos_used)]
df = remove_nan(df)

df_old, _ = extract_name_position(df)

df = pd.read_csv('../data/adv_stats_18-19.csv')

df = remove_rk(remove_slash_name(df))
df = remove_age(remove_team(df))
df = remove_game(remove_min(df))

pos_used = ['PG','SG','SF','PF','C']
df = df.loc[df['Pos'].isin(pos_used)]
df = remove_nan(df)

df_current, _ = extract_name_position(df)

scaler = MinMaxScaler()

curr_values = scaler.fit_transform(df_current.values)
old_values = scaler.transform(df_old.values)

performance_metric = {}
for metric in ['braycurtis', 'canberra', 'chebyshev', 'correlation', 'hamming', 'kulsinski', 'minkowski', 'rogerstanimoto', 'russellrao', 'sokalmichener', 'sokalsneath',
               'sqeuclidean']:
    neigh = NearestNeighbors(1, metric=metric)
    neigh.fit(curr_values)

    dst, ids = neigh.kneighbors(old_values)
    # dst = scaler.fit_transform(dst)
    performance_metric[metric] = np.mean(dst)

    
min_idx = min(performance_metric, key=performance_metric.get)
min_idx

'rogerstanimoto'

In [4]:
def get_alike_player(name):
    # we use pandas to load data directly from csv
    df = pd.read_csv('../data/adv_stats_95-96.csv')

    # apply a couple of preprocessing function
    df = remove_rk(remove_slash_name(df))
    df = remove_age(remove_team(df))
    df = remove_game(remove_min(df))

    pos_used = ['PG','SG','SF','PF','C']
    df = df.loc[df['Pos'].isin(pos_used)]
    df = remove_nan(df)

    df_jordan, _ = extract_name_position(df.loc[df['Player'] == name])
    df_jordan

    df = pd.read_csv('../data/adv_stats_18-19.csv')

    df = remove_rk(remove_slash_name(df))
    df = remove_age(remove_team(df))
    df = remove_game(remove_min(df))

    pos_used = ['PG','SG','SF','PF','C']
    df = df.loc[df['Pos'].isin(pos_used)]
    df = remove_nan(df)

    df_current_player, df_current_names = extract_name_position(df)
    neigh = NearestNeighbors(5, metric=min_idx)
    neigh.fit(df_current_player.values)

    dst, ids = neigh.kneighbors(df_jordan.values)
    return df_current_names.iloc[ids[0]]

In [5]:
player_names = ['Michael Jordan', 'Charles Barkley', 'Shawn Bradley',
                'Muggsy Bogues', 'Patrick Ewing', 'Larry Johnson']
for p in player_names:
    print('Equivalent current player to {}'.format(p))
    print(get_alike_player(p))
    print()

Equivalent current player to Michael Jordan
                    Player Pos
31               Ron Baker  SG
0             Alex Abrines  SG
495  Sviatoslav Mykhailiuk  SF
470        Naz Mitrou-Long  SG
494  Sviatoslav Mykhailiuk  SF

Equivalent current player to Charles Barkley
                    Player Pos
31               Ron Baker  SG
0             Alex Abrines  SG
495  Sviatoslav Mykhailiuk  SF
470        Naz Mitrou-Long  SG
494  Sviatoslav Mykhailiuk  SF

Equivalent current player to Shawn Bradley
                    Player Pos
31               Ron Baker  SG
0             Alex Abrines  SG
495  Sviatoslav Mykhailiuk  SF
470        Naz Mitrou-Long  SG
494  Sviatoslav Mykhailiuk  SF

Equivalent current player to Muggsy Bogues
                Player Pos
32           Ron Baker  SG
245    Brandon Goodwin  PG
291  Haywood Highsmith  SF
423        Daryl Macon  SG
603    Anfernee Simons  SG

Equivalent current player to Patrick Ewing
                    Player Pos
31               Ron Baker 

In [6]:
def get_alike_team(team, old='95-96', new='18-19'):
    # we use pandas to load data directly from csv
    df = pd.read_csv('../data/adv_stats_{}.csv'.format(old))

    # apply a couple of preprocessing function
    df = remove_rk(remove_slash_name(df))
    df = df.loc[df['Tm'] == team]
    df = remove_age(remove_team(df))
    df = remove_game(remove_min(df))

    pos_used = ['PG','SG','SF','PF','C']
    df = df.loc[df['Pos'].isin(pos_used)]
    df = remove_nan(df)

    df_old_team, df_old_names = extract_name_position(df)

    df = pd.read_csv('../data/adv_stats_{}.csv'.format(new))

    df = remove_rk(remove_slash_name(df))
    df = remove_age(remove_team(df))
    df = remove_game(remove_min(df))

    pos_used = ['PG','SG','SF','PF','C']
    df = df.loc[df['Pos'].isin(pos_used)]
    df = remove_nan(df)

    df_current_player, df_current_names = extract_name_position(df)
    neigh = NearestNeighbors(1, metric=min_idx)
    neigh.fit(df_current_player.values)

    dst, ids = neigh.kneighbors(df_old_team.values)
    result_df = pd.DataFrame({'{} players'.format(old) : (df_old_names.values)[:,0],
    '{} players'.format(new) : df_current_names.iloc[np.reshape(ids,-1)].values[:,0]})
    
    #return (df_old_names.values)[:,0], df_current_names.iloc[np.reshape(ids,-1)].values[:,0]
    return result_df

In [7]:
new = '18-19'
old = '95-96'
result_df = get_alike_team('CHI', new=new, old=old)
print('old player and current equivalent :')
result_df

old player and current equivalent :


Unnamed: 0,95-96 players,18-19 players
0,Randy Brown,Isaiah Canaan
1,Jud Buechler,Naz Mitrou-Long
2,Jason Caffey,Naz Mitrou-Long
3,James Edwards,Damian Jones
4,Jack Haley,Brandon Goodwin
5,Ron Harper,Naz Mitrou-Long
6,Michael Jordan,Naz Mitrou-Long
7,Steve Kerr,Naz Mitrou-Long
8,Toni Kukoc,Naz Mitrou-Long
9,Luc Longley,Damian Jones
