In [119]:
import pandas as pd
import numpy as np
pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

# This data was aquired from www.basketball-reference.com
# It contains the top 500 players based on PER rating

df = pd.read_csv('nba_advanced_data.csv')
df.head()

Unnamed: 0,Rk,Player,Pos,Age,Tm,G,MP,PER ?,TS%,3PAr,FTr,ORB%,DRB%,TRB%,AST%,STL%,BLK%,TOV%,USG%,Unnamed: 19,OWS,DWS,WS,WS/48,Unnamed: 24,OBPM,DBPM,BPM,VORP
0,1,Naz Mitrou-Long,SG,24,UTA,1,1,133.8,1.5,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,45.0,,0.1,0.0,0.1,2.712,,68.6,-14.2,54.4,0.0
1,2,Anthony Brown,SF,25,MIN,1,4,41.9,1.5,1.0,0.0,0.0,0.0,0.0,41.7,0.0,0.0,0.0,11.1,,0.0,0.0,0.0,0.571,,17.1,-15.0,2.1,0.0
2,3,Edmond Sumner,PG,22,IND,1,2,39.8,1.0,0.0,0.0,0.0,56.1,28.2,0.0,0.0,0.0,0.0,22.3,,0.0,0.0,0.0,0.425,,4.6,-9.9,-5.3,0.0
3,4,Vince Hunter,PF,23,MEM,4,7,35.8,0.6,0.0,0.0,32.3,16.9,24.8,0.0,0.0,13.4,16.7,38.5,,0.0,0.0,0.0,0.097,,-3.1,-5.9,-9.0,0.0
4,5,David Stockton,PG,26,UTA,3,9,33.9,1.05,1.0,1.333,0.0,0.0,0.0,0.0,0.0,0.0,17.4,28.8,,0.1,0.0,0.1,0.459,,14.8,-8.1,6.7,0.0


The data includes advanced statistics for 500 players from the 2017-18 NBA season, sorted by PER rating.

In [120]:
df.columns

Index(['Rk', 'Player', 'Pos', 'Age', 'Tm', 'G', 'MP', 'PER ?', 'TS%', '3PAr',
       'FTr', 'ORB%', 'DRB%', 'TRB%', 'AST%', 'STL%', 'BLK%', 'TOV%', 'USG%',
       'Unnamed: 19', 'OWS', 'DWS', 'WS', 'WS/48', 'Unnamed: 24', 'OBPM',
       'DBPM', 'BPM', 'VORP'],
      dtype='object')

In [121]:
# only keep players who played more than 50 games
df = df.loc[df['G'] > 50]

# drop unneccesary rows
df['PER'] = df['PER ?']
df.reset_index(inplace= True)
df.drop(['index','Rk', 'PER ?','Pos','Age', 'Tm','Unnamed: 19','Unnamed: 24'], axis = 1, inplace = True)

In [122]:
df.shape

(309, 23)

309 players played more than 50 games and will be included in this analysis.

In [123]:
df.head()

Unnamed: 0,Player,G,MP,TS%,3PAr,FTr,ORB%,DRB%,TRB%,AST%,STL%,BLK%,TOV%,USG%,OWS,DWS,WS,WS/48,OBPM,DBPM,BPM,VORP,PER
0,James Harden,72,2551,0.619,0.498,0.502,1.8,15.2,8.6,45.1,2.4,1.7,15.1,36.1,11.6,3.8,15.4,0.289,9.6,1.3,10.9,8.3,29.8
1,Anthony Davis,75,2727,0.612,0.111,0.409,7.7,24.8,16.5,10.8,2.0,5.6,8.6,30.0,8.8,4.9,13.7,0.241,2.8,2.3,5.2,4.9,28.9
2,LeBron James,82,3026,0.621,0.257,0.336,3.7,22.3,13.1,44.4,1.9,2.0,16.1,31.6,11.0,3.0,14.0,0.221,7.6,2.0,9.6,8.9,28.6
3,Stephen Curry,51,1631,0.675,0.58,0.35,2.7,14.4,9.0,30.3,2.4,0.4,13.3,31.0,7.2,1.8,9.1,0.267,9.9,-1.3,8.6,4.4,28.2
4,Giannis Antetokounmpo,75,2756,0.598,0.1,0.457,6.7,25.3,16.0,23.7,2.0,3.3,11.7,31.2,8.3,3.6,11.9,0.207,3.9,1.9,5.8,5.4,27.3


These names should look familiar!  Although, it'd be hard to argue that they are underrated.

In [124]:
df['PER Rank'] = df['PER'].rank(ascending = False)
df['WS Rank'] = df['WS'].rank(ascending = False)
df['VORP Rank'] = df['VORP'].rank(ascending = False)
df['TS% Rank'] = df['TS%'].rank(ascending = False)

I've created four new columns, each a ranking of the four statistics I believe are most indicative of value to a team:

<b>PER - Player efficiency rating</b> - an attempt to boil down a player's overall impact on a game to one number.  <br>
<b>WS - Win shares</b> - attempts to divy up credit for team success to the individuals on a team.<br>
<b>VORP - Value over replacement player</b> - "an estimate of each player's overall contribution to the team, measured vs. what a theoretical "replacement player" would provide, where the "replacement player" is defined as a player on minimum salary or not a normal member of a team's rotation." - basketball reference<br>
<b>TS% - True shooting percentage</b> - "True shooting percentage is a measure of shooting efficiency that takes into account field goals, 3-point field goals, and free throws." - basketball reference


In [125]:
salary = pd.read_csv('nba_salary.csv', header = 1)
salary.head()

Unnamed: 0,Rk,Player,Tm,2017-18,2018-19,2019-20,2020-21,2021-22,2022-23,Signed Using,Guaranteed
0,1,Stephen Curry,GSW,"$34,682,550","$37,457,154","$40,231,758","$43,006,362","$45,780,966",,Bird Rights,"$201,158,790"
1,2,LeBron James,CLE,"$33,285,709",,,,,,Early Bird,"$33,285,709"
2,3,Paul Millsap,DEN,"$31,269,231","$29,730,769","$30,500,000",,,,Cap space,"$61,000,000"
3,4,Gordon Hayward,BOS,"$29,727,900","$31,214,295","$32,700,690","$34,187,085",,,Cap space,"$93,642,885"
4,5,Blake Griffin,DET,"$29,512,900","$31,873,932","$34,234,964","$36,595,996","$38,957,028",,Bird Rights,"$132,217,792"


I've chosen to use the 2017-18 salary data as a measurement of value, although I acknowledge it is an imperfect measurement due to the draft process.  In a true open market, value can be measured in the form of dollars, but the NBA draft system imposes maximums on the amount owners can spend on players depending on their years of service.  As a consequence, young superstars are often paid far below their value.

On the other hand, pundits and fans are able to assign value in the form of praise for young players such as Karl Anthony Towns, who plays, but doesn't get paid like a superstar.  So, while we wouldn't call KAT "underrated" due to the amount of praise he receives, his salary is far below the amount he deserves. 

In [126]:
salary.index = salary['Player']
df.index = df['Player']

# merge statistics and salary data
df_new = df.merge(salary,on = 'Player', how = 'left')

# reformat salary data as integers
df_new['2017-18'] = df_new['2017-18'].str.replace(',', '')
df_new['2017-18'] = df_new['2017-18'].str.replace('$', '')
df_new['2017-18'] = df_new['2017-18'].astype(int)

# rank salaries
df_new['2017-18_salary_rank'] = df_new['2017-18'].rank(ascending = False)

# drop unnecessary columns
df_new.drop(['2018-19','2019-20','2020-21','2021-22','2022-23','Signed Using', 'Guaranteed'], axis = 1, inplace = True)


In [127]:
best = df_new.loc[(df_new['PER Rank'] < 50) & (df_new['WS Rank'] < 50) & (df_new['VORP Rank'] < 50) & (df_new['TS% Rank'] < 50)]

Now I'm going to filter my dataframe so that it only selects players who rank in the top 50 for each of the four key metrics, and then I'll sort it by salary from lowest to highest.

In [128]:
best.sort_values('2017-18_salary_rank', ascending = False)

Unnamed: 0,Player,G,MP,TS%,3PAr,FTr,ORB%,DRB%,TRB%,AST%,STL%,BLK%,TOV%,USG%,OWS,DWS,WS,WS/48,OBPM,DBPM,BPM,VORP,PER,PER Rank,WS Rank,VORP Rank,TS% Rank,Rk,Tm,2017-18,2017-18_salary_rank
12,Clint Capela,74,2034,0.65,0.001,0.38,13.5,30.8,22.2,5.7,1.4,5.7,11.5,19.4,6.2,4.1,10.2,0.242,0.1,2.9,3.0,2.6,24.5,13.0,12.0,31.5,7.0,290,HOU,2334520,221.0
9,Karl-Anthony Towns,82,2918,0.646,0.243,0.343,9.3,30.9,20.0,10.8,1.1,3.5,10.5,22.9,10.6,3.4,14.0,0.23,4.4,1.1,5.5,5.5,24.9,10.0,2.5,6.0,10.0,172,MIN,6216840,133.0
47,Darren Collison,69,2018,0.61,0.321,0.226,2.1,7.8,5.0,26.4,2.3,0.7,10.9,17.4,5.8,1.8,7.6,0.181,3.1,-1.0,2.2,2.1,18.8,48.0,30.5,46.5,42.0,124,IND,10000000,97.5
8,Kyrie Irving,60,1931,0.61,0.374,0.24,1.9,10.8,6.4,30.7,1.7,0.7,10.4,31.0,6.3,2.7,8.9,0.222,6.9,-0.7,6.2,4.0,25.0,8.5,21.0,16.0,42.0,48,BOS,18868625,41.0
16,Enes Kanter,71,1830,0.63,0.003,0.258,16.6,30.6,23.6,9.4,1.0,1.7,13.3,21.8,5.5,2.1,7.6,0.198,2.4,0.1,2.5,2.1,24.0,17.0,30.5,46.5,19.5,38,NYK,20566802,33.0
29,Rudy Gobert,56,1816,0.657,0.0,0.673,10.4,27.0,18.8,6.9,1.2,6.0,15.7,16.9,4.2,3.9,8.1,0.214,-0.3,4.6,4.3,2.9,20.7,29.0,25.5,28.5,4.0,35,UTA,21974719,30.0
30,Steven Adams,76,2487,0.63,0.003,0.403,16.6,13.9,15.3,5.5,1.8,2.8,13.2,16.7,6.7,3.0,9.7,0.187,2.2,1.1,3.3,3.3,20.6,30.5,15.0,22.5,19.5,32,OKC,22471910,27.5
33,DeAndre Jordan,77,2424,0.648,0.0,0.54,15.1,37.6,26.5,7.1,0.8,2.4,16.1,15.2,6.0,3.4,9.4,0.186,0.5,1.6,2.1,2.5,20.2,34.0,17.0,33.5,8.0,31,LAC,22642350,24.5
1,Anthony Davis,75,2727,0.612,0.111,0.409,7.7,24.8,16.5,10.8,2.0,5.6,8.6,30.0,8.8,4.9,13.7,0.241,2.8,2.3,5.2,4.9,28.9,2.0,4.0,10.0,38.5,21,NOP,23775506,17.5
14,Chris Paul,58,1847,0.604,0.475,0.277,2.3,16.7,9.5,40.9,2.6,0.6,12.5,24.5,7.5,2.7,10.2,0.265,6.2,0.9,7.1,4.3,24.4,14.5,12.0,15.0,49.0,18,HOU,24599495,14.0


Wow!  We can see that <b>Clint Capela</b> tops the list as the most underrated player determined by salary. He has the 221st highest salary out of the 309 players included.  His salary for the 2017-18 season was only $2,334,520, yet he ranked in the top 15 for PER, WS, and TS%, and he's top 40 in VORP.  In addition, the Houston Rockets had the best record of any team over the course of the season.  It's clear that it was in part due to his contributions.

I would argue that he is also underrated in terms of fan praise and support.  He's most definitely not a household name, although, he did start to earn some of the recognition he deserves for his performance in the 2018 playoffs. Regardless, you really only need to look at the other names on this list to conclude that he's doing some special work.  Pay the man!