In [1]:
import pandas as pd
import matplotlib as mpl

In [2]:
# import csv file into a dataframe:
contracts = pd.read_csv("NBA_salaries.csv", header = [1], index_col = 0)

In [3]:
# Display columns
contracts.columns

Index(['Player', 'Tm', '2021-22', '2022-23', '2023-24', '2024-25', '2025-26',
       '2026-27', 'Signed Using', 'Guaranteed'],
      dtype='object')

In [4]:
# Display all players
# pd.set_option('display.max_rows', 550)

In [5]:
# Rename 'tm' to 'team column'
contracts = contracts.rename(columns = {'Tm': 'Team'})

In [6]:
contracts

Unnamed: 0_level_0,Player,Team,2021-22,2022-23,2023-24,2024-25,2025-26,2026-27,Signed Using,Guaranteed
Rk,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,Stephen Curry,GSW,"$45,780,966","$48,070,014","$51,915,615","$55,761,216","$59,606,817",,Bird Rights,"$261,134,628"
2,John Wall,HOU,"$44,310,840","$47,366,760",,,,,Bird Rights,"$44,310,840"
3,Russell Westbrook,LAL,"$44,211,146","$47,063,478",,,,,Bird Rights,"$44,211,146"
4,James Harden,PHI,"$43,848,000","$46,872,000",,,,,Bird Rights,"$43,848,000"
5,LeBron James,LAL,"$41,180,544","$44,474,988",,,,,Bird,"$85,655,532"
...,...,...,...,...,...,...,...,...,...,...
495,Isaiah Thomas,CHO,"$276,039",,,,,,Minimum Salary,"$552,077"
496,Trendon Watford,POR,"$260,561","$1,563,518","$1,836,096","$1,988,598",,,MLE,"$5,648,773"
497,Ish Wainright,TOR,"$8,558",,,,,,,"$375,000"
498,Malik Fitts,BOS,"$231,062","$1,752,638",,,,,Minimum Salary,"$402,218"


In [7]:
# Check df data types
contracts.dtypes

Player          object
Team            object
2021-22         object
2022-23         object
2023-24         object
2024-25         object
2025-26         object
2026-27         object
Signed Using    object
Guaranteed      object
dtype: object

In [8]:
# Dropping unnecessary columns
new_contracts = contracts.drop(['2022-23', '2023-24', '2024-25', '2025-26',
       '2026-27', 'Signed Using', 'Guaranteed'], axis=1)

In [9]:
new_contracts

Unnamed: 0_level_0,Player,Team,2021-22
Rk,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,Stephen Curry,GSW,"$45,780,966"
2,John Wall,HOU,"$44,310,840"
3,Russell Westbrook,LAL,"$44,211,146"
4,James Harden,PHI,"$43,848,000"
5,LeBron James,LAL,"$41,180,544"
...,...,...,...
495,Isaiah Thomas,CHO,"$276,039"
496,Trendon Watford,POR,"$260,561"
497,Ish Wainright,TOR,"$8,558"
498,Malik Fitts,BOS,"$231,062"


In [10]:
# Import player advanced stats(only 500 players. All players that did not play due to injury/situation are not included)
nba_adv = pd.read_csv("NBA_Advanced_Stats.csv", index_col=0)

In [11]:
# Display columns
nba_adv.columns

Index(['Player', 'Pos', 'Age', 'Tm', 'G▼', 'MP', 'PER', 'TS%', '3PAr', 'FTr',
       'ORB%', 'DRB%', 'TRB%', 'AST%', 'STL%', 'BLK%', 'TOV%', 'USG%',
       'Unnamed: 19', 'OWS', 'DWS', 'WS', 'WS/48', 'Unnamed: 24', 'OBPM',
       'DBPM', 'BPM', 'VORP'],
      dtype='object')

<strong>Advanced Stats Dictionary</strong><br>

<strong>PER:</strong> A measure of per-minute production standardized such that the league average is 15<br>
<strong>TS%:</strong> A measure of shooting efficiency that takes into account 2-point field goalds, 3-point field goals and free throws<br>
<strong>TOV%:</strong> An estimate of turnover percentage per 100 plays<br>
<strong>USG%:</strong> An estimate of team plays use by a player while they were on the floor<br>
<strong>WS:</strong> An Estimate of number of wins contributed by a player<br>
<strong>WS/48:</strong> An Estimate of number of wins contributed by a player per 48 minutes (league average is approximately .100)<br>
<strong>BPM:</strong> A box score estimate of the points per 100 possessions a player co
tributed above a league-average player, translated to an average team<br>
<strong>VORP:</strong> A box score estimate of the points per 100 TEAM posessions that a player contributed above a replacement-level (-2.0), player translated to an average team and prorated to an 82-game season.<br>

In [12]:
# Original NBA Player Advanced Stats dataset
nba_adv

Unnamed: 0_level_0,Player,Pos,Age,Tm,G▼,MP,PER,TS%,3PAr,FTr,...,Unnamed: 19,OWS,DWS,WS,WS/48,Unnamed: 24,OBPM,DBPM,BPM,VORP
Rk,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,Deni Avdija,SF,21,WAS,82,1984,11.6,0.536,0.447,0.235,...,,0.6,1.8,2.4,0.058,,-2.1,0.4,-1.7,0.1
2,Saddiq Bey,SF,22,DET,82,2704,14.0,0.529,0.537,0.224,...,,2.4,1.6,4.0,0.071,,0.9,-1.2,-0.3,1.2
3,Mikal Bridges,SF,25,PHO,82,2854,14.4,0.627,0.364,0.183,...,,5.2,3.7,8.9,0.150,,0.5,1.1,1.6,2.6
4,Kevon Looney,C,25,GSW,82,1732,15.7,0.586,0.003,0.343,...,,3.5,3.2,6.8,0.187,,-0.9,1.7,0.9,1.3
5,Dwight Powell,C,30,DAL,82,1798,18.1,0.725,0.092,0.529,...,,6.0,2.2,8.2,0.219,,0.4,0.5,0.9,1.3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
496,Louis King,SF,22,SAC,10,104,8.1,0.438,0.574,0.213,...,,0.0,0.0,0.0,0.000,,-3.0,-1.6,-4.6,-0.1
497,Xavier Moon,SG,27,LAC,10,137,16.2,0.545,0.275,0.098,...,,0.2,0.2,0.3,0.121,,-1.1,1.0,-0.1,0.1
498,Reggie Perry,PF,21,TOT,10,187,15.6,0.537,0.203,0.190,...,,0.1,0.2,0.2,0.058,,-3.1,-1.0,-4.1,-0.1
499,Trevelin Queen,SG,24,HOU,10,74,18.2,0.619,0.727,0.121,...,,0.1,0.1,0.1,0.080,,2.7,-0.8,1.9,0.1


In [13]:
# Dropping all unnecessary columns
nba_adv_stats = nba_adv.drop(['Tm', 'MP', '3PAr', 'FTr', 'ORB%','DRB%', 'TRB%', 'AST%', 'STL%', 'BLK%',
       'Unnamed: 19', 'OWS', 'DWS','Unnamed: 24','OBPM','DBPM'], axis=1)

In [14]:
# NBA advanced stats cleaned: after dropped columns
nba_adv_stats

Unnamed: 0_level_0,Player,Pos,Age,G▼,PER,TS%,TOV%,USG%,WS,WS/48,BPM,VORP
Rk,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,Deni Avdija,SF,21,82,11.6,0.536,12.0,16.3,2.4,0.058,-1.7,0.1
2,Saddiq Bey,SF,22,82,14.0,0.529,7.1,21.3,4.0,0.071,-0.3,1.2
3,Mikal Bridges,SF,25,82,14.4,0.627,6.8,15.0,8.9,0.150,1.6,2.6
4,Kevon Looney,C,25,82,15.7,0.586,13.8,12.2,6.8,0.187,0.9,1.3
5,Dwight Powell,C,30,82,18.1,0.725,11.5,14.0,8.2,0.219,0.9,1.3
...,...,...,...,...,...,...,...,...,...,...,...,...
496,Louis King,SF,22,10,8.1,0.438,0.0,21.2,0.0,0.000,-4.6,-0.1
497,Xavier Moon,SG,27,10,16.2,0.545,10.1,19.0,0.3,0.121,-0.1,0.1
498,Reggie Perry,PF,21,10,15.6,0.537,11.4,22.4,0.2,0.058,-4.1,-0.1
499,Trevelin Queen,SG,24,10,18.2,0.619,16.8,23.9,0.1,0.080,1.9,0.1


In [15]:
# Merge NBA salary dataframe with advanced stats dataframe
full_data = pd.merge(new_contracts, nba_adv_stats, on='Player', how='inner')

In [16]:
# rename 'G' columns to 'Games'
full_data = full_data.rename(columns = {'G▼' : 'Games'})

In [17]:
full_data

Unnamed: 0,Player,Team,2021-22,Pos,Age,Games,PER,TS%,TOV%,USG%,WS,WS/48,BPM,VORP
0,Stephen Curry,GSW,"$45,780,966",PG,33,64,21.4,0.601,13.2,30.8,8.0,0.173,5.8,4.4
1,Russell Westbrook,LAL,"$44,211,146",PG,33,78,15.0,0.512,17.3,27.3,1.7,0.030,-1.6,0.2
2,James Harden,PHI,"$43,848,000",PG-SG,32,65,20.9,0.583,18.8,27.2,7.6,0.152,4.1,3.7
3,LeBron James,LAL,"$41,180,544",SF,37,56,26.2,0.619,12.5,32.3,7.5,0.172,7.7,5.1
4,Kevin Durant,BRK,"$40,918,900",PF,33,55,25.6,0.634,12.9,31.2,8.4,0.198,7.2,4.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
455,Isaiah Thomas,CHO,"$276,039",PG,32,22,12.0,0.516,7.8,24.8,0.2,0.026,-4.5,-0.2
456,Trendon Watford,POR,"$260,561",SF,21,48,15.8,0.590,12.5,17.6,1.9,0.104,-1.7,0.1
457,Ish Wainright,TOR,"$8,558",PF,27,45,9.4,0.494,10.6,14.7,0.6,0.079,-2.0,0.0
458,Malik Fitts,BOS,"$231,062",PF,24,15,10.7,0.553,9.5,14.6,0.1,0.097,-1.1,0.0


In [18]:
# Check for NANs in df
full_data.isnull().sum().sum()

1

In [19]:
# Drop the player that has an NAN for salary. He will not be analyzed (Admiral Schofield)
full_data = full_data.drop([3,452])

In [20]:
full_data

Unnamed: 0,Player,Team,2021-22,Pos,Age,Games,PER,TS%,TOV%,USG%,WS,WS/48,BPM,VORP
0,Stephen Curry,GSW,"$45,780,966",PG,33,64,21.4,0.601,13.2,30.8,8.0,0.173,5.8,4.4
1,Russell Westbrook,LAL,"$44,211,146",PG,33,78,15.0,0.512,17.3,27.3,1.7,0.030,-1.6,0.2
2,James Harden,PHI,"$43,848,000",PG-SG,32,65,20.9,0.583,18.8,27.2,7.6,0.152,4.1,3.7
4,Kevin Durant,BRK,"$40,918,900",PF,33,55,25.6,0.634,12.9,31.2,8.4,0.198,7.2,4.8
5,Giannis Antetokounmpo,MIL,"$39,344,970",PF,27,67,32.1,0.633,12.2,34.9,12.9,0.281,11.2,7.4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
455,Isaiah Thomas,CHO,"$276,039",PG,32,22,12.0,0.516,7.8,24.8,0.2,0.026,-4.5,-0.2
456,Trendon Watford,POR,"$260,561",SF,21,48,15.8,0.590,12.5,17.6,1.9,0.104,-1.7,0.1
457,Ish Wainright,TOR,"$8,558",PF,27,45,9.4,0.494,10.6,14.7,0.6,0.079,-2.0,0.0
458,Malik Fitts,BOS,"$231,062",PF,24,15,10.7,0.553,9.5,14.6,0.1,0.097,-1.1,0.0


In [21]:
# Check Datatypes
full_data.dtypes

Player      object
Team        object
2021-22     object
Pos         object
Age          int64
Games        int64
PER        float64
TS%        float64
TOV%       float64
USG%       float64
WS         float64
WS/48      float64
BPM        float64
VORP       float64
dtype: object

In [22]:
#Remove '$' from salaries. Need to convert to number
full_data['2021-22'] = full_data['2021-22'].str[1:]

In [23]:
full_data = full_data.replace(',','', regex=True)

In [24]:
# Rename '2021-22' to '2021-22 Salaries ($)'
full_data = full_data.rename(columns = {'2021-22' : '2021-22 Salaries ($)'})

In [25]:
# Convert Salary dtype to float from Object (need to calculate column)
full_data['2021-22 Salaries ($)']= full_data['2021-22 Salaries ($)'].astype(float, errors='raise')

In [26]:
full_data

Unnamed: 0,Player,Team,2021-22 Salaries ($),Pos,Age,Games,PER,TS%,TOV%,USG%,WS,WS/48,BPM,VORP
0,Stephen Curry,GSW,45780966.0,PG,33,64,21.4,0.601,13.2,30.8,8.0,0.173,5.8,4.4
1,Russell Westbrook,LAL,44211146.0,PG,33,78,15.0,0.512,17.3,27.3,1.7,0.030,-1.6,0.2
2,James Harden,PHI,43848000.0,PG-SG,32,65,20.9,0.583,18.8,27.2,7.6,0.152,4.1,3.7
4,Kevin Durant,BRK,40918900.0,PF,33,55,25.6,0.634,12.9,31.2,8.4,0.198,7.2,4.8
5,Giannis Antetokounmpo,MIL,39344970.0,PF,27,67,32.1,0.633,12.2,34.9,12.9,0.281,11.2,7.4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
455,Isaiah Thomas,CHO,276039.0,PG,32,22,12.0,0.516,7.8,24.8,0.2,0.026,-4.5,-0.2
456,Trendon Watford,POR,260561.0,SF,21,48,15.8,0.590,12.5,17.6,1.9,0.104,-1.7,0.1
457,Ish Wainright,TOR,8558.0,PF,27,45,9.4,0.494,10.6,14.7,0.6,0.079,-2.0,0.0
458,Malik Fitts,BOS,231062.0,PF,24,15,10.7,0.553,9.5,14.6,0.1,0.097,-1.1,0.0


In [27]:
full_data.describe()

Unnamed: 0,2021-22 Salaries ($),Age,Games,PER,TS%,TOV%,USG%,WS,WS/48,BPM,VORP
count,458.0,458.0,458.0,458.0,458.0,458.0,458.0,458.0,458.0,458.0,458.0
mean,8449268.0,26.29476,54.753275,14.034934,0.558205,12.075328,18.682751,2.750873,0.093245,-0.749782,0.675764
std,9532754.0,4.476344,18.006267,4.505766,0.065175,3.978347,5.370075,2.510401,0.059362,2.866155,1.235968
min,8558.0,19.0,10.0,0.4,0.2,0.0,6.0,-1.6,-0.147,-10.4,-1.2
25%,2015070.0,23.0,43.25,10.9,0.52425,9.5,14.725,0.8,0.056,-2.5,-0.1
50%,4244258.0,26.0,58.0,13.5,0.56,11.65,17.8,2.1,0.093,-0.9,0.25
75%,11082000.0,29.0,69.0,16.675,0.595,14.4,21.675,4.0,0.126,0.8,1.1
max,45780970.0,41.0,82.0,32.8,0.745,31.9,37.4,15.2,0.296,13.7,9.8


In [28]:
full_data_salaries = full_data.sort_values(by=['2021-22 Salaries ($)'], ascending=False)

In [29]:
full_data_salaries

Unnamed: 0,Player,Team,2021-22 Salaries ($),Pos,Age,Games,PER,TS%,TOV%,USG%,WS,WS/48,BPM,VORP
0,Stephen Curry,GSW,45780966.0,PG,33,64,21.4,0.601,13.2,30.8,8.0,0.173,5.8,4.4
1,Russell Westbrook,LAL,44211146.0,PG,33,78,15.0,0.512,17.3,27.3,1.7,0.030,-1.6,0.2
2,James Harden,PHI,43848000.0,PG-SG,32,65,20.9,0.583,18.8,27.2,7.6,0.152,4.1,3.7
4,Kevin Durant,BRK,40918900.0,PF,33,55,25.6,0.634,12.9,31.2,8.4,0.198,7.2,4.8
5,Giannis Antetokounmpo,MIL,39344970.0,PF,27,67,32.1,0.633,12.2,34.9,12.9,0.281,11.2,7.4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
455,Isaiah Thomas,CHO,276039.0,PG,32,22,12.0,0.516,7.8,24.8,0.2,0.026,-4.5,-0.2
456,Trendon Watford,POR,260561.0,SF,21,48,15.8,0.590,12.5,17.6,1.9,0.104,-1.7,0.1
458,Malik Fitts,BOS,231062.0,PF,24,15,10.7,0.553,9.5,14.6,0.1,0.097,-1.1,0.0
459,Joe Wieskamp,SAS,202068.0,SG,22,29,7.1,0.494,3.1,13.1,0.1,0.035,-3.9,-0.1
