In [1]:
import pandas as pd
import matplotlib as mpl

In [2]:
# import csv file into a dataframe:
contracts = pd.read_csv("NBA_salaries.csv", header = [1], index_col = 0)

In [3]:
# Display columns
contracts.columns

Index(['Player', 'Tm', '2021-22', '2022-23', '2023-24', '2024-25', '2025-26',
       '2026-27', 'Signed Using', 'Guaranteed'],
      dtype='object')

In [4]:
pd.set_option('display.max_rows', 550)

In [5]:
# Rename 'tm' to 'team column'
contracts = contracts.rename(columns = {'Tm': 'Team'})

In [6]:
contracts

Unnamed: 0_level_0,Player,Team,2021-22,2022-23,2023-24,2024-25,2025-26,2026-27,Signed Using,Guaranteed
Rk,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,Stephen Curry,GSW,"$45,780,966","$48,070,014","$51,915,615","$55,761,216","$59,606,817",,Bird Rights,"$261,134,628"
2,John Wall,HOU,"$44,310,840","$47,366,760",,,,,Bird Rights,"$44,310,840"
3,Russell Westbrook,LAL,"$44,211,146","$47,063,478",,,,,Bird Rights,"$44,211,146"
4,James Harden,PHI,"$43,848,000","$46,872,000",,,,,Bird Rights,"$43,848,000"
5,LeBron James,LAL,"$41,180,544","$44,474,988",,,,,Bird,"$85,655,532"
6,Kevin Durant,BRK,"$40,918,900","$42,969,845","$42,969,845","$49,856,021","$53,282,609",,Sign and Trade,"$229,997,220"
7,Giannis Antetokounmpo,MIL,"$39,344,970","$42,492,568","$45,640,165","$48,787,763","$51,935,360",,Bird,"$176,265,466"
8,Paul George,LAC,"$39,344,970","$42,492,568","$45,640,165","$48,787,763",,,Bird,"$127,477,703"
9,Damian Lillard,POR,"$39,344,900","$42,492,492","$45,640,084","$48,787,676",,,1st Round Pick,"$176,265,152"
10,Kawhi Leonard,LAC,"$39,344,900","$42,492,492","$45,640,084","$48,787,676",,,Early Bird,"$127,477,476"


In [7]:
# Check df data types
contracts.dtypes

Player          object
Team            object
2021-22         object
2022-23         object
2023-24         object
2024-25         object
2025-26         object
2026-27         object
Signed Using    object
Guaranteed      object
dtype: object

In [8]:
# Dropping unnecessary columns
new_contracts = contracts.drop(['2022-23', '2023-24', '2024-25', '2025-26',
       '2026-27', 'Signed Using', 'Guaranteed'], axis=1)

In [9]:
new_contracts

Unnamed: 0_level_0,Player,Team,2021-22
Rk,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,Stephen Curry,GSW,"$45,780,966"
2,John Wall,HOU,"$44,310,840"
3,Russell Westbrook,LAL,"$44,211,146"
4,James Harden,PHI,"$43,848,000"
5,LeBron James,LAL,"$41,180,544"
6,Kevin Durant,BRK,"$40,918,900"
7,Giannis Antetokounmpo,MIL,"$39,344,970"
8,Paul George,LAC,"$39,344,970"
9,Damian Lillard,POR,"$39,344,900"
10,Kawhi Leonard,LAC,"$39,344,900"


In [10]:
# Import player advanced stats(only 500 players. All players that did not play due to injury/situation are not included)
nba_adv = pd.read_csv("NBA_Advanced_Stats.csv", index_col=0)

In [11]:
# Display columns
nba_adv.columns

Index(['Player', 'Pos', 'Age', 'Tm', 'G▼', 'MP', 'PER', 'TS%', '3PAr', 'FTr',
       'ORB%', 'DRB%', 'TRB%', 'AST%', 'STL%', 'BLK%', 'TOV%', 'USG%',
       'Unnamed: 19', 'OWS', 'DWS', 'WS', 'WS/48', 'Unnamed: 24', 'OBPM',
       'DBPM', 'BPM', 'VORP'],
      dtype='object')

<strong>Advanced Stats Dictionary</strong><br>

<strong>PER:</strong> A measure of per-minute production standardized such that the league average is 15<br>
<strong>TS%:</strong> A measure of shooting efficiency that takes into account 2-point field goalds, 3-point field goals and free throws<br>
<strong>TOV%:</strong> An estimate of turnover percentage per 100 plays<br>
<strong>USG%:</strong> An estimate of team plays use by a player while they were on the floor<br>
<strong>WS:</strong> An Estimate of number of wins contributed by a player<br>
<strong>WS/48:</strong> An Estimate of number of wins contributed by a player per 48 minutes (league average is approximately .100)<br>
<strong>BPM:</strong> A box score estimate of the points per 100 possessions a player co
tributed above a league-average player, translated to an average team<br>
<strong>VORP:</strong> A box score estimate of the points per 100 TEAM posessions that a player contributed above a replacement-level (-2.0), player translated to an average team and prorated to an 82-game season.<br>

In [12]:
# Original NBA Player Advanced Stats dataset
nba_adv

Unnamed: 0_level_0,Player,Pos,Age,Tm,G▼,MP,PER,TS%,3PAr,FTr,...,Unnamed: 19,OWS,DWS,WS,WS/48,Unnamed: 24,OBPM,DBPM,BPM,VORP
Rk,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,Deni Avdija,SF,21,WAS,82,1984,11.6,0.536,0.447,0.235,...,,0.6,1.8,2.4,0.058,,-2.1,0.4,-1.7,0.1
2,Saddiq Bey,SF,22,DET,82,2704,14.0,0.529,0.537,0.224,...,,2.4,1.6,4.0,0.071,,0.9,-1.2,-0.3,1.2
3,Mikal Bridges,SF,25,PHO,82,2854,14.4,0.627,0.364,0.183,...,,5.2,3.7,8.9,0.15,,0.5,1.1,1.6,2.6
4,Kevon Looney,C,25,GSW,82,1732,15.7,0.586,0.003,0.343,...,,3.5,3.2,6.8,0.187,,-0.9,1.7,0.9,1.3
5,Dwight Powell,C,30,DAL,82,1798,18.1,0.725,0.092,0.529,...,,6.0,2.2,8.2,0.219,,0.4,0.5,0.9,1.3
6,Alec Burks,SG,30,NYK,81,2318,13.7,0.559,0.534,0.363,...,,3.2,2.9,6.1,0.126,,-0.4,1.2,0.8,1.7
7,Buddy Hield,SG,29,TOT,81,2499,12.9,0.548,0.652,0.116,...,,0.8,1.1,1.9,0.036,,0.5,-1.7,-1.2,0.5
8,Terance Mann,SF,25,LAC,81,2317,13.4,0.572,0.301,0.238,...,,3.1,2.4,5.5,0.113,,-0.8,0.2,-0.6,0.8
9,Patty Mills,PG,33,BRK,81,2346,10.4,0.563,0.715,0.074,...,,1.9,0.9,2.8,0.058,,-0.3,-1.4,-1.7,0.2
10,Chris Boucher,PF,29,TOR,80,1690,17.9,0.57,0.397,0.334,...,,3.5,2.4,6.0,0.169,,0.5,-0.4,0.0,0.9


In [14]:
# Dropping all unnecessary columns
nba_adv_stats = nba_adv.drop(['Tm', 'MP', '3PAr', 'FTr', 'ORB%','DRB%', 'TRB%', 'AST%', 'STL%', 'BLK%',
       'Unnamed: 19', 'OWS', 'DWS','Unnamed: 24','OBPM','DBPM'], axis=1)

In [15]:
# NBA advanced stats cleaned: after dropped columns
nba_adv_stats

Unnamed: 0_level_0,Player,Pos,Age,G▼,PER,TS%,TOV%,USG%,WS,WS/48,BPM,VORP
Rk,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,Deni Avdija,SF,21,82,11.6,0.536,12.0,16.3,2.4,0.058,-1.7,0.1
2,Saddiq Bey,SF,22,82,14.0,0.529,7.1,21.3,4.0,0.071,-0.3,1.2
3,Mikal Bridges,SF,25,82,14.4,0.627,6.8,15.0,8.9,0.15,1.6,2.6
4,Kevon Looney,C,25,82,15.7,0.586,13.8,12.2,6.8,0.187,0.9,1.3
5,Dwight Powell,C,30,82,18.1,0.725,11.5,14.0,8.2,0.219,0.9,1.3
6,Alec Burks,SG,30,81,13.7,0.559,9.7,17.7,6.1,0.126,0.8,1.7
7,Buddy Hield,SG,29,81,12.9,0.548,11.7,22.4,1.9,0.036,-1.2,0.5
8,Terance Mann,SF,25,81,13.4,0.572,9.9,16.0,5.5,0.113,-0.6,0.8
9,Patty Mills,PG,33,81,10.4,0.563,8.2,16.4,2.8,0.058,-1.7,0.2
10,Chris Boucher,PF,29,80,17.9,0.57,6.0,17.8,6.0,0.169,0.0,0.9


In [16]:
# Merge NBA salary dataframe with advanced stats dataframe
full_data = pd.merge(new_contracts, nba_adv_stats, on='Player', how='inner')

In [17]:
# Display merged dataframe
full_data

Unnamed: 0,Player,Team,2021-22,Pos,Age,G▼,PER,TS%,TOV%,USG%,WS,WS/48,BPM,VORP
0,Stephen Curry,GSW,"$45,780,966",PG,33,64,21.4,0.601,13.2,30.8,8.0,0.173,5.8,4.4
1,Russell Westbrook,LAL,"$44,211,146",PG,33,78,15.0,0.512,17.3,27.3,1.7,0.03,-1.6,0.2
2,James Harden,PHI,"$43,848,000",PG-SG,32,65,20.9,0.583,18.8,27.2,7.6,0.152,4.1,3.7
3,LeBron James,LAL,"$41,180,544",SF,37,56,26.2,0.619,12.5,32.3,7.5,0.172,7.7,5.1
4,Kevin Durant,BRK,"$40,918,900",PF,33,55,25.6,0.634,12.9,31.2,8.4,0.198,7.2,4.8
5,Giannis Antetokounmpo,MIL,"$39,344,970",PF,27,67,32.1,0.633,12.2,34.9,12.9,0.281,11.2,7.4
6,Paul George,LAC,"$39,344,970",SF,31,31,18.6,0.538,15.2,33.7,1.3,0.057,3.6,1.5
7,Damian Lillard,POR,"$39,344,900",PG,31,29,18.5,0.55,11.6,29.3,1.7,0.079,1.3,0.9
8,Klay Thompson,GSW,"$37,980,720",SG,31,32,16.7,0.547,6.7,29.6,1.8,0.091,1.1,0.7
9,Jimmy Butler,MIA,"$36,016,200",SF,32,57,23.6,0.592,10.5,26.5,9.2,0.228,6.3,4.0


In [23]:
# rename 'G' columns to 'Games'
full_data = full_data.rename(columns = {'G▼' : 'Games'})

In [24]:
full_data

Unnamed: 0,Player,Team,2021-22,Pos,Age,Games,PER,TS%,TOV%,USG%,WS,WS/48,BPM,VORP
0,Stephen Curry,GSW,"$45,780,966",PG,33,64,21.4,0.601,13.2,30.8,8.0,0.173,5.8,4.4
1,Russell Westbrook,LAL,"$44,211,146",PG,33,78,15.0,0.512,17.3,27.3,1.7,0.03,-1.6,0.2
2,James Harden,PHI,"$43,848,000",PG-SG,32,65,20.9,0.583,18.8,27.2,7.6,0.152,4.1,3.7
3,LeBron James,LAL,"$41,180,544",SF,37,56,26.2,0.619,12.5,32.3,7.5,0.172,7.7,5.1
4,Kevin Durant,BRK,"$40,918,900",PF,33,55,25.6,0.634,12.9,31.2,8.4,0.198,7.2,4.8
5,Giannis Antetokounmpo,MIL,"$39,344,970",PF,27,67,32.1,0.633,12.2,34.9,12.9,0.281,11.2,7.4
6,Paul George,LAC,"$39,344,970",SF,31,31,18.6,0.538,15.2,33.7,1.3,0.057,3.6,1.5
7,Damian Lillard,POR,"$39,344,900",PG,31,29,18.5,0.55,11.6,29.3,1.7,0.079,1.3,0.9
8,Klay Thompson,GSW,"$37,980,720",SG,31,32,16.7,0.547,6.7,29.6,1.8,0.091,1.1,0.7
9,Jimmy Butler,MIA,"$36,016,200",SF,32,57,23.6,0.592,10.5,26.5,9.2,0.228,6.3,4.0
