In [1]:
import pandas as pd
import matplotlib as mpl

In [2]:
# import csv file into a dataframe:
contracts = pd.read_csv("NBA_salaries.csv", header = [1], index_col = 0)

In [3]:
# Display columns
contracts.columns

Index(['Player', 'Tm', '2021-22', '2022-23', '2023-24', '2024-25', '2025-26',
       '2026-27', 'Signed Using', 'Guaranteed'],
      dtype='object')

In [4]:
# Rename 'tm' to 'team column'
contracts = contracts.rename(columns = {'Tm': 'Team'})

In [5]:
contracts

Unnamed: 0_level_0,Player,Team,2021-22,2022-23,2023-24,2024-25,2025-26,2026-27,Signed Using,Guaranteed
Rk,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,Stephen Curry,GSW,"$45,780,966","$48,070,014","$51,915,615","$55,761,216","$59,606,817",,Bird Rights,"$261,134,628"
2,John Wall,HOU,"$44,310,840","$47,366,760",,,,,Bird Rights,"$44,310,840"
3,Russell Westbrook,LAL,"$44,211,146","$47,063,478",,,,,Bird Rights,"$44,211,146"
4,James Harden,PHI,"$43,848,000","$46,872,000",,,,,Bird Rights,"$43,848,000"
5,LeBron James,LAL,"$41,180,544","$44,474,988",,,,,Bird,"$85,655,532"
...,...,...,...,...,...,...,...,...,...,...
495,Isaiah Thomas,CHO,"$276,039",,,,,,Minimum Salary,"$552,077"
496,Trendon Watford,POR,"$260,561","$1,563,518","$1,836,096","$1,988,598",,,MLE,"$5,648,773"
497,Ish Wainright,TOR,"$8,558",,,,,,,"$375,000"
498,Malik Fitts,BOS,"$231,062","$1,752,638",,,,,Minimum Salary,"$402,218"


In [6]:
# Check df data types
contracts.dtypes

Player          object
Team            object
2021-22         object
2022-23         object
2023-24         object
2024-25         object
2025-26         object
2026-27         object
Signed Using    object
Guaranteed      object
dtype: object

In [7]:
# Dropping all unnecessary columns
new_contracts = contracts.drop(['2022-23', '2023-24', '2024-25', '2025-26',
       '2026-27', 'Signed Using', 'Guaranteed'], axis=1)

In [8]:
new_contracts

Unnamed: 0_level_0,Player,Team,2021-22
Rk,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,Stephen Curry,GSW,"$45,780,966"
2,John Wall,HOU,"$44,310,840"
3,Russell Westbrook,LAL,"$44,211,146"
4,James Harden,PHI,"$43,848,000"
5,LeBron James,LAL,"$41,180,544"
...,...,...,...
495,Isaiah Thomas,CHO,"$276,039"
496,Trendon Watford,POR,"$260,561"
497,Ish Wainright,TOR,"$8,558"
498,Malik Fitts,BOS,"$231,062"


In [9]:
# Import player advanced stats
nba_adv = pd.read_csv("NBA_Advanced_Stats.csv", index_col=0)

In [10]:
nba_adv.columns

Index(['Player', 'Pos', 'Age', 'Tm', 'G', 'MP', 'PERâ–¼', 'TS%', '3PAr', 'FTr',
       'ORB%', 'DRB%', 'TRB%', 'AST%', 'STL%', 'BLK%', 'TOV%', 'USG%',
       'Unnamed: 19', 'OWS', 'DWS', 'WS', 'WS/48', 'Unnamed: 24', 'OBPM',
       'DBPM', 'BPM', 'VORP'],
      dtype='object')

<strong>Advanced Stats Dictionary</strong><br>

<strong>PER:</strong> A measure of per-minute production standardized such that the league average is 15<br>
<strong>TS%:</strong> A measure of shooting efficiency that takes into account 2-point field goalds, 3-point field goals and free throws<br>
<strong>TOV%:</strong> An estimate of turnover percentage per 100 plays<br>
<strong>USG%:</strong> An estimate of team plays use by a player while they were on the floor<br>
<strong>WS:</strong> An Estimate of number of wins contributed by a player<br>
<strong>WS/48:</strong> An Estimate of number of wins contributed by a player per 48 minutes (league average is approximately .100)<br>
<strong>BPM:</strong> A box score estimate of the points per 100 possessions a player co
tributed above a league-average player, translated to an average team<br>
<strong>VORP:</strong> A box score estimate of the points per 100 TEAM posessions that a player contributed above a replacement-level (-2.0), player translated to an average team and prorated to an 82-game season.<br>

In [11]:
# Original NBA Player Advanced Stats dataset
nba_adv

Unnamed: 0_level_0,Player,Pos,Age,Tm,G,MP,PERâ–¼,TS%,3PAr,FTr,...,Unnamed: 19,OWS,DWS,WS,WS/48,Unnamed: 24,OBPM,DBPM,BPM,VORP
Rk,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,Nikola JokiÄ‡,C,26,DEN,74,2476,32.8,0.661,0.220,0.357,...,,10.8,4.5,15.2,0.296,,9.2,4.5,13.7,9.8
2,Giannis Antetokounmpo,PF,27,MIL,67,2204,32.1,0.633,0.194,0.615,...,,9.2,3.7,12.9,0.281,,7.6,3.5,11.2,7.4
3,Joel Embiid,C,27,PHI,68,2297,31.2,0.616,0.188,0.602,...,,7.9,4.1,12.0,0.252,,7.2,2.0,9.2,6.5
4,LeBron James,SF,37,LAL,56,2084,26.2,0.619,0.367,0.275,...,,5.2,2.3,7.5,0.172,,6.9,0.8,7.7,5.1
5,Kevin Durant,PF,33,BRK,55,2047,25.6,0.634,0.269,0.367,...,,6.4,2.0,8.4,0.198,,6.4,0.7,7.2,4.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
173,Jaden McDaniels,PF,21,MIN,70,1803,10.3,0.553,0.455,0.128,...,,0.7,1.7,2.4,0.065,,-2.9,-0.1,-2.9,-0.4
174,Royce O'Neale,SF,28,UTA,77,2406,10.3,0.608,0.689,0.126,...,,2.7,2.8,5.5,0.109,,-0.8,1.7,0.8,1.7
175,Justin Holiday,SF-SG,32,TOT,74,2057,9.5,0.543,0.728,0.094,...,,0.8,0.6,1.4,0.033,,-1.4,-0.8,-2.2,-0.1
176,Killian Hayes,PG,20,DET,66,1647,9.3,0.461,0.364,0.161,...,,-1.0,1.5,0.5,0.014,,-4.3,0.8,-3.5,-0.6


In [12]:
# Dropping all unnecessary columns
nba_adv_stats = nba_adv.drop(['Tm', 'MP', '3PAr', 'FTr', 'ORB%','DRB%', 'TRB%', 'AST%', 'STL%', 'BLK%',
       'Unnamed: 19', 'OWS', 'DWS','Unnamed: 24','OBPM','DBPM'], axis=1)

In [13]:
nba_adv_stats

Unnamed: 0_level_0,Player,Pos,Age,G,PERâ–¼,TS%,TOV%,USG%,WS,WS/48,BPM,VORP
Rk,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,Nikola JokiÄ‡,C,26,74,32.8,0.661,15.6,31.9,15.2,0.296,13.7,9.8
2,Giannis Antetokounmpo,PF,27,67,32.1,0.633,12.2,34.9,12.9,0.281,11.2,7.4
3,Joel Embiid,C,27,68,31.2,0.616,11.3,37.2,12.0,0.252,9.2,6.5
4,LeBron James,SF,37,56,26.2,0.619,12.5,32.3,7.5,0.172,7.7,5.1
5,Kevin Durant,PF,33,55,25.6,0.634,12.9,31.2,8.4,0.198,7.2,4.8
...,...,...,...,...,...,...,...,...,...,...,...,...
173,Jaden McDaniels,PF,21,70,10.3,0.553,11.4,15.3,2.4,0.065,-2.9,-0.4
174,Royce O'Neale,SF,28,77,10.3,0.608,14.4,9.9,5.5,0.109,0.8,1.7
175,Justin Holiday,SF-SG,32,74,9.5,0.543,9.2,15.8,1.4,0.033,-2.2,-0.1
176,Killian Hayes,PG,20,66,9.3,0.461,18.4,15.7,0.5,0.014,-3.5,-0.6


In [14]:
# Rename 'PERâ–¼' to 'PER'
nba_adv_stats = nba_adv_stats.rename(columns = {'PERâ–¼': 'PER'})

In [15]:
full_data = pd.merge(new_contracts, nba_adv_stats, on='Player', how='outer')

In [16]:
full_data

Unnamed: 0,Player,Team,2021-22,Pos,Age,G,PER,TS%,TOV%,USG%,WS,WS/48,BPM,VORP
0,Stephen Curry,GSW,"$45,780,966",PG,33.0,64.0,21.4,0.601,13.2,30.8,8.0,0.173,5.8,4.4
1,John Wall,HOU,"$44,310,840",,,,,,,,,,,
2,Russell Westbrook,LAL,"$44,211,146",PG,33.0,78.0,15.0,0.512,17.3,27.3,1.7,0.030,-1.6,0.2
3,James Harden,PHI,"$43,848,000",PG-SG,32.0,65.0,20.9,0.583,18.8,27.2,7.6,0.152,4.1,3.7
4,LeBron James,LAL,"$41,180,544",SF,37.0,56.0,26.2,0.619,12.5,32.3,7.5,0.172,7.7,5.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
503,Nikola VuÄeviÄ‡,,,C,31.0,73.0,18.2,0.533,10.1,24.5,4.5,0.090,0.9,1.8
504,Bojan BogdanoviÄ‡,,,PF,32.0,69.0,15.7,0.599,10.2,23.7,5.2,0.118,0.0,1.1
505,Bogdan BogdanoviÄ‡,,,SG,29.0,63.0,15.4,0.562,7.7,21.6,3.7,0.095,1.3,1.5
506,Amir Coffey,,,SG,24.0,69.0,13.0,0.598,8.0,15.8,3.8,0.116,-0.3,0.7
