In [1]:
import pandas as pd
import matplotlib as mpl

In [2]:
# import csv file into a dataframe:
contracts = pd.read_csv("NBA_salaries.csv", header = [1], index_col = 0)

In [3]:
# Display columns
contracts.columns

Index(['Player', 'Tm', '2021-22', '2022-23', '2023-24', '2024-25', '2025-26',
       '2026-27', 'Signed Using', 'Guaranteed'],
      dtype='object')

In [4]:
pd.set_option('display.max_rows', 550)

In [5]:
# Rename 'tm' to 'team column'
contracts = contracts.rename(columns = {'Tm': 'Team'})

In [6]:
contracts

Unnamed: 0_level_0,Player,Team,2021-22,2022-23,2023-24,2024-25,2025-26,2026-27,Signed Using,Guaranteed
Rk,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,Stephen Curry,GSW,"$45,780,966","$48,070,014","$51,915,615","$55,761,216","$59,606,817",,Bird Rights,"$261,134,628"
2,John Wall,HOU,"$44,310,840","$47,366,760",,,,,Bird Rights,"$44,310,840"
3,Russell Westbrook,LAL,"$44,211,146","$47,063,478",,,,,Bird Rights,"$44,211,146"
4,James Harden,PHI,"$43,848,000","$46,872,000",,,,,Bird Rights,"$43,848,000"
5,LeBron James,LAL,"$41,180,544","$44,474,988",,,,,Bird,"$85,655,532"
6,Kevin Durant,BRK,"$40,918,900","$42,969,845","$42,969,845","$49,856,021","$53,282,609",,Sign and Trade,"$229,997,220"
7,Giannis Antetokounmpo,MIL,"$39,344,970","$42,492,568","$45,640,165","$48,787,763","$51,935,360",,Bird,"$176,265,466"
8,Paul George,LAC,"$39,344,970","$42,492,568","$45,640,165","$48,787,763",,,Bird,"$127,477,703"
9,Damian Lillard,POR,"$39,344,900","$42,492,492","$45,640,084","$48,787,676",,,1st Round Pick,"$176,265,152"
10,Kawhi Leonard,LAC,"$39,344,900","$42,492,492","$45,640,084","$48,787,676",,,Early Bird,"$127,477,476"


In [7]:
# Check df data types
contracts.dtypes

Player          object
Team            object
2021-22         object
2022-23         object
2023-24         object
2024-25         object
2025-26         object
2026-27         object
Signed Using    object
Guaranteed      object
dtype: object

In [8]:
# Dropping unnecessary columns
new_contracts = contracts.drop(['2022-23', '2023-24', '2024-25', '2025-26',
       '2026-27', 'Signed Using', 'Guaranteed'], axis=1)

In [9]:
new_contracts

Unnamed: 0_level_0,Player,Team,2021-22
Rk,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,Stephen Curry,GSW,"$45,780,966"
2,John Wall,HOU,"$44,310,840"
3,Russell Westbrook,LAL,"$44,211,146"
4,James Harden,PHI,"$43,848,000"
5,LeBron James,LAL,"$41,180,544"
6,Kevin Durant,BRK,"$40,918,900"
7,Giannis Antetokounmpo,MIL,"$39,344,970"
8,Paul George,LAC,"$39,344,970"
9,Damian Lillard,POR,"$39,344,900"
10,Kawhi Leonard,LAC,"$39,344,900"


In [10]:
# Import player advanced stats
nba_adv = pd.read_csv("NBA_Advanced_Stats.csv", index_col=0)

In [11]:
# Display columns
nba_adv.columns

Index(['Player', 'Pos', 'Age', 'Tm', 'G', 'MP', 'PERâ–¼', 'TS%', '3PAr', 'FTr',
       'ORB%', 'DRB%', 'TRB%', 'AST%', 'STL%', 'BLK%', 'TOV%', 'USG%',
       'Unnamed: 19', 'OWS', 'DWS', 'WS', 'WS/48', 'Unnamed: 24', 'OBPM',
       'DBPM', 'BPM', 'VORP'],
      dtype='object')

<strong>Advanced Stats Dictionary</strong><br>

<strong>PER:</strong> A measure of per-minute production standardized such that the league average is 15<br>
<strong>TS%:</strong> A measure of shooting efficiency that takes into account 2-point field goalds, 3-point field goals and free throws<br>
<strong>TOV%:</strong> An estimate of turnover percentage per 100 plays<br>
<strong>USG%:</strong> An estimate of team plays use by a player while they were on the floor<br>
<strong>WS:</strong> An Estimate of number of wins contributed by a player<br>
<strong>WS/48:</strong> An Estimate of number of wins contributed by a player per 48 minutes (league average is approximately .100)<br>
<strong>BPM:</strong> A box score estimate of the points per 100 possessions a player co
tributed above a league-average player, translated to an average team<br>
<strong>VORP:</strong> A box score estimate of the points per 100 TEAM posessions that a player contributed above a replacement-level (-2.0), player translated to an average team and prorated to an 82-game season.<br>

In [12]:
# Original NBA Player Advanced Stats dataset
nba_adv

Unnamed: 0_level_0,Player,Pos,Age,Tm,G,MP,PERâ–¼,TS%,3PAr,FTr,...,Unnamed: 19,OWS,DWS,WS,WS/48,Unnamed: 24,OBPM,DBPM,BPM,VORP
Rk,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,Nikola JokiÄ‡,C,26,DEN,74,2476,32.8,0.661,0.22,0.357,...,,10.8,4.5,15.2,0.296,,9.2,4.5,13.7,9.8
2,Giannis Antetokounmpo,PF,27,MIL,67,2204,32.1,0.633,0.194,0.615,...,,9.2,3.7,12.9,0.281,,7.6,3.5,11.2,7.4
3,Joel Embiid,C,27,PHI,68,2297,31.2,0.616,0.188,0.602,...,,7.9,4.1,12.0,0.252,,7.2,2.0,9.2,6.5
4,LeBron James,SF,37,LAL,56,2084,26.2,0.619,0.367,0.275,...,,5.2,2.3,7.5,0.172,,6.9,0.8,7.7,5.1
5,Kevin Durant,PF,33,BRK,55,2047,25.6,0.634,0.269,0.367,...,,6.4,2.0,8.4,0.198,,6.4,0.7,7.2,4.8
6,Trae Young,PG,23,ATL,76,2652,25.4,0.603,0.395,0.358,...,,9.0,1.0,10.0,0.181,,7.1,-2.0,5.2,4.8
7,Luka DonÄiÄ‡,PG,22,DAL,65,2301,25.1,0.571,0.406,0.349,...,,3.8,3.8,7.6,0.159,,6.4,1.8,8.2,5.9
8,Rudy Gobert,C,29,UTA,66,2120,24.7,0.732,0.008,0.864,...,,7.3,4.3,11.7,0.264,,2.9,1.7,4.6,3.6
9,Ja Morant,PG,22,MEM,57,1889,24.4,0.575,0.218,0.353,...,,4.6,2.1,6.7,0.171,,6.2,-0.1,6.1,3.9
10,Karl-Anthony Towns,C,26,MIN,74,2476,24.1,0.64,0.301,0.385,...,,7.1,3.2,10.3,0.199,,4.5,0.5,5.0,4.4


In [13]:
nba_adv = nba_adv.rename(index = {'Nikola JokiÄ‡':'Nikola Jokić, "Luka DonÄ iÄ‡":'Jonas ValanÄ iÅ«nas':})

In [14]:
# Dropping all unnecessary columns
nba_adv_stats = nba_adv.drop(['Tm', 'MP', '3PAr', 'FTr', 'ORB%','DRB%', 'TRB%', 'AST%', 'STL%', 'BLK%',
       'Unnamed: 19', 'OWS', 'DWS','Unnamed: 24','OBPM','DBPM'], axis=1)

In [15]:
nba_adv_stats

Unnamed: 0_level_0,Player,Pos,Age,G,PERâ–¼,TS%,TOV%,USG%,WS,WS/48,BPM,VORP
Rk,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,Nikola JokiÄ‡,C,26,74,32.8,0.661,15.6,31.9,15.2,0.296,13.7,9.8
2,Giannis Antetokounmpo,PF,27,67,32.1,0.633,12.2,34.9,12.9,0.281,11.2,7.4
3,Joel Embiid,C,27,68,31.2,0.616,11.3,37.2,12.0,0.252,9.2,6.5
4,LeBron James,SF,37,56,26.2,0.619,12.5,32.3,7.5,0.172,7.7,5.1
5,Kevin Durant,PF,33,55,25.6,0.634,12.9,31.2,8.4,0.198,7.2,4.8
6,Trae Young,PG,23,76,25.4,0.603,14.5,34.4,10.0,0.181,5.2,4.8
7,Luka DonÄiÄ‡,PG,22,65,25.1,0.571,15.3,37.4,7.6,0.159,8.2,5.9
8,Rudy Gobert,C,29,66,24.7,0.732,14.5,16.9,11.7,0.264,4.6,3.6
9,Ja Morant,PG,22,57,24.4,0.575,12.6,33.7,6.7,0.171,6.1,3.9
10,Karl-Anthony Towns,C,26,74,24.1,0.64,13.7,27.8,10.3,0.199,5.0,4.4


In [16]:
# Rename 'PERâ–¼' to 'PER'
nba_adv_stats = nba_adv_stats.rename(columns = {'PERâ–¼': 'PER'})

In [17]:
# Merge NBA salary dataframe with advanced stats dataframe
full_data = pd.merge(new_contracts, nba_adv_stats, on='Player', how='outer')

In [18]:
# Display merged dataframe
full_data

Unnamed: 0,Player,Team,2021-22,Pos,Age,G,PER,TS%,TOV%,USG%,WS,WS/48,BPM,VORP
0,Stephen Curry,GSW,"$45,780,966",PG,33.0,64.0,21.4,0.601,13.2,30.8,8.0,0.173,5.8,4.4
1,John Wall,HOU,"$44,310,840",,,,,,,,,,,
2,Russell Westbrook,LAL,"$44,211,146",PG,33.0,78.0,15.0,0.512,17.3,27.3,1.7,0.03,-1.6,0.2
3,James Harden,PHI,"$43,848,000",PG-SG,32.0,65.0,20.9,0.583,18.8,27.2,7.6,0.152,4.1,3.7
4,LeBron James,LAL,"$41,180,544",SF,37.0,56.0,26.2,0.619,12.5,32.3,7.5,0.172,7.7,5.1
5,Kevin Durant,BRK,"$40,918,900",PF,33.0,55.0,25.6,0.634,12.9,31.2,8.4,0.198,7.2,4.8
6,Giannis Antetokounmpo,MIL,"$39,344,970",PF,27.0,67.0,32.1,0.633,12.2,34.9,12.9,0.281,11.2,7.4
7,Paul George,LAC,"$39,344,970",,,,,,,,,,,
8,Damian Lillard,POR,"$39,344,900",,,,,,,,,,,
9,Kawhi Leonard,LAC,"$39,344,900",,,,,,,,,,,


In [19]:
# rename 'G' columns to 'Games'
full_data = full_data.rename(columns = {'G' : 'Games'})

In [20]:
# Due to contextual resons, I will remove all players who are injured/did not play a game this season.
full_nba_data = full_data[full_data['Games'].notna()]

In [21]:
full_nba_data

Unnamed: 0,Player,Team,2021-22,Pos,Age,Games,PER,TS%,TOV%,USG%,WS,WS/48,BPM,VORP
0,Stephen Curry,GSW,"$45,780,966",PG,33.0,64.0,21.4,0.601,13.2,30.8,8.0,0.173,5.8,4.4
2,Russell Westbrook,LAL,"$44,211,146",PG,33.0,78.0,15.0,0.512,17.3,27.3,1.7,0.03,-1.6,0.2
3,James Harden,PHI,"$43,848,000",PG-SG,32.0,65.0,20.9,0.583,18.8,27.2,7.6,0.152,4.1,3.7
4,LeBron James,LAL,"$41,180,544",SF,37.0,56.0,26.2,0.619,12.5,32.3,7.5,0.172,7.7,5.1
5,Kevin Durant,BRK,"$40,918,900",PF,33.0,55.0,25.6,0.634,12.9,31.2,8.4,0.198,7.2,4.8
6,Giannis Antetokounmpo,MIL,"$39,344,970",PF,27.0,67.0,32.1,0.633,12.2,34.9,12.9,0.281,11.2,7.4
11,Jimmy Butler,MIA,"$36,016,200",SF,32.0,57.0,23.6,0.592,10.5,26.5,9.2,0.228,6.3,4.0
12,Tobias Harris,PHI,"$36,000,000",PF,29.0,73.0,15.9,0.566,9.5,21.7,5.6,0.105,0.0,1.3
13,Khris Middleton,MIL,"$35,500,000",SF,30.0,66.0,18.1,0.577,14.2,26.7,5.3,0.119,1.6,1.9
15,Rudy Gobert,UTA,"$35,344,828",C,29.0,66.0,24.7,0.732,14.5,16.9,11.7,0.264,4.6,3.6
