In [1]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
pd.set_option('display.max_columns', None)

data = pd.read_excel('nba_player_data.xlsx')

In [2]:
data.sample(10)

Unnamed: 0,Years,Season_type,PLAYER_ID,RANK,PLAYER,TEAM_ID,TEAM,GP,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,EFF,AST_TOV,STL_TOV
3603,2017-18,Regular%20Season,1626168,150,Trey Lyles,1610612743,DEN,73,1391,270,550,0.491,83,218,0.381,101,143,0.706,55,292,347,91,30,34,61,111,724,843,1.49,0.49
1978,2014-15,Playoffs,201578,124,Marreese Speights,1610612744,GSW,10,67,14,42,0.333,0,0,0.0,9,15,0.6,8,13,21,4,4,3,4,14,37,31,1.0,1.0
1309,2013-14,Playoffs,201965,150,Jeff Ayres,1610612759,SAS,17,65,6,13,0.462,0,0,0.0,5,8,0.625,6,13,19,5,0,0,1,6,17,30,5.0,0.0
3330,2016-17,Playoffs,101135,91,David Lee,1610612759,SAS,15,245,25,48,0.521,0,1,0.0,11,17,0.647,20,37,57,11,4,5,6,20,61,103,1.83,0.67
4905,2018-19,Playoffs,1627824,171,Guerschon Yabusele,1610612738,BOS,4,14,2,4,0.5,0,1,0.0,4,7,0.571,1,1,2,1,0,1,1,3,8,6,1.0,0.0
4789,2018-19,Playoffs,1628378,55,Donovan Mitchell,1610612762,UTA,5,193,36,112,0.321,11,43,0.256,24,33,0.727,7,18,25,16,8,1,21,15,107,51,0.76,0.38
816,2013-14,Regular%20Season,200782,139,P.J. Tucker,1610612756,PHX,81,2490,265,615,0.431,74,191,0.387,156,201,0.776,161,368,529,141,110,23,102,204,760,1066,1.38,1.08
5710,2020-21,Regular%20Season,201566,19,Russell Westbrook,1610612764,WAS,65,2369,544,1238,0.439,86,273,0.315,271,413,0.656,109,641,750,763,89,23,312,190,1445,1922,2.45,0.28
4160,2017-18,Playoffs,203485,167,Lorenzo Brown,1610612761,TOR,4,29,3,10,0.3,2,5,0.4,1,2,0.5,1,5,6,2,0,1,1,2,9,9,2.0,0.0
2592,2015-16,Playoffs,101106,55,Andrew Bogut,1610612744,GSW,22,365,48,77,0.623,0,2,0.0,5,14,0.357,47,78,125,30,14,35,19,72,101,248,1.58,0.74


In [3]:
data.shape

(8475, 30)

**Data cleaning & Analysis preparation**

In [4]:
data.drop(columns=['RANK', 'EFF'], inplace=True)

In [5]:
data['season_start_year'] = data['Years'].str[:4].astype(int)

In [6]:
data['TEAM'].replace(to_replace=['NOP', 'NOH'], value='NO', inplace=True)

In [7]:
data['Season_type'].replace('Regular%20Season', 'RS', inplace=True)

In [8]:
rs_df = data[data['Season_type']=='RS']
po_df = data[data['Season_type']=='Playoffs']

In [9]:
data.columns


Index(['Years', 'Season_type', 'PLAYER_ID', 'PLAYER', 'TEAM_ID', 'TEAM', 'GP',
       'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA',
       'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF',
       'PTS', 'AST_TOV', 'STL_TOV', 'season_start_year'],
      dtype='object')

In [10]:
total_cols = ['MIN', 'FGM', 'FGA', 'FG3M', 'FG3A', 'FTM', 'FTA',
     'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS']

**Which Player stats are correlated with each other?**

In [11]:
data_per_min = data.groupby(['PLAYER', 'PLAYER_ID', 'Years'])[total_cols].sum().reset_index()
for col in data_per_min.columns[4:]:
    data_per_min[col] = data_per_min[col]/data_per_min['MIN']

data_per_min['FG%'] = data_per_min['FGM'] / data_per_min['FGA']
data_per_min['3PT%'] = data_per_min['FG3M'] / data_per_min['FG3A']
data_per_min['FT%'] = data_per_min['FTM'] / data_per_min['FTA']
data_per_min['FG3A%'] = data_per_min['FG3A'] / data_per_min['FGA']
data_per_min['PTS/FGA'] = data_per_min['PTS'] / data_per_min['FGA']
data_per_min['FG3M/FGM'] = data_per_min['FG3M'] / data_per_min['FGM']
data_per_min['FTA/FGA'] = data_per_min['FTA'] / data_per_min['FGA']
data_per_min['TRU%'] = 0.5*data_per_min['PTS']/(data_per_min['FGA']+0.475*data_per_min['FTA'])
data_per_min['AST_TOV'] = data_per_min['AST']/data_per_min['TOV']

data_per_min

Unnamed: 0,PLAYER,PLAYER_ID,Years,MIN,FGM,FGA,FG3M,FG3A,FTM,FTA,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,FG%,3PT%,FT%,FG3A%,PTS/FGA,FG3M/FGM,FTA/FGA,TRU%,AST_TOV
0,A.J. Lawson,1630639,2022-23,108,0.203704,0.407407,0.092593,0.231481,0.018519,0.074074,0.055556,0.138889,0.194444,0.018519,0.018519,0.000000,0.027778,0.101852,0.518519,0.500000,0.400000,0.250000,0.568182,1.272727,0.454545,0.181818,0.585774,0.666667
1,AJ Green,1631260,2022-23,345,0.153623,0.362319,0.127536,0.304348,0.011594,0.011594,0.017391,0.113043,0.130435,0.063768,0.017391,0.000000,0.026087,0.089855,0.446377,0.424000,0.419048,1.000000,0.840000,1.232000,0.830189,0.032000,0.606777,2.444444
2,AJ Green,1631260,2023-24,26,0.115385,0.269231,0.076923,0.192308,0.038462,0.038462,0.000000,0.076923,0.076923,0.115385,0.000000,0.000000,0.000000,0.153846,0.346154,0.428571,0.400000,1.000000,0.714286,1.285714,0.666667,0.142857,0.602007,inf
3,AJ Griffin,1631100,2022-23,1401,0.177016,0.380443,0.072091,0.184868,0.029979,0.033547,0.026410,0.082798,0.109208,0.052106,0.029979,0.008565,0.029979,0.062099,0.456103,0.465291,0.389961,0.893617,0.485929,1.198874,0.407258,0.088180,0.575339,1.738095
4,AJ Griffin,1631100,2023-24,45,0.133333,0.266667,0.111111,0.222222,0.000000,0.000000,0.022222,0.088889,0.111111,0.044444,0.022222,0.000000,0.044444,0.022222,0.377778,0.500000,0.500000,,0.833333,1.416667,0.833333,0.000000,0.708333,1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6117,Zion Williamson,1629627,2022-23,956,0.298117,0.490586,0.007322,0.019874,0.185146,0.259414,0.060669,0.150628,0.211297,0.139121,0.033473,0.016736,0.103556,0.067992,0.788703,0.607676,0.368421,0.713710,0.040512,1.607676,0.024561,0.528785,0.642468,1.343434
6118,Zion Williamson,1629627,2023-24,123,0.284553,0.560976,0.000000,0.008130,0.130081,0.219512,0.121951,0.097561,0.219512,0.130081,0.024390,0.000000,0.097561,0.081301,0.699187,0.507246,0.000000,0.592593,0.014493,1.246377,0.000000,0.391304,0.525512,1.333333
6119,Zoran Dragic,204054,2014-15,75,0.146667,0.400000,0.040000,0.186667,0.040000,0.066667,0.066667,0.040000,0.106667,0.066667,0.026667,0.000000,0.066667,0.080000,0.373333,0.366667,0.214286,0.600000,0.466667,0.933333,0.272727,0.166667,0.432432,1.000000
6120,Zylan Cheatham,1629597,2019-20,51,0.117647,0.176471,0.000000,0.019608,0.000000,0.000000,0.058824,0.117647,0.176471,0.058824,0.019608,0.019608,0.078431,0.196078,0.235294,0.666667,0.000000,,0.111111,1.333333,0.000000,0.000000,0.666667,0.750000


In [12]:

total_cols = ['MIN', 'FGM', 'FGA', 'FG3M', 'FG3A', 'FTM', 'FTA',
     'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS', 'FG%', '3PT%', 'FT%', 'FG3A%', 'PTS/FGA', 'FG3M/FGM'
     , 'FTA/FGA', 'TRU%', 'AST_TOV']

data_per_min = data_per_min[(data_per_min['MIN']>=50)]
df_corr = data_per_min[total_cols]
df_corr.corr()

fig = px.imshow(df_corr.corr())
fig.show()
