## NBA Player Stats Analysis


In [1]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

# display all columns
pd.set_option('display.max_columns', None)

# read in data
data = pd.read_csv('nba_player_data.csv')

In [2]:
data.shape

(7209, 30)

In [3]:
data.head()

Unnamed: 0,Year,Season_type,PLAYER_ID,RANK,PLAYER,TEAM_ID,TEAM,GP,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,EFF,AST_TOV,STL_TOV
0,2014-15,Regular%20Season,201935,1,James Harden,1610612745,HOU,81,2981,647,1470,0.44,208,555,0.375,715,824,0.868,75,384,459,565,154,60,321,208,2217,2202,1.76,0.48
1,2014-15,Regular%20Season,201939,2,Stephen Curry,1610612744,GSW,80,2613,653,1341,0.487,286,646,0.443,308,337,0.914,56,285,341,619,163,16,249,158,1900,2073,2.49,0.66
2,2014-15,Regular%20Season,201566,3,Russell Westbrook,1610612760,OKC,67,2302,627,1471,0.426,86,288,0.299,546,654,0.835,124,364,488,574,140,14,293,184,1886,1857,1.96,0.48
3,2014-15,Regular%20Season,2544,4,LeBron James,1610612739,CLE,69,2493,624,1279,0.488,120,339,0.354,375,528,0.71,51,365,416,511,109,49,272,135,1743,1748,1.88,0.4
4,2014-15,Regular%20Season,203081,5,Damian Lillard,1610612757,POR,82,2925,590,1360,0.434,196,572,0.343,344,398,0.864,49,329,378,507,97,21,222,164,1720,1677,2.28,0.44


### Data cleaning & preparation

In [4]:
# Drop columns that are not needed
data.drop(columns=['RANK', 'EFF'], inplace=True)

In [5]:
# Rename columns and clean up column names
data['season_start_year'] = data['Year'].str[:4].astype(int)

In [6]:
# Clean Season type data
data['Season_type'].replace('Regular%20Season', 'Regular Season', inplace=True)

In [7]:
# Separate regular season and playoffs data
rs_df = data[data['Season_type'] == 'Regular Season']
playoffs_df = data[data['Season_type'] == 'Playoffs']


In [8]:
# coloumns for oroginal data frame
data.columns

Index(['Year', 'Season_type', 'PLAYER_ID', 'PLAYER', 'TEAM_ID', 'TEAM', 'GP',
       'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA',
       'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF',
       'PTS', 'AST_TOV', 'STL_TOV', 'season_start_year'],
      dtype='object')

In [9]:
# List of coloumns where makes sense to total the values
total_cols = ['GP', 'MIN','FGM','FGA','FG3M','FG3A','FTM','FTA',
              'OREB','DREB','REB','AST','STL','BLK','TOV','PF','PTS']

### Data Analysis: Which player stats are correlated with each other?

In [10]:
# Group by player and year and sum the total columns
data_per_min = data.groupby(['PLAYER', 'PLAYER_ID', 'Year'])[total_cols].sum().reset_index()

data_per_min.sample(5)


Unnamed: 0,PLAYER,PLAYER_ID,Year,GP,MIN,FGM,FGA,FG3M,FG3A,FTM,FTA,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS
4096,Paul George,202331,2015-16,88,3094,661,1572,228,609,515,592,88,528,616,359,166,34,288,254,2065
96,Alec Peters,1628409,2017-18,20,225,28,74,18,58,8,10,8,29,37,12,2,2,4,13,82
748,Chris Chiozza,1629185,2021-22,34,372,24,81,17,53,2,3,9,27,36,65,13,0,32,32,67
351,Bam Adebayo,1628389,2023-24,27,928,217,423,1,7,161,206,60,220,280,107,30,26,68,66,596
1960,Ish Smith,202397,2018-19,60,1332,215,527,45,142,50,66,24,132,156,217,31,12,65,117,525


In [11]:
# Transition to per minute stats
for col in data_per_min.columns[5:]:
    data_per_min[col] = data_per_min[col] / data_per_min['MIN']

# Add new indicators
data_per_min['FG%'] = data_per_min['FGM']/data_per_min['FGA']
data_per_min['3PT%'] = data_per_min['FG3M']/data_per_min['FG3A']
data_per_min['FT%'] = data_per_min['FTM']/data_per_min['FTA']
data_per_min['FG3A%'] = data_per_min['FG3A']/data_per_min['FGA']
data_per_min['PTS/FGA'] = data_per_min['PTS']/data_per_min['FGA']
data_per_min['FG3M/FGM'] = data_per_min['FG3M']/data_per_min['FGM']
data_per_min['FTA/FGA'] = data_per_min['FTA']/data_per_min['FGA']
data_per_min['TRU%'] = 0.5*data_per_min['PTS']/(data_per_min['FGA']+0.475*data_per_min['FTA'])
data_per_min['AST_TOV'] = data_per_min['AST']/data_per_min['TOV']

# Add minutes filter
data_per_min = data_per_min[data_per_min['MIN'] > 50]

# for correlation analysis: drop GP (doesn't make sense), drop any categorical columns (player, player_id, year)
data_per_min.drop(columns=['GP', 'PLAYER', 'PLAYER_ID', 'Year'], inplace=True)

data_per_min.sample(5)

Unnamed: 0,MIN,FGM,FGA,FG3M,FG3A,FTM,FTA,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,FG%,3PT%,FT%,FG3A%,PTS/FGA,FG3M/FGM,FTA/FGA,TRU%,AST_TOV
9,1108,0.169675,0.423285,0.059567,0.166968,0.044224,0.057762,0.018953,0.072202,0.091155,0.162455,0.027076,0.009025,0.074007,0.119134,0.443141,0.400853,0.356757,0.765625,0.394456,1.046908,0.351064,0.136461,0.49159,2.195122
1006,664,0.152108,0.362952,0.027108,0.093373,0.10241,0.129518,0.024096,0.078313,0.10241,0.165663,0.021084,0.00753,0.078313,0.103916,0.433735,0.419087,0.290323,0.790698,0.257261,1.195021,0.178218,0.356846,0.51091,2.115385
398,123,0.138211,0.276423,0.0,0.04878,0.056911,0.121951,0.02439,0.073171,0.097561,0.186992,0.04065,0.00813,0.04065,0.146341,0.333333,0.5,0.0,0.466667,0.176471,1.205882,0.0,0.441176,0.49848,4.6
3494,2149,0.164728,0.342485,0.036296,0.101908,0.053048,0.064216,0.041415,0.169846,0.211261,0.070731,0.02792,0.020009,0.061889,0.110749,0.418799,0.480978,0.356164,0.826087,0.297554,1.222826,0.220339,0.1875,0.561412,1.142857
2249,147,0.136054,0.442177,0.102041,0.319728,0.054422,0.054422,0.013605,0.142857,0.156463,0.034014,0.034014,0.006803,0.006803,0.034014,0.428571,0.307692,0.319149,1.0,0.723077,0.969231,0.75,0.123077,0.457849,5.0


In [12]:
data_per_min.corr()

Unnamed: 0,MIN,FGM,FGA,FG3M,FG3A,FTM,FTA,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,FG%,3PT%,FT%,FG3A%,PTS/FGA,FG3M/FGM,FTA/FGA,TRU%,AST_TOV
MIN,1.0,0.407087,0.34363,0.178127,0.103257,0.322161,0.26134,-0.119414,0.05345,-0.01535,0.218918,0.032569,-0.055551,0.150451,-0.357046,0.431262,0.174159,0.169111,0.230071,-0.033162,0.232615,-0.007372,0.029527,0.28506,0.020729
FGM,0.407087,1.0,0.870539,0.19176,0.134698,0.631948,0.596654,0.080472,0.236479,0.191296,0.249638,-0.030116,0.080294,0.434114,-0.181884,0.958685,0.404405,0.12487,0.197091,-0.263421,0.362172,-0.252086,0.084657,0.41519,-0.112628
FGA,0.34363,0.870539,1.0,0.425708,0.446272,0.571423,0.492278,-0.218719,-0.008945,-0.099064,0.349353,0.015506,-0.162664,0.448649,-0.316682,0.897297,-0.073205,0.167745,0.299447,0.022769,-0.024864,0.013706,-0.08992,0.023437,-0.005786
FG3M,0.178127,0.19176,0.425708,1.0,0.953799,-0.013062,-0.149965,-0.611848,-0.394101,-0.522208,0.139186,-0.029798,-0.430168,-0.069116,-0.402573,0.354258,-0.377341,0.575656,0.399155,0.823509,-0.092068,0.845536,-0.39392,0.110331,0.201612
FG3A,0.103257,0.134698,0.446272,0.953799,1.0,-0.029426,-0.164909,-0.647021,-0.429012,-0.560366,0.155212,-0.00807,-0.457898,-0.048384,-0.397351,0.297682,-0.520249,0.441773,0.38046,0.86837,-0.250147,0.831423,-0.422801,-0.07858,0.214002
FTM,0.322161,0.631948,0.571423,-0.013062,-0.029426,1.0,0.955754,0.094089,0.213189,0.181628,0.267302,0.023139,0.083714,0.469862,-0.07724,0.753976,0.206827,0.010468,0.271189,-0.303953,0.485675,-0.2974,0.609791,0.279679,-0.109309
FTA,0.26134,0.596654,0.492278,-0.149965,-0.164909,0.955754,1.0,0.235288,0.310255,0.306202,0.19997,0.023219,0.189075,0.473771,0.020817,0.685278,0.284775,-0.080297,0.029786,-0.421937,0.50721,-0.410105,0.715877,0.250066,-0.176569
OREB,-0.119414,0.080472,-0.218719,-0.611848,-0.647021,0.094089,0.235288,1.0,0.668057,0.869391,-0.354227,-0.103263,0.603079,0.006359,0.486929,-0.046888,0.571435,-0.397724,-0.369942,-0.646748,0.363898,-0.61456,0.389839,0.238987,-0.380512
DREB,0.05345,0.236479,-0.008945,-0.394101,-0.429012,0.213189,0.310255,0.668057,1.0,0.948486,-0.203434,-0.122616,0.538503,0.148571,0.326665,0.149697,0.481548,-0.229456,-0.2381,-0.483392,0.356177,-0.461108,0.326398,0.264691,-0.334624
REB,-0.01535,0.191296,-0.099064,-0.522208,-0.560366,0.181628,0.306202,0.869391,0.948486,1.0,-0.285909,-0.125389,0.614364,0.101366,0.42424,0.079443,0.56307,-0.320159,-0.315465,-0.596361,0.391455,-0.567859,0.382725,0.277521,-0.38375


In [13]:
# Correlation matrix Visualisation
fig = px.imshow(data_per_min.corr())

# Format the heat map
fig.layout.width = 500
fig.layout.height = 500
fig.update_layout(
    xaxis=dict(tickangle=90, tickfont=dict(size=8)),
    yaxis=dict(tickfont=dict(size=8)),
    autosize=False,
    width=1.5 * fig.layout.width,
    height=1.5 * fig.layout.height
)
fig.show()

### Data Analysis: How are minutes distributed?

In [14]:
fig = px.histogram(x=rs_df['MIN'], histnorm='percent')
fig.show()

In [15]:

# function for applying MIN and GM filters and returning MIN/GP data
def hist_data(df=rs_df, min_MIN=0, min_GP=0):
    return df.loc[(df['MIN']>=min_MIN) & (df['GP']>=min_GP), 'MIN']/\
    df.loc[(df['MIN']>=min_MIN) & (df['GP']>=min_GP), 'GP']

In [16]:
# Histogram of minutes per game in terms of % of players

fig = go.Figure()
fig.add_trace(go.Histogram(x=hist_data(rs_df,50,5), histnorm='percent', name='Regular Season',
                           xbins={'start':0,'end':46,'size':1}))
fig.add_trace(go.Histogram(x=hist_data(playoffs_df,5,1), histnorm='percent', name='Playoffs',
                           xbins={'start':0,'end':46,'size':1}))

fig.update_layout(barmode='overlay', title='Minutes per Game Distribution', xaxis_title='Minutes per Game', yaxis_title='Percent of Players')
fig.update_traces(opacity=0.75)
fig.show()

In [17]:
# % of players who play more than 12 minutes per game during the regular season
((hist_data(rs_df,50,5)>=12)).mean()


0.8171681043551441

In [18]:
# % of players who play more than 12 minutes per game during the playoffs
((hist_data(playoffs_df,5,1)>=12)).mean()

0.6802575107296137

### Data Analysis: How has the game chanegd over the past 10 years?

In [19]:
# sum up all the totals and group by season
change_df = data.groupby('season_start_year')[total_cols].sum().reset_index()
# estimate number of possessions
change_df['POSS_est'] = change_df['FGA'] - change_df['OREB'] + change_df['TOV'] + 0.44*change_df['FTA']
change_df = change_df[list(change_df.columns[0:3]) + ['POSS_est'] + list(change_df.columns[3:-1])]
change_df

Unnamed: 0,season_start_year,GP,MIN,POSS_est,FGM,FGA,FG3M,FG3A,FTM,FTA,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS
0,2014,27634,634546,253004.12,98251,219265,20724,59276,45098,60248,28566,85231,113797,57727,20261,12665,35796,53272,262324
1,2015,27978,636391,258064.8,100351,222344,22524,63673,46516,61520,27426,87611,115037,58251,20562,13046,36078,53478,269742
2,2016,27876,632482,258443.8,102147,223333,25408,71018,46806,60620,26470,87173,113643,59162,20143,12409,34908,52232,276508
3,2017,27836,633425,260904.52,103729,225523,27530,76245,43721,57008,25397,88678,114075,60739,20181,12636,35695,52238,278709
4,2018,27862,634231,268739.84,107374,233717,29817,84143,46671,60811,27128,91360,118488,64257,19940,12984,35394,55063,291236
5,2019,24087,552262,234384.64,92997,202223,28032,78279,40949,52906,22802,79318,102120,55445,17368,11085,31685,47615,254975
6,2020,24918,562518,235759.48,95849,205754,29549,80653,39624,50917,22918,80151,103069,57311,17491,11272,30520,45152,260871
7,2021,27931,635572,264004.96,106569,231293,32733,92552,44740,57709,27052,89602,116654,64618,20006,12387,34372,52038,290611
8,2022,27623,635386,266600.04,110010,231870,32382,89926,48136,61516,27403,86695,114098,66265,19078,12250,35066,52438,300538
9,2023,12105,270896,114269.12,47512,100247,14431,39420,20313,25848,12112,36863,48975,29707,8359,5785,14761,22154,129768


In [20]:
# Calculate and add new indicators
change_df['FG%'] = change_df['FGM']/change_df['FGA']
change_df['3PT%'] = change_df['FG3M']/change_df['FG3A']
change_df['FT%'] = change_df['FTM']/change_df['FTA']
change_df['AST%'] = change_df['AST']/change_df['FGM']
change_df['FG3A%'] = change_df['FG3A']/change_df['FGA']
change_df['PTS/FGA'] = change_df['PTS']/change_df['FGA']
change_df['FG3M/FGM'] = change_df['FG3M']/change_df['FGM']
change_df['FTA/FGA'] = change_df['FTA']/change_df['FGA']
change_df['TRU%'] = 0.5*change_df['PTS']/(change_df['FGA']+0.475*change_df['FTA'])
change_df['AST_TOV'] = change_df['AST']/change_df['TOV']

change_df

Unnamed: 0,season_start_year,GP,MIN,POSS_est,FGM,FGA,FG3M,FG3A,FTM,FTA,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,FG%,3PT%,FT%,AST%,FG3A%,PTS/FGA,FG3M/FGM,FTA/FGA,TRU%,AST_TOV
0,2014,27634,634546,253004.12,98251,219265,20724,59276,45098,60248,28566,85231,113797,57727,20261,12665,35796,53272,262324,0.448092,0.349619,0.748539,0.587546,0.27034,1.196379,0.210929,0.274773,0.529129,1.612666
1,2015,27978,636391,258064.8,100351,222344,22524,63673,46516,61520,27426,87611,115037,58251,20562,13046,36078,53478,269742,0.451332,0.353745,0.756112,0.580473,0.286372,1.213174,0.224452,0.276688,0.536126,1.614585
2,2016,27876,632482,258443.8,102147,223333,25408,71018,46806,60620,26470,87173,113643,59162,20143,12409,34908,52232,276508,0.457375,0.357768,0.772121,0.579185,0.317992,1.238097,0.24874,0.271433,0.54835,1.694798
3,2017,27836,633425,260904.52,103729,225523,27530,76245,43721,57008,25397,88678,114075,60739,20181,12636,35695,52238,278709,0.459949,0.361073,0.766927,0.585555,0.338081,1.235834,0.265403,0.252781,0.551677,1.701611
4,2018,27862,634231,268739.84,107374,233717,29817,84143,46671,60811,27128,91360,118488,64257,19940,12984,35394,55063,291236,0.459419,0.354361,0.767476,0.598441,0.360021,1.246105,0.277693,0.260191,0.554519,1.815477
5,2019,24087,552262,234384.64,92997,202223,28032,78279,40949,52906,22802,79318,102120,55445,17368,11085,31685,47615,254975,0.459874,0.358104,0.773995,0.596202,0.387092,1.260861,0.301429,0.261622,0.560746,1.749882
6,2020,24918,562518,235759.48,95849,205754,29549,80653,39624,50917,22918,80151,103069,57311,17491,11272,30520,45152,260871,0.465843,0.366372,0.778208,0.59793,0.391988,1.267878,0.308287,0.247465,0.56726,1.877818
7,2021,27931,635572,264004.96,106569,231293,32733,92552,44740,57709,27052,89602,116654,64618,20006,12387,34372,52038,290611,0.460753,0.353671,0.775269,0.606349,0.40015,1.256463,0.307153,0.249506,0.561665,1.87996
8,2022,27623,635386,266600.04,110010,231870,32382,89926,48136,61516,27403,86695,114098,66265,19078,12250,35066,52438,300538,0.474447,0.360096,0.782496,0.602354,0.387829,1.296149,0.294355,0.265304,0.575545,1.889722
9,2023,12105,270896,114269.12,47512,100247,14431,39420,20313,25848,12112,36863,48975,29707,8359,5785,14761,22154,129768,0.473949,0.366083,0.785864,0.625253,0.393229,1.294483,0.303734,0.257843,0.57662,2.012533


In [21]:
change_per48_df = change_df.copy()

In [22]:
#coloumns to standardize to per 48 minutes, using 48 minutes as we analayse on a team basis
change_per48_df.columns[3:19]

Index(['POSS_est', 'FGM', 'FGA', 'FG3M', 'FG3A', 'FTM', 'FTA', 'OREB', 'DREB',
       'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS'],
      dtype='object')

In [23]:
# all the avareg stats per 48 minutes for a team in a given season
# important: this is not per posession yet, but per 48 minutes

for col in change_per48_df.columns[3:19]:
    change_per48_df[col] = change_per48_df[col]/change_per48_df['MIN']*48*5

#drop MIN column
change_per48_df.drop(columns=['MIN'], inplace=True)

change_per48_df

Unnamed: 0,season_start_year,GP,POSS_est,FGM,FGA,FG3M,FG3A,FTM,FTA,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,FG%,3PT%,FT%,AST%,FG3A%,PTS/FGA,FG3M/FGM,FTA/FGA,TRU%,AST_TOV
0,2014,27634,95.69202,37.160805,82.931103,7.838297,22.419557,17.057109,22.78719,10.804323,32.236339,43.040662,21.833689,7.66318,4.790196,13.538877,20.148705,99.217015,0.448092,0.349619,0.748539,0.587546,0.27034,1.196379,0.210929,0.274773,0.529129,1.612666
1,2015,27978,97.323111,37.845036,83.851846,8.4944,24.012785,17.542423,23.200831,10.343075,33.040442,43.383517,21.968004,7.754478,4.919994,13.605975,20.167978,101.726894,0.451332,0.353745,0.756112,0.580473,0.286372,1.213174,0.224452,0.276688,0.536126,1.614585
2,2016,27876,98.068423,38.760439,84.745368,9.641255,26.948308,17.760885,23.00271,10.044238,33.078443,43.122682,22.449461,7.643411,4.708687,13.2461,19.819821,104.923018,0.457375,0.357768,0.772121,0.579185,0.317992,1.238097,0.24874,0.271433,0.54835,1.694798
3,2017,27836,98.854773,39.302143,85.44898,10.430911,28.888661,16.56556,21.599905,9.622734,33.599432,43.222165,23.013553,7.64643,4.787686,13.524569,19.792588,105.600758,0.459949,0.361073,0.766927,0.585555,0.338081,1.235834,0.265403,0.252781,0.551677,1.701611
4,2018,27862,101.694117,40.631505,88.441088,11.283081,31.840639,17.660821,23.011553,10.265534,34.571631,44.837165,24.315557,7.545516,4.913289,13.39348,20.836446,110.206912,0.459419,0.354361,0.767476,0.598441,0.360021,1.246105,0.277693,0.260191,0.554519,1.815477
5,2019,24087,101.85802,40.414296,87.881332,12.182044,34.018202,17.795467,22.991696,9.90921,34.469726,44.378936,24.095085,7.547722,4.817279,13.769551,20.692353,110.806103,0.459874,0.358104,0.773995,0.596202,0.387092,1.260861,0.301429,0.261622,0.560746,1.749882
6,2020,24918,100.587493,40.894265,87.785564,12.60717,34.410846,16.905699,21.723892,9.778034,34.196666,43.974699,24.451911,7.462588,4.809233,13.02145,19.264237,111.301398,0.465843,0.366372,0.778208,0.59793,0.391988,1.267878,0.308287,0.247465,0.56726,1.877818
7,2021,27931,99.691601,40.241798,87.339153,12.360393,34.948802,16.894388,21.791646,10.215176,33.834845,44.050021,24.400571,7.554518,4.677487,12.979301,19.650205,109.738377,0.460753,0.353671,0.775269,0.606349,0.40015,1.256463,0.307153,0.249506,0.561665,1.87996
8,2022,27623,100.701006,41.553323,87.582666,12.231431,33.967132,18.182081,23.236017,10.350747,32.746708,43.097456,25.029824,7.206202,4.627109,13.24524,19.807046,113.520159,0.474447,0.360096,0.782496,0.602354,0.387829,1.296149,0.294355,0.265304,0.575545,1.889722
9,2023,12105,101.236596,42.093202,88.813714,12.785128,34.924104,17.996279,22.900006,10.730612,32.658733,43.389345,26.318883,7.405646,5.125214,13.077491,19.627311,114.967811,0.473949,0.366083,0.785864,0.625253,0.393229,1.294483,0.303734,0.257843,0.57662,2.012533


In [24]:
# visualise the change in stats over time per 48min per team
fig = go.Figure()
for col in change_per48_df.columns[2:]:
    fig.add_trace(go.Scatter(x=change_per48_df['season_start_year'],
                             y=change_per48_df[col],
                             name=col))
fig.update_layout(title='Change in stats over time per 48min per team', xaxis_title='Season Start Year', yaxis_title='Value')
fig.show()

In [25]:
change_per100_df = change_df.copy()
# needed columns: change_per100_df.columns[4:19]

for col in change_per100_df.columns[4:19]:
    change_per100_df[col] = change_per100_df[col]/change_per100_df['POSS_est']*100

#drop MIN column
change_per100_df.drop(columns=['MIN', 'POSS_est', 'GP'], inplace=True)

change_per100_df

Unnamed: 0,season_start_year,FGM,FGA,FG3M,FG3A,FTM,FTA,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,FG%,3PT%,FT%,AST%,FG3A%,PTS/FGA,FG3M/FGM,FTA/FGA,TRU%,AST_TOV
0,2014,38.833755,86.664597,8.191171,23.428867,17.825006,23.813051,11.290725,33.687594,44.978319,22.816624,8.00817,5.005847,14.148386,21.055784,103.683687,0.448092,0.349619,0.748539,0.587546,0.27034,1.196379,0.210929,0.274773,0.529129,1.612666
1,2015,38.88597,86.158205,8.72804,24.67326,18.02493,23.838974,10.627563,33.949225,44.576788,22.572238,7.967766,5.055319,13.98021,20.722702,104.52491,0.451332,0.353745,0.756112,0.580473,0.286372,1.213174,0.224452,0.276688,0.536126,1.614585
2,2016,39.523873,86.414532,9.831151,27.479088,18.110707,23.455776,10.242072,33.729964,43.972036,22.891631,7.793958,4.801431,13.506998,20.210197,106.989605,0.457375,0.357768,0.772121,0.579185,0.317992,1.238097,0.24874,0.271433,0.54835,1.694798
3,2017,39.757456,86.438901,10.551753,29.223334,16.757471,21.850139,9.734212,33.988679,43.722891,23.280164,7.735014,4.843151,13.68125,20.021884,106.824136,0.459949,0.361073,0.766927,0.585555,0.338081,1.235834,0.265403,0.252781,0.551677,1.701611
4,2018,39.954627,86.967753,11.095117,31.310207,17.36661,22.628204,10.094521,33.995704,44.090225,23.910485,7.419815,4.831438,13.170358,20.489333,108.370981,0.459419,0.354361,0.767476,0.598441,0.360021,1.246105,0.277693,0.260191,0.554519,1.815477
5,2019,39.677088,86.278265,11.959828,33.397666,17.470855,22.572298,9.728453,33.840955,43.569408,23.65556,7.410042,4.729405,13.518377,20.314898,108.784859,0.459874,0.358104,0.773995,0.596202,0.387092,1.260861,0.301429,0.261622,0.560746,1.749882
6,2020,40.655417,87.272843,12.533536,34.209865,16.806959,21.597011,9.720924,33.996936,43.71786,24.309097,7.419002,4.781144,12.945397,19.151722,110.65133,0.465843,0.366372,0.778208,0.59793,0.391988,1.267878,0.308287,0.247465,0.56726,1.877818
7,2021,40.366287,87.609339,12.398631,35.056917,16.946651,21.859059,10.246777,33.939514,44.186291,24.476055,7.577888,4.691957,13.019452,19.710993,110.077856,0.460753,0.353671,0.775269,0.606349,0.40015,1.256463,0.307153,0.249506,0.561665,1.87996
8,2022,41.26406,86.97298,12.146285,33.730678,18.055511,23.074265,10.278693,32.51875,42.797443,24.855585,7.156038,4.594898,13.153036,19.669164,112.729916,0.474447,0.360096,0.782496,0.602354,0.387829,1.296149,0.294355,0.265304,0.575545,1.889722
9,2023,41.579037,87.728863,12.628959,34.497509,17.776456,22.620284,10.599539,32.259809,42.859348,25.9974,7.315187,5.06261,12.917751,19.387565,113.563489,0.473949,0.366083,0.785864,0.625253,0.393229,1.294483,0.303734,0.257843,0.57662,2.012533


In [26]:
# visualise the change in stats over time per 100 possessions
fig = go.Figure()
for col in change_per100_df.columns[1:]:
    fig.add_trace(go.Scatter(x=change_per100_df['season_start_year'],
                             y=change_per100_df[col],
                             name=col))
fig.update_layout(title='Change in stats over time per 100 possessions', xaxis_title='Season Start Year', yaxis_title='Value')
fig.show()

### Data Analysis: Regular Season vs Playoffs


In [27]:
rs_change_df = rs_df.groupby('season_start_year')[total_cols].sum().reset_index()
rs_change_df = rs_change_df[rs_change_df['season_start_year'] != 2023]

playoffs_change_df = playoffs_df.groupby('season_start_year')[total_cols].sum().reset_index()

for i in [rs_change_df, playoffs_change_df]:
    i['POSS_est'] = i['FGA'] - i['OREB'] + i['TOV'] + 0.44*i['FTA']
    i['POSS_per_48'] = i['POSS_est']/i['MIN']*48*5
    
    i['FG%'] = i['FGM']/i['FGA']
    i['3PT%'] = i['FG3M']/i['FG3A']
    i['FT%'] = i['FTM']/i['FTA']
    i['AST%'] = i['AST']/i['FGM']
    i['FG3A%'] = i['FG3A']/i['FGA']
    i['PTS/FGA'] = i['PTS']/i['FGA']
    i['FG3M/FGM'] = i['FG3M']/i['FGM']
    i['FTA/FGA'] = i['FTA']/i['FGA']
    i['TRU%'] = 0.5*i['PTS']/(i['FGA']+0.475*i['FTA'])
    i['AST_TOV'] = i['AST']/i['TOV']

    for col in total_cols:
        i[col] = i[col]/i['POSS_est']*100

    i.drop(columns=['MIN', 'POSS_est', 'GP'], inplace=True)

rs_change_df

Unnamed: 0,season_start_year,FGM,FGA,FG3M,FG3A,FTM,FTA,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,POSS_per_48,FG%,3PT%,FT%,AST%,FG3A%,PTS/FGA,FG3M/FGM,FTA/FGA,TRU%,AST_TOV
0,2014,38.900086,86.65024,8.135183,23.240912,17.771371,23.688136,11.288515,33.604208,44.892723,22.846798,8.021797,4.972578,14.215495,20.960953,103.706727,95.659492,0.448932,0.350037,0.750222,0.58732,0.268215,1.196843,0.20913,0.273376,0.529645,1.607176
1,2015,38.926558,86.09612,8.6709,24.515476,17.996886,23.782176,10.603882,33.942436,44.546317,22.690502,7.988086,5.045782,14.043605,20.63089,104.520902,97.493488,0.452129,0.353691,0.756738,0.582905,0.284745,1.214002,0.22275,0.276228,0.536595,1.615718
2,2016,39.516445,86.434227,9.769173,27.32349,18.052073,23.388341,10.257879,33.777035,44.034914,22.896756,7.795428,4.800256,13.532781,20.136476,106.854135,98.151085,0.457185,0.357538,0.771841,0.579423,0.316119,1.236248,0.247218,0.270591,0.547724,1.691948
3,2017,39.782703,86.439951,10.536996,29.127708,16.700692,21.772593,9.754285,33.953813,43.708098,23.339648,7.750757,4.836313,13.734393,19.940143,106.803093,98.979263,0.460235,0.361752,0.767051,0.586678,0.33697,1.235576,0.264864,0.251881,0.551772,1.699358
4,2018,40.060963,86.993122,11.081358,31.213318,17.241016,22.498864,10.089962,33.952838,44.0428,23.975453,7.443993,4.830132,13.197341,20.384863,108.4443,101.847754,0.460507,0.35502,0.766306,0.598474,0.358802,1.246585,0.276612,0.258628,0.555099,1.816688
5,2019,39.741774,86.37855,11.875237,33.17646,17.368831,22.473502,9.798838,33.803237,43.602075,23.716035,7.438668,4.765339,13.531948,20.205627,108.727615,102.07125,0.460088,0.357942,0.772858,0.596753,0.384082,1.258734,0.29881,0.260175,0.560143,1.752596
6,2020,40.637565,87.1836,12.520406,34.156188,16.730698,21.517093,9.692393,33.988197,43.680589,24.457861,7.466502,4.804198,13.041271,19.021868,110.526234,100.81075,0.466115,0.366563,0.777554,0.601854,0.391773,1.267741,0.308099,0.246802,0.567359,1.87542
7,2021,40.444286,87.713064,12.383811,35.022979,16.859678,21.766578,10.288949,33.966643,44.255592,24.540976,7.59752,4.692395,12.99859,19.550702,110.132061,99.87125,0.461098,0.353591,0.774567,0.606785,0.39929,1.255595,0.306194,0.248157,0.561599,1.887972
8,2022,41.294479,86.867572,12.140801,33.65824,18.095028,23.147063,10.265233,32.468114,42.733347,24.905858,7.171547,4.578144,13.212954,19.655068,112.824787,100.886815,0.475373,0.360708,0.781742,0.603128,0.387466,1.298814,0.294005,0.266464,0.576446,1.884958


In [28]:
playoffs_change_df

Unnamed: 0,season_start_year,FGM,FGA,FG3M,FG3A,FTM,FTA,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,POSS_per_48,FG%,3PT%,FT%,AST%,FG3A%,PTS/FGA,FG3M/FGM,FTA/FGA,TRU%,AST_TOV
0,2014,37.835437,86.88067,9.033813,26.257692,18.63224,25.693079,11.323987,34.942587,46.266574,22.362494,7.803083,5.506566,13.138362,22.48303,103.336928,96.184277,0.435487,0.344044,0.725185,0.591046,0.302227,1.189412,0.238766,0.295728,0.521457,1.702076
1,2015,38.288552,87.07204,9.569092,26.995683,18.43771,24.67498,10.976133,34.049157,45.02529,20.831506,7.668674,5.195694,13.047101,22.074087,104.583906,94.882501,0.439734,0.354468,0.747223,0.544066,0.310038,1.201119,0.24992,0.283386,0.52931,1.596639
2,2016,39.641494,86.10268,10.812501,29.942811,19.03912,24.523533,9.991793,32.984641,42.976434,22.810469,7.770671,4.82003,13.098759,21.377487,109.134609,96.777874,0.460398,0.361105,0.776361,0.575419,0.347757,1.267494,0.272757,0.284817,0.558226,1.741422
3,2017,39.37067,86.422811,10.77783,30.688355,17.627351,23.03816,9.426692,34.522835,43.949527,22.368846,7.493814,4.947919,12.867091,21.274173,107.146521,96.985966,0.455559,0.351203,0.765137,0.56816,0.355096,1.239794,0.273753,0.266575,0.550226,1.738454
4,2018,38.325808,86.579167,11.305871,32.794311,19.290414,24.609395,10.164354,34.652311,44.816665,22.915336,7.049471,4.851445,12.757054,22.089558,107.247901,99.397395,0.442668,0.344751,0.783864,0.597909,0.378778,1.238726,0.294994,0.284242,0.545687,1.796287
5,2019,38.828648,84.962888,13.06936,36.299094,18.809038,23.868145,8.805256,34.335679,43.140935,22.862347,7.034568,4.258082,13.340384,21.748138,109.535694,99.141484,0.457007,0.360046,0.788039,0.588801,0.427235,1.289218,0.336591,0.280924,0.56872,1.71377
6,2020,40.889571,88.443382,12.705765,34.91391,17.807232,22.645242,10.095156,34.111566,44.206722,22.357836,6.795968,4.478752,11.687867,20.854939,112.292139,97.748115,0.462325,0.363917,0.786356,0.546786,0.39476,1.26965,0.310734,0.256042,0.565989,1.91291
7,2021,39.227573,86.09504,12.614982,35.552389,18.216389,23.209204,9.631111,33.543445,43.174556,23.528272,7.291283,4.685565,13.324021,22.051108,109.286518,97.140594,0.455631,0.354828,0.784878,0.599789,0.412944,1.269371,0.321585,0.269577,0.56264,1.765854
8,2022,40.804236,88.566351,12.229181,34.82566,17.458168,21.973837,10.482155,33.284167,43.766322,24.095657,6.921608,4.848148,12.247316,19.882242,111.295822,97.973398,0.460719,0.351154,0.794498,0.590519,0.393215,1.256638,0.299704,0.248106,0.562078,1.967423


In [29]:
comp_change_df = round(100*(playoffs_change_df - rs_change_df) / rs_change_df, 2)
comp_change_df['season_start_year'] = list(range(2014, 2023))
comp_change_df

Unnamed: 0,season_start_year,FGM,FGA,FG3M,FG3A,FTM,FTA,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,POSS_per_48,FG%,3PT%,FT%,AST%,FG3A%,PTS/FGA,FG3M/FGM,FTA/FGA,TRU%,AST_TOV
0,2014,-2.74,0.27,11.05,12.98,4.84,8.46,0.31,3.98,3.06,-2.12,-2.73,10.74,-7.58,7.26,-0.36,0.55,-2.99,-1.71,-3.34,0.63,12.68,-0.62,14.17,8.18,-1.55,5.9
1,2015,-1.64,1.13,10.36,10.12,2.45,3.75,3.51,0.31,1.08,-8.19,-4.0,2.97,-7.1,7.0,0.06,-2.68,-2.74,0.22,-1.26,-6.66,8.88,-1.06,12.2,2.59,-1.36,-1.18
2,2016,0.32,-0.38,10.68,9.59,5.47,4.85,-2.59,-2.35,-2.4,-0.38,-0.32,0.41,-3.21,6.16,2.13,-1.4,0.7,1.0,0.59,-0.69,10.01,2.53,10.33,5.26,1.92,2.92
3,2017,-1.04,-0.02,2.29,5.36,5.55,5.81,-3.36,1.68,0.55,-4.16,-3.32,2.31,-6.31,6.69,0.32,-2.01,-1.02,-2.92,-0.25,-3.16,5.38,0.34,3.36,5.83,-0.28,2.3
4,2018,-4.33,-0.48,2.03,5.07,11.89,9.38,0.74,2.06,1.76,-4.42,-5.3,0.44,-3.34,8.36,-1.1,-2.41,-3.87,-2.89,2.29,-0.09,5.57,-0.63,6.65,9.9,-1.7,-1.12
5,2019,-2.3,-1.64,10.06,9.41,8.29,6.21,-10.14,1.58,-1.06,-3.6,-5.43,-10.64,-1.42,7.63,0.74,-2.87,-0.67,0.59,1.96,-1.33,11.24,2.42,12.64,7.98,1.53,-2.22
6,2020,0.62,1.44,1.48,2.22,6.43,5.24,4.16,0.36,1.2,-8.59,-8.98,-6.77,-10.38,9.64,1.6,-3.04,-0.81,-0.72,1.13,-9.15,0.76,0.15,0.86,3.74,-0.24,2.0
7,2021,-3.01,-1.84,1.87,1.51,8.05,6.63,-6.39,-1.25,-2.44,-4.13,-4.03,-0.15,2.5,12.79,-0.77,-2.73,-1.19,0.35,1.33,-1.15,3.42,1.1,5.03,8.63,0.19,-6.47
8,2022,-1.19,1.96,0.73,3.47,-3.52,-5.07,2.11,2.51,2.42,-3.25,-3.49,5.9,-7.31,1.16,-1.36,-2.89,-3.08,-2.65,1.63,-2.09,1.48,-3.25,1.94,-6.89,-2.49,4.37


In [30]:
# visualise the change in stats over time per 100 possessions
fig = go.Figure()
for col in comp_change_df.columns[1:]:
    fig.add_trace(go.Scatter(x=comp_change_df['season_start_year'],
                             y=comp_change_df[col],
                             name=col))
fig.update_layout(title='Change in stats over time: regular season vs playoffs', xaxis_title='Season Start Year', yaxis_title='Value')
fig.show()

In [31]:
# Create a copy of the DataFrame and set 'season_start_year' as the index
comp_change_heatmap_df = comp_change_df.set_index('season_start_year')

# Create the heat map
fig = px.imshow(comp_change_heatmap_df.values,
                labels=dict(x="Stat Types", y="Season Start Year", color="Change"),
                x=comp_change_heatmap_df.columns,
                y=comp_change_heatmap_df.index,
                title='Change in stats over time: regular season vs playoffs')

# Format the heat map
fig.update_layout(
    autosize=False,
    width=1200,
    height=500,
    xaxis=dict(tickangle=90, tickfont=dict(size=8)),
    yaxis=dict(tickfont=dict(size=8))
)

fig.show()

In [32]:
# Heatmap using plotly's graph objects

# Create a copy of the DataFrame and set 'season_start_year' as the index
comp_change_heatmapv2_df = comp_change_df.set_index('season_start_year')

# Create the heat map
fig = go.Figure(data=go.Heatmap(
    z=comp_change_heatmap_df.values,
    x=comp_change_heatmap_df.columns,
    y=comp_change_heatmap_df.index,
    colorscale='Viridis'))

# Add annotations for each cell
for i, row in enumerate(comp_change_heatmap_df.values):
    for j, value in enumerate(row):
        fig.add_annotation(
            x=comp_change_heatmap_df.columns[j],
            y=comp_change_heatmap_df.index[i],
            text=str(value),
            showarrow=False,
            font=dict(size=8))

# Format the heat map
fig.update_layout(
    autosize=False,
    width=1200,
    height=500,
    xaxis=dict(tickangle=90, tickfont=dict(size=8)),
    yaxis=dict(tickfont=dict(size=8)),
    title='Change in stats over time: regular season vs playoffs')

fig.show()