In [1]:
import pandas as pd 
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
pd.set_option('display.max_columns', None) 

In [2]:
df = pd.read_csv('NBA_Players_Stats.csv')

In [3]:
df.shape

(8158, 31)

In [4]:
df.head(10)

Unnamed: 0.1,Unnamed: 0,Year,Season_type,PLAYER_ID,RANK,PLAYER,TEAM_ID,TEAM,GP,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,EFF,AST_TOV,STL_TOV
0,0,2013-14,Regular%20Season,201142,1,Kevin Durant,1610612760,OKC,81,3122,849,1688,0.503,192,491,0.391,703,805,0.873,58,540,598,445,103,59,285,174,2593,2572,1.56,0.36
1,1,2013-14,Regular%20Season,2546,2,Carmelo Anthony,1610612752,NYK,77,2982,743,1643,0.452,167,415,0.402,459,541,0.848,145,477,622,242,95,51,198,224,2112,1942,1.22,0.48
2,2,2013-14,Regular%20Season,2544,3,LeBron James,1610612748,MIA,77,2902,767,1353,0.567,116,306,0.379,439,585,0.75,81,452,533,488,121,26,270,126,2089,2255,1.81,0.45
3,3,2013-14,Regular%20Season,201567,4,Kevin Love,1610612750,MIN,77,2797,650,1421,0.457,190,505,0.376,520,633,0.821,224,739,963,341,59,35,196,136,2010,2328,1.74,0.3
4,4,2013-14,Regular%20Season,201933,5,Blake Griffin,1610612746,LAC,80,2863,718,1359,0.528,12,44,0.273,482,674,0.715,192,565,757,309,92,51,224,265,1930,2082,1.38,0.41
5,5,2013-14,Regular%20Season,201939,6,Stephen Curry,1610612744,GSW,78,2846,652,1383,0.471,261,615,0.424,308,348,0.885,46,288,334,666,128,14,294,194,1873,1950,2.27,0.43
6,6,2013-14,Regular%20Season,201935,7,James Harden,1610612745,HOU,73,2777,549,1205,0.456,177,483,0.366,576,665,0.866,61,283,344,446,115,29,265,177,1851,1775,1.68,0.43
7,7,2013-14,Regular%20Season,201942,8,DeMar DeRozan,1610612761,TOR,79,3017,604,1407,0.429,64,210,0.305,519,630,0.824,51,292,343,313,86,28,176,197,1791,1471,1.78,0.49
8,8,2013-14,Regular%20Season,202331,9,Paul George,1610612754,IND,80,2898,577,1362,0.424,182,500,0.364,401,464,0.864,64,478,542,283,151,22,224,198,1737,1663,1.26,0.67
9,9,2013-14,Regular%20Season,1717,10,Dirk Nowitzki,1610612742,DAL,80,2628,633,1273,0.497,131,329,0.398,338,376,0.899,40,458,498,216,73,45,117,165,1735,1772,1.85,0.62


## Data Cleaning

In [5]:
# Checking Null Values
df.isna().sum()

Unnamed: 0     0
Year           0
Season_type    0
PLAYER_ID      0
RANK           0
PLAYER         0
TEAM_ID        0
TEAM           0
GP             0
MIN            0
FGM            0
FGA            0
FG_PCT         0
FG3M           0
FG3A           0
FG3_PCT        0
FTM            0
FTA            0
FT_PCT         0
OREB           0
DREB           0
REB            0
AST            0
STL            0
BLK            0
TOV            0
PF             0
PTS            0
EFF            0
AST_TOV        0
STL_TOV        0
dtype: int64

In [6]:
df.columns

Index(['Unnamed: 0', 'Year', 'Season_type', 'PLAYER_ID', 'RANK', 'PLAYER',
       'TEAM_ID', 'TEAM', 'GP', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A',
       'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL',
       'BLK', 'TOV', 'PF', 'PTS', 'EFF', 'AST_TOV', 'STL_TOV'],
      dtype='object')

In [7]:
df.duplicated().sum()

0

In [8]:
# droping columns we don't Need
df.drop(columns=['RANK','EFF','Unnamed: 0'], inplace=True)

In [9]:
df.tail(10)

Unnamed: 0,Year,Season_type,PLAYER_ID,PLAYER,TEAM_ID,TEAM,GP,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,AST_TOV,STL_TOV
8148,2023-24,Playoffs,1641722,Jordan Hawkins,1610612740,NOP,3,12,0,4,0.0,0,3,0.0,0,0,0.0,0,2,2,0,0,0,0,1,0,0.0,0.0
8149,2023-24,Playoffs,1641738,Kobe Brown,1610612746,LAC,3,10,0,0,0.0,0,0,0.0,0,0,0.0,1,1,2,0,0,0,0,1,0,0.0,0.0
8150,2023-24,Playoffs,1631159,Leonard Miller,1610612750,MIN,3,7,0,1,0.0,0,1,0.0,0,0,0.0,1,3,4,0,0,0,0,0,0,0.0,0.0
8151,2023-24,Playoffs,1630322,Lindy Waters III,1610612760,OKC,3,9,0,1,0.0,0,1,0.0,0,0,0.0,0,1,1,0,0,0,0,0,0,0.0,0.0
8152,2023-24,Playoffs,1630346,Matt Ryan,1610612740,NOP,1,3,0,1,0.0,0,1,0.0,0,0,0.0,0,1,1,0,1,0,1,0,0,0.0,1.0
8153,2023-24,Playoffs,1641765,Olivier-Maxence Prosper,1610612742,DAL,3,9,0,2,0.0,0,0,0.0,0,0,0.0,0,3,3,1,0,0,0,0,0,0.0,0.0
8154,2023-24,Playoffs,1631115,Orlando Robinson,1610612748,MIA,1,2,0,1,0.0,0,1,0.0,0,0,0.0,0,1,1,1,0,0,0,0,0,0.0,0.0
8155,2023-24,Playoffs,203933,T.J. Warren,1610612750,MIN,3,11,0,2,0.0,0,1,0.0,0,0,0.0,2,1,3,1,0,0,0,0,0,0.0,0.0
8156,2023-24,Playoffs,201152,Thaddeus Young,1610612756,PHX,1,4,0,0,0.0,0,0,0.0,0,0,0.0,0,0,0,0,0,0,0,0,0,0.0,0.0
8157,2023-24,Playoffs,203648,Thanasis Antetokounmpo,1610612749,MIL,2,5,0,0,0.0,0,0,0.0,0,0,0.0,0,0,0,0,1,1,0,1,0,0.0,0.0


In [10]:
# making the Year easier to read
df['Season_start'] = df['Year'].str[:4].astype(int)

In [11]:
df['production'] = df['PTS']+df['REB']+df['AST']+df['STL']+df['BLK'] 

In [12]:
df.head(20)

Unnamed: 0,Year,Season_type,PLAYER_ID,PLAYER,TEAM_ID,TEAM,GP,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,AST_TOV,STL_TOV,Season_start,production
0,2013-14,Regular%20Season,201142,Kevin Durant,1610612760,OKC,81,3122,849,1688,0.503,192,491,0.391,703,805,0.873,58,540,598,445,103,59,285,174,2593,1.56,0.36,2013,3798
1,2013-14,Regular%20Season,2546,Carmelo Anthony,1610612752,NYK,77,2982,743,1643,0.452,167,415,0.402,459,541,0.848,145,477,622,242,95,51,198,224,2112,1.22,0.48,2013,3122
2,2013-14,Regular%20Season,2544,LeBron James,1610612748,MIA,77,2902,767,1353,0.567,116,306,0.379,439,585,0.75,81,452,533,488,121,26,270,126,2089,1.81,0.45,2013,3257
3,2013-14,Regular%20Season,201567,Kevin Love,1610612750,MIN,77,2797,650,1421,0.457,190,505,0.376,520,633,0.821,224,739,963,341,59,35,196,136,2010,1.74,0.3,2013,3408
4,2013-14,Regular%20Season,201933,Blake Griffin,1610612746,LAC,80,2863,718,1359,0.528,12,44,0.273,482,674,0.715,192,565,757,309,92,51,224,265,1930,1.38,0.41,2013,3139
5,2013-14,Regular%20Season,201939,Stephen Curry,1610612744,GSW,78,2846,652,1383,0.471,261,615,0.424,308,348,0.885,46,288,334,666,128,14,294,194,1873,2.27,0.43,2013,3015
6,2013-14,Regular%20Season,201935,James Harden,1610612745,HOU,73,2777,549,1205,0.456,177,483,0.366,576,665,0.866,61,283,344,446,115,29,265,177,1851,1.68,0.43,2013,2785
7,2013-14,Regular%20Season,201942,DeMar DeRozan,1610612761,TOR,79,3017,604,1407,0.429,64,210,0.305,519,630,0.824,51,292,343,313,86,28,176,197,1791,1.78,0.49,2013,2561
8,2013-14,Regular%20Season,202331,Paul George,1610612754,IND,80,2898,577,1362,0.424,182,500,0.364,401,464,0.864,64,478,542,283,151,22,224,198,1737,1.26,0.67,2013,2735
9,2013-14,Regular%20Season,1717,Dirk Nowitzki,1610612742,DAL,80,2628,633,1273,0.497,131,329,0.398,338,376,0.899,40,458,498,216,73,45,117,165,1735,1.85,0.62,2013,2567


In [13]:
# Checking if all the Teams are present
df.TEAM.nunique()

30

In [14]:
df.TEAM.unique()

array(['OKC', 'NYK', 'MIA', 'MIN', 'LAC', 'GSW', 'HOU', 'TOR', 'IND',
       'DAL', 'POR', 'SAC', 'CHA', 'WAS', 'PHX', 'CLE', 'PHI', 'NOP',
       'BOS', 'MEM', 'ORL', 'ATL', 'MIL', 'DET', 'UTA', 'BKN', 'LAL',
       'SAS', 'DEN', 'CHI'], dtype=object)

In [15]:
# Cleaning season type column
df['Season_type'].replace('Regular%20Season' , 'Regular Season', inplace=True)
df['Season_type'].replace('Playoffs' , 'Post Season', inplace=True)
df

Unnamed: 0,Year,Season_type,PLAYER_ID,PLAYER,TEAM_ID,TEAM,GP,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,AST_TOV,STL_TOV,Season_start,production
0,2013-14,Regular Season,201142,Kevin Durant,1610612760,OKC,81,3122,849,1688,0.503,192,491,0.391,703,805,0.873,58,540,598,445,103,59,285,174,2593,1.56,0.36,2013,3798
1,2013-14,Regular Season,2546,Carmelo Anthony,1610612752,NYK,77,2982,743,1643,0.452,167,415,0.402,459,541,0.848,145,477,622,242,95,51,198,224,2112,1.22,0.48,2013,3122
2,2013-14,Regular Season,2544,LeBron James,1610612748,MIA,77,2902,767,1353,0.567,116,306,0.379,439,585,0.750,81,452,533,488,121,26,270,126,2089,1.81,0.45,2013,3257
3,2013-14,Regular Season,201567,Kevin Love,1610612750,MIN,77,2797,650,1421,0.457,190,505,0.376,520,633,0.821,224,739,963,341,59,35,196,136,2010,1.74,0.30,2013,3408
4,2013-14,Regular Season,201933,Blake Griffin,1610612746,LAC,80,2863,718,1359,0.528,12,44,0.273,482,674,0.715,192,565,757,309,92,51,224,265,1930,1.38,0.41,2013,3139
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8153,2023-24,Post Season,1641765,Olivier-Maxence Prosper,1610612742,DAL,3,9,0,2,0.000,0,0,0.000,0,0,0.000,0,3,3,1,0,0,0,0,0,0.00,0.00,2023,4
8154,2023-24,Post Season,1631115,Orlando Robinson,1610612748,MIA,1,2,0,1,0.000,0,1,0.000,0,0,0.000,0,1,1,1,0,0,0,0,0,0.00,0.00,2023,2
8155,2023-24,Post Season,203933,T.J. Warren,1610612750,MIN,3,11,0,2,0.000,0,1,0.000,0,0,0.000,2,1,3,1,0,0,0,0,0,0.00,0.00,2023,4
8156,2023-24,Post Season,201152,Thaddeus Young,1610612756,PHX,1,4,0,0,0.000,0,0,0.000,0,0,0.000,0,0,0,0,0,0,0,0,0,0.00,0.00,2023,0


In [16]:
# filtiring Data set based on season type

df_rs = df[df['Season_type']=='Regular Season']
df_ps = df[df['Season_type']=='Post Season']

In [17]:
df_rs.head()

Unnamed: 0,Year,Season_type,PLAYER_ID,PLAYER,TEAM_ID,TEAM,GP,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,AST_TOV,STL_TOV,Season_start,production
0,2013-14,Regular Season,201142,Kevin Durant,1610612760,OKC,81,3122,849,1688,0.503,192,491,0.391,703,805,0.873,58,540,598,445,103,59,285,174,2593,1.56,0.36,2013,3798
1,2013-14,Regular Season,2546,Carmelo Anthony,1610612752,NYK,77,2982,743,1643,0.452,167,415,0.402,459,541,0.848,145,477,622,242,95,51,198,224,2112,1.22,0.48,2013,3122
2,2013-14,Regular Season,2544,LeBron James,1610612748,MIA,77,2902,767,1353,0.567,116,306,0.379,439,585,0.75,81,452,533,488,121,26,270,126,2089,1.81,0.45,2013,3257
3,2013-14,Regular Season,201567,Kevin Love,1610612750,MIN,77,2797,650,1421,0.457,190,505,0.376,520,633,0.821,224,739,963,341,59,35,196,136,2010,1.74,0.3,2013,3408
4,2013-14,Regular Season,201933,Blake Griffin,1610612746,LAC,80,2863,718,1359,0.528,12,44,0.273,482,674,0.715,192,565,757,309,92,51,224,265,1930,1.38,0.41,2013,3139


In [18]:
df_ps.head()

Unnamed: 0,Year,Season_type,PLAYER_ID,PLAYER,TEAM_ID,TEAM,GP,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,AST_TOV,STL_TOV,Season_start,production
482,2013-14,Post Season,201142,Kevin Durant,1610612760,OKC,19,815,194,422,0.46,43,125,0.344,132,163,0.81,25,145,170,75,19,25,72,41,563,1.04,0.26,2013,852
483,2013-14,Post Season,2544,LeBron James,1610612748,MIA,20,763,192,340,0.565,35,86,0.407,129,160,0.806,14,127,141,95,36,11,61,46,548,1.56,0.59,2013,831
484,2013-14,Post Season,201566,Russell Westbrook,1610612760,OKC,19,735,167,398,0.42,28,100,0.28,145,164,0.884,45,93,138,153,41,6,83,62,507,1.84,0.49,2013,845
485,2013-14,Post Season,202331,Paul George,1610612754,IND,19,781,138,315,0.438,52,129,0.403,101,128,0.789,13,132,145,73,41,7,49,56,429,1.49,0.84,2013,695
486,2013-14,Post Season,2225,Tony Parker,1610612759,SAS,23,719,168,346,0.486,13,35,0.371,51,70,0.729,5,40,45,111,15,2,62,37,400,1.79,0.24,2013,573


In [19]:
df_ps.shape

(2367, 30)

In [20]:
df_rs.shape

(5791, 30)

In [21]:
# filtiring columns that have totals or a ratio
df.columns

Index(['Year', 'Season_type', 'PLAYER_ID', 'PLAYER', 'TEAM_ID', 'TEAM', 'GP',
       'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA',
       'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF',
       'PTS', 'AST_TOV', 'STL_TOV', 'Season_start', 'production'],
      dtype='object')

In [22]:
total_columns = ['MIN', 'FGM', 'FGA', 'FG3M', 'FG3A', 'FTM','FTA', 'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF','PTS','production']

In [23]:
df_corr = df[total_columns]

In [24]:
df_corr.corr()

Unnamed: 0,MIN,FGM,FGA,FG3M,FG3A,FTM,FTA,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,production
MIN,1.0,0.933442,0.939302,0.754262,0.774092,0.797879,0.809452,0.652193,0.864215,0.832975,0.785739,0.893029,0.616976,0.884592,0.919127,0.927843,0.949703
FGM,0.933442,1.0,0.989139,0.739375,0.756042,0.896845,0.89993,0.624852,0.851807,0.815975,0.801536,0.829353,0.594544,0.919363,0.830825,0.994232,0.984972
FGA,0.939302,0.989139,1.0,0.794102,0.815787,0.888635,0.883281,0.555285,0.814633,0.768508,0.819695,0.84289,0.535566,0.923677,0.821256,0.991156,0.972902
FG3M,0.754262,0.739375,0.794102,1.0,0.9923,0.609118,0.567028,0.166711,0.507848,0.428783,0.65559,0.675846,0.228509,0.666468,0.596949,0.777987,0.716889
FG3A,0.774092,0.756042,0.815787,0.9923,1.0,0.633472,0.593926,0.182759,0.52793,0.448388,0.679653,0.699786,0.242994,0.695276,0.619503,0.794057,0.736747
FTM,0.797879,0.896845,0.888635,0.609118,0.633472,1.0,0.988789,0.534596,0.752509,0.715972,0.763457,0.731263,0.51782,0.879676,0.701059,0.924108,0.905241
FTA,0.809452,0.89993,0.883281,0.567028,0.593926,0.988789,1.0,0.605801,0.795387,0.768191,0.750756,0.740565,0.573995,0.887397,0.732463,0.919157,0.915305
OREB,0.652193,0.624852,0.555285,0.166711,0.182759,0.534596,0.605801,1.0,0.842755,0.914338,0.352079,0.533719,0.791077,0.565449,0.746199,0.581223,0.681518
DREB,0.864215,0.851807,0.814633,0.507848,0.52793,0.752509,0.795387,0.842755,1.0,0.988547,0.632378,0.743408,0.772275,0.805887,0.863531,0.830209,0.903314
REB,0.832975,0.815975,0.768508,0.428783,0.448388,0.715972,0.768191,0.914338,0.988547,1.0,0.574432,0.70888,0.802746,0.764778,0.858815,0.787497,0.87061


In [25]:
data_per_min = df.groupby(['PLAYER','PLAYER_ID','Year'])[total_columns].sum().reset_index()

data_per_min 

Unnamed: 0,PLAYER,PLAYER_ID,Year,MIN,FGM,FGA,FG3M,FG3A,FTM,FTA,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,production
0,AJ Green,1631260,2022-23,345,53,125,44,105,4,4,6,39,45,22,6,0,9,31,154,227
1,AJ Green,1631260,2023-24,681,89,212,71,180,20,22,9,64,73,32,9,4,13,58,269,387
2,AJ Griffin,1631100,2022-23,1401,248,533,101,259,42,47,37,116,153,73,42,12,42,87,639,919
3,AJ Griffin,1631100,2023-24,171,18,62,10,39,2,2,2,16,18,5,1,2,8,6,48,74
4,AJ Hammons,1627773,2016-17,163,17,42,5,10,9,20,8,28,36,4,1,13,10,21,48,102
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5793,Ömer Aşik,201600,2013-14,1131,117,223,0,0,81,129,122,305,427,29,17,41,70,121,315,829
5794,Ömer Aşik,201600,2014-15,2061,215,422,0,0,135,232,256,521,777,78,34,54,100,154,565,1508
5795,Ömer Aşik,201600,2015-16,1178,104,195,0,0,61,112,120,294,414,26,21,23,60,124,269,753
5796,Ömer Aşik,201600,2016-17,482,31,65,0,0,23,39,48,115,163,15,5,10,14,51,85,278


In [26]:
for col in data_per_min.columns[4:]:
    data_per_min[col] = data_per_min[col]/data_per_min['MIN']
data_per_min

Unnamed: 0,PLAYER,PLAYER_ID,Year,MIN,FGM,FGA,FG3M,FG3A,FTM,FTA,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,production
0,AJ Green,1631260,2022-23,345,0.153623,0.362319,0.127536,0.304348,0.011594,0.011594,0.017391,0.113043,0.130435,0.063768,0.017391,0.000000,0.026087,0.089855,0.446377,0.657971
1,AJ Green,1631260,2023-24,681,0.130690,0.311307,0.104258,0.264317,0.029369,0.032305,0.013216,0.093979,0.107195,0.046990,0.013216,0.005874,0.019090,0.085169,0.395007,0.568282
2,AJ Griffin,1631100,2022-23,1401,0.177016,0.380443,0.072091,0.184868,0.029979,0.033547,0.026410,0.082798,0.109208,0.052106,0.029979,0.008565,0.029979,0.062099,0.456103,0.655960
3,AJ Griffin,1631100,2023-24,171,0.105263,0.362573,0.058480,0.228070,0.011696,0.011696,0.011696,0.093567,0.105263,0.029240,0.005848,0.011696,0.046784,0.035088,0.280702,0.432749
4,AJ Hammons,1627773,2016-17,163,0.104294,0.257669,0.030675,0.061350,0.055215,0.122699,0.049080,0.171779,0.220859,0.024540,0.006135,0.079755,0.061350,0.128834,0.294479,0.625767
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5793,Ömer Aşik,201600,2013-14,1131,0.103448,0.197171,0.000000,0.000000,0.071618,0.114058,0.107869,0.269673,0.377542,0.025641,0.015031,0.036251,0.061892,0.106985,0.278515,0.732980
5794,Ömer Aşik,201600,2014-15,2061,0.104318,0.204755,0.000000,0.000000,0.065502,0.112567,0.124212,0.252790,0.377001,0.037846,0.016497,0.026201,0.048520,0.074721,0.274139,0.731684
5795,Ömer Aşik,201600,2015-16,1178,0.088285,0.165535,0.000000,0.000000,0.051783,0.095076,0.101868,0.249576,0.351443,0.022071,0.017827,0.019525,0.050934,0.105263,0.228353,0.639219
5796,Ömer Aşik,201600,2016-17,482,0.064315,0.134855,0.000000,0.000000,0.047718,0.080913,0.099585,0.238589,0.338174,0.031120,0.010373,0.020747,0.029046,0.105809,0.176349,0.576763


In [27]:
data_per_min['FG%'] = data_per_min['FGM']/data_per_min['FGA']
data_per_min['3P%'] = data_per_min['FG3M']/data_per_min['FG3A']
data_per_min['FT%'] = data_per_min['FTM']/data_per_min['FTA']
data_per_min['FG3A%'] = data_per_min['FG3A']/data_per_min['FGA']
data_per_min['PTS/FGA'] = data_per_min['PTS']/data_per_min['FGA']
data_per_min['FTA/FGA'] = data_per_min['FTA']/data_per_min['FTM']
data_per_min['TRU%'] = 0.5*data_per_min['PTS']/(data_per_min['FGA']+0.475*data_per_min['FTA']) # True Shooting percentage formula is explained in NBA.COM
data_per_min['AST_TOV'] = data_per_min['AST']/data_per_min['TOV']


In [28]:
data_per_min

Unnamed: 0,PLAYER,PLAYER_ID,Year,MIN,FGM,FGA,FG3M,FG3A,FTM,FTA,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,production,FG%,3P%,FT%,FG3A%,PTS/FGA,FTA/FGA,TRU%,AST_TOV
0,AJ Green,1631260,2022-23,345,0.153623,0.362319,0.127536,0.304348,0.011594,0.011594,0.017391,0.113043,0.130435,0.063768,0.017391,0.000000,0.026087,0.089855,0.446377,0.657971,0.424000,0.419048,1.000000,0.840000,1.232000,1.000000,0.606777,2.444444
1,AJ Green,1631260,2023-24,681,0.130690,0.311307,0.104258,0.264317,0.029369,0.032305,0.013216,0.093979,0.107195,0.046990,0.013216,0.005874,0.019090,0.085169,0.395007,0.568282,0.419811,0.394444,0.909091,0.849057,1.268868,1.100000,0.604630,2.461538
2,AJ Griffin,1631100,2022-23,1401,0.177016,0.380443,0.072091,0.184868,0.029979,0.033547,0.026410,0.082798,0.109208,0.052106,0.029979,0.008565,0.029979,0.062099,0.456103,0.655960,0.465291,0.389961,0.893617,0.485929,1.198874,1.119048,0.575339,1.738095
3,AJ Griffin,1631100,2023-24,171,0.105263,0.362573,0.058480,0.228070,0.011696,0.011696,0.011696,0.093567,0.105263,0.029240,0.005848,0.011696,0.046784,0.035088,0.280702,0.432749,0.290323,0.256410,1.000000,0.629032,0.774194,1.000000,0.381255,0.625000
4,AJ Hammons,1627773,2016-17,163,0.104294,0.257669,0.030675,0.061350,0.055215,0.122699,0.049080,0.171779,0.220859,0.024540,0.006135,0.079755,0.061350,0.128834,0.294479,0.625767,0.404762,0.500000,0.450000,0.238095,1.142857,2.222222,0.466019,0.400000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5793,Ömer Aşik,201600,2013-14,1131,0.103448,0.197171,0.000000,0.000000,0.071618,0.114058,0.107869,0.269673,0.377542,0.025641,0.015031,0.036251,0.061892,0.106985,0.278515,0.732980,0.524664,,0.627907,0.000000,1.412556,1.592593,0.554041,0.414286
5794,Ömer Aşik,201600,2014-15,2061,0.104318,0.204755,0.000000,0.000000,0.065502,0.112567,0.124212,0.252790,0.377001,0.037846,0.016497,0.026201,0.048520,0.074721,0.274139,0.731684,0.509479,,0.581897,0.000000,1.338863,1.718519,0.530815,0.780000
5795,Ömer Aşik,201600,2015-16,1178,0.088285,0.165535,0.000000,0.000000,0.051783,0.095076,0.101868,0.249576,0.351443,0.022071,0.017827,0.019525,0.050934,0.105263,0.228353,0.639219,0.533333,,0.544643,0.000000,1.379487,1.836066,0.541902,0.433333
5796,Ömer Aşik,201600,2016-17,482,0.064315,0.134855,0.000000,0.000000,0.047718,0.080913,0.099585,0.238589,0.338174,0.031120,0.010373,0.020747,0.029046,0.105809,0.176349,0.576763,0.476923,,0.589744,0.000000,1.307692,1.695652,0.508830,1.071429


In [29]:
(data_per_min['MIN']>=100).mean() # 87% of players played more then 100 minutes

0.869782683684029

In [30]:
data_per_min_100 = data_per_min[data_per_min['MIN']>=100] 

In [31]:
data_per_min_100.head(10)

Unnamed: 0,PLAYER,PLAYER_ID,Year,MIN,FGM,FGA,FG3M,FG3A,FTM,FTA,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,production,FG%,3P%,FT%,FG3A%,PTS/FGA,FTA/FGA,TRU%,AST_TOV
0,AJ Green,1631260,2022-23,345,0.153623,0.362319,0.127536,0.304348,0.011594,0.011594,0.017391,0.113043,0.130435,0.063768,0.017391,0.0,0.026087,0.089855,0.446377,0.657971,0.424,0.419048,1.0,0.84,1.232,1.0,0.606777,2.444444
1,AJ Green,1631260,2023-24,681,0.13069,0.311307,0.104258,0.264317,0.029369,0.032305,0.013216,0.093979,0.107195,0.04699,0.013216,0.005874,0.01909,0.085169,0.395007,0.568282,0.419811,0.394444,0.909091,0.849057,1.268868,1.1,0.60463,2.461538
2,AJ Griffin,1631100,2022-23,1401,0.177016,0.380443,0.072091,0.184868,0.029979,0.033547,0.02641,0.082798,0.109208,0.052106,0.029979,0.008565,0.029979,0.062099,0.456103,0.65596,0.465291,0.389961,0.893617,0.485929,1.198874,1.119048,0.575339,1.738095
3,AJ Griffin,1631100,2023-24,171,0.105263,0.362573,0.05848,0.22807,0.011696,0.011696,0.011696,0.093567,0.105263,0.02924,0.005848,0.011696,0.046784,0.035088,0.280702,0.432749,0.290323,0.25641,1.0,0.629032,0.774194,1.0,0.381255,0.625
4,AJ Hammons,1627773,2016-17,163,0.104294,0.257669,0.030675,0.06135,0.055215,0.122699,0.04908,0.171779,0.220859,0.02454,0.006135,0.079755,0.06135,0.128834,0.294479,0.625767,0.404762,0.5,0.45,0.238095,1.142857,2.222222,0.466019,0.4
5,AJ Lawson,1630639,2022-23,108,0.203704,0.407407,0.092593,0.231481,0.018519,0.074074,0.055556,0.138889,0.194444,0.018519,0.018519,0.0,0.027778,0.101852,0.518519,0.75,0.5,0.4,0.25,0.568182,1.272727,4.0,0.585774,0.666667
6,AJ Lawson,1630639,2023-24,341,0.170088,0.381232,0.043988,0.164223,0.046921,0.073314,0.043988,0.111437,0.155425,0.058651,0.029326,0.01173,0.041056,0.067449,0.431085,0.686217,0.446154,0.267857,0.64,0.430769,1.130769,1.5625,0.518062,1.428571
8,AJ Price,201985,2014-15,324,0.157407,0.42284,0.046296,0.175926,0.049383,0.074074,0.018519,0.080247,0.098765,0.141975,0.021605,0.0,0.04321,0.046296,0.410494,0.67284,0.372263,0.263158,0.666667,0.416058,0.970803,1.5,0.448113,3.285714
9,Aaron Brooks,201166,2013-14,1557,0.149647,0.373154,0.061657,0.159281,0.053308,0.061015,0.027617,0.062299,0.089917,0.149647,0.033398,0.008349,0.075145,0.09377,0.414258,0.695568,0.401033,0.387097,0.873684,0.42685,1.110155,1.144578,0.515073,1.991453
10,Aaron Brooks,201166,2014-15,2017,0.180962,0.4353,0.063956,0.168071,0.073872,0.089737,0.017353,0.073872,0.091225,0.134854,0.02826,0.007933,0.0823,0.101636,0.499752,0.762023,0.415718,0.380531,0.823204,0.386105,1.148064,1.214765,0.522835,1.638554


In [32]:
data_per_min_100_corr = data_per_min_100.drop(columns=['Year', 'PLAYER_ID', 'PLAYER']) # correlation function doesn't work with strings so We have to drop them

In [33]:
data_per_min_100_corr.corr()

Unnamed: 0,MIN,FGM,FGA,FG3M,FG3A,FTM,FTA,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,production,FG%,3P%,FT%,FG3A%,PTS/FGA,FTA/FGA,TRU%,AST_TOV
MIN,1.0,0.431418,0.369965,0.192958,0.121609,0.362122,0.301972,-0.125348,0.044041,-0.024109,0.245904,0.042886,-0.067518,0.169095,-0.388075,0.462321,0.400932,0.173097,0.18049,0.246184,-0.023904,0.279512,-0.242461,0.312536,0.065336
FGM,0.431418,1.0,0.882369,0.173868,0.128836,0.663233,0.633112,0.081669,0.241406,0.19361,0.250128,-0.039784,0.07139,0.455204,-0.192356,0.960197,0.877146,0.373866,0.112387,0.198816,-0.262609,0.376067,-0.182385,0.393861,-0.106672
FGA,0.369965,0.882369,1.0,0.418614,0.432933,0.606132,0.528568,-0.22286,-0.004766,-0.097577,0.353087,0.008797,-0.175434,0.476462,-0.327022,0.911408,0.716832,-0.084005,0.189673,0.317326,0.020388,-0.001135,-0.279686,0.029706,0.002437
FG3M,0.192958,0.173868,0.418614,1.0,0.966157,-0.013346,-0.151398,-0.634962,-0.410568,-0.539489,0.152937,-0.021045,-0.4503,-0.06716,-0.419512,0.341459,0.001635,-0.42888,0.58589,0.43521,0.839368,-0.115634,-0.359849,0.095331,0.221088
FG3A,0.121609,0.128836,0.432933,0.966157,1.0,-0.020786,-0.157036,-0.662694,-0.437032,-0.568671,0.169157,-0.002922,-0.471539,-0.046472,-0.410619,0.299097,-0.038858,-0.547545,0.491431,0.416461,0.874186,-0.249047,-0.344015,-0.063534,0.232876
FTM,0.362122,0.663233,0.606132,-0.013346,-0.020786,1.0,0.959255,0.09013,0.210956,0.17714,0.265866,0.022871,0.074051,0.505551,-0.101616,0.772792,0.741331,0.205878,-0.00312,0.263369,-0.300911,0.482851,-0.250647,0.271768,-0.124898
FTA,0.301972,0.633112,0.528568,-0.151398,-0.157036,0.959255,1.0,0.236286,0.314264,0.307117,0.198156,0.022195,0.183286,0.509999,0.002242,0.708722,0.744416,0.298551,-0.099463,0.030733,-0.42116,0.515163,-0.039145,0.252536,-0.19744
OREB,-0.125348,0.081669,-0.22286,-0.634962,-0.662694,0.09013,0.236286,1.0,0.692588,0.879928,-0.370596,-0.118086,0.626852,-0.00427,0.511179,-0.053095,0.308351,0.614636,-0.447515,-0.411235,-0.661203,0.398995,0.357614,0.25539,-0.407361
DREB,0.044041,0.241406,-0.004766,-0.410568,-0.437032,0.210956,0.314264,0.692588,1.0,0.952138,-0.218305,-0.130656,0.561434,0.139396,0.336584,0.147669,0.528814,0.507001,-0.280401,-0.268279,-0.489843,0.376114,0.226049,0.271008,-0.349462
REB,-0.024109,0.19361,-0.097577,-0.539489,-0.568671,0.17714,0.307117,0.879928,0.952138,1.0,-0.300828,-0.136097,0.635421,0.090004,0.438306,0.074763,0.478969,0.594391,-0.373948,-0.350968,-0.602823,0.416804,0.300425,0.286722,-0.402789


In [34]:
fig = px.imshow(data_per_min_100_corr.corr(), height=700, width=700)
fig.update_layout(title='per 100 minutes Stats Correlations')
fig.show()

## Minutes Distribution

In [35]:
fig = px.histogram(x=df_rs['MIN'], histnorm='percent')
fig.show()

In [36]:
fig = go.Figure()
fig.add_trace(go.Histogram(x=df_rs['MIN']/df_rs['GP'], histnorm='percent', name='Regular Season', xbins={'start':0,'end':45,'size':1}))
fig.add_trace(go.Histogram(x=df_ps['MIN']/df_ps['GP'], histnorm='percent', name='Playoffs',xbins={'start':0,'end':45,'size':1}))

fig.update_layout(barmode='overlay', height=600, width=1200)
fig.update_layout(title='Minutes Distribution comparison')
fig.update_traces(opacity=0.5)
fig.show()

### as expected if you are familiar with basketball, in the playoffs the rotation gets tighter, more guys play fewer minutes and main players play more minutes.


In [37]:
# a function to filter minutes and game played 
# min = minimum

def hist_data(df=df_rs, min_MIN=0, min_GP=0):
    return df.loc[(df['MIN']>=min_MIN) & (df['GP']>=min_GP), 'MIN']/\
    df.loc[(df['MIN']>=min_MIN) & (df['GP']>=min_GP), 'GP']

In [38]:
fig = go.Figure()
fig.add_trace(go.Histogram(x=hist_data(df_rs, 50, 3), histnorm='percent', name='Regular Season', xbins={'start':0,'end':45,'size':1}))
fig.add_trace(go.Histogram(x=hist_data(df_ps, 50, 3), histnorm='percent', name='Playoffs',xbins={'start':0,'end':45,'size':1}))

fig.update_layout(barmode='overlay',  height=600, width=1200)
fig.update_layout(title='Minutes Distribution comparison (players woth more than 50 mins played and 3 Games)')
fig.update_traces(opacity=0.5)
fig.show()

In [39]:
# percent of players averaging 12-34 Points in the regular season compared to the post season

((hist_data(df_rs, 100, 10)>=12)&(hist_data(df_rs, 100, 10)<=34)).mean() 

0.7735085945399394

In [40]:
((hist_data(df_ps, 20, 2)>=12)&(hist_data(df_ps, 20, 2)<=34)).mean()

0.565843621399177

## 77% of players average between 12 and 34 points in the regular season
## 56% of players average between 12 and 34 points in the post season

In [41]:
# a function to filter Minutes and game played and show PTS
# min = minimum

def hist_data_PTS(df=df_rs, min_MIN=0, min_GP=0):
    return df.loc[(df['MIN']>=min_MIN) & (df['GP']>=min_GP), 'PTS']/\
    df.loc[(df['MIN']>=min_MIN) & (df['GP']>=min_GP), 'GP']

In [42]:
fig = go.Figure()
fig.add_trace(go.Histogram(x=hist_data_PTS(df_rs, 100, 5), histnorm='percent', name='RS', xbins={'start':0,'end':45,'size':1}))
fig.add_trace(go.Histogram(x=hist_data_PTS(df_ps, 100, 5), histnorm='percent', name='Playoffs',xbins={'start':0,'end':45,'size':1}))

fig.update_layout(barmode='overlay',  height=600, width=1200)
fig.update_layout(title='points Distribution comparison (players woth more than 100 mins played and 5 Games)')
fig.update_traces(opacity=0.65)
fig.show()

## Exploring trends in the past 10 years

In [43]:
trends_df = df.groupby('Season_start')[total_columns].sum().reset_index()
trends_df

Unnamed: 0,Season_start,MIN,FGM,FGA,FG3M,FG3A,FTM,FTA,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,production
0,2013,638373,99251,218411,20480,56952,47219,62420,28669,83812,112481,57657,20156,12369,36826,54839,266201,468864
1,2014,634546,98251,219265,20724,59276,45098,60248,28566,85231,113797,57727,20261,12665,35796,53272,262324,466774
2,2015,636391,100351,222344,22524,63673,46516,61520,27426,87611,115037,58251,20562,13046,36078,53478,269742,476638
3,2016,632482,102147,223333,25408,71018,46806,60620,26470,87173,113643,59162,20143,12409,34908,52232,276508,481865
4,2017,633425,103729,225523,27530,76245,43721,57008,25397,88678,114075,60739,20181,12636,35695,52238,278709,486340
5,2018,634231,107374,233717,29817,84143,46671,60811,27128,91360,118488,64257,19940,12984,35394,55063,291236,506905
6,2019,552262,92997,202223,28032,78279,40949,52906,22802,79318,102120,55445,17368,11085,31685,47615,254975,440993
7,2020,562518,95849,205754,29549,80653,39624,50917,22918,80151,103069,57311,17491,11272,30520,45152,260871,450014
8,2021,635572,106569,231293,32733,92552,44740,57709,27052,89602,116654,64618,20006,12387,34372,52038,290611,504276
9,2022,635386,110010,231870,32382,89926,48136,61516,27403,86695,114098,66265,19078,12250,35066,52438,300538,512229


In [44]:
# now we will create a new metric, The formula FGA + 0.475 x FTA - ORB + TO to calculate the possession
trends_df['per_poss'] = trends_df['FGA'] + 0.475*trends_df['FTA'] - trends_df['OREB'] + trends_df['TOV']
trends_df

Unnamed: 0,Season_start,MIN,FGM,FGA,FG3M,FG3A,FTM,FTA,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,production,per_poss
0,2013,638373,99251,218411,20480,56952,47219,62420,28669,83812,112481,57657,20156,12369,36826,54839,266201,468864,256217.5
1,2014,634546,98251,219265,20724,59276,45098,60248,28566,85231,113797,57727,20261,12665,35796,53272,262324,466774,255112.8
2,2015,636391,100351,222344,22524,63673,46516,61520,27426,87611,115037,58251,20562,13046,36078,53478,269742,476638,260218.0
3,2016,632482,102147,223333,25408,71018,46806,60620,26470,87173,113643,59162,20143,12409,34908,52232,276508,481865,260565.5
4,2017,633425,103729,225523,27530,76245,43721,57008,25397,88678,114075,60739,20181,12636,35695,52238,278709,486340,262899.8
5,2018,634231,107374,233717,29817,84143,46671,60811,27128,91360,118488,64257,19940,12984,35394,55063,291236,506905,270868.225
6,2019,552262,92997,202223,28032,78279,40949,52906,22802,79318,102120,55445,17368,11085,31685,47615,254975,440993,236236.35
7,2020,562518,95849,205754,29549,80653,39624,50917,22918,80151,103069,57311,17491,11272,30520,45152,260871,450014,237541.575
8,2021,635572,106569,231293,32733,92552,44740,57709,27052,89602,116654,64618,20006,12387,34372,52038,290611,504276,266024.775
9,2022,635386,110010,231870,32382,89926,48136,61516,27403,86695,114098,66265,19078,12250,35066,52438,300538,512229,268753.1


In [45]:
# let's move the column to the left so it's easier to use in for loops
trends_df[list(trends_df.columns[0:2])+['per_poss']+list(trends_df.columns[2:-1])]
trends_df


Unnamed: 0,Season_start,MIN,FGM,FGA,FG3M,FG3A,FTM,FTA,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,production,per_poss
0,2013,638373,99251,218411,20480,56952,47219,62420,28669,83812,112481,57657,20156,12369,36826,54839,266201,468864,256217.5
1,2014,634546,98251,219265,20724,59276,45098,60248,28566,85231,113797,57727,20261,12665,35796,53272,262324,466774,255112.8
2,2015,636391,100351,222344,22524,63673,46516,61520,27426,87611,115037,58251,20562,13046,36078,53478,269742,476638,260218.0
3,2016,632482,102147,223333,25408,71018,46806,60620,26470,87173,113643,59162,20143,12409,34908,52232,276508,481865,260565.5
4,2017,633425,103729,225523,27530,76245,43721,57008,25397,88678,114075,60739,20181,12636,35695,52238,278709,486340,262899.8
5,2018,634231,107374,233717,29817,84143,46671,60811,27128,91360,118488,64257,19940,12984,35394,55063,291236,506905,270868.225
6,2019,552262,92997,202223,28032,78279,40949,52906,22802,79318,102120,55445,17368,11085,31685,47615,254975,440993,236236.35
7,2020,562518,95849,205754,29549,80653,39624,50917,22918,80151,103069,57311,17491,11272,30520,45152,260871,450014,237541.575
8,2021,635572,106569,231293,32733,92552,44740,57709,27052,89602,116654,64618,20006,12387,34372,52038,290611,504276,266024.775
9,2022,635386,110010,231870,32382,89926,48136,61516,27403,86695,114098,66265,19078,12250,35066,52438,300538,512229,268753.1


In [46]:
# addind important ratios columns
trends_df['FG%'] = trends_df['FGM']/trends_df['FGA']
trends_df['3P%'] = trends_df['FG3M']/trends_df['FG3A']
trends_df['FT%'] = trends_df['FTM']/trends_df['FTA']
trends_df['FG3A%'] = trends_df['FG3A']/trends_df['FGA']
trends_df['PTS/FGA'] = trends_df['PTS']/trends_df['FGA']
trends_df['FTA/FGA'] = trends_df['FTA']/trends_df['FTM']
trends_df['TRU%'] = 0.5*trends_df['PTS']/(trends_df['FGA']+0.475*trends_df['FTA']) # True Shooting percentage formula is explained in NBA.COM
trends_df['AST_TOV'] = trends_df['AST']/trends_df['TOV']

In [47]:
trends_df

Unnamed: 0,Season_start,MIN,FGM,FGA,FG3M,FG3A,FTM,FTA,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,production,per_poss,FG%,3P%,FT%,FG3A%,PTS/FGA,FTA/FGA,TRU%,AST_TOV
0,2013,638373,99251,218411,20480,56952,47219,62420,28669,83812,112481,57657,20156,12369,36826,54839,266201,468864,256217.5,0.454423,0.359601,0.756472,0.260756,1.218808,1.321925,0.536565,1.56566
1,2014,634546,98251,219265,20724,59276,45098,60248,28566,85231,113797,57727,20261,12665,35796,53272,262324,466774,255112.8,0.448092,0.349619,0.748539,0.27034,1.196379,1.335935,0.529129,1.612666
2,2015,636391,100351,222344,22524,63673,46516,61520,27426,87611,115037,58251,20562,13046,36078,53478,269742,476638,260218.0,0.451332,0.353745,0.756112,0.286372,1.213174,1.322556,0.536126,1.614585
3,2016,632482,102147,223333,25408,71018,46806,60620,26470,87173,113643,59162,20143,12409,34908,52232,276508,481865,260565.5,0.457375,0.357768,0.772121,0.317992,1.238097,1.295133,0.54835,1.694798
4,2017,633425,103729,225523,27530,76245,43721,57008,25397,88678,114075,60739,20181,12636,35695,52238,278709,486340,262899.8,0.459949,0.361073,0.766927,0.338081,1.235834,1.303904,0.551677,1.701611
5,2018,634231,107374,233717,29817,84143,46671,60811,27128,91360,118488,64257,19940,12984,35394,55063,291236,506905,270868.225,0.459419,0.354361,0.767476,0.360021,1.246105,1.302972,0.554519,1.815477
6,2019,552262,92997,202223,28032,78279,40949,52906,22802,79318,102120,55445,17368,11085,31685,47615,254975,440993,236236.35,0.459874,0.358104,0.773995,0.387092,1.260861,1.291997,0.560746,1.749882
7,2020,562518,95849,205754,29549,80653,39624,50917,22918,80151,103069,57311,17491,11272,30520,45152,260871,450014,237541.575,0.465843,0.366372,0.778208,0.391988,1.267878,1.285004,0.56726,1.877818
8,2021,635572,106569,231293,32733,92552,44740,57709,27052,89602,116654,64618,20006,12387,34372,52038,290611,504276,266024.775,0.460753,0.353671,0.775269,0.40015,1.256463,1.289875,0.561665,1.87996
9,2022,635386,110010,231870,32382,89926,48136,61516,27403,86695,114098,66265,19078,12250,35066,52438,300538,512229,268753.1,0.474447,0.360096,0.782496,0.387829,1.296149,1.277962,0.575545,1.889722


In [48]:
# let's save this and copy it as a checkpoint so we can do other analysis.
trends_per48_df = trends_df.copy()

In [49]:
# choosimg needed columns for our next analysis
# will calculate per 48 mins per team (5 players in a team)

for col in trends_per48_df.columns[2:18]:
    trends_per48_df[col] = (trends_per48_df[col]/trends_per48_df['MIN'])*48*5

trends_per48_df


Unnamed: 0,Season_start,MIN,FGM,FGA,FG3M,FG3A,FTM,FTA,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,production,per_poss,FG%,3P%,FT%,FG3A%,PTS/FGA,FTA/FGA,TRU%,AST_TOV
0,2013,638373,37.313984,82.112871,7.699574,21.411432,17.752255,23.467158,10.778275,31.509603,42.287879,21.676481,7.577764,4.650197,13.844946,20.617037,100.079797,176.272117,256217.5,0.454423,0.359601,0.756472,0.260756,1.218808,1.321925,0.536565,1.56566
1,2014,634546,37.160805,82.931103,7.838297,22.419557,17.057109,22.78719,10.804323,32.236339,43.040662,21.833689,7.66318,4.790196,13.538877,20.148705,99.217015,176.544742,255112.8,0.448092,0.349619,0.748539,0.27034,1.196379,1.335935,0.529129,1.612666
2,2015,636391,37.845036,83.851846,8.4944,24.012785,17.542423,23.200831,10.343075,33.040442,43.383517,21.968004,7.754478,4.919994,13.605975,20.167978,101.726894,179.752888,260218.0,0.451332,0.353745,0.756112,0.286372,1.213174,1.322556,0.536126,1.614585
3,2016,632482,38.760439,84.745368,9.641255,26.948308,17.760885,23.00271,10.044238,33.078443,43.122682,22.449461,7.643411,4.708687,13.2461,19.819821,104.923018,182.847259,260565.5,0.457375,0.357768,0.772121,0.317992,1.238097,1.295133,0.54835,1.694798
4,2017,633425,39.302143,85.44898,10.430911,28.888661,16.56556,21.599905,9.622734,33.599432,43.222165,23.013553,7.64643,4.787686,13.524569,19.792588,105.600758,184.270592,262899.8,0.459949,0.361073,0.766927,0.338081,1.235834,1.303904,0.551677,1.701611
5,2018,634231,40.631505,88.441088,11.283081,31.840639,17.660821,23.011553,10.265534,34.571631,44.837165,24.315557,7.545516,4.913289,13.39348,20.836446,110.206912,191.818438,270868.225,0.459419,0.354361,0.767476,0.360021,1.246105,1.302972,0.554519,1.815477
6,2019,552262,40.414296,87.881332,12.182044,34.018202,17.795467,22.991696,9.90921,34.469726,44.378936,24.095085,7.547722,4.817279,13.769551,20.692353,110.806103,191.645125,236236.35,0.459874,0.358104,0.773995,0.387092,1.260861,1.291997,0.560746,1.749882
7,2020,562518,40.894265,87.785564,12.60717,34.410846,16.905699,21.723892,9.778034,34.196666,43.974699,24.451911,7.462588,4.809233,13.02145,19.264237,111.301398,191.999829,237541.575,0.465843,0.366372,0.778208,0.391988,1.267878,1.285004,0.56726,1.877818
8,2021,635572,40.241798,87.339153,12.360393,34.948802,16.894388,21.791646,10.215176,33.834845,44.050021,24.400571,7.554518,4.677487,12.979301,19.650205,109.738377,190.420975,266024.775,0.460753,0.353671,0.775269,0.40015,1.256463,1.289875,0.561665,1.87996
9,2022,635386,41.553323,87.582666,12.231431,33.967132,18.182081,23.236017,10.350747,32.746708,43.097456,25.029824,7.206202,4.627109,13.24524,19.807046,113.520159,193.48075,268753.1,0.474447,0.360096,0.782496,0.387829,1.296149,1.277962,0.575545,1.889722


In [50]:
trends_per48_df.columns

Index(['Season_start', 'MIN', 'FGM', 'FGA', 'FG3M', 'FG3A', 'FTM', 'FTA',
       'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS',
       'production', 'per_poss', 'FG%', '3P%', 'FT%', 'FG3A%', 'PTS/FGA',
       'FTA/FGA', 'TRU%', 'AST_TOV'],
      dtype='object')

In [51]:
trends_per48_df.drop(columns='MIN', inplace=True)

In [52]:
# let's visualiuse it

for col in trends_per48_df.columns[1:]:
    fig.add_trace(go.Scatter(x=trends_per48_df['Season_start'],
                             y=trends_per48_df[col], name=col))

fig.show()

There is a tremendous amount of information we can extract from the Data above so I will try to summarize it:

1. the amount of 3 pointers attempted has obviously gone up from 21 in 2013 to 34 in 2023, what is interesting is that the FG% and TRU% didn't fall, it has actually gone up even if players are taking more difficult shots, or maybe paint defense isn't as good, so many factors.
2. the amount of 3 pointers attempted compared to 2 pointers was 26% and now it's 39%.
3. personal fouls have gone down from 20 to 18 in 10 years
4. Free throws attempted are relatively the same which is interesting, they float between 23-21
5. Assist to turnover ratio is interesting because it's gotten better, even if players are taking more shots.

again, there are so many insights to extract.

## compairng the Regular season to the Playoffs

In [53]:
trends_df_ps = df_ps.groupby('Season_start')[total_columns].sum().reset_index()
trends_df_ps

Unnamed: 0,Season_start,MIN,FGM,FGA,FG3M,FG3A,FTM,FTA,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,production
0,2013,43171,6472,14239,1426,3978,3349,4391,1823,5497,7320,3526,1261,775,2260,3916,17719,30601
1,2014,39332,5964,13695,1424,4139,2937,4050,1785,5508,7293,3525,1230,868,2071,3544,16289,29205
2,2015,41527,6286,14295,1571,4432,3027,4051,1802,5590,7392,3420,1259,853,2142,3624,17170,30094
3,2016,38073,6086,13219,1660,4597,2923,3765,1534,5064,6598,3502,1193,740,2011,3282,16755,28788
4,2017,39560,6294,13816,1723,4906,2818,3683,1507,5519,7026,3576,1198,791,2057,3401,17129,29720
5,2018,39766,6312,14259,1862,5401,3177,4053,1674,5707,7381,3774,1161,799,2101,3638,17663,30778
6,2019,40194,6447,14107,2170,6027,3123,3963,1462,5701,7163,3796,1168,707,2215,3611,18187,31021
7,2020,41006,6829,14771,2122,5831,2974,3782,1686,5697,7383,3734,1135,748,1952,3483,18754,31754
8,2021,41814,6639,14571,2135,6017,3083,3928,1630,5677,7307,3982,1234,793,2255,3732,18496,31812
9,2022,40523,6750,14651,2023,5761,2888,3635,1734,5506,7240,3986,1145,802,2026,3289,18411,31584


In [54]:
trends_df_rs = df_rs.groupby('Season_start')[total_columns].sum().reset_index()
trends_df_rs

Unnamed: 0,Season_start,MIN,FGM,FGA,FG3M,FG3A,FTM,FTA,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,production
0,2013,595202,92779,204172,19054,52974,43870,58029,26846,78315,105161,54131,18895,11594,34566,50923,248482,438263
1,2014,595214,92287,205570,19300,55137,42161,56198,26781,79723,106504,54202,19031,11797,33725,49728,246035,437569
2,2015,594864,94065,208049,20953,59241,43489,57469,25624,82021,107645,54831,19303,12193,33936,49854,252572,446544
3,2016,594409,96061,210114,23748,66421,43883,56855,24936,82109,107045,55660,18950,11669,32897,48950,259753,453077
4,2017,593865,97435,211707,25807,71339,40903,53325,23890,83159,107049,57163,18983,11845,33638,48837,261580,456620
5,2018,594465,101062,219458,27955,78742,43494,56758,25454,85653,111107,60483,18779,12185,33293,51425,273573,476127
6,2019,512068,86550,188116,25862,72252,37826,48943,21340,73617,94957,51649,16200,10378,29470,44004,236788,409972
7,2020,521512,89020,190983,27427,74822,36650,47135,21232,74454,95686,53577,16356,10524,28568,41669,242117,418260
8,2021,593758,99930,216722,30598,86535,41657,53781,25422,83925,109347,60636,18772,11594,32117,48306,272115,472464
9,2022,594863,103260,217219,30359,84165,45248,57881,25669,81189,106858,62279,17933,11448,33040,49149,282127,480645


In [55]:
for i in [trends_df_ps,trends_df_rs]:
    i['poss_est'] = i['FGA']-i['OREB']+i['TOV']+0.475*i['FTA']
    i['poss_per_48'] = (i['poss_est']/i['MIN'])*48*5
    i['FG%'] = i['FGM']/i['FGA']
    i['3P%'] = i['FG3M']/i['FG3A']
    i['FT%'] = i['FTM']/i['FTA']
    i['FG3A%'] = i['FG3A']/i['FGA']
    i['PTS/FGA'] = i['PTS']/i['FGA']
    i['FTA/FGA'] = i['FTA']/i['FTM']
    i['TRU%'] = 0.5*i['PTS']/(i['FGA']+0.475*i['FTA'])
    i['AST_TOV'] = i['AST']/i['TOV']
    for col in total_columns:
        i[col] = 100*i[col]/i['poss_est']
    i.drop(columns=['MIN','poss_est'], inplace=True)


In [56]:
trends_df_ps

Unnamed: 0,Season_start,FGM,FGA,FG3M,FG3A,FTM,FTA,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,production,poss_per_48,FG%,3P%,FT%,FG3A%,PTS/FGA,FTA/FGA,TRU%,AST_TOV
0,2013,38.611778,84.949491,8.507478,23.732641,19.980044,26.196588,10.875969,32.794954,43.670923,21.036021,7.523092,4.623629,13.483099,23.362751,105.711077,182.564742,93.183248,0.454526,0.358472,0.762696,0.279374,1.244399,1.311138,0.542704,1.560177
1,2014,37.498232,86.106352,8.9533,26.023672,18.466181,25.464091,11.223062,34.631164,45.854226,22.16319,7.733539,5.457489,13.021267,22.282651,102.415945,183.624389,97.049222,0.435487,0.344044,0.725185,0.302227,1.189412,1.378958,0.521457,1.702076
2,2015,37.960714,86.326504,9.487159,26.764538,18.279841,24.463705,10.882152,33.757618,44.63977,20.65314,7.603013,5.151207,12.935388,21.885082,103.688427,181.735558,95.701929,0.439734,0.354468,0.747223,0.310038,1.201119,1.338289,0.52931,1.596639
3,2016,39.304137,85.369929,10.720484,29.687992,18.877094,24.314834,9.906761,32.703935,42.610696,22.616347,7.704541,4.779011,12.987286,21.19556,108.205853,185.916448,97.608541,0.460398,0.361105,0.776361,0.347757,1.267494,1.28806,0.558226,1.741422
4,2017,39.055749,85.731527,10.69162,30.442883,17.486352,22.853881,9.351289,34.246692,43.597981,22.189921,7.433872,4.908341,12.764168,21.104004,106.289471,184.419586,97.767998,0.455559,0.351203,0.765137,0.355096,1.239794,1.306955,0.550226,1.738454
5,2018,37.998516,85.839804,11.209321,32.514256,19.125679,24.399237,10.077553,34.35639,44.433943,22.719645,6.989271,4.810015,12.648112,21.900919,106.332033,185.284906,100.253533,0.442668,0.344751,0.783864,0.378778,1.238726,1.275732,0.545687,1.796287
6,2019,38.506967,84.259001,12.961085,35.998369,18.653212,23.670406,8.732307,34.05122,42.783527,22.67294,6.976289,4.222805,13.229864,21.567963,108.62823,185.283793,99.969697,0.457007,0.360046,0.788039,0.427235,1.289218,1.268972,0.56872,1.71377
7,2020,40.568036,87.747907,12.605853,34.639364,17.667204,22.467171,10.015772,33.84333,43.859102,22.182024,6.742528,4.443534,11.595959,20.690946,111.409129,188.636316,98.52285,0.462325,0.363917,0.786356,0.39476,1.26965,1.271688,0.565989,1.91291
8,2021,38.911486,85.401306,12.513334,35.265916,18.069606,23.02219,9.553505,33.27316,42.826665,23.338686,7.232531,4.64781,13.216659,21.873425,108.405913,186.451605,97.929689,0.455631,0.354828,0.784878,0.412944,1.269371,1.274084,0.56264,1.765854
9,2022,40.492813,87.890399,12.135846,34.559866,17.324925,21.806129,10.402154,33.030137,43.432291,23.911756,6.868781,4.811146,12.153843,19.730498,110.446396,189.470369,98.726896,0.460719,0.351154,0.794498,0.393215,1.256638,1.258657,0.562078,1.967423


In [57]:
trends_df_rs

Unnamed: 0,Season_start,FGM,FGA,FG3M,FG3A,FTM,FTA,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,production,poss_per_48,FG%,3P%,FT%,FG3A%,PTS/FGA,FTA/FGA,TRU%,AST_TOV
0,2013,38.745777,85.265014,7.95721,22.122665,18.320711,24.233702,11.211256,32.705413,43.916669,22.605844,7.89081,4.841813,14.435233,21.26614,103.769475,183.024611,96.554424,0.454416,0.359686,0.756001,0.259458,1.217023,1.322749,0.536132,1.566019
1,2014,38.580223,85.937743,8.06829,23.04981,17.625243,23.493357,11.195693,33.327892,44.523585,22.658936,7.955836,4.93169,14.098606,20.788598,102.85398,182.924028,96.45259,0.448932,0.350037,0.750222,0.268215,1.196843,1.332938,0.529645,1.607176
2,2015,38.605217,85.385392,8.599321,24.313099,17.848321,23.585853,10.516346,33.662239,44.178585,22.503191,7.922144,5.004129,13.927674,20.460581,103.658077,183.266127,98.305001,0.452129,0.353691,0.756738,0.284745,1.214002,1.321461,0.536595,1.615718
3,2016,39.195593,85.732428,9.689853,27.101638,17.9055,23.198441,10.17459,33.502784,43.677374,22.710847,7.732134,4.761281,13.422902,19.972978,105.986538,184.868174,98.954541,0.457185,0.357538,0.771841,0.316119,1.236248,1.295604,0.547724,1.691948
4,2017,39.481835,85.786225,10.457307,28.907422,16.574388,21.607932,9.680516,33.697028,43.377544,23.163136,7.69214,4.799737,13.630523,19.78934,105.995365,185.027922,99.733525,0.460235,0.361752,0.767051,0.33697,1.235576,1.303694,0.551772,1.699358
5,2018,39.747964,86.313438,10.994779,30.969446,17.10631,22.323078,10.011129,33.687561,43.69869,23.788131,7.385833,4.792394,13.094229,20.225595,107.597016,187.262064,102.649764,0.460507,0.35502,0.766306,0.358802,1.246585,1.304962,0.555099,1.816688
6,2019,39.431615,85.704422,11.782558,32.91754,17.233279,22.298111,9.722365,33.539425,43.26179,23.530947,7.380614,4.728149,13.42634,20.047935,107.879068,186.780568,102.874114,0.460088,0.357942,0.772858,0.384082,1.258734,1.293898,0.560143,1.752596
7,2020,40.333812,86.53193,12.426819,33.900882,16.605642,21.35626,9.619945,33.734145,43.35409,24.275047,7.410692,4.768288,12.943792,18.879686,109.700085,189.508202,101.569954,0.466115,0.366563,0.777554,0.391773,1.267741,1.286085,0.567359,1.87542
8,2021,40.138499,87.049892,12.290181,34.75818,16.732207,21.602007,10.211157,33.709832,43.920989,24.355429,7.540077,4.656917,12.900312,19.402885,109.299385,189.772797,100.632099,0.461098,0.353591,0.774567,0.39929,1.255595,1.291044,0.561599,1.887972
9,2022,40.962622,86.169472,12.043233,33.38775,17.94961,22.961045,10.182738,32.207189,42.389927,24.705705,7.113913,4.541353,13.106769,19.497113,111.918086,190.668984,101.704147,0.475373,0.360708,0.781742,0.387466,1.298814,1.279195,0.576446,1.884958


In [58]:
# percentage change over the years 
comparison_df = round(100*(trends_df_ps-trends_df_rs)/trends_df_rs,2)
comparison_df['Season_start'] = list(range(2013,2024))
comparison_df

Unnamed: 0,Season_start,FGM,FGA,FG3M,FG3A,FTM,FTA,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,production,poss_per_48,FG%,3P%,FT%,FG3A%,PTS/FGA,FTA/FGA,TRU%,AST_TOV
0,2013,-0.35,-0.37,6.92,7.28,9.06,8.1,-2.99,0.27,-0.56,-6.94,-4.66,-4.51,-6.6,9.86,1.87,-0.25,-3.49,0.02,-0.34,0.89,7.68,2.25,-0.88,1.23,-0.37
1,2014,-2.8,0.2,10.97,12.9,4.77,8.39,0.24,3.91,2.99,-2.19,-2.79,10.66,-7.64,7.19,-0.43,0.38,0.62,-2.99,-1.71,-3.34,12.68,-0.62,3.45,-1.55,5.9
2,2015,-1.67,1.1,10.32,10.08,2.42,3.72,3.48,0.28,1.04,-8.22,-4.03,2.94,-7.12,6.96,0.03,-0.84,-2.65,-2.74,0.22,-1.26,8.88,-1.06,1.27,-1.36,-1.18
3,2016,0.28,-0.42,10.64,9.54,5.43,4.81,-2.63,-2.38,-2.44,-0.42,-0.36,0.37,-3.25,6.12,2.09,0.57,-1.36,0.7,1.0,0.59,10.01,2.53,-0.58,1.92,2.92
4,2017,-1.08,-0.06,2.24,5.31,5.5,5.77,-3.4,1.63,0.51,-4.2,-3.36,2.26,-6.36,6.64,0.28,-0.33,-1.97,-1.02,-2.92,-0.25,5.38,0.34,0.25,-0.28,2.3
5,2018,-4.4,-0.55,1.95,4.99,11.8,9.3,0.66,1.99,1.68,-4.49,-5.37,0.37,-3.41,8.28,-1.18,-1.06,-2.33,-3.87,-2.89,2.29,5.57,-0.63,-2.24,-1.7,-1.12
6,2019,-2.34,-1.69,10.0,9.36,8.24,6.15,-10.18,1.53,-1.11,-3.65,-5.48,-10.69,-1.46,7.58,0.69,-0.8,-2.82,-0.67,0.59,1.96,11.24,2.42,-1.93,1.53,-2.22
7,2020,0.58,1.41,1.44,2.18,6.39,5.2,4.11,0.32,1.16,-8.62,-9.02,-6.81,-10.41,9.59,1.56,-0.46,-3.0,-0.81,-0.72,1.13,0.76,0.15,-1.12,-0.24,2.0
8,2021,-3.06,-1.89,1.82,1.46,7.99,6.57,-6.44,-1.3,-2.49,-4.17,-4.08,-0.2,2.45,12.73,-0.82,-1.75,-2.69,-1.19,0.35,1.33,3.42,1.1,-1.31,0.19,-6.47
9,2022,-1.15,2.0,0.77,3.51,-3.48,-5.03,2.15,2.56,2.46,-3.21,-3.45,5.94,-7.27,1.2,-1.31,-0.63,-2.93,-3.08,-2.65,1.63,1.48,-3.25,-1.61,-2.49,4.37


very insightful data, some highlights:
1. because of the tight defence the PTS, Personal Fouls, and free Throws go up. 
2. teams shhot more 3 pointers compared to the regular season be the gradually became less in recent years, for examle in 2014 and 2019 they shot 10% more, but in 2020 to 2023 all les than 4%
3. poss per 48 represnt pace and it goes down in the playoffs, by a small margin I'd say, from 3-6%.

In [59]:
fig = go.Figure()

for col in comparison_df.columns[1:]:
    fig.add_trace(go.Scatter(x=comparison_df['Season_start'],
                             y=comparison_df[col], name=col))

fig.show()

## Trying to Filter the Years Giannis started to play well
## being a fan of his I was trying to prove that he is still the best 


In [60]:
included_yrs = [2018, 2019, 2020, 2021, 2022, 2023]

In [61]:
df_since2018 = df[df['Season_start'].isin(included_yrs)]

In [62]:
df_since2018.shape

(4631, 30)

In [63]:
df_since2018.head()

Unnamed: 0,Year,Season_type,PLAYER_ID,PLAYER,TEAM_ID,TEAM,GP,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,AST_TOV,STL_TOV,Season_start,production
3527,2018-19,Regular Season,201935,James Harden,1610612745,HOU,78,2867,843,1909,0.442,378,1028,0.368,754,858,0.879,66,452,518,586,158,58,387,244,2818,1.51,0.41,2018,4138
3528,2018-19,Regular Season,202331,Paul George,1610612760,OKC,77,2841,707,1614,0.438,292,757,0.386,453,540,0.839,105,523,628,318,170,34,205,214,2159,1.55,0.83,2018,3309
3529,2018-19,Regular Season,202689,Kemba Walker,1610612766,CHA,82,2863,731,1684,0.434,260,731,0.356,380,450,0.844,52,309,361,484,102,34,211,131,2102,2.29,0.48,2018,3083
3530,2018-19,Regular Season,203078,Bradley Beal,1610612764,WAS,82,3028,764,1609,0.475,209,596,0.351,362,448,0.808,89,322,411,448,121,58,224,226,2099,2.0,0.54,2018,3137
3531,2018-19,Regular Season,203081,Damian Lillard,1610612757,POR,80,2838,681,1533,0.444,237,643,0.369,468,513,0.912,68,303,371,551,88,34,212,148,2067,2.6,0.41,2018,3111


In [64]:
addition_cols = ['MIN', 'FGM', 'FGA', 'FG3M', 'FG3A', 'FTM','FTA', 'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF','PTS','production','GP','PLAYER']

In [65]:
df_giannis = df_since2018[addition_cols]

In [66]:
df_giannis_sum = df_giannis.groupby('PLAYER').sum().reset_index()

In [67]:
df_giannis_sum['PPM'] = df_giannis_sum['production']/df_giannis_sum['MIN'] # production per Minute
df_giannis_sum['PPG'] = df_giannis_sum['production']/df_giannis_sum['GP'] # production per Game

In [68]:
df_giannis50 = df_giannis_sum[df_giannis_sum['GP'] >= 50]

In [69]:
df_giannis50.sort_values(by='PPM', ascending=False).head(20).reset_index()

Unnamed: 0,index,PLAYER,MIN,FGM,FGA,FG3M,FG3A,FTM,FTA,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,production,GP,PPM,PPG
0,343,Giannis Antetokounmpo,15213,4902,8676,413,1463,3241,4836,1019,4546,5565,2687,490,550,1632,1406,13458,22750,459,1.495432,49.56427
1,516,Joel Embiid,12939,3673,7289,464,1375,3487,4167,872,3567,4439,1466,372,621,1323,1172,11297,18195,390,1.406214,46.653846
2,1027,Victor Wembanyama,2106,551,1186,128,394,292,367,161,594,755,274,88,254,260,153,1522,2893,71,1.373694,40.746479
3,794,Nikola Jokic,17900,4992,8910,630,1784,2336,2823,1452,4697,6149,4261,680,395,1717,1467,12950,24435,527,1.365084,46.366224
4,677,Luka Doncic,15890,4443,9476,1367,3936,2763,3712,421,3520,3941,3715,551,205,1810,1036,13016,21428,450,1.348521,47.617778
5,662,LeBron James,14108,4028,7820,894,2524,1737,2409,405,2826,3231,3181,474,266,1413,655,10687,17839,397,1.26446,44.934509
6,54,Anthony Davis,12881,3428,6443,237,805,2186,2731,1033,3099,4132,1211,476,815,807,898,9279,15913,373,1.235385,42.662198
7,449,James Harden,16326,3432,7843,1481,4084,3189,3660,348,2549,2897,3956,658,319,1757,1139,11534,19364,446,1.186084,43.41704
8,929,Stephen Curry,13299,3604,7771,1858,4555,1786,1955,225,1835,2060,2226,431,124,1181,821,10852,15693,388,1.180014,40.445876
9,596,Karl-Anthony Towns,11803,2934,5786,770,1933,1703,2028,879,2720,3599,1300,293,384,1071,1278,8341,13917,354,1.179107,39.313559


### My point is Valid, Giannis is the best player in the league, although The joker Leads him in production but he played in more games and more Minutes.
### National Players are Dominating and I'm Really impressed by Victor Wembynyama, The French Alien, I already predicted That he will Win the DPOY in 2024-25 Season. 

## Looking for who was the most productive Player in the Last 10 years.
### - James harden has scored the most points and played the most Games
### - Lebron Leads the Production value marginally over JH
### - 


In [70]:
df_sorting = df[addition_cols]

In [71]:

df_by_Production = df_sorting.groupby('PLAYER').sum().sort_values(by='production', ascending=False).reset_index()

In [72]:
df_by_Production['PPM'] = df_by_Production['production']/df_by_Production['MIN'] # production per Minute
df_by_Production['PPG'] = df_by_Production['production']/df_by_Production['GP'] # production per Game

In [73]:
df_addition_50GP = df_by_Production[df_by_Production['GP'] >= 50]

In [74]:
df_addition_50GP.sort_values(by='PTS', ascending=False).head(20)

Unnamed: 0,PLAYER,MIN,FGM,FGA,FG3M,FG3A,FTM,FTA,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,production,GP,PPM,PPG
1,James Harden,32827,7131,16247,2783,7749,7035,8115,728,4821,5549,7526,1424,572,3744,2309,24080,39151,891,1.192646,43.940516
0,LeBron James,32128,8879,16987,1665,4710,4261,5901,985,6039,7024,6850,1199,610,3238,1582,23684,39367,876,1.225317,44.939498
4,Stephen Curry,28620,7504,15909,3679,8813,3725,4105,561,3644,4205,5353,1250,218,2662,1810,22412,33438,833,1.168344,40.141657
5,Damian Lillard,29966,6804,15585,2641,7116,5034,5592,508,3096,3604,5528,806,257,2336,1586,21283,31478,825,1.050457,38.155152
2,Giannis Antetokounmpo,28799,7524,13852,589,2079,4970,7171,1638,7039,8677,4268,983,1091,2640,2635,20607,35626,871,1.237057,40.902411
9,DeMar DeRozan,31095,7211,15390,488,1623,5436,6409,598,3539,4137,4226,928,285,1938,1946,20346,29922,869,0.962277,34.432681
8,Kevin Durant,25995,6999,13614,1586,4075,4521,5118,405,4791,5196,3702,656,867,2270,1452,20105,30526,716,1.174303,42.634078
3,Russell Westbrook,28349,6901,15807,1150,3782,4273,5596,1376,5631,7007,7388,1359,270,3543,2258,19225,35249,845,1.243395,41.714793
7,Anthony Davis,25823,6855,13062,376,1254,4330,5407,1935,6073,8008,1935,975,1722,1511,1756,18416,31056,732,1.202649,42.42623
12,Paul George,26147,5815,13210,2180,5643,3485,4047,616,4267,4883,2987,1273,305,2148,2033,17295,26743,740,1.022794,36.139189


In [75]:
df_addition_50GP.sort_values(by='PPM', ascending=False).head(20)

Unnamed: 0,PLAYER,MIN,FGM,FGA,FG3M,FG3A,FTM,FTA,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,production,GP,PPM,PPG
30,Joel Embiid,15915,4443,8912,574,1716,4080,4939,1099,4374,5473,1755,446,822,1703,1524,13540,22036,492,1.384606,44.788618
514,Victor Wembanyama,2106,551,1186,128,394,292,367,161,594,755,274,88,254,260,153,1522,2893,71,1.373694,40.746479
32,Luka Doncic,15890,4443,9476,1367,3936,2763,3712,421,3520,3941,3715,551,205,1810,1036,13016,21428,450,1.348521,47.617778
6,Nikola Jokic,24114,6297,11374,814,2287,2944,3554,2040,6190,8230,5267,910,561,2202,2101,16352,31320,755,1.298831,41.483444
58,DeMarcus Cousins,13208,3259,7009,428,1262,2440,3267,1006,3580,4586,1504,598,554,1517,1609,9386,16628,454,1.258934,36.625551
3,Russell Westbrook,28349,6901,15807,1150,3782,4273,5596,1376,5631,7007,7388,1359,270,3543,2258,19225,35249,845,1.243395,41.714793
2,Giannis Antetokounmpo,28799,7524,13852,589,2079,4970,7171,1638,7039,8677,4268,983,1091,2640,2635,20607,35626,871,1.237057,40.902411
0,LeBron James,32128,8879,16987,1665,4710,4261,5901,985,6039,7024,6850,1199,610,3238,1582,23684,39367,876,1.225317,44.939498
7,Anthony Davis,25823,6855,13062,376,1254,4330,5407,1935,6073,8008,1935,975,1722,1511,1756,18416,31056,732,1.202649,42.42623
1,James Harden,32827,7131,16247,2783,7749,7035,8115,728,4821,5549,7526,1424,572,3744,2309,24080,39151,891,1.192646,43.940516


### top 5 players in Production per Minute last 10 years
- Embiid 
- Victor (Very impressive for a rookie)
- Luka
- Jokic
- DeMarcus Cousins (what a suprise) 





In [76]:
df_addition_50GP.sort_values(by='PPG', ascending=False).head(20)

Unnamed: 0,PLAYER,MIN,FGM,FGA,FG3M,FG3A,FTM,FTA,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,production,GP,PPM,PPG
32,Luka Doncic,15890,4443,9476,1367,3936,2763,3712,421,3520,3941,3715,551,205,1810,1036,13016,21428,450,1.348521,47.617778
0,LeBron James,32128,8879,16987,1665,4710,4261,5901,985,6039,7024,6850,1199,610,3238,1582,23684,39367,876,1.225317,44.939498
30,Joel Embiid,15915,4443,8912,574,1716,4080,4939,1099,4374,5473,1755,446,822,1703,1524,13540,22036,492,1.384606,44.788618
1,James Harden,32827,7131,16247,2783,7749,7035,8115,728,4821,5549,7526,1424,572,3744,2309,24080,39151,891,1.192646,43.940516
8,Kevin Durant,25995,6999,13614,1586,4075,4521,5118,405,4791,5196,3702,656,867,2270,1452,20105,30526,716,1.174303,42.634078
7,Anthony Davis,25823,6855,13062,376,1254,4330,5407,1935,6073,8008,1935,975,1722,1511,1756,18416,31056,732,1.202649,42.42623
3,Russell Westbrook,28349,6901,15807,1150,3782,4273,5596,1376,5631,7007,7388,1359,270,3543,2258,19225,35249,845,1.243395,41.714793
6,Nikola Jokic,24114,6297,11374,814,2287,2944,3554,2040,6190,8230,5267,910,561,2202,2101,16352,31320,755,1.298831,41.483444
2,Giannis Antetokounmpo,28799,7524,13852,589,2079,4970,7171,1638,7039,8677,4268,983,1091,2640,2635,20607,35626,871,1.237057,40.902411
514,Victor Wembanyama,2106,551,1186,128,394,292,367,161,594,755,274,88,254,260,153,1522,2893,71,1.373694,40.746479


### top 5 players in Production per Game
- Luka 
- Lebron
- Embiid
- Harden
- Durant
