# NBA Dashboard

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [2]:
NBA_data = pd.read_csv('../Data/Player.csv')
NBA_data.head()

Unnamed: 0,season,lg,player,player_id,age,team,pos,g,gs,mp,...,orb,drb,trb,ast,stl,blk,tov,pf,pts,trp_dbl
0,2025,NBA,Precious Achiuwa,achiupr01,25.0,NYK,C,57,10.0,1170.0,...,101.0,216.0,317.0,55,47.0,42.0,45.0,81.0,379,0.0
1,2025,NBA,Steven Adams,adamsst01,31.0,HOU,C,58,3.0,794.0,...,166.0,161.0,327.0,66,22.0,28.0,54.0,60.0,225,0.0
2,2025,NBA,Bam Adebayo,adebaba01,27.0,MIA,C,78,78.0,2674.0,...,185.0,564.0,749.0,337,98.0,53.0,161.0,162.0,1410,1.0
3,2025,NBA,Ochai Agbaji,agbajoc01,24.0,TOR,SG,64,45.0,1739.0,...,61.0,181.0,242.0,98,58.0,30.0,54.0,125.0,667,0.0
4,2025,NBA,Santi Aldama,aldamsa01,24.0,MEM,PF,65,16.0,1660.0,...,94.0,322.0,416.0,188,52.0,29.0,71.0,76.0,811,0.0


1. Remove all the players that is not from the NBA league and Drop Unnecessary Columns()

In [3]:
# Remove all the players that is not from the NBA league
print("Remove all the players that is not from the NBA league:")
print(NBA_data['lg'].unique())
NBA_data = NBA_data[NBA_data['lg'] == 'NBA']
print(NBA_data['lg'].unique())
print("Number of NBA players:", len(NBA_data))
print()

# Remove all the players that does not have a position
print("Remove all the players that does not have a position:")
print(NBA_data['pos'].unique())
NBA_data = NBA_data[NBA_data['pos'].notna()]
print(NBA_data['pos'].unique())
print("Number of players with a position:", len(NBA_data))
print()

# Remove all the players that is not in the current 30 teams
print("Remove all the players that is not in the current 30 teams:")
current_teams = ['ATL', 'BKN', 'BOS', 'CHA', 'CHI', 'CLE', 'DAL', 'DEN', 'DET', 'GSW',
                 'HOU', 'IND', 'LAC', 'LAL', 'MEM', 'MIA', 'MIL', 'MIN', 'NOP', 'NYK',
                 'OKC', 'ORL', 'PHI', 'PHX', 'POR', 'SAC', 'SAS', 'TOR', 'UTA', 'WAS']
NBA_data = NBA_data[NBA_data['team'].isin(current_teams)]
print(NBA_data['team'].unique())
print("Number of players in the current 30 teams:", len(NBA_data))
print()

# Drop Unnecessary Columns(lg, trp_dbl)
NBA_data = NBA_data.drop(columns=['lg'])

NBA_data.head()

Remove all the players that is not from the NBA league:
['NBA' 'ABA' 'BAA']
['NBA']
Number of NBA players: 30386

Remove all the players that does not have a position:
['C' 'SG' 'PF' 'PG' 'SF' nan]
['C' 'SG' 'PF' 'PG' 'SF']
Number of players with a position: 29820

Remove all the players that is not in the current 30 teams:
['NYK' 'HOU' 'MIA' 'TOR' 'MEM' 'MIN' 'DEN' 'CLE' 'NOP' 'GSW' 'MIL' 'ORL'
 'LAL' 'POR' 'WAS' 'PHI' 'LAC' 'CHI' 'ATL' 'SAS' 'DET' 'IND' 'BOS' 'DAL'
 'OKC' 'SAC' 'UTA' 'CHA']
Number of players in the current 30 teams: 21682



Unnamed: 0,season,player,player_id,age,team,pos,g,gs,mp,fg,...,orb,drb,trb,ast,stl,blk,tov,pf,pts,trp_dbl
0,2025,Precious Achiuwa,achiupr01,25.0,NYK,C,57,10.0,1170.0,164,...,101.0,216.0,317.0,55,47.0,42.0,45.0,81.0,379,0.0
1,2025,Steven Adams,adamsst01,31.0,HOU,C,58,3.0,794.0,91,...,166.0,161.0,327.0,66,22.0,28.0,54.0,60.0,225,0.0
2,2025,Bam Adebayo,adebaba01,27.0,MIA,C,78,78.0,2674.0,540,...,185.0,564.0,749.0,337,98.0,53.0,161.0,162.0,1410,1.0
3,2025,Ochai Agbaji,agbajoc01,24.0,TOR,SG,64,45.0,1739.0,266,...,61.0,181.0,242.0,98,58.0,30.0,54.0,125.0,667,0.0
4,2025,Santi Aldama,aldamsa01,24.0,MEM,PF,65,16.0,1660.0,313,...,94.0,322.0,416.0,188,52.0,29.0,71.0,76.0,811,0.0


2. Remove empty data and fill out empty data with 0 as the players had never done any shooting in 2-points/3-points/Free Throws during the season

In [4]:
NBA_data.isna().sum()

season             0
player             0
player_id          0
age                0
team               0
pos                0
g                  0
gs              2725
mp                16
fg                 0
fga                0
fg_percent       118
x3p             2597
x3pa            2597
x3p_percent     5592
x2p             2597
x2pa            2597
x2p_percent     2786
e_fg_percent    2706
ft                 0
fta                0
ft_percent      1008
orb             1433
drb             1433
trb                5
ast                0
stl             1433
blk             1433
tov             2154
pf                 0
pts                0
trp_dbl            1
dtype: int64

In [5]:
NBA_data = NBA_data[NBA_data['x2p'].notna()]
NBA_data = NBA_data[NBA_data['gs'].notna()]
NBA_data = NBA_data[NBA_data['trp_dbl'].notna()]

In [6]:
NBA_data.isna().sum()

season             0
player             0
player_id          0
age                0
team               0
pos                0
g                  0
gs                 0
mp                 0
fg                 0
fga                0
fg_percent       109
x3p                0
x3pa               0
x3p_percent     2896
x2p                0
x2pa               0
x2p_percent      189
e_fg_percent     109
ft                 0
fta                0
ft_percent       945
orb                0
drb                0
trb                0
ast                0
stl                0
blk                0
tov                0
pf                 0
pts                0
trp_dbl            0
dtype: int64

In [7]:
NBA_data = NBA_data.fillna({'fg_percent':0, 'x3p_percent':0, 'x2p_percent':0, 'e_fg_percent':0, 'ft_percent':0})
NBA_data.isna().sum()

season          0
player          0
player_id       0
age             0
team            0
pos             0
g               0
gs              0
mp              0
fg              0
fga             0
fg_percent      0
x3p             0
x3pa            0
x3p_percent     0
x2p             0
x2pa            0
x2p_percent     0
e_fg_percent    0
ft              0
fta             0
ft_percent      0
orb             0
drb             0
trb             0
ast             0
stl             0
blk             0
tov             0
pf              0
pts             0
trp_dbl         0
dtype: int64

3. Create New Columns for players stat and rename some colunms to be more informative
`mpg`: Minutes Per Game<br>
`fpg`: Field Goal Per Game<br>
`fapg`: Field Goal Attempt Per Game<br>
`3pg`: 3-pointer made Per Game<br>
`3apg`: 3-pointer Attempt Per Game<br>
`2pg`: 2-pointer made Per Game<br>
`2apg`: 2-pointer Attempt Per Game<br>
`ftpg`: Free Throw made Per Game<br>
`ftapg`: Free Throw Attempt Per Game<br>
`orbpg`: Offensive Rebound Per Game<br>
`drbpg`: Defensive Rebound Per Game<br>
`trbpg`: Total Rebound Per Game<br>
`astpg`: Assist Per Game<br>
`stlpg`: Steal Per Game<br>
`blkpg`: BLock Per Game<br>
`tovpg`: Turnover Per Game<br>
`pfpg`: Personal Foul Per Game<br>
`ppg`: Point Per Game<br>

In [8]:
# Rename some colunms to be more informative
print(NBA_data.columns)
NBA_data = NBA_data.rename(columns={
    'g': 'games',
    'gs' : 'games_started',
    'x3p' : '3p',
    'x3pa' : '3pa',
    'x3p_percent' : '3p%',
    'x2p' : '2p',
    'x2pa' : '2pa',
    'x2p_percent' : '2p%',
    'mp' : 'minutes',
    'e_fg_percent' : 'efg%',
    'ft_percent' : 'ft%',
    'fg_percent' : 'fg%',
})
print(NBA_data.columns)
print()

# Create New Columns for players stat
NBA_data['mpg'] = np.round(NBA_data['minutes'] / NBA_data['games'], 1)
NBA_data['fpg'] = np.round(NBA_data['fg'] / NBA_data['games'], 1)
NBA_data['fapg'] = np.round(NBA_data['fga'] / NBA_data['games'], 1)
NBA_data['3pg'] = np.round(NBA_data['3p'] / NBA_data['games'], 1)
NBA_data['3apg'] = np.round(NBA_data['3pa'] / NBA_data['games'], 1)
NBA_data['2pg'] = np.round(NBA_data['2p'] / NBA_data['games'], 1)
NBA_data['2apg'] = np.round(NBA_data['2pa'] / NBA_data['games'], 1)
NBA_data['ftpg'] = np.round(NBA_data['ft'] / NBA_data['games'], 1)
NBA_data['ftapg'] = np.round(NBA_data['fta'] / NBA_data['games'], 1)
NBA_data['orbpg'] = np.round(NBA_data['orb'] / NBA_data['games'], 1)
NBA_data['drbpg'] = np.round(NBA_data['drb'] / NBA_data['games'], 1)
NBA_data['trbpg'] = np.round(NBA_data['trb'] / NBA_data['games'], 1)
NBA_data['astpg'] = np.round(NBA_data['ast'] / NBA_data['games'], 1)
NBA_data['stlpg'] = np.round(NBA_data['stl'] / NBA_data['games'], 1)
NBA_data['blkpg'] = np.round(NBA_data['blk'] / NBA_data['games'], 1)
NBA_data['tovpg'] = np.round(NBA_data['tov'] / NBA_data['games'], 1)
NBA_data['pfpg'] = np.round(NBA_data['pf'] / NBA_data['games'], 1)
NBA_data['ppg'] = np.round(NBA_data['pts'] / NBA_data['games'], 1)

print(NBA_data.columns)


Index(['season', 'player', 'player_id', 'age', 'team', 'pos', 'g', 'gs', 'mp',
       'fg', 'fga', 'fg_percent', 'x3p', 'x3pa', 'x3p_percent', 'x2p', 'x2pa',
       'x2p_percent', 'e_fg_percent', 'ft', 'fta', 'ft_percent', 'orb', 'drb',
       'trb', 'ast', 'stl', 'blk', 'tov', 'pf', 'pts', 'trp_dbl'],
      dtype='object')
Index(['season', 'player', 'player_id', 'age', 'team', 'pos', 'games',
       'games_started', 'minutes', 'fg', 'fga', 'fg%', '3p', '3pa', '3p%',
       '2p', '2pa', '2p%', 'efg%', 'ft', 'fta', 'ft%', 'orb', 'drb', 'trb',
       'ast', 'stl', 'blk', 'tov', 'pf', 'pts', 'trp_dbl'],
      dtype='object')

Index(['season', 'player', 'player_id', 'age', 'team', 'pos', 'games',
       'games_started', 'minutes', 'fg', 'fga', 'fg%', '3p', '3pa', '3p%',
       '2p', '2pa', '2p%', 'efg%', 'ft', 'fta', 'ft%', 'orb', 'drb', 'trb',
       'ast', 'stl', 'blk', 'tov', 'pf', 'pts', 'trp_dbl', 'mpg', 'fpg',
       'fapg', '3pg', '3apg', '2pg', '2apg', 'ftpg', 'ftapg', 'orbpg', 'dr

In [9]:
NBA_data.head()

Unnamed: 0,season,player,player_id,age,team,pos,games,games_started,minutes,fg,...,ftapg,orbpg,drbpg,trbpg,astpg,stlpg,blkpg,tovpg,pfpg,ppg
0,2025,Precious Achiuwa,achiupr01,25.0,NYK,C,57,10.0,1170.0,164,...,1.2,1.8,3.8,5.6,1.0,0.8,0.7,0.8,1.4,6.6
1,2025,Steven Adams,adamsst01,31.0,HOU,C,58,3.0,794.0,91,...,1.6,2.9,2.8,5.6,1.1,0.4,0.5,0.9,1.0,3.9
2,2025,Bam Adebayo,adebaba01,27.0,MIA,C,78,78.0,2674.0,540,...,4.2,2.4,7.2,9.6,4.3,1.3,0.7,2.1,2.1,18.1
3,2025,Ochai Agbaji,agbajoc01,24.0,TOR,SG,64,45.0,1739.0,266,...,0.8,1.0,2.8,3.8,1.5,0.9,0.5,0.8,2.0,10.4
4,2025,Santi Aldama,aldamsa01,24.0,MEM,PF,65,16.0,1660.0,313,...,1.4,1.4,5.0,6.4,2.9,0.8,0.4,1.1,1.2,12.5


In [10]:
print(len(NBA_data))
print(len(NBA_data['player'].unique()))

18705
3411


In [11]:
NBA_data.to_csv('../Data/Player_cleaned.csv', index=False)