# Get NBA team data

In [13]:
from nba_api.stats.static import teams
# get_teams returns a list of 30 dictionaries, each an NBA team.
nba_teams = teams.get_teams()

first_team = nba_teams[0]
print(f'Team data format: {first_team}')
print('Number of teams fetched: {}'.format(len(nba_teams)))

team_abbreviations = [team['abbreviation'] for team in nba_teams]
team_abbreviations.sort()
print(team_abbreviations)

Team data format: {'id': 1610612737, 'full_name': 'Atlanta Hawks', 'abbreviation': 'ATL', 'nickname': 'Hawks', 'city': 'Atlanta', 'state': 'Atlanta', 'year_founded': 1949}
Number of teams fetched: 30
['ATL', 'BKN', 'BOS', 'CHA', 'CHI', 'CLE', 'DAL', 'DEN', 'DET', 'GSW', 'HOU', 'IND', 'LAC', 'LAL', 'MEM', 'MIA', 'MIL', 'MIN', 'NOP', 'NYK', 'OKC', 'ORL', 'PHI', 'PHX', 'POR', 'SAC', 'SAS', 'TOR', 'UTA', 'WAS']


# Generate mapping between team abbreviation to team id

In [19]:
team_id_mapping = dict()

for team in nba_teams:
    team_symbol = team['abbreviation']
    team_id = team['id']
    team_id_mapping[team_symbol] = team_id
    
team_id_mapping['GSW']

1610612744

# Get team roster

In [9]:
from nba_api.stats.endpoints import commonteamroster

In [29]:
roster = commonteamroster.CommonTeamRoster(team_id = team_id_mapping['GSW'])
roster_df = roster.get_data_frames()[0]
roster_df

Unnamed: 0,TeamID,SEASON,LeagueID,PLAYER,NUM,POSITION,HEIGHT,WEIGHT,BIRTH_DATE,AGE,EXP,SCHOOL,PLAYER_ID
0,1610612744,2019,0,D'Angelo Russell,0,G,6-4,193,"FEB 23, 1996",23.0,4,Ohio State,1626156
1,1610612744,2019,0,Damion Lee,1,G-F,6-5,210,"OCT 21, 1992",27.0,2,Louisville,1627814
2,1610612744,2019,0,Willie Cauley-Stein,2,C,7-0,240,"AUG 18, 1993",26.0,4,Kentucky,1626161
3,1610612744,2019,0,Jordan Poole,3,G,6-4,194,"JUN 19, 1999",20.0,R,Michigan,1629673
4,1610612744,2019,0,Omari Spellman,4,F,6-8,245,"JUL 21, 1997",22.0,1,Villanova,1629016
5,1610612744,2019,0,Kevon Looney,5,F,6-9,222,"FEB 06, 1996",23.0,4,UCLA,1626172
6,1610612744,2019,0,Alen Smailagic,6,F,6-10,215,"AUG 18, 2000",19.0,R,,1629346
7,1610612744,2019,0,Eric Paschall,7,F,6-6,255,"NOV 04, 1996",22.0,R,Villanova,1629672
8,1610612744,2019,0,Alec Burks,8,G,6-6,214,"JUL 20, 1991",28.0,8,Colorado,202692
9,1610612744,2019,0,Jacob Evans,10,G-F,6-4,210,"JUN 18, 1997",22.0,1,Cincinnati,1628980


# Get team game log

In [40]:
from nba_api.stats.endpoints import teamgamelog

In [42]:
gamelog = teamgamelog.TeamGameLog(team_id=team_id_mapping['GSW'])
gamelog.get_data_frames()[0]

Unnamed: 0,Team_ID,Game_ID,GAME_DATE,MATCHUP,WL,W,L,W_PCT,MIN,FGM,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS
0,1610612744,21900016,"OCT 24, 2019",GSW vs. LAC,L,0,1,0.0,240,39,...,0.967,15,27,42,27,7,0,13,21,122


# Get player game log

In [None]:
from nba_api.stats.endpoints import playergamelog

In [43]:
player_gamelog = playergamelog.PlayerGameLog(player_id=roster_df[roster_df['PLAYER'] == 'Stephen Curry']['PLAYER_ID'])
player_gamelog.get_data_frames()[0]

Unnamed: 0,SEASON_ID,Player_ID,Game_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,...,DREB,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE
0,22019,201939,21900016,"OCT 24, 2019",GSW vs. LAC,L,30,8,20,0.4,...,3,4,4,1,0,8,3,23,-19,1


# Get all game player stat data to interpret player scoring demographics

In [81]:
import pandas
import time

In [82]:
failed_teams = list()
player_id_mapping = dict()

for team_abbr, team_id in sorted(team_id_mapping.items()):
    print(f'Processing team {team_abbr} - id: {team_id}')
    try:
        roster = commonteamroster.CommonTeamRoster(team_id = team_id)
        time.sleep(30)
    except Exception:
        print(f'Failed to retrieve data for Team {team_abbr} - id: {team_id}')
        failed_teams.append(team_abbr)
        continue
        
    roster_df = roster.get_data_frames()[0]
    player_dict = roster_df.to_dict('records')
    
    for player in player_dict:
        player_id_mapping[player['PLAYER_ID']] = player
print('player_id_mapping is DONE')

Processing team ATL - id: 1610612737
Processing team BOS - id: 1610612738
Processing team CLE - id: 1610612739
Processing team NOP - id: 1610612740
Processing team CHI - id: 1610612741
Processing team DAL - id: 1610612742
Processing team DEN - id: 1610612743
Processing team GSW - id: 1610612744
Processing team HOU - id: 1610612745
Processing team LAC - id: 1610612746
Processing team LAL - id: 1610612747
Processing team MIA - id: 1610612748
Processing team MIL - id: 1610612749
Processing team MIN - id: 1610612750
Processing team BKN - id: 1610612751
Processing team NYK - id: 1610612752
Processing team ORL - id: 1610612753
Processing team IND - id: 1610612754
Processing team PHI - id: 1610612755
Processing team PHX - id: 1610612756
Processing team POR - id: 1610612757
Processing team SAC - id: 1610612758
Processing team SAS - id: 1610612759
Processing team OKC - id: 1610612760
Processing team TOR - id: 1610612761
Processing team UTA - id: 1610612762
Processing team MEM - id: 1610612763
P

In [84]:
import pickle

# Save data to offload polling time
with open('player_id_mapping.pickle', 'wb') as handle:
    pickle.dump(player_id_mapping, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [230]:
player_id_mapping[list(player_id_mapping.keys())[0]]

{'TeamID': 1610612737,
 'SEASON': '2019',
 'LeagueID': '00',
 'PLAYER': 'Brandon Goodwin',
 'NUM': '0',
 'POSITION': 'G',
 'HEIGHT': '6-0',
 'WEIGHT': '180',
 'BIRTH_DATE': 'OCT 02, 1995',
 'AGE': 24.0,
 'EXP': '1',
 'SCHOOL': 'Florida Gulf Coast',
 'PLAYER_ID': 1629164}

In [88]:
# Number of players
len(player_id_mapping)

501

In [198]:
import time 
from nba_api.stats.endpoints import playergamelog

season = '2019-20'

def get_player_id_gamelog_mapping(season, player_id_mapping):
    player_id_gamelog_mapping = dict()
    
    ten_percent_cnt = round(len(player_id_mapping)*.1)
    
    progress_cnt = 0
    
    for player_id, player in sorted(player_id_mapping.items()):
        retry_cnt = 0
        
        while True:
            try:
                player_name = player['PLAYER']
                #print(f'Processing player {player_name} - id: {player_id}')
                player_gamelog = playergamelog.PlayerGameLog(player_id=player_id, season=season)
                time.sleep(2)
                break
            except Exception:
                if retry_cnt == 1:
                    print(f'Failed to retrieve data for player {player_name} - id: {player_id}')
                    raise
                    
                print(f'Retrying for player {player_name} - id: {player_id}')
                retry_cnt += 1
                time.sleep(10)

        player_gamelog_df = player_gamelog.get_data_frames()[0]
        player_gamelog_dict = player_gamelog_df.to_dict('records')

        player_id_gamelog_mapping[player_id] = player_gamelog_dict
        
        if progress_cnt % ten_percent_cnt == 0:
            print(progress_cnt)
        
        progress_cnt += 1
            
    return player_id_gamelog_mapping

player_id_gamelog_mapping = get_player_id_gamelog_mapping(season=season, player_id_mapping=player_id_mapping)

# Save data to offload polling time
with open(f'player_id_gamelog_mapping_{season}.pickle', 'wb') as handle:
    pickle.dump(player_id_gamelog_mapping, handle, protocol=pickle.HIGHEST_PROTOCOL)

0
50
100
150
200
250
300
350
400
450
500


In [4]:
import pickle

season = '2018-19'

with open(f'player_id_gamelog_mapping_{season}.pickle', 'rb') as handler:
    player_id_gamelog_mapping = pickle.load(handler)
    
with open(f'player_id_mapping.pickle', 'rb') as handler:
    player_id_mapping = pickle.load(handler)

In [5]:
player_id_gamelog_mapping[list(sorted(player_id_mapping.keys()))[0]][0]

{'SEASON_ID': '22018',
 'Player_ID': 1713,
 'Game_ID': '0021801220',
 'GAME_DATE': 'APR 10, 2019',
 'MATCHUP': 'ATL vs. IND',
 'WL': 'L',
 'MIN': 12,
 'FGM': 0,
 'FGA': 4,
 'FG_PCT': 0.0,
 'FG3M': 0,
 'FG3A': 3,
 'FG3_PCT': 0.0,
 'FTM': 1,
 'FTA': 2,
 'FT_PCT': 0.5,
 'OREB': 0,
 'DREB': 0,
 'REB': 0,
 'AST': 0,
 'STL': 0,
 'BLK': 1,
 'TOV': 0,
 'PF': 2,
 'PTS': 1,
 'PLUS_MINUS': -13,
 'VIDEO_AVAILABLE': 1}

In [6]:
player_id_mapping_list = list(sorted(player_id_mapping.keys()))
len(player_id_mapping_list), player_id_mapping_list[-1]

(501, 1629750)

In [7]:
player_id_gamelog_mapping_list = list(sorted(player_id_gamelog_mapping.keys()))
len(player_id_gamelog_mapping_list), player_id_gamelog_mapping_list[-1]

(501, 1629750)

In [8]:
missing_player_ids = set(player_id_mapping_list)-set(player_id_gamelog_mapping_list)
missing_players = list()
for player_id in missing_player_ids:
    name = player_id_mapping[player_id]['PLAYER']
    entry = f'{name} - {player_id}'
    missing_players.append(entry)
    
print(missing_players[:5])

[]


In [9]:
len(player_id_gamelog_mapping)

501

In [10]:
player_id_stats_mapping = dict()
idx = 0
for player_id, gamelog in player_id_gamelog_mapping.items():
    for game in gamelog:
        player_id_stats_mapping[idx] = game
        idx += 1

In [11]:
import pandas as pd

header = list(game.keys())
player_stat_df = pd.DataFrame.from_dict(player_id_stats_mapping, orient='index', columns=header)

In [30]:
player_id = 201939 # Stephen Curry
print(header)

['SEASON_ID', 'Player_ID', 'Game_ID', 'GAME_DATE', 'MATCHUP', 'WL', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS', 'PLUS_MINUS', 'VIDEO_AVAILABLE']


In [39]:
player_stat_df[player_stat_df['Player_ID'] == player_id][:5]

Unnamed: 0,SEASON_ID,Player_ID,Game_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,...,DREB,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE
3390,22018,201939,21801215,2019-04-09,GSW @ NOP,W,9,2,4,0.5,...,1,1,1,0,0,2,0,5,1,1
3391,22018,201939,21801205,2019-04-07,GSW vs. LAC,W,30,11,20,0.55,...,5,5,4,1,1,0,3,27,35,1
3392,22018,201939,21801191,2019-04-05,GSW vs. CLE,W,36,12,21,0.571,...,5,6,7,0,1,4,1,40,5,1
3393,22018,201939,21801179,2019-04-04,GSW @ LAL,W,29,3,14,0.214,...,9,10,7,1,0,2,1,7,32,1
3394,22018,201939,21801164,2019-04-02,GSW vs. DEN,W,30,6,11,0.545,...,4,4,5,3,0,4,4,17,25,1


In [14]:
# Typecast game_date string to datetime type
player_stat_df['GAME_DATE'] = pd.to_datetime(player_stat_df['GAME_DATE'], format='%b %d, %Y')
player_stat_df[:5]

Unnamed: 0,SEASON_ID,Player_ID,Game_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,...,DREB,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE
0,22018,1713,21801220,2019-04-10,ATL vs. IND,L,12,0,4,0.0,...,0,0,0,0,1,0,2,1,-13,1
1,22018,1713,21801202,2019-04-07,ATL @ MIL,L,31,1,10,0.1,...,8,9,2,0,1,1,4,5,11,1
2,22018,1713,21801181,2019-04-05,ATL @ ORL,L,22,3,6,0.5,...,0,0,1,1,2,2,1,11,-3,1
3,22018,1713,21801162,2019-04-02,ATL @ SAS,L,28,3,5,0.6,...,1,2,5,0,0,1,0,8,-10,1
4,22018,1713,21801145,2019-03-31,ATL vs. MIL,W,19,3,11,0.273,...,2,2,0,1,1,0,1,9,-2,1


In [15]:
index_cols = ['SEASON_ID', 'Player_ID', 'Game_ID', 'GAME_DATE', 'MATCHUP', 'WL', 
              'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT',
             'PLUS_MINUS', 'VIDEO_AVAILABLE']
player_mean_stats_df = player_stat_df.set_index(index_cols).groupby('Player_ID').agg(['mean'])

In [16]:
# Groupby and rename columns
player_mean_stats_df.columns = ["_".join(x) for x in player_mean_stats_df.columns.ravel()]

In [17]:
player_mean_stats_df[:5]

Unnamed: 0_level_0,MIN_mean,OREB_mean,DREB_mean,REB_mean,AST_mean,STL_mean,BLK_mean,TOV_mean,PF_mean,PTS_mean
Player_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1713,17.447368,0.407895,2.144737,2.552632,1.144737,0.578947,0.355263,0.631579,1.855263,7.394737
2199,15.927273,1.672727,3.909091,5.581818,0.672727,0.381818,0.418182,0.763636,2.0,3.145455
2200,11.966667,0.733333,3.833333,4.566667,1.733333,0.166667,0.5,0.533333,0.966667,3.933333
2403,12.952381,0.809524,2.119048,2.928571,0.619048,0.428571,0.357143,0.261905,2.071429,3.595238
2544,35.2,1.036364,7.418182,8.454545,8.254545,1.309091,0.6,3.581818,1.709091,27.363636


In [18]:
player_mean_stats_df.sort_values(by=['AST_mean','PTS_mean'], ascending=False)
player_mean_stats_df[:5]

Unnamed: 0_level_0,MIN_mean,OREB_mean,DREB_mean,REB_mean,AST_mean,STL_mean,BLK_mean,TOV_mean,PF_mean,PTS_mean
Player_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1713,17.447368,0.407895,2.144737,2.552632,1.144737,0.578947,0.355263,0.631579,1.855263,7.394737
2199,15.927273,1.672727,3.909091,5.581818,0.672727,0.381818,0.418182,0.763636,2.0,3.145455
2200,11.966667,0.733333,3.833333,4.566667,1.733333,0.166667,0.5,0.533333,0.966667,3.933333
2403,12.952381,0.809524,2.119048,2.928571,0.619048,0.428571,0.357143,0.261905,2.071429,3.595238
2544,35.2,1.036364,7.418182,8.454545,8.254545,1.309091,0.6,3.581818,1.709091,27.363636


# Measure number of games attended - Evaluate Durability

In [19]:
player_id_gameplay_df = player_stat_df.set_index(header[:-1]).groupby('Player_ID').agg('count')
player_id_gameplay_df.rename(columns={ header[-1] : 'num_games'}, inplace=True)

# Calculate totals, percentages for FG, 3-pointers, FT

In [20]:
index_cols = ['SEASON_ID', 'Player_ID', 'Game_ID', 'GAME_DATE', 'MATCHUP', 'WL', 
              'MIN', 'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS', 
              'PLUS_MINUS', 'VIDEO_AVAILABLE','FG_PCT','FT_PCT', 'FG3_PCT']
player_sum_stats_df = player_stat_df.set_index(index_cols).groupby('Player_ID').agg(['sum'])
player_sum_stats_df.columns = ["_".join(x) for x in player_sum_stats_df.columns.ravel()]
player_sum_stats_df[:5]

Unnamed: 0_level_0,FGM_sum,FGA_sum,FG3M_sum,FG3A_sum,FTM_sum,FTA_sum
Player_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1713,196,468,123,316,47,66
2199,61,99,0,1,51,87
2200,42,94,6,13,28,40
2403,60,116,0,3,31,47
2544,558,1095,111,327,278,418


In [21]:
# Check for nulls
player_sum_stats_df.isnull().values.any()

False

In [22]:
player_sum_stats_df.columns

Index(['FGM_sum', 'FGA_sum', 'FG3M_sum', 'FG3A_sum', 'FTM_sum', 'FTA_sum'], dtype='object')

In [23]:
percent_pairs = list()

for idx in range(len(player_sum_stats_df.columns)):    
    if idx != 0 and (idx+1) % 2 == 0:
        percent_pairs.append(tuple(player_sum_stats_df.columns[idx-1:idx+1]))

print(f'Percent pairs: {percent_pairs}')

def generate_percentage(row, num, denom):
    if not row[denom]:
        return 0
    
    return round(row[num]/row[denom],3)

for num, denom in percent_pairs:
    key = num.split('_')[0] + '%'
    player_sum_stats_df[key] = player_sum_stats_df.apply (lambda row: generate_percentage(row, num, denom), axis=1)

player_sum_stats_df[:5]

Percent pairs: [('FGM_sum', 'FGA_sum'), ('FG3M_sum', 'FG3A_sum'), ('FTM_sum', 'FTA_sum')]


Unnamed: 0_level_0,FGM_sum,FGA_sum,FG3M_sum,FG3A_sum,FTM_sum,FTA_sum,FGM%,FG3M%,FTM%
Player_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1713,196,468,123,316,47,66,0.419,0.389,0.712
2199,61,99,0,1,51,87,0.616,0.0,0.586
2200,42,94,6,13,28,40,0.447,0.462,0.7
2403,60,116,0,3,31,47,0.517,0.0,0.66
2544,558,1095,111,327,278,418,0.51,0.339,0.665


In [24]:
player_sum_stats_df.drop(columns=[pair[0] for pair in percent_pairs], inplace=True)

## Combine all dataframes for a summarized view
#### Durability - Number of Games Attended and game duration
#### Talent - Offensive/Defense Averages
#### Accuracy - FG %, FT %, 3FG %

In [25]:
overall_df = player_mean_stats_df.join(player_id_gameplay_df).join(player_sum_stats_df)
overall_df[:5]

Unnamed: 0_level_0,MIN_mean,OREB_mean,DREB_mean,REB_mean,AST_mean,STL_mean,BLK_mean,TOV_mean,PF_mean,PTS_mean,num_games,FGA_sum,FG3A_sum,FTA_sum,FGM%,FG3M%,FTM%
Player_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
1713,17.447368,0.407895,2.144737,2.552632,1.144737,0.578947,0.355263,0.631579,1.855263,7.394737,76,468,316,66,0.419,0.389,0.712
2199,15.927273,1.672727,3.909091,5.581818,0.672727,0.381818,0.418182,0.763636,2.0,3.145455,55,99,1,87,0.616,0.0,0.586
2200,11.966667,0.733333,3.833333,4.566667,1.733333,0.166667,0.5,0.533333,0.966667,3.933333,30,94,13,40,0.447,0.462,0.7
2403,12.952381,0.809524,2.119048,2.928571,0.619048,0.428571,0.357143,0.261905,2.071429,3.595238,42,116,3,47,0.517,0.0,0.66
2544,35.2,1.036364,7.418182,8.454545,8.254545,1.309091,0.6,3.581818,1.709091,27.363636,55,1095,327,418,0.51,0.339,0.665


In [26]:
player_id_mapping[list(player_id_mapping.keys())[0]]

{'TeamID': 1610612737,
 'SEASON': '2019',
 'LeagueID': '00',
 'PLAYER': 'Brandon Goodwin',
 'NUM': '0',
 'POSITION': 'G',
 'HEIGHT': '6-0',
 'WEIGHT': '180',
 'BIRTH_DATE': 'OCT 02, 1995',
 'AGE': 24.0,
 'EXP': '1',
 'SCHOOL': 'Florida Gulf Coast',
 'PLAYER_ID': 1629164}

In [27]:
# Replace player id with names
player_id_name_mapping = { player['PLAYER_ID'] : player['PLAYER'] for player in player_id_mapping.values() }
overall_df = overall_df.rename(index=player_id_name_mapping)

In [28]:
overall_df.describe()

Unnamed: 0,MIN_mean,OREB_mean,DREB_mean,REB_mean,AST_mean,STL_mean,BLK_mean,TOV_mean,PF_mean,PTS_mean,num_games,FGA_sum,FG3A_sum,FTA_sum,FGM%,FG3M%,FTM%
count,391.0,391.0,391.0,391.0,391.0,391.0,391.0,391.0,391.0,391.0,391.0,391.0,391.0,391.0,391.0,391.0,391.0
mean,22.096275,0.957684,3.221138,4.178823,2.220971,0.703975,0.462602,1.239665,1.933629,10.151223,57.841432,518.363171,185.647059,135.056266,0.452084,0.305524,0.739992
std,8.365206,0.835175,1.917193,2.583232,1.881055,0.407648,0.430003,0.815294,0.730748,6.170997,21.315347,376.473489,166.57658,129.568839,0.081217,0.117748,0.127188
min,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,15.853914,0.407651,1.909056,2.444272,0.950247,0.401923,0.165612,0.647473,1.436049,5.720759,44.0,229.5,41.5,46.0,0.412,0.2895,0.6885
50%,22.84127,0.708333,2.9,3.671875,1.554054,0.631579,0.358025,1.033898,1.946429,8.810345,64.0,457.0,152.0,92.0,0.444,0.339,0.763
75%,28.632382,1.242647,4.014062,5.225298,2.962477,0.903075,0.6,1.580446,2.435731,13.623821,75.0,729.5,289.5,185.5,0.487,0.3705,0.8225
max,36.935065,5.35443,11.109375,15.594937,10.739726,2.207792,2.689189,4.961538,3.793103,36.128205,82.0,1909.0,1028.0,858.0,0.716,0.529,1.0


In [29]:
overall_df.sort_values(by=['AST_mean','FTM%','FGM%','STL_mean','REB_mean','BLK_mean','num_games'], ascending=False).rank(pct=True).round(2)[:30]

Unnamed: 0_level_0,MIN_mean,OREB_mean,DREB_mean,REB_mean,AST_mean,STL_mean,BLK_mean,TOV_mean,PF_mean,PTS_mean,num_games,FGA_sum,FG3A_sum,FTA_sum,FGM%,FG3M%,FTM%
Player_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
Russell Westbrook,0.99,0.82,0.99,0.97,1.0,0.99,0.64,1.0,0.97,0.95,0.68,0.98,0.9,0.97,0.39,0.26,0.19
John Wall,0.96,0.31,0.57,0.5,1.0,0.96,0.88,0.99,0.64,0.93,0.15,0.61,0.54,0.73,0.5,0.29,0.27
Kyle Lowry,0.94,0.43,0.78,0.7,0.99,0.93,0.68,0.95,0.8,0.79,0.52,0.76,0.94,0.77,0.24,0.56,0.79
LeBron James,0.98,0.7,0.96,0.93,0.99,0.9,0.75,0.98,0.38,0.99,0.36,0.9,0.8,0.95,0.82,0.51,0.2
Jeff Teague,0.79,0.23,0.32,0.28,0.99,0.81,0.61,0.9,0.61,0.7,0.21,0.44,0.4,0.69,0.36,0.46,0.67
Chris Paul,0.88,0.42,0.74,0.65,0.99,0.99,0.43,0.93,0.79,0.82,0.4,0.74,0.84,0.79,0.32,0.64,0.9
Trae Young,0.82,0.56,0.51,0.51,0.98,0.73,0.27,0.99,0.39,0.91,0.92,0.96,0.95,0.94,0.3,0.39,0.78
Rajon Rondo,0.78,0.52,0.81,0.76,0.98,0.89,0.23,0.94,0.63,0.53,0.27,0.47,0.47,0.19,0.21,0.65,0.17
Jrue Holiday,0.99,0.72,0.72,0.72,0.98,0.97,0.85,0.97,0.64,0.94,0.56,0.92,0.84,0.88,0.67,0.4,0.53
Ben Simmons,0.95,0.9,0.94,0.94,0.98,0.93,0.84,0.98,0.84,0.86,0.85,0.86,0.1,0.95,0.91,0.05,0.1
