In [2]:
import sys
sys.path.append('/Users/victor/Documents/code/nba_api')

from nba_py import player
import pandas as pd

## Getting Shots and Block Data

In [3]:
player_id = 1629116

# The PlayerShootingSplits NBA end point is the one we will be using
# It returns shooting and blocking stats grouped by location on court
player.PlayerShootingSplits(player_id).shot_areas()

# Sometimes a new player will not have stats. 
class NoDataError(Exception):
    pass

In [4]:
# Extracting shooting and blocking columns
player_shot_block_raw = player.PlayerShootingSplits(player_id).shot_areas()
if player_shot_block_raw.shape[0] == 0:
    raise NoDataError("No data for {}".format(player_id))

player_shot_and_block_by_area = player_shot_block_raw.T
player_shot_and_block_by_area.columns = player_shot_and_block_by_area.loc['GROUP_VALUE']
player_shot_and_block_by_area = player_shot_and_block_by_area.loc[['FGA','BLKA'], "Restricted Area":"Above the Break 3"]

In [5]:
player_shot_and_block_by_area

GROUP_VALUE,Restricted Area,In The Paint (Non-RA),Mid-Range,Left Corner 3,Above the Break 3
FGA,4,1,0,0,0
BLKA,1,1,0,0,0


In [6]:
player_shot_by_area = player_shot_and_block_by_area.loc[["FGA"],:]
player_block_by_area = player_shot_and_block_by_area.loc[["BLKA"],:]

In [9]:
# Use a dict to map old col name to new col name, using dict is required
# because the API sometimes returns inconsistent column order and number of cols
shot_col_map = {"Restricted Area":'shot_res',
               "In The Paint (Non-RA)":'shot_in_paint',
               "Mid-Range":'shot_mid_range',
               'Left Corner 3':'shot_lcorner_3',
               'Right Corner 3':'shot_rcorner_3',
               'Above the Break 3':'shot_above_3'}

block_col_map = {"Restricted Area":'block_res',
               "In The Paint (Non-RA)":'block_in_paint',
               "Mid-Range":'block_mid_range',
               'Left Corner 3':'block_lcorner_3',
               'Right Corner 3':'block_rcorner_3',
               'Above the Break 3':'block_above_3'}

# Applying the column name map
player_shot_by_area = player_shot_by_area.rename(columns=shot_col_map)
player_block_by_area = player_block_by_area.rename(columns=block_col_map)

# Total attempted shots and blocks
total_shots = player_shot_by_area.loc['FGA',:].sum()
total_blocks = player_block_by_area.loc['BLKA',:].sum()

# Normalize all stats by total shots to get relative frequencies for each player
player_shot_by_area /= total_shots + 1e-5
player_block_by_area /= total_blocks + 1e-5

SyntaxError: invalid syntax (<ipython-input-9-e3a27a9ed3c6>, line 7)

In [11]:
# Record total attempted shots and blocks
player_shot_by_area['fga'] = total_shots
player_block_by_area['blka'] = total_blocks

In [12]:
player_shot_by_area

Unnamed: 0,res_shot,in_paint_shot,mid_range_shot,l_corner3_shot,r_corner3_shot,above_break3_shot,fga
FGA,0.33333,0.190474,0.0952371,0,0.0952371,0.285711,1.0


In [13]:
player_block_by_area

Unnamed: 0,res_block,in_paint_block,mid_range_block,l_corner3_block,r_corner3_block,above_break3_block,blka
BLKA,0,0,0,0,0,0,0


## Getting Rebound, Assist and Steal Data
This one is a bit more tricky to normalize, because the stats has 2 components.

1) Player specific component: where a better player is going to get a higher stat regardless of the position they play

2) Position specific compoment: where the position a player plays has systemic influence on his rebound and assist stats. For example, a center will always have more rebounds than anyone else on the team. Similarily, a point guard who's job is to manage the offense, will often have the most assist in a match.

We will record the stats as-is, and preprocess them in a pipeline during training time.

In [23]:
# The point for other player stats, aside from shooting and blocking
player.PlayerYearOverYearSplits(player_id, per_mode="Per48").by_year()

Unnamed: 0,GROUP_SET,GROUP_VALUE,TEAM_ID,TEAM_ABBREVIATION,MAX_GAME_DATE,GP,W,L,W_PCT,MIN,...,BLKA_RANK,PF_RANK,PFD_RANK,PTS_RANK,PLUS_MINUS_RANK,NBA_FANTASY_PTS_RANK,DD2_RANK,TD3_RANK,CFID,CFPARAMS
0,By Year,2018-19,1610612743,DEN,2019-04-10T00:00:00,81,53,28,0.654,1878.723333,...,3,2,3,2,2,2,1,1,264,2018-19
1,By Year,2017-18,1610612743,DEN,2018-04-07T00:00:00,62,34,28,0.548,582.731667,...,2,1,1,3,3,3,1,1,264,2017-18
2,By Year,2016-17,1610612743,DEN,2017-04-12T00:00:00,22,12,10,0.545,164.895,...,1,3,2,1,1,1,1,1,264,2016-17


In [10]:
cols_of_interest = ['OREB', 'DREB', 'AST', 'STL', "MIN"]

# Take the most recent 2 years
rebound_assist = player.PlayerYearOverYearSplits(player_id, per_mode="Per48").by_year().iloc[:2,:]
rebound_assist = rebound_assist[cols_of_interest]
rebound_assist.columns = [c.lower() for c in rebound_assist.columns]

In [11]:
rebound_assist

Unnamed: 0,oreb,dreb,ast,stl,min
0,0.9,4.2,2.5,1.4,1878.723333
1,1.2,4.7,2.6,1.2,582.731667


In [12]:
rebound_assist_summed = pd.DataFrame(rebound_assist.sum()).T
rebound_assist_summed

Unnamed: 0,oreb,dreb,ast,stl,min
0,2.1,8.9,5.1,2.6,2461.455


In [18]:
player_stat = pd.concat([player_shot_by_area.reset_index(drop=True), 
                            player_block_by_area.reset_index(drop=True), 
                            rebound_assist_summed.reset_index(drop=True)],
                            axis=1)
player_stat = player_stat.rename({0:player_id})

In [19]:
display(player_stat)
print(player_stat.shape)

Unnamed: 0,res_shot,in_paint_shot,mid_range_shot,l_corner3_shot,r_corner3_shot,above_break3_shot,fga,res_block,in_paint_block,mid_range_block,l_corner3_block,r_corner3_block,above_break3_block,blka,oreb,dreb,ast,stl,min
1627736,0.237772,0.101902,0.118207,0.0747283,0.0380435,0.429348,736,0.588235,0.294118,0.0588235,0,0,0.0588235,34,2.1,8.9,5.1,2.6,2461.455


(1, 19)
