# Setup

In [1]:
import pandas as pd
import numpy as np
import nba_api
import json
import time

In [2]:
season = '2019-20'

## nba_api packages

In [3]:
from nba_api.stats.static.teams import get_teams, find_teams_by_nickname
from nba_api.stats.static.players import get_active_players, find_players_by_full_name
from nba_api.stats.endpoints import CommonTeamRoster

from nba_api.stats.endpoints import leaguegamefinder
from nba_api.stats.endpoints import LeagueDashTeamShotLocations
from nba_api.stats.endpoints import ShotChartLineupDetail
from nba_api.stats.endpoints import ShotChartDetail
from nba_api.stats.endpoints import BoxScorePlayerTrackV2

## Get spurs test game

In [4]:
# Get Spurs' team info
all_teams = get_teams()
spurs_info = find_teams_by_nickname('spurs') # find spurs nickname
spurs_id = spurs_info[0]['id'] # get spurs team ID

# find all Spurs games
gamefinder = leaguegamefinder.LeagueGameFinder(team_id_nullable=spurs_id,
                                              season_nullable='2019-20')

time.sleep(3)
spurs_games_df = gamefinder.get_data_frames()[0]

# test - last 2019-20 regular season game vs Utah Jazz
test_game_id = spurs_games_df.iloc[0]['GAME_ID']

spurs_games_df.head(n=5)

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,22019,1610612759,SAS,San Antonio Spurs,21901314,2020-08-13,SAS @ UTA,L,239,112,...,0.667,9,35,44,24,4,8,15,20,-6.0
1,22019,1610612759,SAS,San Antonio Spurs,21901298,2020-08-11,SAS vs. HOU,W,240,123,...,0.9,9,50,59,28,10,5,22,20,18.0
2,22019,1610612759,SAS,San Antonio Spurs,21901287,2020-08-09,SAS @ NOP,W,241,122,...,0.969,15,36,51,22,12,6,17,27,9.0
3,22019,1610612759,SAS,San Antonio Spurs,21901274,2020-08-07,SAS vs. UTA,W,240,119,...,0.769,8,39,47,27,10,5,13,18,8.0
4,22019,1610612759,SAS,San Antonio Spurs,21901264,2020-08-05,SAS vs. DEN,L,239,126,...,0.852,10,25,35,29,10,5,7,20,-6.0


# Functions

In [5]:
def get_playerid(name):
    return find_players_by_full_name(name)[0]['id']

In [6]:
def get_shot_chart(playerid, teamid, gameid):
    """
    This function takes in a player and returns his shot chart for a specific game.
    
    Inputs:
    playerid: Player's unique ID
    teamid: Associated team ID
    gameid: Specific game ID
    
    Output: Entire DataFrame with shots data
    """
    # First, set a sleep timer
    time.sleep(8)
    
    shots_df = ShotChartDetail(
        player_id = playerid,
        season_nullable = season,
        team_id = teamid,
        game_id_nullable = gameid,
        context_measure_simple = 'FGA'
    )
    
    return shots_df.data_sets[0].get_data_frame()

In [7]:
def gather_team_df(spurs_or_opp):
    """
    This function grabs an entire team's shot data separated by each shot. 
    It loops through a team's roster and grabs individual team's shot data.
    
    Input: 
    spurs_or_opp: True or False
    
    Output: concatted DataFrame of a team's game shot data
    """
    if spurs_or_opp == True:
        player_list = spurs_roster_list
        team_id = spurs_id
    else:
        player_list = opp_roster_list
        team_id = opp_teamid
        
    data = pd.DataFrame() # start with an empty dataframe
    
    for player in player_list:
        df = get_shot_chart(player_ids[player], team_id, test_game_id)
        data = pd.concat([data, df])
        print('Finished gathering data for {}'.format(player))
        
    return data

In [21]:
def clean_shots_df(shots_df):
    """
    This function aggregates shot dataframes to team levels and cleans it using
    function rename_cols(). 
    
    The output dateframe shows the shot zone area, shot zone range, number
    of shots made and attempted in each respective area. The final created
    column is the field goal percentage formatted to XX.X%.
    
    input: Concatenated player dataframe
    output: cleaned, aggregated dataframe
    """
    df = shots_df.copy()
    
    groupby_cols = ['GAME_ID', 'TEAM_NAME', 'SHOT_ZONE_AREA', 'SHOT_ZONE_RANGE']
    out_cols = ['GAME_ID', 'TEAM_NAME', 'SHOT_ZONE_AREA', 'SHOT_ZONE_RANGE',
               'SHOT_MADE_FLAG', 'SHOT_ATTEMPTED_FLAG']
    
    # aggregate shots 
    agg_df = df.groupby(groupby_cols)[out_cols].sum().reset_index()
    
    # add a fg pct column and format it
    # format by leaving one decimal place and adding '%' to the end
    agg_df['rounded_fg_pct'] = round((agg_df.SHOT_MADE_FLAG / agg_df.SHOT_ATTEMPTED_FLAG)* 100).astype(int)
    agg_df['formatted_fg_pct'] = agg_df.rounded_fg_pct.apply(lambda x: str(x) + '%')
    
    # format column names
    agg_df = rename_cols(agg_df)
    
    return agg_df

In [9]:
def rename_cols(df):
    """
    This function takes an aggregated team shots dataframe and formats
    the columns to lower case. It also renames two flagged columns for
    easier readability.
    """
    data = df.copy()
    
    shots_cols = {
        'SHOT_MADE_FLAG' : 'shots_made',
        'SHOT_ATTEMPTED_FLAG' : 'shots_attempted'
    }
    
    data.rename(columns = shots_cols, inplace = True)
    
    # lower column names
    cols = dict()
    
    for col in list(data.columns):
        cols[col] = col.lower()
        
    data.rename(columns = cols, inplace = True)
    
    return data

In [10]:
# format percentages
def format_pct(pct):
    pct = round(pct*100, 2)
    out_format = str(pct) + '%'
    return out_format

In [33]:
def combine_shooting_fields(df, makes_col, att_col, field_name):
    """
    This function combines FGM and FGA to FGM-FGA format.
    """
    df[field_name] = df[makes_col].astype(int).astype(str) + '-' + df[att_col].astype(int).astype(str)

In [93]:
def calc_fg_diffs(team_df, avg_df):
    """
    This function takes a team shots dataframe and the league average dataframe as inputs.
    The output is the difference between a team's fg percentage in a certain area
    versus the league average.
    """
    
    shot_area = 'shot_zone_area'
    shot_range = 'shot_zone_range'
    
    data = list()
    
    for i in range(len(team_df)):
        avg_pct = avg_df[
            (avg_df[shot_area] == team_df.iloc[i][shot_area]) &
            (avg_df[shot_range] == team_df.iloc[i][shot_range])
        ].iloc[0]['rounded_fg_pct']
        
        team_pct = team_df.iloc[i]['rounded_fg_pct']
        
        #print(avg_pct)
        #print(team_pct)
        
        diff = team_pct - avg_pct
        #print(diff)
        
        data.append(team_pct - avg_pct)
        
    return pd.Series(data)

# Get Spurs and opponent rosters

In [11]:
roster_data_df = BoxScorePlayerTrackV2(game_id=test_game_id).data_sets[0].get_data_frame()
time.sleep(5)
spurs_roster_list = list(roster_data_df[roster_data_df.TEAM_ABBREVIATION == 'SAS'].PLAYER_NAME)
opp_roster_list = list(roster_data_df[roster_data_df.TEAM_ABBREVIATION != 'SAS'].PLAYER_NAME)

In [12]:
# get opponent team ID
opp_teamid = roster_data_df[roster_data_df.TEAM_ABBREVIATION != 'SAS'].TEAM_ID.iloc[0]

## Create dictionary of Player:ID 

In [13]:
player_ids = dict()

for player in spurs_roster_list:
    player_ids[player] = get_playerid(player)
    
for player in opp_roster_list:
    player_ids[player] = get_playerid(player)

# Gather Team DataFrames

In [23]:
spurs_shots_df = gather_team_df(True)
opp_shots_df = gather_team_df(False)

Finished gathering data for Keldon Johnson
Finished gathering data for Luka Samanic
Finished gathering data for Jakob Poeltl
Finished gathering data for Lonnie Walker IV
Finished gathering data for Dejounte Murray
Finished gathering data for Marco Belinelli
Finished gathering data for Quinndary Weatherspoon
Finished gathering data for Drew Eubanks
Finished gathering data for Chimezie Metu
Finished gathering data for DeMar DeRozan
Finished gathering data for Rudy Gay
Finished gathering data for Patty Mills
Finished gathering data for Derrick White
Finished gathering data for Joe Ingles
Finished gathering data for Royce O'Neale
Finished gathering data for Tony Bradley
Finished gathering data for Jordan Clarkson
Finished gathering data for Donovan Mitchell
Finished gathering data for Georges Niang
Finished gathering data for Miye Oni
Finished gathering data for Ed Davis
Finished gathering data for Juwan Morgan
Finished gathering data for Jarrell Brantley
Finished gathering data for Rayjon

In [24]:
clean_spurs_shots_df = clean_shots_df(spurs_shots_df)
clean_opp_shots_df = clean_shots_df(opp_shots_df)

In [26]:
clean_spurs_shots_df

Unnamed: 0,game_id,team_name,shot_zone_area,shot_zone_range,shots_made,shots_attempted,rounded_fg_pct,formatted_fg_pct
0,21901314,San Antonio Spurs,Center(C),24+ ft.,2,2,100,100%
1,21901314,San Antonio Spurs,Center(C),8-16 ft.,3,10,30,30%
2,21901314,San Antonio Spurs,Center(C),Less Than 8 ft.,17,30,57,57%
3,21901314,San Antonio Spurs,Left Side Center(LC),24+ ft.,3,6,50,50%
4,21901314,San Antonio Spurs,Left Side(L),16-24 ft.,3,5,60,60%
5,21901314,San Antonio Spurs,Left Side(L),24+ ft.,1,3,33,33%
6,21901314,San Antonio Spurs,Left Side(L),8-16 ft.,1,1,100,100%
7,21901314,San Antonio Spurs,Right Side Center(RC),16-24 ft.,0,3,0,0%
8,21901314,San Antonio Spurs,Right Side Center(RC),24+ ft.,3,7,43,43%
9,21901314,San Antonio Spurs,Right Side(R),16-24 ft.,1,4,25,25%


In [27]:
clean_spurs_shots_df.dtypes

game_id             object
team_name           object
shot_zone_area      object
shot_zone_range     object
shots_made           int64
shots_attempted      int64
rounded_fg_pct       int64
formatted_fg_pct    object
dtype: object

# Get League Average Shot Chart

In [38]:
# get league average shot chart

name = get_playerid(spurs_roster_list[0])

league_avg = ShotChartDetail(
    season_nullable=season,
    team_id=spurs_id,
    player_id=name,
    context_measure_simple='FGA'
)

time.sleep(5)

league_avg_df = league_avg.data_sets[1].get_data_frame() # league averages is 1

In [41]:
league_avg_df.columns

Index(['GRID_TYPE', 'SHOT_ZONE_BASIC', 'SHOT_ZONE_AREA', 'SHOT_ZONE_RANGE',
       'FGA', 'FGM', 'FG_PCT'],
      dtype='object')

In [42]:
groupby_cols = ['SHOT_ZONE_AREA', 'SHOT_ZONE_RANGE']
out_cols = ['SHOT_ZONE_AREA', 'SHOT_ZONE_RANGE', 'FG_PCT']

league_avg_df = league_avg_df.groupby(groupby_cols)[out_cols].sum().reset_index()

In [50]:
league_avg_df = league_avg.data_sets[1].get_data_frame() # league averages is 1
league_avg_df = rename_cols(league_avg_df)
league_avg_df['rounded_fg_pct'] = (league_avg_df.fg_pct * 100).astype(int)
league_avg_df['formatted_fg_pct'] = round(league_avg_df.rounded_fg_pct).astype(str) + '%'

# Calculate team fg pct minus league avg fg pct by area

In [94]:
# call function to calculate difference
clean_spurs_shots_df['fg_diff'] = calc_fg_diffs(clean_spurs_shots_df, league_avg_df)
clean_opp_shots_df['fg_diff'] = calc_fg_diffs(clean_opp_shots_df, league_avg_df)

In [114]:
# format by giving it a % marker
clean_spurs_shots_df['formatted_fgpct_diff'] = clean_spurs_shots_df.fg_diff.apply(lambda x: str(x) + '%')
clean_opp_shots_df['formatted_fgpct_diff'] = clean_opp_shots_df.fg_diff.apply(lambda x: str(x) + '%')

# Clean up columns formatting for outputs

In [115]:
# combine shooting fields 
combine_shooting_fields(clean_spurs_shots_df, 'shots_made', 'shots_attempted', 'fg')
combine_shooting_fields(clean_opp_shots_df, 'shots_made', 'shots_attempted', 'fg')

In [116]:
league_avg_df.columns

Index(['grid_type', 'shot_zone_basic', 'shot_zone_area', 'shot_zone_range',
       'fga', 'fgm', 'fg_pct', 'rounded_fg_pct', 'formatted_fg_pct'],
      dtype='object')

In [117]:
clean_spurs_shots_df.columns

Index(['game_id', 'team_name', 'shot_zone_area', 'shot_zone_range',
       'shots_made', 'shots_attempted', 'rounded_fg_pct', 'formatted_fg_pct',
       'fg_diff', 'formatted_fg_diff', 'formatted_fgpct_diff', 'fg'],
      dtype='object')

In [118]:
clean_opp_shots_df.columns

Index(['game_id', 'team_name', 'shot_zone_area', 'shot_zone_range',
       'shots_made', 'shots_attempted', 'rounded_fg_pct', 'formatted_fg_pct',
       'fg_diff', 'formatted_fgpct_diff', 'fg'],
      dtype='object')

In [119]:
# Filter out working columns
out_spurs_df = clean_spurs_shots_df[[
    'game_id', 'team_name', 'shot_zone_area', 'shot_zone_range', 'fg',
    'formatted_fg_pct', 'formatted_fgpct_diff'
]].copy()

out_opp_df = clean_opp_shots_df[[
    'game_id', 'team_name', 'shot_zone_area', 'shot_zone_range', 'fg',
    'formatted_fg_pct', 'formatted_fgpct_diff'
]].copy()

out_league_avg_df = league_avg_df[[
    'shot_zone_basic', 'shot_zone_area', 'shot_zone_range',
    'fgm', 'fga', 'fg_pct', 'formatted_fg_pct'
]].copy()

In [123]:
# rename column for formatting

col_renames = {
    'formatted_fg_pct' : 'fg_pct',
    'formatted_fgpct_diff': 'fg_pct_diff'
}

out_spurs_df.rename(columns=col_renames, inplace=True)
out_opp_df.rename(columns=col_renames, inplace=True)

In [124]:
out_spurs_df

Unnamed: 0,game_id,team_name,shot_zone_area,shot_zone_range,fg,fg_pct,fg_pct_diff
0,21901314,San Antonio Spurs,Center(C),24+ ft.,2-2,100%,66%
1,21901314,San Antonio Spurs,Center(C),8-16 ft.,3-10,30%,-12%
2,21901314,San Antonio Spurs,Center(C),Less Than 8 ft.,17-30,57%,19%
3,21901314,San Antonio Spurs,Left Side Center(LC),24+ ft.,3-6,50%,15%
4,21901314,San Antonio Spurs,Left Side(L),16-24 ft.,3-5,60%,21%
5,21901314,San Antonio Spurs,Left Side(L),24+ ft.,1-3,33%,-6%
6,21901314,San Antonio Spurs,Left Side(L),8-16 ft.,1-1,100%,60%
7,21901314,San Antonio Spurs,Right Side Center(RC),16-24 ft.,0-3,0%,-40%
8,21901314,San Antonio Spurs,Right Side Center(RC),24+ ft.,3-7,43%,8%
9,21901314,San Antonio Spurs,Right Side(R),16-24 ft.,1-4,25%,-14%


In [125]:
out_opp_df

Unnamed: 0,game_id,team_name,shot_zone_area,shot_zone_range,fg,fg_pct,fg_pct_diff
0,21901314,Utah Jazz,Center(C),16-24 ft.,0-1,0%,-41%
1,21901314,Utah Jazz,Center(C),24+ ft.,2-3,67%,33%
2,21901314,Utah Jazz,Center(C),8-16 ft.,2-3,67%,25%
3,21901314,Utah Jazz,Center(C),Less Than 8 ft.,28-45,62%,24%
4,21901314,Utah Jazz,Left Side Center(LC),24+ ft.,6-18,33%,-2%
5,21901314,Utah Jazz,Left Side(L),24+ ft.,0-3,0%,-39%
6,21901314,Utah Jazz,Left Side(L),8-16 ft.,0-1,0%,-40%
7,21901314,Utah Jazz,Right Side Center(RC),24+ ft.,5-16,31%,-4%
8,21901314,Utah Jazz,Right Side(R),16-24 ft.,1-2,50%,11%
9,21901314,Utah Jazz,Right Side(R),24+ ft.,2-6,33%,-5%


In [126]:
out_league_avg_df

Unnamed: 0,shot_zone_basic,shot_zone_area,shot_zone_range,fgm,fga,fg_pct,formatted_fg_pct
0,Above the Break 3,Back Court(BC),Back Court Shot,6,45,0.133,13%
1,Above the Break 3,Center(C),24+ ft.,5168,14955,0.346,34%
2,Above the Break 3,Left Side Center(LC),24+ ft.,7459,20895,0.357,35%
3,Above the Break 3,Right Side Center(RC),24+ ft.,6875,19624,0.35,35%
4,Backcourt,Back Court(BC),Back Court Shot,7,405,0.017,1%
5,In The Paint (Non-RA),Center(C),8-16 ft.,4159,9820,0.424,42%
6,In The Paint (Non-RA),Center(C),Less Than 8 ft.,6518,16915,0.385,38%
7,In The Paint (Non-RA),Left Side(L),8-16 ft.,714,1770,0.403,40%
8,In The Paint (Non-RA),Right Side(R),8-16 ft.,738,1871,0.394,39%
9,Left Corner 3,Left Side(L),24+ ft.,3329,8380,0.397,39%


In [127]:
# output files

out_spurs_df.to_json('data/shots_spurs.json', orient='records')
out_opp_df.to_json('data/shots_opp.json', orient='records')
out_league_avg_df.to_json('data/shots_league_avg.json', orient='records')