# Setup

In [1]:
import pandas as pd
import numpy as np
import nba_api
import json
import time

In [2]:
season = '2019-20'

## nba_api packages

In [3]:
from nba_api.stats.static.teams import get_teams, find_teams_by_nickname
from nba_api.stats.static.players import get_active_players, find_players_by_full_name
from nba_api.stats.endpoints import CommonTeamRoster

from nba_api.stats.endpoints import leaguegamefinder
from nba_api.stats.endpoints import LeagueDashTeamShotLocations
from nba_api.stats.endpoints import ShotChartLineupDetail
from nba_api.stats.endpoints import ShotChartDetail
from nba_api.stats.endpoints import BoxScorePlayerTrackV2

## Get spurs test game

In [4]:
# Get Spurs' team info
all_teams = get_teams()
spurs_info = find_teams_by_nickname('spurs') # find spurs nickname
spurs_id = spurs_info[0]['id'] # get spurs team ID

# find all Spurs games
gamefinder = leaguegamefinder.LeagueGameFinder(team_id_nullable=spurs_id,
                                              season_nullable='2019-20')
spurs_games_df = gamefinder.get_data_frames()[0]

# test - last 2019-20 regular season game vs Utah Jazz
test_game_id = spurs_games_df.iloc[0]['GAME_ID']

spurs_games_df.head(n=5)

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,22019,1610612759,SAS,San Antonio Spurs,21901314,2020-08-13,SAS @ UTA,L,239,112,...,0.667,9,35,44,24,4,8,15,20,-6.0
1,22019,1610612759,SAS,San Antonio Spurs,21901298,2020-08-11,SAS vs. HOU,W,240,123,...,0.9,9,50,59,28,10,5,22,20,18.0
2,22019,1610612759,SAS,San Antonio Spurs,21901287,2020-08-09,SAS @ NOP,W,241,122,...,0.969,15,36,51,22,12,6,17,27,9.0
3,22019,1610612759,SAS,San Antonio Spurs,21901274,2020-08-07,SAS vs. UTA,W,240,119,...,0.769,8,39,47,27,10,5,13,18,8.0
4,22019,1610612759,SAS,San Antonio Spurs,21901264,2020-08-05,SAS vs. DEN,L,239,126,...,0.852,10,25,35,29,10,5,7,20,-6.0


# Functions

In [5]:
def get_playerid(name):
    return find_players_by_full_name(name)[0]['id']

In [6]:
def get_shot_chart(playerid, teamid, gameid):
    """
    This function takes in a player and returns his shot chart for a specific game.
    
    Inputs:
    playerid: Player's unique ID
    teamid: Associated team ID
    gameid: Specific game ID
    
    Output: Entire DataFrame with shots data
    """
    shots_df = ShotChartDetail(
        player_id = playerid,
        season_nullable = season,
        team_id = teamid,
        game_id_nullable = gameid,
        context_measure_simple = 'FGA'
    )
    
    return shots_df.data_sets[0].get_data_frame()

In [7]:
def gather_team_df(spurs_or_opp):
    """
    This function grabs an entire team's shot data separated by each shot. 
    It loops through a team's roster and grabs individual team's shot data.
    
    Input: 
    spurs_or_opp: True or False
    
    Output: concatted DataFrame of a team's game shot data
    """
    if spurs_or_opp == True:
        player_list = spurs_roster_list
        team_id = spurs_id
    else:
        player_list = opp_roster_list
        team_id = opp_teamid
        
    data = pd.DataFrame() # start with an empty dataframe
    
    for player in player_list:
        df = get_shot_chart(player_ids[player], team_id, test_game_id)
        data = pd.concat([data, df])
    
    # Sleep timer to prevent bombarding API
    time.sleep(8)
        
    return data

In [43]:
def clean_shots_df(shots_df):
    """
    This function aggregates shot dataframes to team levels and cleans it using
    function rename_cols(). 
    
    The output dateframe shows the shot zone area, shot zone range, number
    of shots made and attempted in each respective area. The final created
    column is the field goal percentage formatted to XX.X%.
    
    input: Concatenated player dataframe
    output: cleaned, aggregated dataframe
    """
    df = shots_df.copy()
    
    groupby_cols = ['GAME_ID', 'TEAM_NAME', 'SHOT_ZONE_AREA', 'SHOT_ZONE_RANGE']
    out_cols = ['GAME_ID', 'TEAM_NAME', 'SHOT_ZONE_AREA', 'SHOT_ZONE_RANGE',
               'SHOT_MADE_FLAG', 'SHOT_ATTEMPTED_FLAG']
    
    # aggregate shots 
    agg_df = df.groupby(groupby_cols)[out_cols].sum().reset_index()
    
    # add a fg pct column and format it
    # format by leaving one decimal place and adding '%' to the end
    agg_df['fg_pct'] = round((agg_df.SHOT_MADE_FLAG / agg_df.SHOT_ATTEMPTED_FLAG)* 100, 1)
    agg_df['fg_pct'] = agg_df.fg_pct.apply(lambda x: str(x) + '%')
    
    # format column names
    agg_df = rename_cols(agg_df)
    
    return agg_df

In [44]:
def rename_cols(df):
    """
    This function takes an aggregated team shots dataframe and formats
    the columns to lower case. It also renames two flagged columns for
    easier readability.
    """
    data = df.copy()
    
    shots_cols = {
        'SHOT_MADE_FLAG' : 'shots_made',
        'SHOT_ATTEMPTED_FLAG' : 'shots_attempted'
    }
    
    data.rename(columns = shots_cols, inplace = True)
    
    # lower column names
    cols = dict()
    
    for col in list(data.columns):
        cols[col] = col.lower()
        
    data.rename(columns = cols, inplace = True)
    
    return data

# Get Spurs and opponent rosters

In [8]:
roster_data_df = BoxScorePlayerTrackV2(game_id=test_game_id).data_sets[0].get_data_frame()
spurs_roster_list = list(roster_data_df[roster_data_df.TEAM_ABBREVIATION == 'SAS'].PLAYER_NAME)
opp_roster_list = list(roster_data_df[roster_data_df.TEAM_ABBREVIATION != 'SAS'].PLAYER_NAME)

In [9]:
# get opponent team ID
opp_teamid = roster_data_df[roster_data_df.TEAM_ABBREVIATION != 'SAS'].TEAM_ID.iloc[0]

## Create dictionary of Player:ID 

In [10]:
player_ids = dict()

for player in spurs_roster_list:
    player_ids[player] = get_playerid(player)
    
for player in opp_roster_list:
    player_ids[player] = get_playerid(player)

# Gather Team DataFrames

In [37]:
spurs_shots_df = gather_team_df(True)
opp_shots_df = gather_team_df(False)

In [46]:
spurs_shots_df = clean_shots_df(spurs_shots_df)
opp_shots_df = clean_shots_df(opp_shots_df)

In [71]:
spurs_shots_df

Unnamed: 0,game_id,team_name,shot_zone_area,shot_zone_range,shots_made,shots_attempted,fg_pct
0,21901314,San Antonio Spurs,Center(C),24+ ft.,2,2,100.0%
1,21901314,San Antonio Spurs,Center(C),8-16 ft.,3,10,30.0%
2,21901314,San Antonio Spurs,Center(C),Less Than 8 ft.,17,30,56.7%
3,21901314,San Antonio Spurs,Left Side Center(LC),24+ ft.,3,6,50.0%
4,21901314,San Antonio Spurs,Left Side(L),16-24 ft.,3,5,60.0%
5,21901314,San Antonio Spurs,Left Side(L),24+ ft.,1,3,33.3%
6,21901314,San Antonio Spurs,Left Side(L),8-16 ft.,1,1,100.0%
7,21901314,San Antonio Spurs,Right Side Center(RC),16-24 ft.,0,3,0.0%
8,21901314,San Antonio Spurs,Right Side Center(RC),24+ ft.,3,7,42.9%
9,21901314,San Antonio Spurs,Right Side(R),16-24 ft.,1,4,25.0%


In [72]:
# output two files
spurs_shots_df.to_json('data/shots_spurs.json', orient='records')
opp_shots_df.to_json('data/shots_opp.json', orient='records')

# Get League Average Shot Chart

In [47]:
# get league average shot chart

name = get_playerid(spurs_roster_list[0])

league_avg = ShotChartDetail(
    season_nullable=season,
    team_id=spurs_id,
    player_id=name,
    context_measure_simple='FGA'
)

In [69]:
league_avg_df = league_avg.data_sets[1].get_data_frame() # league averages is 1
league_avg_df = rename_cols(league_avg_df)
league_avg_df.fg_pct = league_avg_df.fg_pct * 100
league_avg_df.fg_pct = round(league_avg_df.fg_pct, 1).astype(str) + '%'

In [70]:
league_avg_df

Unnamed: 0,grid_type,shot_zone_basic,shot_zone_area,shot_zone_range,fga,fgm,fg_pct
0,League Averages,Above the Break 3,Back Court(BC),Back Court Shot,45,6,13.3%
1,League Averages,Above the Break 3,Center(C),24+ ft.,14955,5168,34.6%
2,League Averages,Above the Break 3,Left Side Center(LC),24+ ft.,20895,7459,35.7%
3,League Averages,Above the Break 3,Right Side Center(RC),24+ ft.,19624,6875,35.0%
4,League Averages,Backcourt,Back Court(BC),Back Court Shot,405,7,1.7%
5,League Averages,In The Paint (Non-RA),Center(C),8-16 ft.,9820,4159,42.4%
6,League Averages,In The Paint (Non-RA),Center(C),Less Than 8 ft.,16915,6518,38.5%
7,League Averages,In The Paint (Non-RA),Left Side(L),8-16 ft.,1770,714,40.3%
8,League Averages,In The Paint (Non-RA),Right Side(R),8-16 ft.,1871,738,39.4%
9,League Averages,Left Corner 3,Left Side(L),24+ ft.,8380,3329,39.7%


In [73]:
# create league average file
league_avg_df.to_json('data/shots_league_avg.json', orient = 'records')