# Create rotations chart data

# Setup

In [1]:
import pandas as pd
import numpy as np
import nba_api
import re

In [2]:
from nba_api.stats.static.teams import get_teams, find_teams_by_nickname
from nba_api.stats.endpoints import LeagueGameFinder
from nba_api.stats.endpoints import PlayByPlayV2
from nba_api.stats.endpoints import PlayByPlay
from nba_api.stats.endpoints import BoxScorePlayerTrackV2

## Import data sheet for home/away

In [3]:
spurs_file = 'data/team_box_scores_spurs.json'
opp_file = 'data/team_box_scores_opp.json'

spurs_df = pd.read_json(spurs_file)
opp_df = pd.read_json(opp_file)

spurs_home = True

In [4]:
spurs_df.head()

Unnamed: 0,TEAM_ABBREVIATION,home,PTS,FG,FG_PCT,3P,FG3_PCT,FT,FT_PCT,AST,REB,DREB,OREB,BLK,STL,TO,PF
0,SAS,away,112,39-79,49.4%,10-20,50.0%,24-36,66.7%,24,44,35,9,8,4,15,20


In [5]:
opp_df.head()

Unnamed: 0,TEAM_ABBREVIATION,home,PTS,FG,FG_PCT,3P,FG3_PCT,FT,FT_PCT,AST,REB,DREB,OREB,BLK,STL,TO,PF
0,UTA,home,118,47-101,46.5%,15-46,32.6%,9-10,90.0%,25,43,31,12,5,7,8,23


In [6]:
if spurs_df.home.iloc[0] == 'away':
    spurs_home = False
else:
    spurs_home = True

In [7]:
spurs_info = find_teams_by_nickname('Spurs')
spurs_id = spurs_info[0]['id']

# find all Spurs games
gamefinder = LeagueGameFinder(team_id_nullable=spurs_id,
                                              season_nullable='2019-20')
team_box_scores_df = gamefinder.get_data_frames()[0]
team_box_scores_df.head(n=10)

spurs_games = list(team_box_scores_df.GAME_ID.unique()) # grab unique list of game IDs

test_game = spurs_games[0]

## Get list of active players and starters for each team

In [8]:
# get list of rosters and starting players
roster_data_df = BoxScorePlayerTrackV2(game_id=test_game).data_sets[0].get_data_frame()
starters_df = roster_data_df[roster_data_df.START_POSITION.isin(['F', 'G', 'C'])]
spurs_starters = list(starters_df[starters_df.TEAM_ABBREVIATION == 'SAS'].PLAYER_NAME)
opp_starters = list(starters_df[starters_df.TEAM_ABBREVIATION != 'SAS'].PLAYER_NAME)
spurs_roster = list(roster_data_df[roster_data_df.TEAM_ABBREVIATION == 'SAS'].PLAYER_NAME)
opp_roster = list(roster_data_df[roster_data_df.TEAM_ABBREVIATION != 'SAS'].PLAYER_NAME)

In [9]:
pbp = PlayByPlayV2(game_id=test_game).data_sets

In [10]:
pbp_df = pbp[0].get_data_frame()
pbp_df.head(n=10)

Unnamed: 0,GAME_ID,EVENTNUM,EVENTMSGTYPE,EVENTMSGACTIONTYPE,PERIOD,WCTIMESTRING,PCTIMESTRING,HOMEDESCRIPTION,NEUTRALDESCRIPTION,VISITORDESCRIPTION,...,PLAYER2_TEAM_NICKNAME,PLAYER2_TEAM_ABBREVIATION,PERSON3TYPE,PLAYER3_ID,PLAYER3_NAME,PLAYER3_TEAM_ID,PLAYER3_TEAM_CITY,PLAYER3_TEAM_NICKNAME,PLAYER3_TEAM_ABBREVIATION,VIDEO_AVAILABLE_FLAG
0,21901314,2,12,0,1,6:38 PM,12:00,,,,...,,,0,0,,,,,,0
1,21901314,4,10,0,1,6:38 PM,12:00,Jump Ball Bradley vs. Poeltl: Tip to Ingles,,,...,Spurs,SAS,4,204060,Joe Ingles,1610613000.0,Utah,Jazz,UTA,1
2,21901314,7,1,47,1,6:38 PM,11:46,Clarkson 7' Turnaround Jump Shot (2 PTS) (Mitc...,,,...,Jazz,UTA,0,0,,,,,,1
3,21901314,9,2,78,1,6:38 PM,11:31,O'Neale BLOCK (1 BLK),,MISS Samanic 4' Floating Jump Shot,...,,,4,1626220,Royce O'Neale,1610613000.0,Utah,Jazz,UTA,1
4,21901314,11,4,0,1,6:38 PM,11:28,Bradley REBOUND (Off:0 Def:1),,,...,,,0,0,,,,,,1
5,21901314,12,2,1,1,6:38 PM,11:23,MISS O'Neale 25' 3PT Jump Shot,,,...,,,0,0,,,,,,1
6,21901314,13,4,0,1,6:38 PM,11:17,,,Samanic REBOUND (Off:0 Def:1),...,,,0,0,,,,,,1
7,21901314,14,2,78,1,6:38 PM,11:12,,,MISS Walker IV 14' Floating Jump Shot,...,,,0,0,,,,,,1
8,21901314,15,4,0,1,6:38 PM,11:10,Bradley REBOUND (Off:0 Def:2),,,...,,,0,0,,,,,,1
9,21901314,16,2,1,1,6:39 PM,10:58,MISS Clarkson 25' 3PT Jump Shot,,,...,,,0,0,,,,,,1


# Functions

In [11]:
def conv_time_to_sec(time):
    """
    This function takes a string quarter time marker and converts it to elapsed game time in seconds.
    Input example: '7:12'
    Output example: 288
    """
    split_time = time.split(':')
    # convert time to seconds
    # seconds in elapsed time
    new_time = int(split_time[0]) * 60 + int(split_time[1])
    new_time = 12*60 - new_time
    return new_time

In [12]:
def get_col(spurs_or_opp):
    """ 
    This function determines whether the spurs or opponent is the home team and returns the 
    appropriate regex player set match and dataframe column.
    
    spurs_or_opp can be 'spurs' or 'opp'
    """
    text_col = '{}DESCRIPTION'
    # check home or away
    if spurs_or_opp == 'spurs':
        re_match = spurs_players_re
        if spurs_home == True:
            side = text_col.format('HOME')
        elif spurs_home == False:
            side = text_col.format('VISITOR')
    elif spurs_or_opp == 'opp':
        re_match = opp_players_re
        if spurs_home == True:
            side = text_col.format('VISITOR')
        else:
            side = text_col.format('HOME')
    
    return side, re_match

In [13]:
def create_period_starters_list(period, df, spurs_or_opp):
    """
    This function returns the 5 players which started a certain period.
    It works by acquiring a unique list of players appearing in play by play then
    returning the first 5 names. 
    
    spurs_or_opp = 'spurs' or 'opp'
    period is an integer figure between 2-4
    df = play by play dataframe
    """
    data = list()
    side, re_match = get_col(spurs_or_opp)
    period_pbp = df[(df.PERIOD == period) & (pbp_df[side].notnull())]
    
    num_rows = period_pbp[side].shape[0]
    
    for i in range(num_rows):
        play_descrip = period_pbp[side].iloc[i]
        try:
            player_match = re_match.search(play_descrip).group()
            if player_match in data:
                continue
            else:
                data.append(player_match)
        except:
            continue
    
    return data[:5]

In [61]:
def split_player_out(pbp):
    """
    Takes in a substitution play by play line and outputs the last name of the player exiting the game.
    """
    split_pbp = pbp.split('FOR')
    return split_pbp[1].strip() # remove all white space

In [76]:
def split_player_in(pbp):
    """
    Takes in a sub play by play line and outputs the last name of the player entering the game.
    """
    split_pbp = pbp.split('FOR')
    return split_pbp[0][4:].strip() # remove white space & 'SUB: '

# Parse PBP data

## Gather quarterly starters

In [14]:
# Create regex of both rosters
spurs_last_names = [' '.join(l_name.split(' ')[1:]) for l_name in spurs_roster]
spurs_re = "(" +  ')|('.join(spurs_last_names) + ")"
opp_last_names = [' '.join(l_name.split(' ')[1:]) for l_name in opp_roster]
opp_re = '(' + ')|('.join(opp_last_names) + ')'

In [16]:
spurs_players_re = re.compile(spurs_re)
opp_players_re = re.compile(opp_re)

In [19]:
# Get all quarter starters
spurs_second_starters = create_period_starters_list(2, pbp_df, 'spurs')
spurs_third_starters = create_period_starters_list(3, pbp_df, 'spurs')
spurs_fourth_starters = create_period_starters_list(4, pbp_df, 'spurs')

opp_second_starters = create_period_starters_list(2, pbp_df, 'opp')
opp_third_starters = create_period_starters_list(3, pbp_df, 'opp')
opp_fourth_starters = create_period_starters_list(4, pbp_df, 'opp')

## Get sub data

In [39]:
# Get appropriate column for each team
spurs_col = get_col('spurs')[0]
opp_col = get_col('opp')[0]

In [43]:
# Grab sub column 
spurs_sub_df = pbp_df[
    (pbp_df[spurs_col].notnull()) & # remove None values
    (pbp_df[spurs_col].str.contains('SUB')) # query if it contains subs
][['GAME_ID', 'PERIOD', 'PCTIMESTRING', spurs_col]].copy()

opp_sub_df = pbp_df[
    (pbp_df[opp_col].notnull()) &
    (pbp_df[opp_col].str.contains('SUB'))
][['GAME_ID', 'PERIOD', 'PCTIMESTRING', opp_col]].copy()

In [52]:
# Assign total game time
spurs_sub_df['game_time'] = spurs_sub_df.PCTIMESTRING.apply(conv_time_to_sec) + (spurs_sub_df.PERIOD - 1) * (12*60)
opp_sub_df['game_time'] = opp_sub_df.PCTIMESTRING.apply(conv_time_to_sec) + (opp_sub_df.PERIOD - 1) * (12*60)

In [77]:
# Assign columns for player in and out
spurs_sub_df['player_in'] = spurs_sub_df[spurs_col].apply(split_player_in)
spurs_sub_df['player_out'] = spurs_sub_df[spurs_col].apply(split_player_out)

opp_sub_df['player_in'] = opp_sub_df[opp_col].apply(split_player_in)
opp_sub_df['player_out'] = opp_sub_df[opp_col].apply(split_player_out)

In [80]:
spurs_sub_df.columns

Index(['GAME_ID', 'PERIOD', 'PCTIMESTRING', 'VISITORDESCRIPTION', 'game_time',
       'player_in', 'player_out'],
      dtype='object')

In [82]:
# Filter out unnecessary columns
# Remove time string and pbp column
spurs_sub_df = spurs_sub_df[['GAME_ID', 'PERIOD', 'game_time', 'player_in', 'player_out']]
opp_sub_df = opp_sub_df[['GAME_ID', 'PERIOD', 'game_time', 'player_in', 'player_out']]

# Create rotations data for visualization

In [None]:
# at the start of each quarter, track down the five players initally in the game