In [155]:
'''
Date: 2019-01-02
Contributor: Matthew Barlowe
Twitter: @matt_barlowe
Email: matt@barloweanalytics.com

This file contains the main functions to scrape and compile the NBA api and
return a CSV file of the pbp for the provided game
'''
import json
import requests
import bs4
import pandas as pd
import numpy as np
pd.options.display.max_columns = None
# have to pass this to the requests function or the api will return a 403 code
user_agent = {'User-agent': 'Mozilla/5.0'}

#this will catalog the shot types recorded in the NBA play by play
#not sure how accurate this is it seems to change for the same shots
shot_type_dict = {58: 'turnaround hook shot', 5: 'layup', 6: 'driving layup',
                  96: 'turnaround bank hook shot', 108: 'cutting dunk shot',
                  79: 'pullup jump shot', 72: 'putback layup', 1: 'jump shot',
                  57: 'driving hook shot', 75: 'driving finger roll layup',
                  76: 'running finger roll layup', 79: '3pt shot', 80: '3pt shot',
                  2: '3pt shot', 3: 'hook shot', 98: 'cutting layup', 67: 'hook bank shot',
                  101: 'driving floating jump shot', 102: 'driving floating bank shot jump shot',
                  73: 'driving reverse layup', 63: 'fadeaway jump shot', 47: 'turnaround jump shot',
                  52: 'alley oop dunk', 97: 'tip layup', 66: 'jump bank shot',
                  50: 'running dunk shot', 41: 'running layup', 93: 'driving bank hook shot',
                  87: 'putback dunk shot', 99:'cutting finger roll layup',
                  86: 'turnaround fadeaway', 78: 'floating jump shot', 9: 'driving dunk',
                  74: 'running reverse layup', 44: 'reverse layup',
                  71: 'finger roll layup', 43: 'alley oop layup', 7: 'dunk',
                  103: 'running pull up jump shot', 110: 'running reverse dunk',
                  107: 'tip dunk', 51: 'reverse dunk'
                  }

#this dictionary will categorize the event types that happen in the NBA
#play by play
event_type_dict = {1: 'shot', 2: 'missed_shot', 4: 'rebound', 5: 'turnover',
                   20: 'stoppage: out-of-bounds', 6: 'foul', 3: 'free-throw',
                   8: 'substitution', 12: 'period-start', 10: 'jump-ball',
                   9: 'team-timeout', 18: 'instant-replay', 13: 'period-end',
                   7: 'goal-tending', 0: 'game-end'
                  }

In [2]:
#create column whether shot was succesful or not
def made_shot(row):
    '''
    function to determine whether shot was made or missed

    Input:
    row - pandas row

    Output:
    shot_made - binary variable
    '''

    if row['event_type_de'] == 'shot':
        return 1
    elif row['event_type_de'] == 'missed_shot':
        return 0
    elif (row['event_type_de'] == 'free-throw') & ('Missed' in row['de']):
        return 0
    elif (row['event_type_de'] == 'free-throw') & ('Missed' not in row['de']):
        return 1
    else:
        return np.nan

In [3]:
#parse mtype column to get all the shot types being taken
def parse_shot_types(row):
    '''
    function to parse what type of shot is being taken

    Inputs:
    row - pandas row of play by play dataframe

    Outputs:
    shot_type - returns a shot type of the values hook, jump, layup, dunk, tip
    '''
    if pd.isnull(row['shot_made']) == 0:
        if 'Layup' in row['de']:
            return 'layup'
        elif 'Hook' in row['de']:
            return 'hook'
        elif 'Dunk' in row['de']:
            return 'dunk'
        elif 'Free' in row['de']:
            return 'free'
        else:
            return 'jump'
    else:
        return np.nan

In [4]:
#Clean time to get a seconds elapsed column

def create_seconds_elapsed(row):
    '''
    this function parses the string time column and converts it into game
    seconds elapsed

    Inputs:
    row - row of play by play dataframe

    Outputs:
    time_in_seconds - the elapsed game time expressed in seconds
    '''

    time = row['pctimestring'].strip()
    time_list = time.split(':')
    max_time = 720
    ot_max_time = 300

    if row['period'] in [1,2,3,4]:
        time_in_seconds = (max_time - (int(time_list[0]) * 60 + int(time_list[1]))) + (720 * (int(row['period']) - 1))
    elif row['period'] > 4:
        time_in_seconds = (ot_max_time - (int(time_list[0]) * 60 + int(time_list[1]))) + (300 * (int(row['period']) - 5)) + 2880

    return time_in_seconds

In [5]:
def calc_points_made(row):
    '''
    function to calculate the points earned by a team with each shot made

    Inputs:
    row - row of pbp dataframe

    Outputs - value of shot made
    '''

    if row['is_three'] == 1 and row['shot_made'] == 1:
        return 3
    elif row['is_three'] == 0 and row['shot_made'] == 1 and row['shot_type'] != 'free':
        return 2
    elif row['shot_type'] == 'free':
        return 1
    else:
        return 0

In [6]:
#determine what type of fouls are being commited

def parse_foul(row):
    '''
    function to determine what type of foul is being commited by the player

    Input:
    row - row of nba play by play

    Output:
    foul_type - the foul type of the fould commited by the player
    '''

    if 'Shooting' in row['de']:
        return 'shooting'
    if 'Personal' in row['de']:
        return 'personal'
    if 'Loose Ball' in row['de']:
        return 'loose_ball'
    if 'Technical' in row['de']:
        return 'technical'
    if 'Charge' in row['de']:
        return 'charge'
    if 'Defense 3 Second' in row['de']:
        return '3 second'
    if 'Flagrant' in row['de']:
        return 'flagrant'
    if 'Flagrant 2' in row['de']:
        return 'flagrant 2'
    else:
        return np.nan

In [164]:
def get_lineups(dataframe):
    '''
    This function gets the lineups for the game and creates columns 
    for each player on the court for each event of the play by play
    
    Inputs:
    dataframe  - the nba dataframe that's been computed up to this point
    
    Outputs:
    lineup_df  - the dataframe with lineups computed 
    '''
    #this pulls out the starting lineups from the play by play if every player
#on the court has done something that is recorded by the play by play
#if not then I will need to check the players against the lineups returned
#from the api and weed out which one doesn't fit. This needs to be repeated
#for every period

    periods = []
    for period in range(1, dataframe['period'].max()+1):
        print(period)
        #subsets main dataframe by period and subsets into a home and away subs
        period_df = dataframe[dataframe['period'] == period].reset_index()
        subs_df = period_df[(period_df.event_type_de == 'substitution')]
        away_subs = subs_df[pd.isnull(subs_df['visitordescription']) == 0]
        home_subs = subs_df[pd.isnull(subs_df['homedescription']) == 0]

        #getting player ids of the players subbed into the game to check against later
        #to determine starting lineups
        away_subbed_players = list(away_subs['player2_id'].unique())
        home_subbed_players = list(home_subs['player2_id'].unique())
        #gets the index of the first sub for home and away to get the players who started
        #the period by subsetting the dataframe to all actions before the first sub for
        #each team
        away_indexes = list(away_subs.index)
        home_indexes = list(home_subs.index)
        #create variables for the lineup API in case just looking at
        game_date = str(period_df.game_date.unique()[0])[:10]
        away_team_id = period_df.away_team_id.unique()[0]
        home_team_id = period_df.home_team_id.unique()[0]
        api_season = f'{period_df.season.unique()[0]-1}-{str(period_df.season.unique()[0])[2:]}'
        home_lineup_api = ('https://stats.nba.com/stats/leaguedashlineups?Conference=&'
                           f'DateFrom={game_date}&DateTo={game_date}&Division=&'
                           'GameSegment=&GroupQuantity=5&LastNGames=0&LeagueID=&Location=&'
                           f'MeasureType=Base&Month=0&OpponentTeamID={away_team_id}&Outcome=&PORound=&'
                           f'PaceAdjust=N&PerMode=Totals&Period={period}&PlusMinus=N&Rank=N&'
                           f'Season={api_season}&SeasonSegment=&SeasonType=Regular+'
                           'Season&ShotClockRange=&TeamID=&VsConference=&VsDivision=')

        away_lineup_api = ('https://stats.nba.com/stats/leaguedashlineups?Conference=&'
                           f'DateFrom={game_date}&DateTo={game_date}&Division=&'
                           'GameSegment=&GroupQuantity=5&LastNGames=0&LeagueID=&Location=&'
                           f'MeasureType=Base&Month=0&OpponentTeamID={home_team_id}&Outcome=&PORound=&'
                           f'PaceAdjust=N&PerMode=Totals&Period={period}&PlusMinus=N&Rank=N&'
                           f'Season={api_season}&SeasonSegment=&SeasonType=Regular+'
                           'Season&ShotClockRange=&TeamID=&VsConference=&VsDivision=')


        home_lineup_req = requests.get(home_lineup_api, headers=user_agent)

        home_lineup_dict = home_lineup_req.json()

        #extract the player ids of each lineup
        home_lineups = []
        for lineup in home_lineup_dict['resultSets'][0]['rowSet']:
            home_lineups.append([lineup[1]])

        #clean the id strings into a list of ids for each lineup and convert them to ints
        for x in range(len(home_lineups)):
            home_lineups[x] = list(map(int,list(filter(None,home_lineups[x][0].split('-')))))

        away_lineup_req = requests.get(away_lineup_api, headers=user_agent)
        away_lineup_dict = away_lineup_req.json()

        #extract the player ids of each lineup
        away_lineups = []
        for lineup in away_lineup_dict['resultSets'][0]['rowSet']:
            away_lineups.append([lineup[1]])

        #clean the id strings into a list of ids for each lineup and convert them to ints
        for x in range(len(away_lineups)):
            away_lineups[x] = list(map(int,list(filter(None,away_lineups[x][0].split('-')))))
        #looking at the people before the first sub and if
        #it doesn't equal five then continue till next sub excluding the id of the first
        #subbed player and etc. until a list of five players is achieved if five is never
        #achieved by end of period then goto lineup api



        away_starting_line = list(period_df[(period_df.event_team == test['away_team_abbrev'].unique()[0])
                                       & (~pd.isnull(period_df['player1_name']))
                                       & (period_df['player1_team_abbreviation'] == test['away_team_abbrev'].unique()[0])
                                       & (period_df.is_block == 0)
                                       & (period_df.is_steal == 0)]
                                        .loc[:away_indexes[0], :]['player1_id'].unique())

        home_starting_line = list(period_df[(period_df.event_team == test['home_team_abbrev'].unique()[0])
                                       & (~pd.isnull(period_df['player1_name']))
                                       & (period_df['player1_team_abbreviation'] == test['home_team_abbrev'].unique()[0])
                                       & (period_df.is_block == 0)
                                       & (period_df.is_steal == 0)]
                                        .loc[:home_indexes[0], :]['player1_id'].unique())
        
#theres a large possibility that my catching of posssible lines might return
#two possible lines that fit the criteria in extreme edge cases may have to
#resort to brute forcing it if that happens often
        
        if len(away_starting_line) < 5:
            possible_away_lines = []
            for x in away_lineups:
                if set(away_starting_line).issubset(x):
                    possible_away_lines.append(x)
            if len(possible_away_lines) > 1:
                index = 0
                for line in possible_away_lines:
                    for player in line:
                        if player in away_subs and player not in away_starting_line:
                            index = possible_away_lines.index(line)
                            possible_away_lines.pop(index)
            away_ids_names = [(x, period_df[period_df['player1_id'] == x]['player1_name'].unique()[0]) for x in possible_away_lines[0]]
        else:
            away_ids_names = [(x, period_df[period_df['player1_id'] == x]['player1_name'].unique()[0]) for x in away_starting_line]
        #repeating the process for home players
        if len(home_starting_line) < 5:
            possible_home_lines = []
            for x in home_lineups:
                if set(home_starting_line).issubset(x):
                    possible_home_lines.append(x)
            if len(possible_home_lines) > 1:
                index = 0
                for line in possible_home_lines:
                    for player in line:
                        if player in home_subs and player not in home_starting_line:
                            index = possible_home_lines.index(line)
                            possible_home_lines.pop(index)
            home_ids_names = [(x, period_df[period_df['player1_id'] == x]['player1_name'].unique()[0]) for x in possible_home_lines[0]]
        else:
            home_ids_names = [(x, period_df[period_df['player1_id'] == x]['player1_name'].unique()[0]) for x in home_starting_line]
        
        period_df['home_player_1'] = ''
        period_df['home_player_1_id'] = ''
        period_df['home_player_2'] = ''
        period_df['home_player_2_id'] = ''
        period_df['home_player_3'] = ''
        period_df['home_player_3_id'] = ''
        period_df['home_player_4'] = ''
        period_df['home_player_4_id'] = ''
        period_df['home_player_5'] = ''
        period_df['home_player_5_id'] = ''
        period_df['away_player_1'] = ''
        period_df['away_player_1_id'] = ''
        period_df['away_player_2'] = ''
        period_df['away_player_2_id'] = ''
        period_df['away_player_3'] = ''
        period_df['away_player_3_id'] = ''
        period_df['away_player_4'] = ''
        period_df['away_player_4_id'] = ''
        period_df['away_player_5'] = ''
        period_df['away_player_5_id'] = ''
        for x in range(period_df.shape[0]):
            if period_df.iloc[x, :]['event_type_de'] == 'substitution' and pd.isnull(period_df.iloc[x, :]['visitordescription']) == 1:
                home_ids_names = [ids for ids in home_ids_names if ids[0] != period_df.iloc[x, :]['player1_id']]
                home_ids_names.append((period_df.iloc[x, 21], period_df.iloc[x,22]))
                period_df.iloc[x, 63] = home_ids_names[0][0]
                period_df.iloc[x, 62] = home_ids_names[0][1]
                period_df.iloc[x, 65] = home_ids_names[1][0]
                period_df.iloc[x, 64] = home_ids_names[1][1]
                period_df.iloc[x, 67] = home_ids_names[2][0]
                period_df.iloc[x, 66] = home_ids_names[2][1]
                period_df.iloc[x, 69] = home_ids_names[3][0]
                period_df.iloc[x, 68] = home_ids_names[3][1]
                period_df.iloc[x, 71] = home_ids_names[4][0]
                period_df.iloc[x, 70] = home_ids_names[4][1]
                period_df.iloc[x, 73] = away_ids_names[0][0]
                period_df.iloc[x, 72] = away_ids_names[0][1]
                period_df.iloc[x, 75] = away_ids_names[1][0]
                period_df.iloc[x, 74] = away_ids_names[1][1]
                period_df.iloc[x, 77] = away_ids_names[2][0]
                period_df.iloc[x, 76] = away_ids_names[2][1]
                period_df.iloc[x, 79] = away_ids_names[3][0]
                period_df.iloc[x, 78] = away_ids_names[3][1]
                period_df.iloc[x, 81] = away_ids_names[4][0]
                period_df.iloc[x, 80] = away_ids_names[4][1]
            elif period_df.iloc[x, :]['event_type_de'] == 'substitution' and pd.isnull(period_df.iloc[x, :]['homedescription']) == 1:
                away_ids_names = [ids for ids in away_ids_names if ids[0] != period_df.iloc[x, :]['player1_id']]
                away_ids_names.append((period_df.iloc[x,21], period_df.iloc[x,22]))
                period_df.iloc[x, 63] = home_ids_names[0][0]
                period_df.iloc[x, 62] = home_ids_names[0][1]
                period_df.iloc[x, 65] = home_ids_names[1][0]
                period_df.iloc[x, 64] = home_ids_names[1][1]
                period_df.iloc[x, 67] = home_ids_names[2][0]
                period_df.iloc[x, 66] = home_ids_names[2][1]
                period_df.iloc[x, 69] = home_ids_names[3][0]
                period_df.iloc[x, 68] = home_ids_names[3][1]
                period_df.iloc[x, 71] = home_ids_names[4][0]
                period_df.iloc[x, 70] = home_ids_names[4][1]
                period_df.iloc[x, 73] = away_ids_names[0][0]
                period_df.iloc[x, 72] = away_ids_names[0][1]
                period_df.iloc[x, 75] = away_ids_names[1][0]
                period_df.iloc[x, 74] = away_ids_names[1][1]
                period_df.iloc[x, 77] = away_ids_names[2][0]
                period_df.iloc[x, 76] = away_ids_names[2][1]
                period_df.iloc[x, 79] = away_ids_names[3][0]
                period_df.iloc[x, 78] = away_ids_names[3][1]
                period_df.iloc[x, 81] = away_ids_names[4][0]
                period_df.iloc[x, 80] = away_ids_names[4][1]
            else:
                period_df.iloc[x, 63] = home_ids_names[0][0]
                period_df.iloc[x, 62] = home_ids_names[0][1]
                period_df.iloc[x, 65] = home_ids_names[1][0]
                period_df.iloc[x, 64] = home_ids_names[1][1]
                period_df.iloc[x, 67] = home_ids_names[2][0]
                period_df.iloc[x, 66] = home_ids_names[2][1]
                period_df.iloc[x, 69] = home_ids_names[3][0]
                period_df.iloc[x, 68] = home_ids_names[3][1]
                period_df.iloc[x, 71] = home_ids_names[4][0]
                period_df.iloc[x, 70] = home_ids_names[4][1]
                period_df.iloc[x, 73] = away_ids_names[0][0]
                period_df.iloc[x, 72] = away_ids_names[0][1]
                period_df.iloc[x, 75] = away_ids_names[1][0]
                period_df.iloc[x, 74] = away_ids_names[1][1]
                period_df.iloc[x, 77] = away_ids_names[2][0]
                period_df.iloc[x, 76] = away_ids_names[2][1]
                period_df.iloc[x, 79] = away_ids_names[3][0]
                period_df.iloc[x, 78] = away_ids_names[3][1]
                period_df.iloc[x, 81] = away_ids_names[4][0]
                period_df.iloc[x, 80] = away_ids_names[4][1]
        periods.append(period_df)

    lineup_df = pd.concat(periods).reset_index()
    return lineup_df

In [160]:
def scrape_pbp(game_id, season, user_agent=user_agent):
    '''
    This function scrapes both of the pbp urls and returns a joined/cleaned
    pbp dataframe

    Inputs:
    game_id - integer id of the nba game you want to scrape in question
    user_agent - this is the user agent to pass to the requests function

    Outputs:
    clean_df - final cleaned dataframe
    '''

#hard coding these in for testing purposes
    #v2_api_url = 'https://stats.nba.com/stats/playbyplayv2?EndPeriod=10&EndRange=55800&GameID=0021800549&RangeType=2&Season=2018-19&SeasonType=Regular+Season&StartPeriod=1&StartRange=0kk'
    #pbp_api_url = 'https://data.nba.com/data/10s/v2015/json/mobile_teams/nba/2018/scores/pbp/0021800549_full_pbp.json'

# this will be the main url used for the v2 api url once testing is done
#v2 api will contain all the player info for each play in the game while the
#pbp_api_url will contain xy coords for each event
    v2_season = f'{season - 1}-{str(season)[2:]}'
    pbp_season = f'{season - 1}'
    print(v2_season)
    print(pbp_season)
    v2_api_url = f'https://stats.nba.com/stats/playbyplayv2?EndPeriod=10&EndRange=55800&GameID={game_id}&RangeType=2&Season={v2_season}&SeasonType=Regular+Season&StartPeriod=1&StartRange=0kk'
    pbp_api_url = f'https://data.nba.com/data/10s/v2015/json/mobile_teams/nba/{pbp_season}/scores/pbp/{game_id}_full_pbp.json'
    print(v2_api_url)
    print(pbp_api_url)
# have to pass this to the requests function or the api will return a 403 code
    v2_rep = requests.get(v2_api_url, headers=user_agent, verify=False) # remove this when package is finalized
    v2_dict = v2_rep.json()

#this pulls the v2 stats.nba play by play api
    pbp_v2_headers = v2_dict['resultSets'][0]['headers']
    pbp_v2_data = v2_dict['resultSets'][0]['rowSet']
    pbp_v2_df = pd.DataFrame(pbp_v2_data, columns=pbp_v2_headers)
    pbp_v2_df.columns = list(map(str.lower, pbp_v2_df.columns))
    print(pbp_v2_df.columns)

#this pulls the data.nba api end play by play
    pbp_rep = requests.get(pbp_api_url, headers=user_agent, verify=False)
    pbp_dict = pbp_rep.json()

#this will be used to concat each quarter from the play by play
    pbp_df_list = []

    for qtr in range(len(pbp_dict['g']['pd'])):
        pbp_df_list.append(pd.DataFrame(pbp_dict['g']['pd'][qtr]['pla']))

#pulling the home and away team abbreviations and the game date
    gcode = pbp_dict['g']['gcode'].split('/')
    date = gcode[0]
    teams = gcode[1]
    home_team_abbrev = teams[3:]
    away_team_abbrev = teams[:3]
    pbp_df = pd.concat(pbp_df_list)

#joining the two dataframes together and only pulling in relavent columns
    clean_df = pbp_v2_df.merge(pbp_df[['evt', 'locX', 'locY', 'hs', 'vs', 'de']],
                               left_on = 'eventnum', right_on='evt')

#add date and team abbrev columns to dataframe
    clean_df.loc[:, 'home_team_abbrev'] = home_team_abbrev
    clean_df.loc[:, 'away_team_abbrev'] = away_team_abbrev
    clean_df.loc[:, 'game_date'] = date
    clean_df.loc[:, 'game_date'] = clean_df.loc[:, 'game_date'].astype('datetime64')
    clean_df.loc[:, ('season')] = np.where(clean_df.game_date.dt.month.isin([10, 11, 12]),
                                           clean_df.game_date.dt.year + 1,
                                           clean_df.game_date.dt.year)
    #code to properly get the team ids as the scientific notation cuts off some digits
    home_team_id = clean_df[clean_df['player1_team_abbreviation'] == home_team_abbrev]['player1_team_id'].astype(int).unique()
    away_team_id = clean_df[clean_df['player1_team_abbreviation'] == away_team_abbrev]['player1_team_id'].astype(int).unique()
    clean_df.loc[:, 'home_team_id'] = home_team_id
    clean_df.loc[:, 'away_team_id'] = away_team_id

#create an event team colum
    clean_df['event_team'] = np.where(clean_df['homedescription'].isnull(),
                                    clean_df['home_team_abbrev'], clean_df['away_team_abbrev'])

#create and event type description column
    clean_df['event_type_de'] = clean_df[['eventmsgtype']].replace({'eventmsgtype': event_type_dict})

    #print(clean_df.iloc[317, :])
#create and shot type description column
    clean_df['shot_type_de'] = clean_df[['eventmsgtype', 'eventmsgactiontype']]\
                                .apply(lambda x: shot_type_dict[int(x['eventmsgactiontype'])]
                                       if np.isin(x['eventmsgtype'],[1,2]) else np.nan, axis=1)

#create an event team colum
    clean_df['event_team'] = np.where(~clean_df['homedescription'].isnull(),
                                      clean_df['home_team_abbrev'],
                                      clean_df['away_team_abbrev'])
    
#creates column for whether shot is made or not
    clean_df['shot_made'] = clean_df.apply(made_shot, axis=1)

#create a column that says whether the shot was blocked or not
    clean_df['is_block'] = np.where(clean_df['homedescription'].str.contains('BLOCK') |
                                    clean_df['visitordescription'].str.contains('BLOCK'),
                                    1, 0)

#create a column for the type of shot taken
    clean_df['shot_type'] = clean_df.apply(parse_shot_types, axis=1)

#create seconds_elapsed column between plays
    clean_df['seconds_elapsed'] = clean_df.apply(create_seconds_elapsed, axis=1)

#calculate event length of each even in seconds
    clean_df['event_length'] =  clean_df['seconds_elapsed'] - clean_df['seconds_elapsed'].shift(1)

#determine whether shot was a three pointer
    clean_df['is_three'] = np.where(clean_df['de'].str.contains('3pt'), 1, 0)
    #determine points earned

    clean_df['points_made'] = clean_df.apply(calc_points_made, axis=1)

#create columns that determine if rebound is offenseive or deffensive

    clean_df['is_d_rebound'] = np.where((clean_df['event_type_de'] == 'rebound') &
                                         (clean_df['event_team'] != clean_df['event_team'].shift(1)), 1, 0)

    clean_df['is_o_rebound'] = np.where((clean_df['event_type_de'] == 'rebound') &
                                        (clean_df['event_team'] == clean_df['event_team'].shift(1))
                                        & (clean_df['event_type_de'].shift(1) != 'free-throw'), 1, 0)

#create columns to determine turnovers and steals

    clean_df['is_turnover'] = np.where(clean_df['de'].str.contains('Turnover'), 1, 0)
    clean_df['is_steal'] = np.where(clean_df['de'].str.contains('Steal'), 1, 0)

#determine what type of fouls are being commited


    clean_df['foul_type'] = clean_df.apply(parse_foul, axis=1)

# determine if a shot is a putback off an offensive reboundk
    clean_df['is_putback'] = np.where((clean_df['is_o_rebound'].shift(1) == 1) &
                                      (clean_df['event_length'] <= 3), 1, 0)

#pull lineups
    clean_df = get_lineups(clean_df)

    return clean_df



In [165]:
test = scrape_pbp('0021800864', 2019)

2018-19
2018
https://stats.nba.com/stats/playbyplayv2?EndPeriod=10&EndRange=55800&GameID=0021800864&RangeType=2&Season=2018-19&SeasonType=Regular+Season&StartPeriod=1&StartRange=0kk
https://data.nba.com/data/10s/v2015/json/mobile_teams/nba/2018/scores/pbp/0021800864_full_pbp.json




Index(['game_id', 'eventnum', 'eventmsgtype', 'eventmsgactiontype', 'period',
       'wctimestring', 'pctimestring', 'homedescription', 'neutraldescription',
       'visitordescription', 'score', 'scoremargin', 'person1type',
       'player1_id', 'player1_name', 'player1_team_id', 'player1_team_city',
       'player1_team_nickname', 'player1_team_abbreviation', 'person2type',
       'player2_id', 'player2_name', 'player2_team_id', 'player2_team_city',
       'player2_team_nickname', 'player2_team_abbreviation', 'person3type',
       'player3_id', 'player3_name', 'player3_team_id', 'player3_team_city',
       'player3_team_nickname', 'player3_team_abbreviation'],
      dtype='object')




1
2
3
4


In [166]:
test

Unnamed: 0,level_0,index,game_id,eventnum,eventmsgtype,eventmsgactiontype,period,wctimestring,pctimestring,homedescription,neutraldescription,visitordescription,score,scoremargin,person1type,player1_id,player1_name,player1_team_id,player1_team_city,player1_team_nickname,player1_team_abbreviation,person2type,player2_id,player2_name,player2_team_id,player2_team_city,player2_team_nickname,player2_team_abbreviation,person3type,player3_id,player3_name,player3_team_id,player3_team_city,player3_team_nickname,player3_team_abbreviation,evt,locX,locY,hs,vs,de,home_team_abbrev,away_team_abbrev,game_date,season,home_team_id,away_team_id,event_team,event_type_de,shot_type_de,shot_made,is_block,shot_type,seconds_elapsed,event_length,is_three,points_made,is_d_rebound,is_o_rebound,is_turnover,is_steal,foul_type,is_putback,home_player_1,home_player_1_id,home_player_2,home_player_2_id,home_player_3,home_player_3_id,home_player_4,home_player_4_id,home_player_5,home_player_5_id,away_player_1,away_player_1_id,away_player_2,away_player_2_id,away_player_3,away_player_3_id,away_player_4,away_player_4_id,away_player_5,away_player_5_id
0,0,0,0021800864,2,12,0,1,7:12 PM,12:00,,,,,,0,0,,,,,,0,0,,,,,,0,0,,,,,,2,0,-80,0,0,Start Period,ORL,CHA,2019-02-14,2019,1610612753,1610612766,CHA,period-start,,,0,,0,,0,0,0,0,0,0,,0,D.J. Augustin,201571,Nikola Vucevic,202696,Evan Fournier,203095,Jonathan Isaac,1628371,Wes Iwundu,1628411,Marvin Williams,101107,Nicolas Batum,201587,Kemba Walker,202689,Jeremy Lamb,203087,Cody Zeller,203469
1,1,1,0021800864,4,10,0,1,7:12 PM,12:00,Jump Ball Vucevic vs. Zeller: Tip to Williams,,,,,4,202696,Nikola Vucevic,1.610613e+09,Orlando,Magic,ORL,5,203469,Cody Zeller,1.610613e+09,Charlotte,Hornets,CHA,5,101107,Marvin Williams,1.610613e+09,Charlotte,Hornets,CHA,4,0,-80,0,0,Jump Ball Zeller vs Vucevic (Williams gains po...,ORL,CHA,2019-02-14,2019,1610612753,1610612766,ORL,jump-ball,,,0,,0,0.0,0,0,0,0,0,0,,0,D.J. Augustin,201571,Nikola Vucevic,202696,Evan Fournier,203095,Jonathan Isaac,1628371,Wes Iwundu,1628411,Marvin Williams,101107,Nicolas Batum,201587,Kemba Walker,202689,Jeremy Lamb,203087,Cody Zeller,203469
2,2,2,0021800864,7,2,1,1,7:12 PM,11:40,,,MISS Williams 21' Jump Shot,,,5,101107,Marvin Williams,1.610613e+09,Charlotte,Hornets,CHA,0,0,,,,,,0,0,,,,,,7,183,103,0,0,[CHA] Williams Jump Shot: Missed,ORL,CHA,2019-02-14,2019,1610612753,1610612766,CHA,missed_shot,jump shot,0.0,0,jump,20,20.0,0,0,0,0,0,0,,0,D.J. Augustin,201571,Nikola Vucevic,202696,Evan Fournier,203095,Jonathan Isaac,1628371,Wes Iwundu,1628411,Marvin Williams,101107,Nicolas Batum,201587,Kemba Walker,202689,Jeremy Lamb,203087,Cody Zeller,203469
3,3,3,0021800864,8,4,0,1,7:12 PM,11:37,Vucevic REBOUND (Off:0 Def:1),,,,,4,202696,Nikola Vucevic,1.610613e+09,Orlando,Magic,ORL,0,0,,,,,,0,0,,,,,,8,183,103,0,0,[ORL] Vucevic Rebound (Off:0 Def:1),ORL,CHA,2019-02-14,2019,1610612753,1610612766,ORL,rebound,,,0,,23,3.0,0,0,1,0,0,0,,0,D.J. Augustin,201571,Nikola Vucevic,202696,Evan Fournier,203095,Jonathan Isaac,1628371,Wes Iwundu,1628411,Marvin Williams,101107,Nicolas Batum,201587,Kemba Walker,202689,Jeremy Lamb,203087,Cody Zeller,203469
4,4,4,0021800864,9,6,1,1,7:12 PM,11:24,,,Lamb P.FOUL (P1.T1) (T.Brothers),,,5,203087,Jeremy Lamb,1.610613e+09,Charlotte,Hornets,CHA,4,203932,Aaron Gordon,1.610613e+09,Orlando,Magic,ORL,1,0,,,,,,9,79,26,0,0,[CHA] Lamb Foul: Personal (1 PF) (T Brothers),ORL,CHA,2019-02-14,2019,1610612753,1610612766,CHA,foul,,,0,,36,13.0,0,0,0,0,0,0,personal,0,D.J. Augustin,201571,Nikola Vucevic,202696,Evan Fournier,203095,Jonathan Isaac,1628371,Wes Iwundu,1628411,Marvin Williams,101107,Nicolas Batum,201587,Kemba Walker,202689,Jeremy Lamb,203087,Cody Zeller,203469
5,5,5,0021800864,12,2,1,1,7:13 PM,11:14,MISS Fournier 26' 3PT Jump Shot,,,,,4,203095,Evan Fournier,1.610613e+09,Orlando,Magic,ORL,0,0,,,,,,0,0,,,,,,12,-79,246,0,0,[ORL] Fournier 3pt Shot: Missed,ORL,CHA,2019-02-14,2019,1610612753,1610612766,ORL,missed_shot,jump shot,0.0,0,jump,46,10.0,1,0,0,0,0,0,,0,D.J. Augustin,201571,Nikola Vucevic,202696,Evan Fournier,203095,Jonathan Isaac,1628371,Wes Iwundu,1628411,Marvin Williams,101107,Nicolas Batum,201587,Kemba Walker,202689,Jeremy Lamb,203087,Cody Zeller,203469
6,6,6,0021800864,13,4,0,1,7:13 PM,11:11,,,Batum REBOUND (Off:0 Def:1),,,5,201587,Nicolas Batum,1.610613e+09,Charlotte,Hornets,CHA,0,0,,,,,,0,0,,,,,,13,-79,246,0,0,[CHA] Batum Rebound (Off:0 Def:1),ORL,CHA,2019-02-14,2019,1610612753,1610612766,CHA,rebound,,,0,,49,3.0,0,0,1,0,0,0,,0,D.J. Augustin,201571,Nikola Vucevic,202696,Evan Fournier,203095,Jonathan Isaac,1628371,Wes Iwundu,1628411,Marvin Williams,101107,Nicolas Batum,201587,Kemba Walker,202689,Jeremy Lamb,203087,Cody Zeller,203469
7,7,7,0021800864,14,5,1,1,7:13 PM,11:03,Fournier STEAL (1 STL),,Batum Bad Pass Turnover (P1.T1),,,5,201587,Nicolas Batum,1.610613e+09,Charlotte,Hornets,CHA,4,203095,Evan Fournier,1.610613e+09,Orlando,Magic,ORL,0,0,,,,,,14,131,51,0,0,[CHA] Batum Turnover : Bad Pass (1 TO) Steal:F...,ORL,CHA,2019-02-14,2019,1610612753,1610612766,ORL,turnover,,,0,,57,8.0,0,0,0,0,1,1,,0,D.J. Augustin,201571,Nikola Vucevic,202696,Evan Fournier,203095,Jonathan Isaac,1628371,Wes Iwundu,1628411,Marvin Williams,101107,Nicolas Batum,201587,Kemba Walker,202689,Jeremy Lamb,203087,Cody Zeller,203469
8,8,8,0021800864,16,1,1,1,7:13 PM,10:58,Augustin 26' 3PT Jump Shot (3 PTS) (Fournier 1...,,,0 - 3,3,4,201571,D.J. Augustin,1.610613e+09,Orlando,Magic,ORL,4,203095,Evan Fournier,1.610613e+09,Orlando,Magic,ORL,0,0,,,,,,16,136,228,3,0,[ORL 3-0] Augustin 3pt Shot: Made (3 PTS) Assi...,ORL,CHA,2019-02-14,2019,1610612753,1610612766,ORL,shot,jump shot,1.0,0,jump,62,5.0,1,3,0,0,0,0,,0,D.J. Augustin,201571,Nikola Vucevic,202696,Evan Fournier,203095,Jonathan Isaac,1628371,Wes Iwundu,1628411,Marvin Williams,101107,Nicolas Batum,201587,Kemba Walker,202689,Jeremy Lamb,203087,Cody Zeller,203469
9,9,9,0021800864,18,2,1,1,7:13 PM,10:40,,,MISS Walker 3PT Jump Shot,,,5,202689,Kemba Walker,1.610613e+09,Charlotte,Hornets,CHA,0,0,,,,,,0,0,,,,,,18,-229,40,3,0,[CHA] Walker 3pt Shot: Missed,ORL,CHA,2019-02-14,2019,1610612753,1610612766,CHA,missed_shot,jump shot,0.0,0,jump,80,18.0,1,0,0,0,0,0,,0,D.J. Augustin,201571,Nikola Vucevic,202696,Evan Fournier,203095,Jonathan Isaac,1628371,Wes Iwundu,1628411,Marvin Williams,101107,Nicolas Batum,201587,Kemba Walker,202689,Jeremy Lamb,203087,Cody Zeller,203469


In [87]:
subs_df = test[(test.event_type_de == 'substitution')]
away_subs = subs_df[pd.isnull(subs_df['visitordescription']) == 0]
home_subs = subs_df[pd.isnull(subs_df['homedescription']) == 0]

#getting player ids of the players subbed into the game to check against later
#to determine starting lineups
away_subbed_players = list(away_subs['player2_id'].unique())
home_subbed_players = list(home_subs['player2_id'].unique())
#gets the index of the first sub for home and away to get the players who started
#the period by subsetting the dataframe to all actions before the first sub for
#each team
away_indexes = list(away_subs.index)
home_indexes = list(home_subs.index)
away_starting_line = list(test[(test.event_team == test['away_team_abbrev'].unique()[0])
                                       & (~pd.isnull(test['player1_name']))
                                       & (test['player1_team_abbreviation'] == test['away_team_abbrev'].unique()[0])
                                       & (test.is_block == 0)
                                       & (test.is_steal == 0)]
                                        .loc[:away_indexes[0], :]['player1_id'].unique())



In [88]:
away_starting_line
    
    

[1628378, 203497, 202324, 204060, 201937]

In [73]:
blah = test[(test['event_team'] == test['away_team_abbrev'].unique()[0]) 
            & (~pd.isnull(test['player1_name'])) 
           
            #& (test['player1_team_abbreviation'] == test['away_team_abbrev'].unique()[0])
            ]
'''
& (test['player1_team_abbreviation'] == test['away_team_abbrev'].unique()[0])
& (test.is_block == 0)
& (test.is_steal == 0)
 & (test.is_block == 0)
            & (test.is_steal == 0)]
'''
    
    
    
    
    

"\n& (test['player1_team_abbreviation'] == test['away_team_abbrev'].unique()[0])\n& (test.is_block == 0)\n& (test.is_steal == 0)\n & (test.is_block == 0)\n            & (test.is_steal == 0)]\n"

In [74]:
blah

Unnamed: 0,game_id,eventnum,eventmsgtype,eventmsgactiontype,period,wctimestring,pctimestring,homedescription,neutraldescription,visitordescription,score,scoremargin,person1type,player1_id,player1_name,player1_team_id,player1_team_city,player1_team_nickname,player1_team_abbreviation,person2type,player2_id,player2_name,player2_team_id,player2_team_city,player2_team_nickname,player2_team_abbreviation,person3type,player3_id,player3_name,player3_team_id,player3_team_city,player3_team_nickname,player3_team_abbreviation,evt,locX,locY,hs,vs,de,home_team_abbrev,away_team_abbrev,game_date,season,home_team_id,away_team_id,event_team,event_type_de,shot_type_de,shot_made,is_block,shot_type,seconds_elapsed,event_length,is_three,points_made,is_d_rebound,is_o_rebound,is_turnover,is_steal,foul_type,is_putback
1,0021800549,4,10,0,1,7:41 PM,12:00,Jump Ball Ibaka vs. Gobert: Tip to Siakam,,,,,4,201586,Serge Ibaka,1.610613e+09,Toronto,Raptors,TOR,5,203497,Rudy Gobert,1.610613e+09,Utah,Jazz,UTA,4,1627783,Pascal Siakam,1.610613e+09,Toronto,Raptors,TOR,4,0,-80,0,0,Jump Ball Ibaka vs Gobert (Siakam gains posses...,TOR,UTA,2019-01-01,2019,1610612761,1610612762,UTA,jump-ball,,,0,,0,0.0,0,0,0,0,0,0,,0
2,0021800549,7,1,58,1,7:41 PM,11:36,Ibaka 2' Turnaround Hook Shot (2 PTS),,,0 - 2,2,4,201586,Serge Ibaka,1.610613e+09,Toronto,Raptors,TOR,0,0,,,,,,0,0,,,,,,7,1,20,2,0,[TOR 2-0] Ibaka Turnaround Hook Shot: Made (2 ...,TOR,UTA,2019-01-01,2019,1610612761,1610612762,UTA,shot,turnaround hook shot,1.0,0,hook,24,24.0,0,2,0,0,0,0,,0
4,0021800549,9,2,86,1,7:42 PM,11:03,MISS Ibaka 11' Turnaround Fadeaway Shot,,,,,4,201586,Serge Ibaka,1.610613e+09,Toronto,Raptors,TOR,0,0,,,,,,0,0,,,,,,9,107,-3,2,2,[TOR] Ibaka Turnaround Fadeaway shot: Missed,TOR,UTA,2019-01-01,2019,1610612761,1610612762,UTA,missed_shot,turnaround fadeaway,0.0,0,jump,57,20.0,0,0,0,0,0,0,,0
7,0021800549,13,1,96,1,7:42 PM,10:34,Green 1' Turnaround Bank Hook Shot (2 PTS) (Va...,,,4 - 4,TIE,4,201980,Danny Green,1.610613e+09,Toronto,Raptors,TOR,4,1627832,Fred VanVleet,1.610613e+09,Toronto,Raptors,TOR,0,0,,,,,,13,12,2,4,4,[TOR 4-4] Green Turnaround Bank Hook Shot: Mad...,TOR,UTA,2019-01-01,2019,1610612761,1610612762,UTA,shot,turnaround bank hook shot,1.0,0,hook,86,16.0,0,2,0,0,0,0,,0
8,0021800549,15,6,2,1,7:43 PM,10:17,Siakam S.FOUL (P1.T1) (M.Lindsay),,,,,4,1627783,Pascal Siakam,1.610613e+09,Toronto,Raptors,TOR,5,202324,Derrick Favors,1.610613e+09,Utah,Jazz,UTA,1,0,,,,,,15,8,10,4,4,[TOR] Siakam Foul: Shooting (1 PF) (2 FTA) (M ...,TOR,UTA,2019-01-01,2019,1610612761,1610612762,UTA,foul,,,0,,103,17.0,0,0,0,0,0,0,shooting,0
12,0021800549,20,4,0,1,7:44 PM,10:14,Siakam REBOUND (Off:0 Def:1),,,,,4,1627783,Pascal Siakam,1.610613e+09,Toronto,Raptors,TOR,0,0,,,,,,0,0,,,,,,20,0,-80,4,4,[TOR] Siakam Rebound (Off:0 Def:1),TOR,UTA,2019-01-01,2019,1610612761,1610612762,UTA,rebound,,,0,,106,3.0,0,0,1,0,0,0,,0
13,0021800549,21,1,108,1,7:44 PM,10:05,Siakam Cutting Dunk Shot (2 PTS) (VanVleet 2 ...,,,4 - 6,2,4,1627783,Pascal Siakam,1.610613e+09,Toronto,Raptors,TOR,4,1627832,Fred VanVleet,1.610613e+09,Toronto,Raptors,TOR,0,0,,,,,,21,2,3,6,4,[TOR 6-4] Siakam Cutting Dunk Shot: Made (2 PT...,TOR,UTA,2019-01-01,2019,1610612761,1610612762,UTA,shot,cutting dunk shot,1.0,0,dunk,115,9.0,0,2,0,0,0,0,,0
15,0021800549,24,4,0,1,7:44 PM,9:49,VanVleet REBOUND (Off:0 Def:1),,,,,4,1627832,Fred VanVleet,1.610613e+09,Toronto,Raptors,TOR,0,0,,,,,,0,0,,,,,,24,231,50,6,4,[TOR] VanVleet Rebound (Off:0 Def:1),TOR,UTA,2019-01-01,2019,1610612761,1610612762,UTA,rebound,,,0,,131,3.0,0,0,1,0,0,0,,0
16,0021800549,25,5,45,1,7:44 PM,9:39,Leonard Out of Bounds - Bad Pass Turnover Turn...,,,,,4,202695,Kawhi Leonard,1.610613e+09,Toronto,Raptors,TOR,0,0,,,,,,1,0,,,,,,25,28,82,6,4,[TOR] Leonard Turnover : Out of Bounds - Bad P...,TOR,UTA,2019-01-01,2019,1610612761,1610612762,UTA,turnover,,,0,,141,10.0,0,0,0,0,1,0,,0
17,0021800549,26,2,6,1,7:45 PM,9:25,Ibaka BLOCK (1 BLK),,MISS Rubio 1' Driving Layup,,,5,201937,Ricky Rubio,1.610613e+09,Utah,Jazz,UTA,0,0,,,,,,4,201586,Serge Ibaka,1.610613e+09,Toronto,Raptors,TOR,26,-7,6,6,4,[UTA] Rubio Driving Layup Shot: Missed,TOR,UTA,2019-01-01,2019,1610612761,1610612762,UTA,missed_shot,driving layup,0.0,1,layup,155,14.0,0,0,0,0,0,0,,0


In [16]:
v2_dict

{'resource': 'playbyplay',
 'parameters': {'GameID': '0021800549', 'StartPeriod': 1, 'EndPeriod': 10},
 'resultSets': [{'name': 'PlayByPlay',
   'headers': ['GAME_ID',
    'EVENTNUM',
    'EVENTMSGTYPE',
    'EVENTMSGACTIONTYPE',
    'PERIOD',
    'WCTIMESTRING',
    'PCTIMESTRING',
    'HOMEDESCRIPTION',
    'NEUTRALDESCRIPTION',
    'VISITORDESCRIPTION',
    'SCORE',
    'SCOREMARGIN',
    'PERSON1TYPE',
    'PLAYER1_ID',
    'PLAYER1_NAME',
    'PLAYER1_TEAM_ID',
    'PLAYER1_TEAM_CITY',
    'PLAYER1_TEAM_NICKNAME',
    'PLAYER1_TEAM_ABBREVIATION',
    'PERSON2TYPE',
    'PLAYER2_ID',
    'PLAYER2_NAME',
    'PLAYER2_TEAM_ID',
    'PLAYER2_TEAM_CITY',
    'PLAYER2_TEAM_NICKNAME',
    'PLAYER2_TEAM_ABBREVIATION',
    'PERSON3TYPE',
    'PLAYER3_ID',
    'PLAYER3_NAME',
    'PLAYER3_TEAM_ID',
    'PLAYER3_TEAM_CITY',
    'PLAYER3_TEAM_NICKNAME',
    'PLAYER3_TEAM_ABBREVIATION'],
   'rowSet': [['0021800549',
     2,
     12,
     0,
     1,
     '7:41 PM',
     '12:00',
     None,
    

In [32]:










#this part gets the lines probably should put this in a function
#since I will need to loop over each period and match them up

    home_lineup_api = ('https://stats.nba.com/stats/leaguedashlineups?Conference=&'
                       f'DateFrom={game_date}&DateTo={game_date}&Division=&'
                       'GameSegment=&GroupQuantity=5&LastNGames=0&LeagueID=&Location=&'
                       f'MeasureType=Base&Month=0&OpponentTeamID={team_id}&Outcome=&PORound=&'
                       f'PaceAdjust=N&PerMode=Totals&Period={period}&PlusMinus=N&Rank=N&'
                       'Season=2018-19&SeasonSegment=&SeasonType=Regular+'
                       'Season&ShotClockRange=&TeamID=&VsConference=&VsDivision=')


    home_lineup_req = requests.get(home_lineup_api, headers=user_agent)
    home_lineup_dict = home_lineup_req.json()

#extract the player ids of each lineup
    lineups = []
    for lineup in home_lineup_dict['resultSets'][0]['rowSet']:
        lineups.append([lineup[1]])

#clean the id strings into a list of ids for each lineup
    for x in range(len(lineups)):
        print(lineups[x][0])
        lineups[x] = list(filter(None,lineups[x][0].split('-')))

#TODO parse mtype column to get all the shot types being taken

#this pulls out the starting lineups from the play by play if every player
#on the court has done something that is recorded by the play by play
#if not then I will need to check the players against the lineups returned
#from the api and weed out which one doesn't fit. This needs to be repeated
#for every period
    periods = []
    for period in range(1, clean_df['PERIOD'].max()+1):
        #subsets main dataframe by period and subsets into a home and away subs
        period_df = clean_df[clean_df['PERIOD'] == period].reset_index()
        subs_df = period_df[(period_df.event_type_de == 'substitution')]
        away_subs = subs_df[pd.isnull(subs_df.VISITORDESCRIPTION) == 0]
        home_subs = subs_df[pd.isnull(subs_df.HOMEDESCRIPTION) == 0]

        #getting player ids of the players subbed into the game to check against later
        #to determine starting lineups
        away_subbed_players = list(away_subs['PLAYER2_ID'].unique())
        home_subbed_players = list(home_subs['PLAYER2_ID'].unique())
        #gets the index of the first sub for home and away to get the players who started
        #the period by subsetting the dataframe to all actions before the first sub for
        #each team
        away_indexes = list(away_subs.index)
        home_indexes = list(home_subs.index)
        #create variables for the lineup API in case just looking at
        game_date = str(period_df.game_date.unique()[0])[:10]
        away_team_id = period_df.away_team_id.unique()[0]
        home_team_id = period_df.home_team_id.unique()[0]
        api_season = f'{period_df.season.unique()[0]-1}-{str(period_df.season.unique()[0])[2:]}'
        home_lineup_api = ('https://stats.nba.com/stats/leaguedashlineups?Conference=&'
                           f'DateFrom={game_date}&DateTo={game_date}&Division=&'
                           'GameSegment=&GroupQuantity=5&LastNGames=0&LeagueID=&Location=&'
                           f'MeasureType=Base&Month=0&OpponentTeamID={away_team_id}&Outcome=&PORound=&'
                           f'PaceAdjust=N&PerMode=Totals&Period={period}&PlusMinus=N&Rank=N&'
                           f'Season={api_season}&SeasonSegment=&SeasonType=Regular+'
                           'Season&ShotClockRange=&TeamID=&VsConference=&VsDivision=')

        away_lineup_api = ('https://stats.nba.com/stats/leaguedashlineups?Conference=&'
                           f'DateFrom={game_date}&DateTo={game_date}&Division=&'
                           'GameSegment=&GroupQuantity=5&LastNGames=0&LeagueID=&Location=&'
                           f'MeasureType=Base&Month=0&OpponentTeamID={home_team_id}&Outcome=&PORound=&'
                           f'PaceAdjust=N&PerMode=Totals&Period={period}&PlusMinus=N&Rank=N&'
                           f'Season={api_season}&SeasonSegment=&SeasonType=Regular+'
                           'Season&ShotClockRange=&TeamID=&VsConference=&VsDivision=')


        home_lineup_req = requests.get(home_lineup_api, headers=user_agent)

        home_lineup_dict = home_lineup_req.json()

        #extract the player ids of each lineup
        home_lineups = []
        for lineup in home_lineup_dict['resultSets'][0]['rowSet']:
            home_lineups.append([lineup[1]])

        #clean the id strings into a list of ids for each lineup and convert them to ints
        for x in range(len(home_lineups)):
            home_lineups[x] = list(map(int,list(filter(None,home_lineups[x][0].split('-')))))

        away_lineup_req = requests.get(away_lineup_api, headers=user_agent)
        away_lineup_dict = away_lineup_req.json()

        #extract the player ids of each lineup
        away_lineups = []
        for lineup in away_lineup_dict['resultSets'][0]['rowSet']:
            away_lineups.append([lineup[1]])

        #clean the id strings into a list of ids for each lineup and convert them to ints
        for x in range(len(away_lineups)):
            away_lineups[x] = list(map(int,list(filter(None,away_lineups[x][0].split('-')))))
        #looking at the people before the first sub and if
        #it doesn't equal five then continue till next sub excluding the id of the first
        #subbed player and etc. until a list of five players is achieved if five is never
        #achieved by end of period then goto lineup api



        away_starting_line = list(period_df[(period_df.event_team == away_team_abbrev)
                                       & (~pd.isnull(period_df.PLAYER1_NAME))
                                       & (period_df.PLAYER1_TEAM_ABBREVIATION == away_team_abbrev)
                                       & (period_df.is_block == 0)
                                       & (period_df.is_steal == 0)]
                                        .loc[:away_indexes[0], :]['PLAYER1_ID'].unique())

        home_starting_line = list(period_df[(period_df.event_team == home_team_abbrev)
                                       & (~pd.isnull(period_df.PLAYER1_NAME))
                                       & (period_df.PLAYER1_TEAM_ABBREVIATION == home_team_abbrev)
                                       & (period_df.is_block == 0)
                                       & (period_df.is_steal == 0)]
                                        .loc[:home_indexes[0], :]['PLAYER1_ID'].unique())
#theres a large possibility that my catching of posssible lines might return
#two possible lines that fit the criteria in extreme edge cases may have to
#resort to brute forcing it if that happens often
        if len(away_starting_line) < 5:
            possible_away_lines = []
            for x in away_lineups:
                if set(away_starting_line).issubset(x):
                    possible_away_lines.append(x)
            if len(possible_away_lines) > 1:
                index = 0
                for line in possible_away_lines:
                    for player in line:
                        if player in away_subs and player not in away_starting_line:
                            index = possible_home_lines.index(line)
                    possible_home_lines.pop(index)
            away_ids_names = [(x, period_df[period_df['PLAYER1_ID'] == x]['PLAYER1_NAME'].unique()[0]) for x in possible_away_lines[0]]
        else:
            away_ids_names = [(x, period_df[period_df['PLAYER1_ID'] == x]['PLAYER1_NAME'].unique()[0]) for x in away_starting_line]
        #repeating the process for home players
        if len(home_starting_line) < 5:
            possible_home_lines = []
            for x in home_lineups:
                if set(home_starting_line).issubset(x):
                    possible_home_lines.append(x)
            if len(possible_home_lines) > 1:
                index = 0
                for line in possible_home_lines:
                    for player in line:
                        if player in home_subs and player not in home_starting_line:
                            index = possible_home_lines.index(line)
                    possible_home_lines.pop(index)
            home_ids_names = [(x, period_df[period_df['PLAYER1_ID'] == x]['PLAYER1_NAME'].unique()[0]) for x in possible_home_lines[0]]
        else:
            home_ids_names = [(x, period_df[period_df['PLAYER1_ID'] == x]['PLAYER1_NAME'].unique()[0]) for x in home_starting_line]




        for x in range(period_df.shape[0]):
            if period_df.iloc[x, :]['event_type_de'] == 'substitution' and pd.isnull(period_df.iloc[x, :]['VISITORDESCRIPTION']) == 1:
                home_ids_names = [ids for ids in home_ids_names if ids[0] != period_df.iloc[x, :]['PLAYER1_ID']]
                home_ids_names.append((period_df.iloc[x, 21], period_df.iloc[x,22]))
                period_df.iloc[x, 63] = home_ids_names[0][0]
                period_df.iloc[x, 62] = home_ids_names[0][1]
                period_df.iloc[x, 65] = home_ids_names[1][0]
                period_df.iloc[x, 64] = home_ids_names[1][1]
                period_df.iloc[x, 67] = home_ids_names[2][0]
                period_df.iloc[x, 66] = home_ids_names[2][1]
                period_df.iloc[x, 69] = home_ids_names[3][0]
                period_df.iloc[x, 68] = home_ids_names[3][1]
                period_df.iloc[x, 71] = home_ids_names[4][0]
                period_df.iloc[x, 70] = home_ids_names[4][1]
                period_df.iloc[x, 73] = away_ids_names[0][0]
                period_df.iloc[x, 72] = away_ids_names[0][1]
                period_df.iloc[x, 75] = away_ids_names[1][0]
                period_df.iloc[x, 74] = away_ids_names[1][1]
                period_df.iloc[x, 77] = away_ids_names[2][0]
                period_df.iloc[x, 76] = away_ids_names[2][1]
                period_df.iloc[x, 79] = away_ids_names[3][0]
                period_df.iloc[x, 78] = away_ids_names[3][1]
                period_df.iloc[x, 81] = away_ids_names[4][0]
                period_df.iloc[x, 80] = away_ids_names[4][1]
            elif period_df.iloc[x, :]['event_type_de'] == 'substitution' and pd.isnull(period_df.iloc[x, :]['HOMEDESCRIPTION']) == 1:
                away_ids_names = [ids for ids in away_ids_names if ids[0] != period_df.iloc[x, :]['PLAYER1_ID']]
                away_ids_names.append((period_df.iloc[x,21], period_df.iloc[x,22]))
                period_df.iloc[x, 63] = home_ids_names[0][0]
                period_df.iloc[x, 62] = home_ids_names[0][1]
                period_df.iloc[x, 65] = home_ids_names[1][0]
                period_df.iloc[x, 64] = home_ids_names[1][1]
                period_df.iloc[x, 67] = home_ids_names[2][0]
                period_df.iloc[x, 66] = home_ids_names[2][1]
                period_df.iloc[x, 69] = home_ids_names[3][0]
                period_df.iloc[x, 68] = home_ids_names[3][1]
                period_df.iloc[x, 71] = home_ids_names[4][0]
                period_df.iloc[x, 70] = home_ids_names[4][1]
                period_df.iloc[x, 73] = away_ids_names[0][0]
                period_df.iloc[x, 72] = away_ids_names[0][1]
                period_df.iloc[x, 75] = away_ids_names[1][0]
                period_df.iloc[x, 74] = away_ids_names[1][1]
                period_df.iloc[x, 77] = away_ids_names[2][0]
                period_df.iloc[x, 76] = away_ids_names[2][1]
                period_df.iloc[x, 79] = away_ids_names[3][0]
                period_df.iloc[x, 78] = away_ids_names[3][1]
                period_df.iloc[x, 81] = away_ids_names[4][0]
                period_df.iloc[x, 80] = away_ids_names[4][1]
            else:
                period_df.iloc[x, 63] = home_ids_names[0][0]
                period_df.iloc[x, 62] = home_ids_names[0][1]
                period_df.iloc[x, 65] = home_ids_names[1][0]
                period_df.iloc[x, 64] = home_ids_names[1][1]
                period_df.iloc[x, 67] = home_ids_names[2][0]
                period_df.iloc[x, 66] = home_ids_names[2][1]
                period_df.iloc[x, 69] = home_ids_names[3][0]
                period_df.iloc[x, 68] = home_ids_names[3][1]
                period_df.iloc[x, 71] = home_ids_names[4][0]
                period_df.iloc[x, 70] = home_ids_names[4][1]
                period_df.iloc[x, 73] = away_ids_names[0][0]
                period_df.iloc[x, 72] = away_ids_names[0][1]
                period_df.iloc[x, 75] = away_ids_names[1][0]
                period_df.iloc[x, 74] = away_ids_names[1][1]
                period_df.iloc[x, 77] = away_ids_names[2][0]
                period_df.iloc[x, 76] = away_ids_names[2][1]
                period_df.iloc[x, 79] = away_ids_names[3][0]
                period_df.iloc[x, 78] = away_ids_names[3][1]
                period_df.iloc[x, 81] = away_ids_names[4][0]
                period_df.iloc[x, 80] = away_ids_names[4][1]
        periods.append(period_df)

    new_df = pd.concat(periods).reset_index()
    return new_df

