In [1]:
import pandas as pd
import requests
import re

In [2]:
GAME_ID = "6060646"

In [3]:
def url_for_game(game):
    return f"https://data.ncaa.com/casablanca/game/{game}/pbp.json"
pbp_url = url_for_game(GAME_ID)
game_json = requests.get(pbp_url).json()

In [4]:
def list_to_string(list):
    return ', '.join(list)

def string_to_list(string):
    if string == None:
        return []
    if string == '':
        return []
    return string.split(', ')

def find_player(text):
    pattern = r"\b[a-zA-Z]+, [a-zA-Z]+\b"
    matches = re.findall(pattern, text)
    
    if len(matches) == 0:
        return ""
    
    player = matches[-1]
    last, first = player.split(",")
    return first.upper()[1:] + ' ' + last.upper()

def get_df_for_period(period=1):
    df = pd.DataFrame(game_json["periods"][period - 1]["playStats"])
    df["homePlayer"] = df["homeText"].apply(find_player)
    df["visitorPlayer"] = df["visitorText"].apply(find_player)
    df["homePlayers"] = None
    df["visitorPlayers"] = None
    return df

In [5]:
def add_player_to_list_retro(row, player, is_home, idx):
            
    if row.name > idx:
        return
    
    lineup_column = "visitorPlayers"
    if is_home:
        lineup_column = "homePlayers"
    
    lineup_list = string_to_list(row[lineup_column])
    
    if player not in lineup_list:
        lineup_list.append(player)
    
    row[lineup_column] = list_to_string(lineup_list)
    
    return row

In [6]:
def fill_in_lineups(df):
    teams = ["home", "visitor"]
    for i, row in df.iterrows():
        for team in teams:
            teamPlayer = f"{team}Player"
            teamPlayers = f"{team}Players"
            teamText = f"{team}Text"
            is_home = team == "home"
                
            current_lineup = []
            if i == 0:
                continue
                
            current_lineup = string_to_list(df.iloc[i - 1][teamPlayers])
            
            df.loc[i, teamPlayers] = list_to_string(current_lineup)
            
            if row[teamPlayer] == "":
                continue
                
            if "Subbing in" in row[teamText]:
                if row[teamPlayer] not in current_lineup:
                    current_lineup.append(row[teamPlayer])
                    
                df.loc[i, teamPlayers] = list_to_string(current_lineup)
                df[teamPlayers] = df[teamPlayers].ffill()
                continue
            
            if row[teamPlayer] not in current_lineup:
                df.apply(add_player_to_list_retro, args=(row[teamPlayer], is_home, i), axis=1)
            
            current_lineup = string_to_list(row[teamPlayers])
            if "Subbing out" in row[teamText]:
                current_lineup.remove(row[teamPlayer])
                df.loc[i, teamPlayers] = list_to_string(current_lineup)
                df[teamPlayers] = df[teamPlayers].ffill()
                continue

In [7]:
pd.set_option('display.max_colwidth', 300)

In [8]:
df = get_df_for_period(1)
fill_in_lineups(df)

In [9]:
df

Unnamed: 0,score,time,visitorText,homeText,homePlayer,visitorPlayer,homePlayers,visitorPlayers
0,,19:47,"Jumper MISSED by UR's Armstead, Khyree",,,KHYREE ARMSTEAD,"KEVIN CHARLES, JAYDEN WILLIAMS, RYAN ELZY, AARON GREGG, DARRY MOORE","NATHANIEL JOHNSON, NEIL OWENS, CHOZEN AMADI, ROBERT POWER, KHYREE ARMSTEAD"
1,,19:45,,"ETBU Defensive REBOUND by Charles, Kevin",KEVIN CHARLES,,"KEVIN CHARLES, JAYDEN WILLIAMS, RYAN ELZY, AARON GREGG, DARRY MOORE","NATHANIEL JOHNSON, NEIL OWENS, CHOZEN AMADI, ROBERT POWER, KHYREE ARMSTEAD"
2,0-3,19:35,,"3 Pointer GOOD by ETBU's Williams, Jayden",JAYDEN WILLIAMS,,"KEVIN CHARLES, JAYDEN WILLIAMS, RYAN ELZY, AARON GREGG, DARRY MOORE","NATHANIEL JOHNSON, NEIL OWENS, CHOZEN AMADI, ROBERT POWER, KHYREE ARMSTEAD"
3,,19:35,,"ETBU Assist by Elzy, Ryan",RYAN ELZY,,"KEVIN CHARLES, JAYDEN WILLIAMS, RYAN ELZY, AARON GREGG, DARRY MOORE","NATHANIEL JOHNSON, NEIL OWENS, CHOZEN AMADI, ROBERT POWER, KHYREE ARMSTEAD"
4,,19:10,"Jumper MISSED by UR's Johnson, Nathaniel",,,NATHANIEL JOHNSON,"KEVIN CHARLES, JAYDEN WILLIAMS, RYAN ELZY, AARON GREGG, DARRY MOORE","NATHANIEL JOHNSON, NEIL OWENS, CHOZEN AMADI, ROBERT POWER, KHYREE ARMSTEAD"
...,...,...,...,...,...,...,...,...
368,,00:16,,"ETBU Steal by Gregg, Aaron",AARON GREGG,,"KEVIN CHARLES, AARON GREGG, SKYLER STUTTS, KURT LABEAUD, JAYDEN WILLIAMS","ROBERT POWER, CHOZEN AMADI, JAKOB POULTON, NEIL OWENS, NATHAN ANDERSON"
369,41-60,00:09,,"Layup GOOD by ETBU's Gregg, Aaron",AARON GREGG,,"KEVIN CHARLES, AARON GREGG, SKYLER STUTTS, KURT LABEAUD, JAYDEN WILLIAMS","ROBERT POWER, CHOZEN AMADI, JAKOB POULTON, NEIL OWENS, NATHAN ANDERSON"
370,,00:05,"UR Turnover by Owens, Neil",,,NEIL OWENS,"KEVIN CHARLES, AARON GREGG, SKYLER STUTTS, KURT LABEAUD, JAYDEN WILLIAMS","ROBERT POWER, CHOZEN AMADI, JAKOB POULTON, NEIL OWENS, NATHAN ANDERSON"
371,,00:05,,"ETBU Steal by Labeaud, Kurt",KURT LABEAUD,,"KEVIN CHARLES, AARON GREGG, SKYLER STUTTS, KURT LABEAUD, JAYDEN WILLIAMS","ROBERT POWER, CHOZEN AMADI, JAKOB POULTON, NEIL OWENS, NATHAN ANDERSON"


In [256]:
for i, row in df.iterrows():
    players = string_to_list(row["homePlayers"])
    if len(players) != 5:
        print(i)

51
52
53
64
65
66
78
89
105
117
118
119
133
137
138
139
145
161
174
180
192
207
216
224
225
226
236
237
238
