In [23]:
#load libraries
import numpy as np
import pandas as pd
pd.set_option('display.max_colwidth', None)
from datetime import datetime
from copy import deepcopy
import re, ast, os

In [24]:
current_file_path = os.path.dirname(os.path.abspath("__file__"))
data_path = os.path.join(current_file_path, "..", "data")
filename = "Ottawa_Carleton_12_06_2023.csv"
file_path = os.path.join(data_path, filename)
raw_data = pd.read_csv(file_path)

In [25]:
player_event_df = raw_data.copy()

In [26]:
event_list = ['made layup','missed layup','Assist','Turnover','defensive rebound','enters the game','goes to the bench','missed 3-pt. jump shot','Foul','Steal','made free throw','missed free throw','made jump shot','made 3-pt. jump shot','missed jump shot','offensive rebound']

In [27]:
def make_swap_uppernames(ls):
    formatted_players = []
    for player in ls:
        name_parts = player.split(" ")
        first_name = " ".join(name_parts[:-1])
        last_name = name_parts[-1]
        formatted_name = last_name.upper() + "," + first_name.upper()
        formatted_name = formatted_name.replace(".", "")
        formatted_players.append(formatted_name)

    return formatted_players


def players_list_and_starters(df:pd.DataFrame, quarter_index:int, HorV:str):
    p_dict = ast.literal_eval(df.iloc[quarter_index][HorV])
    p_list = p_dict["starters"].copy()
    p_list.extend(p_dict["reserves"])
    p_list.remove("Team")
    
    sts = p_dict["starters"].copy()

    p_list = make_swap_uppernames(p_list)
    sts = make_swap_uppernames(sts)

    return p_list, sts

### Simply building a dataframe from main one and getting kinds of event with respect to their player

In [28]:
pattern = "([A-Z]+\W*[A-Z]+,[A-Z]+\W*[A-Z]+)"
player_event_df["H-event"] = player_event_df["H-event"].fillna("No Event")
player_event_df["V-event"] = player_event_df["V-event"].fillna("No Event")
for index, row in player_event_df.iterrows():
    for side in ["H", "V"]:
        player = re.search(pattern, row[f"{side}-event"])
        if player:
            player = player[0].strip()
            player_event_df.loc[index, f"{side}_player"] = player
        
        for event in event_list:
            if event in row[f"{side}-event"]:
                player_event_df.loc[index, f"{side}_exactevent"] = event

player_event_df["V_player"] = player_event_df["V_player"].fillna("No Player")
player_event_df["V_player"] = player_event_df["V_player"].fillna("No Player")
player_event_df["V_exactevent"] = player_event_df["V_exactevent"].fillna("No Event")
player_event_df["V_exactevent"] = player_event_df["V_exactevent"].fillna("No Event")

In [29]:
player_event_df.head(n=20)

Unnamed: 0.1,Unnamed: 0,Time,Home,H-event,Score,V-event,Visitor,V_player,V_exactevent,H_player,H_exactevent
0,0,Quarter 1,"{'starters': ['Kevin Otoo', 'Dragan Stajic', 'Justin Ndjock-Tadjore', 'Brock Newton', 'Jacques-Mélaine Guemeta'], 'reserves': ['Cid Ruhamyandekwe', 'Liban Abdalla', 'Khalifa Koulamallah', 'Thomas Armstrong', 'Team']}",Quarter 1,Quarter 1,Quarter 1,"{'starters': ['Emmanuel Ugbah', 'Xavier Spencer', 'Wazir Latiff', 'Augustas Brazdeikis', 'Noah Horobetz Simpson'], 'reserves': ['Marjok Okado', 'Aubrey Dorey-Havens', 'Emanuel Milon', 'Reginald Jean Seraphin', 'Daniel Smith', 'Team']}",No Player,No Event,,
1,1,10:00,,No Event,0 - 3,"SPENCER,XAVIER made 3-pt. jump shot",Carleton,"SPENCER,XAVIER",made 3-pt. jump shot,,
2,2,10:00,,No Event,0 - 3,"Assist by SIMPSON,NOAH HOROBETZ",Carleton,"SIMPSON,NOAH HOROBETZ",Assist,,
3,3,10:00,Ottawa,"GUEMETA,JACQUES-M&EACUTE;LAINE missed jump shot",0 - 3,No Event,,No Player,No Event,"GUEMETA,JACQUES-M",missed jump shot
4,4,10:00,,No Event,0 - 3,"LATIFF,WAZIR defensive rebound",Carleton,"LATIFF,WAZIR",defensive rebound,,
5,5,09:03,,No Event,0 - 3,Turnover by TEAM,Carleton,No Player,Turnover,,
6,6,08:41,Ottawa,"GUEMETA,JACQUES-M&EACUTE;LAINE missed 3-pt. jump shot",0 - 3,No Event,,No Player,No Event,"GUEMETA,JACQUES-M",missed 3-pt. jump shot
7,7,08:41,,No Event,0 - 3,"SIMPSON,NOAH HOROBETZ defensive rebound",Carleton,"SIMPSON,NOAH HOROBETZ",defensive rebound,,
8,8,08:30,,No Event,0 - 3,"Turnover by UGBAH,EMMANUEL",Carleton,"UGBAH,EMMANUEL",Turnover,,
9,9,08:14,,No Event,0 - 3,"Foul by LATIFF,WAZIR",Carleton,"LATIFF,WAZIR",Foul,,


### Calculating in-game time of each player on each quarter and events count for each player also

In [42]:
# ------------------------------------------------------------------------------------------------------------------------------------------------------------
# dependencies
players_list, _ = players_list_and_starters(player_event_df, 0, "Visitor")

player_event_df["Time"] = pd.to_datetime(player_event_df["Time"], format="%M:%S", errors="coerce")
under5min_df = player_event_df.loc[player_event_df["Time"] < datetime.strptime("05:00", "%M:%S")]

quarter_indices5min = list(reversed(under5min_df["Time"].nlargest(4).index))
quarter_indices = list(player_event_df[player_event_df['Score'].str.contains('Quarter')].index)
quarter_indices.append(len(player_event_df) - 1)

quarter_dict = {"player":players_list, "time1":[], "score1":[],
                "time2":[], "score2":[], "time3":[], "score3":[],
                "time4":[], "score4":[]}

v = list(quarter_dict.keys())
v.remove("player")
reorder_ls = ["lineup"] + v
lineup_quarter_dict = {key:[] for key in reorder_ls}

# Last 5 minutes statistics of quarters 2 and 4
quarter_dict5min = {"player":players_list, "time2":[],
                    "score2":[], "time4":[], "score4":[]}

quarter = 1
in_lineup = []

not_changed_list = ["not_changed" for n in range(len(players_list))]
event_num_dict = {k:[] for k in ["player"] + event_list}
event_num_dict5min = {k:[] for k in ["player"] + [e + "2" for e in event_list] + [e + "4" for e in event_list]}
time_dict = {"player":players_list, 
            "seconds":list(np.zeros(len(players_list))),
            "points_conceded":list(np.zeros(len(players_list))),
            "timecache":not_changed_list.copy(),
            "scorecache":not_changed_list.copy(),
            "seconds5min":list(np.zeros(len(players_list))),
            "points_conceded5min":list(np.zeros(len(players_list))),
            "timecache5min":not_changed_list.copy(),
            "scorecache5min":not_changed_list.copy(),}

# each custom minutes
custom_minute = 5
each_ls = list(reversed([int(m) for m in range(1, int((10/custom_minute) + 1))]))
event_df_columns = []
eff_columns = [] # will be used for efficiency later on
for qu in range(1, 5):
    for each in list(reversed(each_ls)):
        eff_columns.append((f"quarter{qu}", f"{custom_minute}minute{each}"))
        for event in event_list:
            event_df_columns.append((f"quarter{qu}", f"{custom_minute}minute{each}", event))

event_df_columns = [("player", "player", "player")] + event_df_columns
init_data = {}
for c in event_df_columns:
    if "player" in c:
        init_data[c] = players_list
    else:
        init_data[c] = list(np.zeros(len(players_list)))

event_df_columns = pd.MultiIndex.from_tuples(event_df_columns)
event_df = pd.DataFrame(init_data, columns=event_df_columns)
minutes_ls = list(np.array(each_ls) * custom_minute) + [0]

lineup_time_dict = {k:[] for k in list(time_dict.keys())}
lineup_time_dict["lineup"] = lineup_time_dict.pop("player")
lineup_event_dict = {"lineup":[]}

events_1min = pd.DataFrame(columns=[event_list])

for ind, row in player_event_df.iterrows():
    # 5min checking needs these constants
    cur_time = deepcopy(row["Time"])
    threshold_time = datetime.strptime("05:00", "%M:%S")
# ------------------------------------------------------------------------------------------------------------------------------------------------------------
    # calculating in-game time of each player
    # -------------------------------------------
    # each quarter end calculations
    if ind in quarter_indices:
        
        if ind != len(player_event_df) - 1:
            _, starters = players_list_and_starters(player_event_df, ind, "Visitor")

        if ind != 0:
            for player in in_lineup:
                player_ind = time_dict["player"].index(player)
                cached_time = time_dict["timecache"][player_ind]
                cached_score = time_dict["scorecache"][player_ind]
                if cached_time == "not_changed":
                    enter_time = datetime.strptime("10:00", "%M:%S")
                    enter_score_index = quarter_indices[quarter - 1] + 1
                    enter_score = int(player_event_df.iloc[enter_score_index]["Score"].split("-")[0])
                else:
                    enter_time = cached_time
                    enter_score = cached_score
                
                seconds = enter_time - datetime.strptime("00:00", "%M:%S")
                seconds = seconds.total_seconds()

                points = int(player_event_df.iloc[ind - 1]["Score"].split("-")[0]) - enter_score

                time_dict["seconds"][player_ind] += seconds
                time_dict["points_conceded"][player_ind] += points
            
            # ---------------------
            # lineup quarter calculations
            lineup_cached_score = lineup_time_dict["scorecache"][-1]
            if lineup_time_dict["timecache"][-1] == "not_changed":
                lineup_time_dict["seconds"] += 600
                enter_score_index = quarter_indices[quarter - 1] + 1
                enter_score = int(player_event_df.iloc[enter_score_index]["Score"].split("-")[0])
            else:
                cached_time = lineup_time_dict["timecache"][-1]
                enter_time = cached_time
                seconds = enter_time - datetime.strptime("00:00", "%M:%S")
                seconds = seconds.total_seconds()
                lineup_time_dict["seconds"][-1] += seconds
                enter_score = int(lineup_cached_score.split("-")[0])
            
            points = int(player_event_df.iloc[ind - 1]["Score"].split("-")[0]) - enter_score

            lineup_time_dict["seconds"][-1] += seconds
            lineup_time_dict["points_conceded"][-1] += points
            # ---------------------

            quarter_dict["player"] = time_dict["player"]
            quarter_dict[f"time{quarter}"] = time_dict["seconds"]
            quarter_dict[f"score{quarter}"] = time_dict["points_conceded"]
            if quarter in (2, 4):
                quarter_dict5min["player"] = time_dict["player"]
                quarter_dict5min[f"time{quarter}"] = time_dict["seconds5min"]
                quarter_dict5min[f"score{quarter}"] = time_dict["points_conceded5min"]

            for key in list(lineup_quarter_dict.keys()):
                if key != "lineup":
                    if int(key[-1]) == quarter:
                        if "time" in key:
                            lineup_quarter_dict[f"time{quarter}"].extend(lineup_time_dict["seconds"])
                        else:    
                            lineup_quarter_dict[f"score{quarter}"].extend(lineup_time_dict["points_conceded"])
                    else:
                        length = len(lineup_time_dict["lineup"])
                        zero_list = list(np.zeros(length))
                        lineup_quarter_dict[key].extend(zero_list)
                else:
                    lineup_quarter_dict[key].extend(lineup_time_dict["lineup"])

            quarter += 1
            time_dict["timecache"] = not_changed_list.copy()
            time_dict["scorecache"] = not_changed_list.copy()
            time_dict["timecache5min"] = not_changed_list.copy()
            time_dict["scorecache5min"] = not_changed_list.copy()
            time_dict["seconds"] = list(np.zeros(len(players_list)))
            time_dict["points_conceded"] = list(np.zeros(len(players_list)))
            time_dict["seconds5min"] = list(np.zeros(len(players_list)))
            time_dict["points_conceded5min"] = list(np.zeros(len(players_list)))
            lineup_time_dict = {key:[] for key in lineup_time_dict}

        in_lineup = starters.copy()
        lineup_time_dict["lineup"].append(sorted(in_lineup.copy()))
        for key in lineup_time_dict:
            if key != "lineup":
                if "cache" in key:
                    lineup_time_dict[key].append("not_changed")
                else:
                    lineup_time_dict[key].append(0)

        for key in list(lineup_event_dict.keys()):
                if key == "lineup":
                    lineup_event_dict[key].append(sorted(in_lineup))
                else:
                    lineup_event_dict[key].append(0)
        continue
    # -------------------------------------------
    # iterating rows calculation
    if row["V_player"] not in ("No Player", np.nan, "nan") and row["V_exactevent"] not in ("No Event", np.nan, "nan") and not pd.isna(row["V_player"]) and not pd.isna(row["V_exactevent"]):
        player_index = time_dict["player"].index(row["V_player"])
        cached_time = time_dict["timecache"][player_index]
        cached_score = time_dict["scorecache"][player_index]
        if "goes to the bench" in row["V_exactevent"]:
            in_lineup.remove(row["V_player"])
            if cached_time == "not_changed":
                enter_time = datetime.strptime("10:00", "%M:%S")
                enter_score_index = quarter_indices[quarter - 1] + 1
                enter_score = int(player_event_df.iloc[enter_score_index]["Score"].split("-")[0])
            else:
                enter_time = cached_time
                enter_score = cached_score
            

            seconds = enter_time - row["Time"]
            seconds = seconds.total_seconds()

            points = int(row["Score"].split("-")[0]) - enter_score


            time_dict["seconds"][player_index] += seconds
            time_dict["points_conceded"][player_index] += points

            ## if player goes to bench in last 5min of quarters 2 an 4 
            ## (considering not to exceed to to much memory and calculate them when needed)
            if cur_time < threshold_time and quarter in (2, 4):
                cached_time5min = time_dict["timecache5min"][player_index]
                cached_score5min = time_dict["scorecache5min"][player_index]
                if cached_time5min == "not_changed":
                    enter_time5min = threshold_time
                    enter_score_index5min = quarter_indices5min[quarter - 1] + 1
                    enter_score5min = int(player_event_df.iloc[enter_score_index5min]["Score"].split("-")[0])
                else:
                    if cached_time5min > threshold_time:
                        enter_time5min = threshold_time
                        enter_score_index5min = quarter_indices5min[quarter - 1] + 1
                        enter_score5min = int(player_event_df.iloc[enter_score_index5min]["Score"].split("-")[0])
                    else:
                        enter_time5min = cached_time5min
                        enter_score5min = cached_score5min
                
                seconds5min = enter_time5min - row["Time"]
                seconds5min = seconds5min.total_seconds()
                points5min = int(row["Score"].split("-")[0]) - enter_score5min
                time_dict["seconds5min"][player_index] += seconds5min
                time_dict["points_conceded5min"][player_index] += points5min

        elif "enters the game" in row["V_exactevent"]:
            in_lineup.append(row["V_player"])
            time_dict["timecache"][player_index] = row["Time"]
            time_dict["scorecache"][player_index] = int(row["Score"].split("-")[0])
            
            ## time and score cache for under 5 min assessment would be catched if the the time is below 05:00 
            ## else i would not change it and it would be the initial list that i made before main loop for it
            if cur_time < threshold_time:
                time_dict["timecache5min"][player_index] = row["Time"]
                time_dict["scorecache5min"][player_index] = int(row["Score"].split("-")[0])
        
        # ---------------------
        # lineup iterating rows calculation
        last_lineup = lineup_time_dict["lineup"][-1]
        if sorted(in_lineup) != sorted(last_lineup) and len(in_lineup) == 5:
            cached_time = lineup_time_dict["timecache"][-1]
            if lineup_time_dict["timecache"][-1] == "not_changed":
                enter_time = datetime.strptime("10:00", "%M:%S")
                enter_score = int(player_event_df.iloc[enter_score_index]["Score"].split("-")[0])
            else:
                enter_time = cached_time
                enter_score = int(lineup_time_dict["scorecache"][-1].split("-")[0])

            seconds = enter_time - row["Time"]
            seconds = seconds.total_seconds()

            curr_score = int(row["Score"].split("-")[0])
            points = curr_score - enter_score

            lineup_time_dict["seconds"][-1] += seconds
            lineup_time_dict["points_conceded"][-1] += points

            lineup_time_dict["lineup"].append(sorted(in_lineup.copy()))
            for k in lineup_time_dict:
                if k != "lineup":
                    if "cache" in k:
                        lineup_time_dict[k].append("not_changed")
                    else:
                        lineup_time_dict[k].append(0)

            lineup_time_dict["timecache"].append(row["Time"])
            lineup_time_dict["scorecache"].append(row["Score"])
        # ---------------------
    # -------------------------------------------
# ------------------------------------------------------------------------------------------------------------------------------------------------------------
        # # filling event_num_dict for meaesuring how many times each event occured
        if row["V_player"] not in event_num_dict["player"]:
            event_num_dict["player"].append(row["V_player"])
            for key in event_num_dict:
                if key != "player":
                    event_num_dict[key].append(0)
        
        player_index = event_num_dict["player"].index(row["V_player"])
        event_num_dict[row["V_exactevent"]][player_index] += 1

        if cur_time < threshold_time and quarter in (2, 4):
            if row["V_player"] not in event_num_dict5min["player"]:
                event_num_dict5min["player"].append(row["V_player"])
                for key in event_num_dict5min:
                    if key != "player":
                        event_num_dict5min[key].append(0)
            
            player_index = event_num_dict5min["player"].index(row["V_player"])
            event_num_dict5min[row["V_exactevent"] + str(quarter)][player_index] += 1
        

        # each custom minutes
        minute_integer = row["Time"].minute + row["Time"].second / 60
        if minute_integer == 10:
            which_minute = 1
        else:
            for minute_index, minute in enumerate(minutes_ls):
                if minute <= minute_integer:
                    which_minute = minute_index
                    break
        
        event_df.loc[event_df["player", "player", "player"] == row["V_player"], (f"quarter{quarter}", f"{custom_minute}minute{which_minute}", row["V_exactevent"])] += 1

        # ---------------------
        # lineup event calculations
        if len(lineup_event_dict["lineup"]) == 0:
            lineup_event_dict["lineup"].append(sorted(in_lineup))

        last_lineup = lineup_event_dict["lineup"][-1]
        new_lineup = sorted(in_lineup.copy())
        if row["V_exactevent"] not in lineup_event_dict:
            lineup_event_dict[row["V_exactevent"]] = list(np.zeros(len(lineup_event_dict["lineup"])))
        
        if new_lineup == last_lineup:
            lineup_event_dict[row["V_exactevent"]][-1] += 1
        elif new_lineup != last_lineup and len(new_lineup) == 5:
            for key in list(lineup_event_dict.keys()):
                if key == "lineup":
                    lineup_event_dict[key].append(new_lineup)
                else:
                    lineup_event_dict[key].append(0)
        # ---------------------
# ------------------------------------------------------------------------------------------------------------------------------------------------------------

time_score_df = pd.DataFrame(quarter_dict)
time_score_df5min = pd.DataFrame(quarter_dict5min)
lineup_time_score_df = pd.DataFrame(lineup_quarter_dict)
event_num_df = pd.DataFrame(event_num_dict)
event_num_df5min = pd.DataFrame(event_num_dict5min)
lineup_event_df = pd.DataFrame(lineup_event_dict)

time_columns = []
for i in range(1, 5):
    for sub in ["time", "score"]:
        time_columns.append((f"quarter{i}", sub))

event_columns = []
for i in [2, 4]:
    for sub in event_list:
        event_columns.append((f"quarter{i}", sub))

time_score_df.columns = pd.MultiIndex.from_tuples([("player", "player")] + time_columns)
time_score_df5min.columns = pd.MultiIndex.from_tuples([("player", "player")] + [(k, v) for k, v in time_columns if k[-1] not in ("1", "3")])
lineup_time_score_df.columns = pd.MultiIndex.from_tuples([("lineup", "lineup")] + time_columns)
event_num_df5min.columns = pd.MultiIndex.from_tuples([("player", "player")] + event_columns)

time_sum_ls = [(f"quarter{i}", "time") for i in range(1, 5)]
score_sum_ls = [(f"quarter{i}", "score") for i in range(1, 5)]
time_score_df[("total", "time")] = time_score_df[time_sum_ls].sum(axis=1)
time_score_df[("total", "score")] = time_score_df[score_sum_ls].sum(axis=1)
lineup_time_score_df[("total", "time")] = lineup_time_score_df[time_sum_ls].sum(axis=1)
lineup_time_score_df[("total", "score")] = lineup_time_score_df[score_sum_ls].sum(axis=1)

In [43]:
event_df

Unnamed: 0_level_0,player,quarter1,quarter1,quarter1,quarter1,quarter1,quarter1,quarter1,quarter1,quarter1,...,quarter4,quarter4,quarter4,quarter4,quarter4,quarter4,quarter4,quarter4,quarter4,quarter4
Unnamed: 0_level_1,player,5minute1,5minute1,5minute1,5minute1,5minute1,5minute1,5minute1,5minute1,5minute1,...,5minute2,5minute2,5minute2,5minute2,5minute2,5minute2,5minute2,5minute2,5minute2,5minute2
Unnamed: 0_level_2,player,made layup,missed layup,Assist,Turnover,defensive rebound,enters the game,goes to the bench,missed 3-pt. jump shot,Foul,...,goes to the bench,missed 3-pt. jump shot,Foul,Steal,made free throw,missed free throw,made jump shot,made 3-pt. jump shot,missed jump shot,offensive rebound
0,"UGBAH,EMMANUEL",0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"SPENCER,XAVIER",0.0,0.0,0.0,1.0,1.0,0.0,1.0,2.0,0.0,...,0.0,2.0,0.0,0.0,0.0,0.0,1.0,2.0,0.0,0.0
2,"LATIFF,WAZIR",0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,...,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,2.0,0.0
3,"BRAZDEIKIS,AUGUSTAS",0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,...,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0
4,"SIMPSON,NOAH HOROBETZ",0.0,0.0,1.0,0.0,2.0,0.0,1.0,0.0,0.0,...,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,3.0
5,"OKADO,MARJOK",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,"DOREY-HAVENS,AUBREY",0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,...,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,"MILON,EMANUEL",0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"SERAPHIN,REGINALD JEAN",0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,"SMITH,DANIEL",0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,1.0


In [44]:
time_score_df

Unnamed: 0_level_0,player,quarter1,quarter1,quarter2,quarter2,quarter3,quarter3,quarter4,quarter4,total,total
Unnamed: 0_level_1,player,time,score,time,score,time,score,time,score,time,score
0,"UGBAH,EMMANUEL",392.0,12.0,196.0,7.0,274.0,6.0,180.0,13.0,1042.0,38.0
1,"SPENCER,XAVIER",439.0,13.0,558.0,16.0,398.0,14.0,535.0,21.0,1930.0,64.0
2,"LATIFF,WAZIR",407.0,14.0,212.0,8.0,367.0,10.0,490.0,19.0,1476.0,51.0
3,"BRAZDEIKIS,AUGUSTAS",334.0,10.0,44.0,0.0,414.0,17.0,496.0,19.0,1288.0,46.0
4,"SIMPSON,NOAH HOROBETZ",358.0,14.0,472.0,14.0,460.0,17.0,540.0,19.0,1830.0,64.0
5,"OKADO,MARJOK",193.0,2.0,388.0,8.0,233.0,9.0,127.0,7.0,941.0,26.0
6,"DOREY-HAVENS,AUBREY",161.0,3.0,109.0,0.0,202.0,5.0,68.0,5.0,540.0,13.0
7,"MILON,EMANUEL",242.0,2.0,195.0,2.0,140.0,2.0,60.0,7.0,637.0,13.0
8,"SERAPHIN,REGINALD JEAN",266.0,6.0,447.0,16.0,161.0,2.0,44.0,0.0,918.0,24.0
9,"SMITH,DANIEL",0.0,0.0,379.0,9.0,351.0,13.0,460.0,20.0,1190.0,42.0


In [45]:
time_score_df5min

Unnamed: 0_level_0,player,quarter2,quarter2,quarter4,quarter4
Unnamed: 0_level_1,player,time,score,time,score
0,"UGBAH,EMMANUEL",101.0,2.0,50.0,5.0
1,"SPENCER,XAVIER",50.0,11.0,0.0,0.0
2,"LATIFF,WAZIR",0.0,0.0,0.0,0.0
3,"BRAZDEIKIS,AUGUSTAS",0.0,0.0,240.0,4.0
4,"SIMPSON,NOAH HOROBETZ",0.0,0.0,240.0,4.0
5,"OKADO,MARJOK",278.0,17.0,17.0,0.0
6,"DOREY-HAVENS,AUBREY",92.0,11.0,3.0,0.0
7,"MILON,EMANUEL",50.0,11.0,50.0,5.0
8,"SERAPHIN,REGINALD JEAN",0.0,0.0,0.0,0.0
9,"SMITH,DANIEL",59.0,1.0,270.0,9.0


In [46]:
event_num_df

Unnamed: 0,player,made layup,missed layup,Assist,Turnover,defensive rebound,enters the game,goes to the bench,missed 3-pt. jump shot,Foul,Steal,made free throw,missed free throw,made jump shot,made 3-pt. jump shot,missed jump shot,offensive rebound
0,"SPENCER,XAVIER",0,0,6,7,7,4,4,8,0,1,1,0,3,4,4,1
1,"SIMPSON,NOAH HOROBETZ",0,0,1,4,13,4,5,0,1,0,0,0,2,0,2,4
2,"LATIFF,WAZIR",0,0,3,1,4,5,5,3,4,0,1,1,6,2,4,0
3,"UGBAH,EMMANUEL",0,0,1,1,2,8,9,0,2,0,0,3,0,0,0,1
4,"BRAZDEIKIS,AUGUSTAS",0,0,1,0,2,5,9,0,4,0,1,1,1,0,4,2
5,"SERAPHIN,REGINALD JEAN",0,0,0,0,0,7,5,1,4,0,0,0,1,0,5,2
6,"SMITH,DANIEL",0,0,1,2,3,9,6,2,2,0,7,0,1,0,3,2
7,"DOREY-HAVENS,AUBREY",0,0,3,2,1,5,5,3,3,0,0,0,1,1,2,1
8,"MILON,EMANUEL",0,0,0,0,1,5,4,0,3,0,0,0,1,0,1,0
9,"OKADO,MARJOK",0,0,0,2,3,6,6,2,1,2,0,1,2,3,1,0


In [47]:
event_num_df5min

Unnamed: 0_level_0,player,quarter2,quarter2,quarter2,quarter2,quarter2,quarter2,quarter2,quarter2,quarter2,...,quarter4,quarter4,quarter4,quarter4,quarter4,quarter4,quarter4,quarter4,quarter4,quarter4
Unnamed: 0_level_1,player,made layup,missed layup,Assist,Turnover,defensive rebound,enters the game,goes to the bench,missed 3-pt. jump shot,Foul,...,goes to the bench,missed 3-pt. jump shot,Foul,Steal,made free throw,missed free throw,made jump shot,made 3-pt. jump shot,missed jump shot,offensive rebound
0,"SIMPSON,NOAH HOROBETZ",0,0,0,1,3,0,0,0,0,...,2,0,0,0,0,0,0,0,1,3
1,"DOREY-HAVENS,AUBREY",0,0,1,0,0,0,1,0,1,...,1,1,0,0,0,0,0,0,0,0
2,"OKADO,MARJOK",0,0,0,1,1,0,1,1,0,...,1,0,0,0,0,0,0,0,0,0
3,"UGBAH,EMMANUEL",0,0,0,0,0,2,2,0,1,...,1,0,0,0,0,0,0,0,0,0
4,"SMITH,DANIEL",0,0,0,0,0,2,1,0,1,...,1,0,0,0,4,0,0,0,0,1
5,"MILON,EMANUEL",0,0,0,0,0,0,1,0,0,...,1,0,1,0,0,0,0,0,0,0
6,"SPENCER,XAVIER",0,0,2,1,1,1,1,1,0,...,0,2,0,0,0,0,1,2,0,0
7,"SERAPHIN,REGINALD JEAN",0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,"LATIFF,WAZIR",0,0,0,0,0,1,0,0,0,...,0,1,0,0,1,1,1,0,2,0
9,"BRAZDEIKIS,AUGUSTAS",0,0,0,0,0,0,0,0,0,...,2,0,0,0,0,0,0,0,2,1


In [48]:
lineup_event_df.iloc[:-1] # starter lineup will be added automatically at end of the match so we ignore it cause no event will happen there(try proving it by removing .iloc)

Unnamed: 0,lineup,made 3-pt. jump shot,Assist,defensive rebound,Turnover,Foul,missed 3-pt. jump shot,made jump shot,enters the game,goes to the bench,missed jump shot,made free throw,Steal,offensive rebound,missed free throw
0,"[BRAZDEIKIS,AUGUSTAS, LATIFF,WAZIR, SIMPSON,NOAH HOROBETZ, SPENCER,XAVIER, UGBAH,EMMANUEL]",2.0,2.0,5.0,2.0,2.0,3.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"[DOREY-HAVENS,AUBREY, LATIFF,WAZIR, SERAPHIN,REGINALD JEAN, SIMPSON,NOAH HOROBETZ, SMITH,DANIEL]",0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"[DOREY-HAVENS,AUBREY, LATIFF,WAZIR, MILON,EMANUEL, SERAPHIN,REGINALD JEAN, SMITH,DANIEL]",0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,"[DOREY-HAVENS,AUBREY, MILON,EMANUEL, OKADO,MARJOK, SERAPHIN,REGINALD JEAN, SMITH,DANIEL]",0.0,0.0,2.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
4,"[BRAZDEIKIS,AUGUSTAS, DOREY-HAVENS,AUBREY, MILON,EMANUEL, OKADO,MARJOK, SMITH,DANIEL]",1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"[BRAZDEIKIS,AUGUSTAS, MILON,EMANUEL, OKADO,MARJOK, SMITH,DANIEL, SPENCER,XAVIER]",0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
6,"[MILON,EMANUEL, OKADO,MARJOK, SERAPHIN,REGINALD JEAN, SMITH,DANIEL, SPENCER,XAVIER]",0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0
7,"[MILON,EMANUEL, OKADO,MARJOK, SERAPHIN,REGINALD JEAN, SPENCER,XAVIER, UGBAH,EMMANUEL]",0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,2.0,0.0,1.0,0.0,0.0
8,"[LATIFF,WAZIR, SERAPHIN,REGINALD JEAN, SIMPSON,NOAH HOROBETZ, SPENCER,XAVIER, UGBAH,EMMANUEL]",1.0,2.0,1.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,"[BRAZDEIKIS,AUGUSTAS, LATIFF,WAZIR, SIMPSON,NOAH HOROBETZ, SPENCER,XAVIER, UGBAH,EMMANUEL]",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [49]:
lineup_time_score_df

Unnamed: 0_level_0,lineup,quarter1,quarter1,quarter2,quarter2,quarter3,quarter3,quarter4,quarter4,total,total
Unnamed: 0_level_1,lineup,time,score,time,score,time,score,time,score,time,score
0,"[BRAZDEIKIS,AUGUSTAS, LATIFF,WAZIR, SIMPSON,NOAH HOROBETZ, SPENCER,XAVIER, UGBAH,EMMANUEL]",261.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,261.0,9.0
1,"[DOREY-HAVENS,AUBREY, LATIFF,WAZIR, SERAPHIN,REGINALD JEAN, SIMPSON,NOAH HOROBETZ, SMITH,DANIEL]",25.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,25.0,3.0
2,"[DOREY-HAVENS,AUBREY, LATIFF,WAZIR, MILON,EMANUEL, SERAPHIN,REGINALD JEAN, SMITH,DANIEL]",49.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,49.0,0.0
3,"[DOREY-HAVENS,AUBREY, MILON,EMANUEL, OKADO,MARJOK, SERAPHIN,REGINALD JEAN, SMITH,DANIEL]",40.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,40.0,0.0
4,"[BRAZDEIKIS,AUGUSTAS, DOREY-HAVENS,AUBREY, MILON,EMANUEL, OKADO,MARJOK, SMITH,DANIEL]",47.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,47.0,0.0
5,"[BRAZDEIKIS,AUGUSTAS, MILON,EMANUEL, OKADO,MARJOK, SMITH,DANIEL, SPENCER,XAVIER]",26.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,26.0,1.0
6,"[MILON,EMANUEL, OKADO,MARJOK, SERAPHIN,REGINALD JEAN, SMITH,DANIEL, SPENCER,XAVIER]",21.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,21.0,0.0
7,"[MILON,EMANUEL, OKADO,MARJOK, SERAPHIN,REGINALD JEAN, SPENCER,XAVIER, UGBAH,EMMANUEL]",59.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,59.0,1.0
8,"[LATIFF,WAZIR, SERAPHIN,REGINALD JEAN, SIMPSON,NOAH HOROBETZ, SPENCER,XAVIER, UGBAH,EMMANUEL]",144.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,144.0,2.0
9,"[BRAZDEIKIS,AUGUSTAS, LATIFF,WAZIR, SIMPSON,NOAH HOROBETZ, SPENCER,XAVIER, UGBAH,EMMANUEL]",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [50]:
for index, row in lineup_event_df.iterrows():
    if index < 31:
        time_lineup = lineup_time_score_df.iloc[index]["lineup"].to_list()[0]
    else:
        time_lineup = []
    event_lineup = lineup_event_df.iloc[index]["lineup"]
    if time_lineup != event_lineup:
        print(event_lineup, time_lineup, index)

['BRAZDEIKIS,AUGUSTAS', 'DOREY-HAVENS,AUBREY', 'OKADO,MARJOK', 'SIMPSON,NOAH HOROBETZ', 'SMITH,DANIEL'] [] 31
['LATIFF,WAZIR', 'SERAPHIN,REGINALD JEAN', 'SIMPSON,NOAH HOROBETZ', 'SMITH,DANIEL', 'SPENCER,XAVIER'] [] 32
['BRAZDEIKIS,AUGUSTAS', 'LATIFF,WAZIR', 'SIMPSON,NOAH HOROBETZ', 'SMITH,DANIEL', 'SPENCER,XAVIER'] [] 33
['LATIFF,WAZIR', 'MILON,EMANUEL', 'SMITH,DANIEL', 'SPENCER,XAVIER', 'UGBAH,EMMANUEL'] [] 34
['BRAZDEIKIS,AUGUSTAS', 'LATIFF,WAZIR', 'SIMPSON,NOAH HOROBETZ', 'SMITH,DANIEL', 'SPENCER,XAVIER'] [] 35
['BRAZDEIKIS,AUGUSTAS', 'LATIFF,WAZIR', 'OKADO,MARJOK', 'SIMPSON,NOAH HOROBETZ', 'SPENCER,XAVIER'] [] 36
['BRAZDEIKIS,AUGUSTAS', 'DOREY-HAVENS,AUBREY', 'LATIFF,WAZIR', 'SIMPSON,NOAH HOROBETZ', 'SPENCER,XAVIER'] [] 37
['LATIFF,WAZIR', 'MILON,EMANUEL', 'SMITH,DANIEL', 'SPENCER,XAVIER', 'UGBAH,EMMANUEL'] [] 38
['BRAZDEIKIS,AUGUSTAS', 'LATIFF,WAZIR', 'SIMPSON,NOAH HOROBETZ', 'SPENCER,XAVIER', 'UGBAH,EMMANUEL'] [] 39


off possession = made and miss

def possession = Turnover, defensive rebound,	Foul, Steal

efficiency = point, rebound, assist, steal, block, -missed free throw, -turnover, -points conceded

What type of game was it? (Pre-season, Regular Season, or Playoffs)

In [51]:
def cal_eff(offense, defense, time):
    eff = ((offense - defense) * 60) / time
    return float(eff)

In [52]:
pos_contrib = ['Assist', 'defensive rebound', 'made 3-pt. jump shot', 'made free throw',
                               'made jump shot', 'made layup', 'offensive rebound']
neg_contrib = ['Turnover', 'missed 3-pt. jump shot', 'missed free throw',
                               'missed jump shot', 'missed layup']

eff_df = pd.DataFrame({key:[] for key in [("player", "player")] + eff_columns})
eff_df.columns = pd.MultiIndex.from_tuples([("player", "player")] + eff_columns)
for index, row in event_df.iterrows():
    data = {("player", "player"):[row["player", "player", "player"]]}
    for col in eff_columns:
        alter = []
        for event in pos_contrib:
            alter.append(col + (event,))

        offense = row[alter].sum()

        for event in neg_contrib:
            alter.append(col + (event,))

        defense = row[alter].sum()

        eff = cal_eff(offense, defense, custom_minute * 60)
        data[col] = [eff]
    
    new_df = pd.DataFrame(data)
    eff_df = pd.concat([eff_df, new_df], ignore_index=True, axis=0)

In [53]:
eff_df

Unnamed: 0_level_0,player,quarter1,quarter1,quarter2,quarter2,quarter3,quarter3,quarter4,quarter4
Unnamed: 0_level_1,player,5minute1,5minute2,5minute1,5minute2,5minute1,5minute2,5minute1,5minute2
0,"UGBAH,EMMANUEL",-0.2,0.0,0.0,0.0,0.0,0.0,-0.6,0.0
1,"SPENCER,XAVIER",-0.6,-0.2,-0.6,-0.6,-0.6,-0.2,-0.6,-0.4
2,"LATIFF,WAZIR",-0.2,-0.2,-0.4,0.0,0.0,0.0,-0.2,-0.8
3,"BRAZDEIKIS,AUGUSTAS",0.0,-0.2,0.0,0.0,-0.4,0.0,0.0,-0.4
4,"SIMPSON,NOAH HOROBETZ",0.0,0.0,-0.2,-0.4,0.0,0.0,-0.4,-0.2
5,"OKADO,MARJOK",0.0,0.0,-0.2,-0.6,-0.4,0.0,0.0,0.0
6,"DOREY-HAVENS,AUBREY",0.0,-0.2,-0.2,-0.2,-0.2,-0.2,-0.2,-0.2
7,"MILON,EMANUEL",0.0,-0.2,0.0,0.0,0.0,0.0,0.0,0.0
8,"SERAPHIN,REGINALD JEAN",0.0,0.0,-0.2,-0.2,-0.2,-0.4,-0.2,0.0
9,"SMITH,DANIEL",0.0,-0.2,-0.2,0.0,-0.2,-0.6,0.0,-0.2


In [54]:
scoring_values = {'made layup': 2, 'made free throw': 1, 'made jump shot': 2, 'made 3-pt. jump shot': 3}

neg_contrib2 = []
neg_contrib4 = []
pos_contrib2 = []
pos_contrib4 = []
for q in [2, 4]:
    for n in neg_contrib:
        globals()[f"neg_contrib{q}"].append((f"quarter{q}", n))
    for p in pos_contrib:
        globals()[f"pos_contrib{q}"].append((f"quarter{q}", n))

final_columns = ['Player Name', 'PtScored', 'ptsconceded', "OffRtg", "DefRtg", "NetRtg",
                'total off possession', 'total def possession', "global efficiency",
                "quarter2 last 5min efficiency", "quarter4 last 5min efficiency",
                'minutes', 'home/visitor', 'opponent', 'date', 'game_type']

player_final_table = pd.DataFrame(columns=final_columns)
for index, row in event_num_df.iterrows():
    points_scored = 0
    for key in scoring_values:
        score_value = scoring_values[key]
        points_scored += event_num_df.loc[index, key] * score_value

    points_conceded = float(time_score_df.loc[time_score_df[("player", "player")] == row["player"]][("total", "score")])

    seconds = time_score_df.loc[time_score_df[("player", "player")] == row["player"]][("total", "time")]
    time = seconds.iloc[0]
    global_off_possession = row[pos_contrib].sum()
    global_def_possession = row[neg_contrib].sum()
    global_efficiency = cal_eff(global_off_possession, global_def_possession, time)

    if row["player"] in event_num_df5min["player", "player"].tolist():
        time_row5min = time_score_df5min.loc[time_score_df5min["player", "player"] == row["player"]]
        event_row5min = event_num_df5min.loc[event_num_df5min["player", "player"] == row["player"]]
        for q in [2, 4]:
            time = time_row5min[f"quarter{q}", "time"].iloc[0]
            offense = float(event_row5min[globals()[f"pos_contrib{q}"]].sum(axis=1).iloc[0])
            defense = float(event_row5min[globals()[f"neg_contrib{q}"]].sum(axis=1).iloc[0])
            globals()[f"quarter{q}_5min_eff"] = cal_eff(offense, defense, time)

            if time == 0:
                globals()[f"quarter{q}_5min_eff"] = "Not in the time"
    else:
        quarter2_5min_eff = "Not in the time"
        quarter4_5min_eff = "Not in the time"
    
    minutes = seconds / 60
    minutes = "{:.2f}".format(minutes.to_list()[0])
    hv_df = player_event_df.loc[player_event_df["V_player"] == row["player"]].iloc[1]
    if pd.isna(hv_df['Home']) == False:
        hv = "Home"
    else:
        hv = "Visitor"
    
    opponent_df = player_event_df.loc[pd.isna(player_event_df[hv]) == True]
    opponent = opponent_df.iloc[1]["Home"] if hv == "Visitor" else opponent_df.iloc[1]["Visitor"]

    try:
        off_rtg = 100 * (points_scored / global_off_possession)
        def_rtg = 100 * (points_conceded / global_def_possession)
    except ZeroDivisionError:
        off_rtg = 0
        def_rtg = 0        

    net_rtg = off_rtg - def_rtg
    off_rtg = "{:.3f}".format(off_rtg)
    def_rtg = "{:.3f}".format(def_rtg)
    net_rtg = "{:.3f}".format(net_rtg)

    new_row = {"Player Name":[row["player"]], "PtScored":[points_scored], "OffRtg":[off_rtg], "DefRtg":[def_rtg],
               "NetRtg":[net_rtg], "ptsconceded":[points_conceded], 'total off possession':[global_off_possession], 
               'total def possession':[global_def_possession], "global efficiency":[global_efficiency], 
               "quarter2 last 5min efficiency":[quarter2_5min_eff], "quarter4 last 5min efficiency":[quarter4_5min_eff],
               "minutes":minutes, "home/visitor":hv, "opponent":opponent}
    
    new_df = pd.DataFrame(new_row)
    player_final_table = pd.concat([player_final_table, new_df], ignore_index=True, axis=0)

player_final_table = player_final_table.reindex(columns=final_columns)

  points_conceded = float(time_score_df.loc[time_score_df[("player", "player")] == row["player"]][("total", "score")])
  eff = ((offense - defense) * 60) / time


KeyError: 'player'

In [92]:
player_final_table

Unnamed: 0,Player Name,PtScored,ptsconceded,OffRtg,DefRtg,NetRtg,total off possession,total def possession,global efficiency,quarter2 last 5min efficiency,quarter4 last 5min efficiency,minutes,home/visitor,opponent,date,game_type
0,"MILON,EMANUEL",9,40.0,100.0,2000.0,-1900.0,9,2,0.252252,-0.304569,Not in the time,27.75,Home,Canisius,,
1,"OKADO,MARJOK",9,41.0,90.0,341.667,-251.667,10,12,-0.073126,Not in the time,-0.461538,27.35,Home,Canisius,,
2,"RANDRIASALAMA,TO",2,16.0,66.667,400.0,-333.333,3,4,-0.077821,Not in the time,Not in the time,12.85,Home,Canisius,,
3,"RILEY,BEN",1,18.0,50.0,450.0,-400.0,2,4,-0.167131,-0.618557,Not in the time,11.97,Home,Canisius,,
4,"J-SERAPHIN,REGINALD",0,7.0,0.0,233.333,-233.333,1,3,-0.198675,Not in the time,0.0,10.07,Home,Canisius,,
5,"SPENCER,XAVIER",4,35.0,44.444,700.0,-655.556,9,5,0.229226,Not in the time,-1.081081,17.45,Home,Canisius,,
6,"BRAZDEIKIS,AUGUSTAS",18,46.0,105.882,460.0,-354.118,17,10,0.275953,Not in the time,-1.875,25.37,Home,Canisius,,
7,"SMITH,DANIEL",9,53.0,90.0,757.143,-667.143,10,7,0.101868,Not in the time,Not in the time,29.45,Home,Canisius,,
8,"DOREY-HAVENS,AUBREY",10,42.0,66.667,300.0,-233.333,15,14,0.042463,-1.5,Not in the time,23.55,Home,Canisius,,
9,"SIMPSON,NOAH",2,26.0,25.0,520.0,-495.0,8,5,0.244898,Not in the time,Not in the time,12.25,Home,Canisius,,


## Let's go to lineup

In [30]:
if "Player Name" in final_columns:
    final_columns.remove("Player Name")
lineup_final_columns = ["Lineup"].extend(final_columns)
lineup_final_table = pd.DataFrame(columns=[lineup_final_columns])
for index, row in lineup_event_df.iloc[:-1].iterrows():
    
    points_scored = 0
    for key in scoring_values:
        score_value = scoring_values[key]
        points_scored += lineup_event_df.loc[index, key] * score_value

    points_conceded = float(lineup_time_score_df[("total", "score")].iloc[index])

    efficiency = sum([points_scored, row["offensive rebound"], row["defensive rebound"], row["Assist"], row["Steal"]]) - \
    sum([row["missed free throw"], row["Turnover"], points_conceded])
    efficiency = float(efficiency)

    minutes = lineup_time_score_df[("total", "time")].iloc[index] / 60
    minutes = "{:.2f}".format(minutes)

    hv_df = player_event_df.loc[player_event_df["player"] == row["lineup"][0]].iloc[1]
    if pd.isna(hv_df['Home']) == False:
        hv = "Home"
    else:
        hv = "Visitor"

    opponent_df = player_event_df.loc[pd.isna(player_event_df[hv]) == True]
    opponent = opponent_df.iloc[1]["Home"] if hv == "Visitor" else opponent_df.iloc[1]["Visitor"]

    new_row = {"Lineup":[row["lineup"]], "PtScored":[points_scored],
               "ptsconceded":[points_conceded], "efficiency":[efficiency], 
               "minutes":minutes, "home/visitor":hv, "opponent":opponent}
    
    new_df = pd.DataFrame(new_row)
    lineup_final_table = pd.concat([lineup_final_table, new_df], ignore_index=True, axis=0)

lineup_final_table = lineup_final_table.reindex(columns=lineup_final_columns)

In [31]:
lineup_final_table

Unnamed: 0,None,Lineup,PtScored,ptsconceded,efficiency,minutes,home/visitor,opponent
0,,"[BRAZDEIKIS,AUGUSTAS, OKADO,MARJOK, SIMPSON,NOAH, SPENCER,XAVIER, UGBAH,EMMANUEL]",8.0,7.0,4.0,3.48,Visitor,Guelph
1,,"[BRAZDEIKIS,AUGUSTAS, DOREY-HAVENS,AUBREY, MILON,EMANUEL, OKADO,MARJOK, UGBAH,EMMANUEL]",2.0,0.0,3.0,0.42,Visitor,Guelph
2,,"[BRAZDEIKIS,AUGUSTAS, DOREY-HAVENS,AUBREY, LATIFF,WAZIR, MILON,EMANUEL, UGBAH,EMMANUEL]",5.0,0.0,9.0,1.6,Visitor,Guelph
3,,"[DOREY-HAVENS,AUBREY, LATIFF,WAZIR, MILON,EMANUEL, RILEY,BEN, SERAPHIN,REGINALD JEAN]",0.0,2.0,-2.0,0.52,Visitor,Guelph
4,,"[LATIFF,WAZIR, MILON,EMANUEL, RILEY,BEN, SERAPHIN,REGINALD JEAN, SPENCER,XAVIER]",2.0,1.0,3.0,1.18,Visitor,Guelph
5,,"[BRAZDEIKIS,AUGUSTAS, RANDRIASALAMA,TO, RILEY,BEN, SIMPSON,NOAH, SPENCER,XAVIER]",4.0,4.0,0.0,1.45,Visitor,Guelph
6,,"[BRAZDEIKIS,AUGUSTAS, RANDRIASALAMA,TO, SIMPSON,NOAH, SPENCER,XAVIER, UGBAH,EMMANUEL]",0.0,2.0,-1.0,1.05,Visitor,Guelph
7,,"[BRAZDEIKIS,AUGUSTAS, DOREY-HAVENS,AUBREY, OKADO,MARJOK, SIMPSON,NOAH, UGBAH,EMMANUEL]",2.0,0.0,2.0,0.6,Visitor,Guelph
8,,"[BRAZDEIKIS,AUGUSTAS, OKADO,MARJOK, SIMPSON,NOAH, SPENCER,XAVIER, UGBAH,EMMANUEL]",0.0,0.0,0.0,0.17,Visitor,Guelph
9,,"[BRAZDEIKIS,AUGUSTAS, DOREY-HAVENS,AUBREY, OKADO,MARJOK, SIMPSON,NOAH, UGBAH,EMMANUEL]",7.0,3.0,8.0,1.97,Visitor,Guelph


In [37]:
import pandas as pd

# Create a sample dataframe
data = {'A': [1, 2, 3], 'B': [4, 5, 6], "C":[4, 5, 6]}
df = pd.DataFrame(data)

# Create multi-level columns
df.columns = pd.MultiIndex.from_tuples([('Column 1', 'time'), ('Column 1', 'score'), ('Column 2', 'time')])

# Display the dataframe
print(df)

  Column 1       Column 2
      time score     time
0        1     4        4
1        2     5        5
2        3     6        6
