# Cleaning & Categorizing Debates

Here we turn our dataset into a more usable dataset.

In [1]:
import os
import json
import pandas as pd
from collections import defaultdict, Counter
import re

## Old Debates

In [2]:
DATA_DIRECTORY = "data_dir"

INCUMBENT_PAIRS = {
    ("ford", "1976"), ("carter", "1980"), ("reagan", "1984"), ("bush", "1984"),
    ("bush", "1992"), ("quayle", "1992"), ("clinton", "1996"), ("gore", "1996"),
    ("bush", "2004"), ("cheney", "2004"), ("obama", "2012"), ("biden", "2012"),
    ("trump", "2020"), ("pence", "2020")
}

WINNER_PAIRS = {
    ("kennedy", "1960"), ("carter", "1976"), ("reagan", "1980"), ("bush", "1980"),
    ("reagan", "1984"), ("bush", "1984"), ("bush", "1988"), ("quayle", "1988"),
    ("clinton", "1992"), ("gore", "1992"), ("clinton", "1996"), ("gore", "1996"),
    ("bush", "2000"), ("cheney", "2000"), ("bush", "2004"), ("cheney", "2004"),
    ("obama", "2008"), ("biden", "2008"), ("obama", "2012"), ("biden", "2012"),
    ("trump", "2016"), ("pence", "2016"), ("biden", "2020"), ("harris", "2020")
}

CANDIDATES = {
    # Presidential
    "kennedy": "Democratic", "nixon": "Republican", "ford": "Republican",
    "carter": "Democratic", "reagan": "Republican", "anderson": "Independent",
    "mondale": "Democratic", "bush": "Republican", "dukakis": "Democratic",
    "clinton": "Democratic", "perot": "Independent", "dole": "Republican",
    "gore": "Democratic", "kerry": "Democratic", "obama": "Democratic",
    "mccain": "Republican", "romney": "Republican", "trump": "Republican",
    "biden": "Democratic",

    # Vice-Presidential
    "ferraro": "Democratic", "quayle": "Republican", "bentsen": "Democratic",
    "kemp": "Republican", "stockdale": "Independent", "lieberman": "Democratic", "cheney": "Republican",
    "edwards": "Democratic", "palin": "Republican", "ryan": "Republican",
    "kaine": "Democratic", "pence": "Republican", "harris": "Democratic",
    "vance": "Republican"
}

VP_CORRECTIONS = ["2000-10-11", "2008-09-26", "2012-10-03"]

In [3]:
def normalize_last_name(full_name):
    """Extracts the last name and applies capitalization."""
    if not full_name or full_name == "UNKNOWN":
        return "UNKNOWN"
    return full_name.strip().split()[-1].capitalize()

def check_incumbent(last_name, year):
    """Returns True if candidate is incumbent that year."""
    return (last_name.lower(), str(year)) in INCUMBENT_PAIRS

def check_winner(last_name, year):
    """Returns True if candidate won in that year."""
    return (last_name.lower(), str(year)) in WINNER_PAIRS

def check_candidate(last_name):
    """Returns whether person was a candidate and their party if applicable."""
    key = last_name.lower()
    return (key in CANDIDATES), CANDIDATES.get(key)

def get_json_files(directory):
    """Retrieves JSON files excluding partials from a directory."""
    return [
        os.path.join(directory, f)
        for f in os.listdir(directory)
        if f.endswith(".json") and not f.startswith("part")
    ]

def is_vp_debate(content):
    """Determines if a debate is a Vice-Presidential debate."""
    dialogues = [entry.get("dialogue", "").lower() for entry in content[:5]]
    return any("vice presidential" in d for d in dialogues)

def parse_date(date_list):
    """Converts a date list into a datetime.date object or returns 'UNKNOWN'."""
    try:
        return pd.to_datetime(" ".join(date_list)).date()
    except Exception:
        return pd.NaT

def fix_duplicate_names(last_name, year, is_candidate, party):
    """Fixes duplicate names for specific cases."""
    if last_name.lower() == "bush":
        last_name = "Bush Sr" if str(year) in ["1984", "1988", "1992"] else "Bush Jr"
    elif last_name.lower() == "clinton":
        last_name = "Clinton Hillary" if str(year) == "2016" else "Clinton Bill"
    elif last_name.lower() == "edwards" and str(year) == "1960":
        is_candidate = False
        party = None
    return last_name, is_candidate, party

def process_debate_file(file_path):
    """Processes a single JSON debate file and returns a list of parsed rows."""
    rows = []
    with open(file_path, "r", encoding="utf-8") as f:
        debate = json.load(f)
        content = debate.get("content", [])
        date = parse_date(debate.get("date", []))
        year = date.year if pd.notnull(date) else "UNKNOWN"
        vp_flag = is_vp_debate(content)

        for entry in content:
            actor_raw = entry.get("actor", "UNKNOWN")
            dialogue = entry.get("dialogue", "")
            last_name = normalize_last_name(actor_raw)

            is_candidate, party = check_candidate(last_name)
            is_incumbent = check_incumbent(last_name, year)
            is_winner = check_winner(last_name, year)

            # Fix duplicate names and candidate status
            last_name, is_candidate, party = fix_duplicate_names(last_name, year, is_candidate, party)

            rows.append({
                "debate_title" : None,
                "date": date,
                "year": year,
                "actor": last_name,
                "dialogue": dialogue,
                "is_candidate": is_candidate,
                "party": party,
                "is_winner": is_winner,
                "VP_debate": vp_flag,
                "is_incumbent": is_incumbent
            })
    return rows

def correct_vp_debate_flags(df):
    """Manually corrects VP debate flags for known false positives."""
    for d in VP_CORRECTIONS:
        df.loc[df["date"] == pd.to_datetime(d).date(), "VP_debate"] = False
    return df

def generate_debate_titles(df):
    """Add a 'debate_title' column to the DataFrame."""
    debate_titles = {}
    debate_counter = defaultdict(Counter)

    for date, group in df.groupby("date"):
        year = group["year"].iloc[0]
        is_vp = group["VP_debate"].iloc[0]
        
        # Get sorted unique candidate last names
        candidate_names = sorted(set(group[group["is_candidate"]]["actor"]))
        title_base = f"{year}_" + "_".join(candidate_names)

        if is_vp:
            full_title = f"{title_base}_VP"
        else:
            # Number the debate among similar candidate sets in same year
            debate_counter[year][title_base] += 1
            count = debate_counter[year][title_base]
            full_title = f"{title_base}_{count}"

        debate_titles[date] = full_title

    df["debate_title"] = df["date"].map(debate_titles)
    return df

In [4]:
def debates_to_dataframe(directory):
    """Converts debate JSON files to a DataFrame."""
    
    all_rows = []
    json_files = get_json_files(directory)

    for file_path in json_files:
        all_rows.extend(process_debate_file(file_path))

    df = pd.DataFrame(all_rows)
    df = correct_vp_debate_flags(df)
    df = generate_debate_titles(df)
    return df

df_debates=debates_to_dataframe(DATA_DIRECTORY)
df_debates.head()

Unnamed: 0,debate_title,date,year,actor,dialogue,is_candidate,party,is_winner,VP_debate,is_incumbent
0,2020_Harris_Pence_VP,2020-10-07,2020,Participants,Senator Kamala Harris (D-CA) and,False,,False,True,False
1,2020_Harris_Pence_VP,2020-10-07,2020,Moderator,Susan Page (USA Today),False,,False,True,False
2,2020_Harris_Pence_VP,2020-10-07,2020,Page,Good evening. From the University of Utah in S...,False,,False,True,False
3,2020_Harris_Pence_VP,2020-10-07,2020,Pence,Thank you.,True,Republican,False,True,True
4,2020_Harris_Pence_VP,2020-10-07,2020,Page,Senator Harris and Vice President Pence thank ...,False,,False,True,False


In [5]:
def summarize_debate_actors(
    df,
    only_candidates=True
):
    """
    Summarize actors in a debate DataFrame, counting statements per actor per debate.

    Args:
        df (pd.DataFrame): Debate DataFrame.
        only_candidates (bool): If True, include only candidates.

    Returns:
        pd.DataFrame: Summary with one row per actor per debate.
    """
    # Ensure required columns exist
    required_cols = {"debate_title", "date", "actor", "VP_debate", "is_incumbent", "is_candidate", "party", "dialogue", "is_winner"}
    if not required_cols.issubset(df.columns):
        raise ValueError(f"Input DataFrame must contain columns: {required_cols}")

    # Drop duplicates to get one row per actor per debate
    unique_actors = df.drop_duplicates(subset=["date", "actor"])[
        ["debate_title", "date", "actor",  "is_candidate", "party", "is_winner", "VP_debate", "is_incumbent",]
    ]

    # Count number of statements per actor per debate
    statement_counts = df.groupby(["date", "actor"]).size().reset_index(name="statement_count")

    # Merge counts into unique_actors
    unique_actors = unique_actors.merge(statement_counts, on=["date", "actor"], how="left")

    # Optionally filter only candidates
    if only_candidates:
        unique_actors = unique_actors[unique_actors["is_candidate"]]

    # Sort by date and actor
    unique_actors = unique_actors.sort_values(by=["date", "actor"]).reset_index(drop=True)

    return unique_actors


summarize_debate_actors(df_debates)

Unnamed: 0,debate_title,date,actor,is_candidate,party,is_winner,VP_debate,is_incumbent,statement_count
0,1960_Kennedy_Nixon_1,1960-09-26,Kennedy,True,Democratic,True,False,False,17
1,1960_Kennedy_Nixon_1,1960-09-26,Nixon,True,Republican,False,False,False,10
2,1960_Kennedy_Nixon_2,1960-10-07,Kennedy,True,Democratic,True,False,False,13
3,1960_Kennedy_Nixon_2,1960-10-07,Nixon,True,Republican,False,False,False,12
4,1960_Kennedy_Nixon_3,1960-10-13,Kennedy,True,Democratic,True,False,False,14
...,...,...,...,...,...,...,...,...,...
89,2020_Biden_Trump_1,2020-09-29,Trump,True,Republican,False,False,True,341
90,2020_Harris_Pence_VP,2020-10-07,Harris,True,Democratic,True,True,False,62
91,2020_Harris_Pence_VP,2020-10-07,Pence,True,Republican,False,True,True,89
92,2020_Biden_Trump_2,2020-10-22,Biden,True,Democratic,True,False,False,84


## Add txt based debates

In [6]:
def extract_debate_txt(file_path, title, year, date, vp_debate, candidate_info):
    """Extracts structured debate data from a transcript text file.
    Args:
        file_path (str): Path to the transcript text file.
        title (str): Title of the debate.
        year (int): Year of the debate.
        date (str): Date of the debate in 'YYYY-MM-DD' format.
        vp_debate (bool): Whether this is a vice-presidential debate.
        candidate_info (dict): Dictionary mapping speaker last names to:
            {"is_candidate": bool, "party": str, "is_winner": bool, "is_incumbent": bool}
    """
    with open(file_path, 'r', encoding='utf-8') as f:
        lines = [line.strip() for line in f if line.strip()]

    date = pd.to_datetime(date, errors="coerce").date() if date else None
    pattern = re.compile(r'^([A-Z][A-Z\s.\-]*)(?:, [A-Z\s.]+)?:\s*(.*)')

    data, current_actor, current_text = [], None, []

    def append_block(actor, text):
        if not actor or not text:
            return
        info = candidate_info.get(actor, {
            'is_candidate': False, 'party': None,
            'is_winner': False, 'is_incumbent': False
        })
        data.append({
            "debate_title": title,  "date": date, "year": year,
            "actor": actor, "dialogue": ' '.join(text).strip(),
            "is_candidate": info['is_candidate'], "party": info['party'],
            "is_winner": info['is_winner'], "VP_debate": vp_debate,
            "is_incumbent": info['is_incumbent']
        })

    for line in lines:
        match = pattern.match(line)
        if match:
            append_block(current_actor, current_text)
            current_actor = match.group(1).split()[-1].title()
            current_text = [match.group(2)] if match.group(2) else []
        else:
            current_text.append(line)

    append_block(current_actor, current_text)
    return pd.DataFrame(data)


In [7]:
debate_1992_first_half=extract_debate_txt(
    file_path="data_dir/transcript_1992_oct_15_first_half.txt",
    title="1992_Bush Sr_Clinton Bill_Perot_2",
    year=1992, date="1992-10-15", vp_debate=False,
    candidate_info={
        "Bush": {"is_candidate": True, "party": "Republican","is_winner": False,"is_incumbent": True},
        "Clinton": {"is_candidate": True,"party": "Democratic","is_winner": True,"is_incumbent": False},
        "Perot": {"is_candidate": True,"party": "Independent","is_winner": False,"is_incumbent": False}
    }
)

#Rename Bush to Bush Sr and Clinton to Clinton (Bill)
debate_1992_first_half.loc[debate_1992_first_half["actor"] == "Bush", "actor"] = "Bush Sr"
debate_1992_first_half.loc[debate_1992_first_half["actor"] == "Clinton", "actor"] = "Clinton Bill"
debate_1992_first_half.head(20)

Unnamed: 0,debate_title,date,year,actor,dialogue,is_candidate,party,is_winner,VP_debate,is_incumbent
0,1992_Bush Sr_Clinton Bill_Perot_2,1992-10-15,1992,Simpson,Good evening and welcome to this second of thr...,False,,False,False,False
1,1992_Bush Sr_Clinton Bill_Perot_2,1992-10-15,1992,Bush Sr,Let’s go.,True,Republican,False,False,True
2,1992_Bush Sr_Clinton Bill_Perot_2,1992-10-15,1992,Simpson,And I think the first question is over here.,False,,False,False,False
3,1992_Bush Sr_Clinton Bill_Perot_2,1992-10-15,1992,Question,Yes. I’d like to direct my question to Mr. Per...,False,,False,False,False
4,1992_Bush Sr_Clinton Bill_Perot_2,1992-10-15,1992,Perot,That’s right at the top of my agenda. We’ve sh...,True,Independent,False,False,False
5,1992_Bush Sr_Clinton Bill_Perot_2,1992-10-15,1992,Simpson,"Thank you, Mr. Perot. I see that the president...",False,,False,False,False
6,1992_Bush Sr_Clinton Bill_Perot_2,1992-10-15,1992,Bush Sr,"Carole, the thing that saved us in this global...",True,Republican,False,False,True
7,1992_Bush Sr_Clinton Bill_Perot_2,1992-10-15,1992,Simpson,Governor Clinton.,False,,False,False,False
8,1992_Bush Sr_Clinton Bill_Perot_2,1992-10-15,1992,Clinton Bill,"I’d like to answer the question, because I’ve ...",True,Democratic,True,False,False
9,1992_Bush Sr_Clinton Bill_Perot_2,1992-10-15,1992,Simpson,Thank you. I think we have a question over here.,False,,False,False,False


In [8]:
#concat with the rest of the data
df_debates = pd.concat([ debate_1992_first_half, df_debates], ignore_index=True)
summary=summarize_debate_actors(df_debates)
summary[summary["date"]==pd.to_datetime("1992-10-15").date()]

Unnamed: 0,debate_title,date,actor,is_candidate,party,is_winner,VP_debate,is_incumbent,statement_count
36,1992_Bush Sr_Clinton Bill_Perot_2,1992-10-15,Bush Sr,True,Republican,False,False,True,47
37,1992_Bush Sr_Clinton Bill_Perot_2,1992-10-15,Clinton Bill,True,Democratic,True,False,False,24
38,1992_Bush Sr_Clinton Bill_Perot_2,1992-10-15,Perot,True,Independent,False,False,False,45


In [9]:
debate_2024_biden=extract_debate_txt(
    file_path="data_dir/transcript_2024_Trump_Biden.txt",
    title="2024_Trump_Biden",
    year=2024,date="2024-07-27",vp_debate=False,
    candidate_info={
        "Trump": {"is_candidate": True,"party": "Republican","is_winner": True,"is_incumbent": False},
        "Biden": {"is_candidate": True,"party": "Democratic","is_winner": False,"is_incumbent": True}
    }
)

df_debates = pd.concat([df_debates, debate_2024_biden], ignore_index=True)
debate_2024_biden.head(20)

Unnamed: 0,debate_title,date,year,actor,dialogue,is_candidate,party,is_winner,VP_debate,is_incumbent
0,2024_Trump_Biden,2024-07-27,2024,Tapper,"We’re live from Georgia, a key battleground st...",False,,False,False,False
1,2024_Trump_Biden,2024-07-27,2024,Bash,This debate is being produced by CNN and it’s ...,False,,False,False,False
2,2024_Trump_Biden,2024-07-27,2024,Tapper,"I’m Jake Tapper, anchor of CNN’s “The Lead” an...",False,,False,False,False
3,2024_Trump_Biden,2024-07-27,2024,Bash,"When it’s time for a candidate to speak, his m...",False,,False,False,False
4,2024_Trump_Biden,2024-07-27,2024,Tapper,Now please welcome the 46th president of the U...,False,,False,False,False
5,2024_Trump_Biden,2024-07-27,2024,Biden,How are you? Good to be here. Thank you.,True,Democratic,False,False,True
6,2024_Trump_Biden,2024-07-27,2024,Tapper,And please welcome the 45th president of the U...,False,,False,False,False
7,2024_Trump_Biden,2024-07-27,2024,Biden,You have to take a look at what I was left whe...,True,Democratic,False,False,True
8,2024_Trump_Biden,2024-07-27,2024,Tapper,Thank you. President Trump?,False,,False,False,False
9,2024_Trump_Biden,2024-07-27,2024,Trump,We had the greatest economy in the history of ...,True,Republican,True,False,False


In [10]:
debate_2024_harris=extract_debate_txt(
    file_path="data_dir/transcript_2024_Trump_Harris.txt",
    title="2024_Trump_Harris",
    year=2024,date="2024-09-10",vp_debate=False,
    candidate_info={
        "Trump": {"is_candidate": True,"party": "Republican","is_winner": True,"is_incumbent": False},
        "Harris": {"is_candidate": True,"party": "Democratic","is_winner": False,"is_incumbent": False}
    }
)

df_debates = pd.concat([df_debates, debate_2024_harris], ignore_index=True)
debate_2024_harris.head(20)

Unnamed: 0,debate_title,date,year,actor,dialogue,is_candidate,party,is_winner,VP_debate,is_incumbent
0,2024_Trump_Harris,2024-09-10,2024,Muir,"Tonight, the high-stakes showdown here in Phil...",False,,False,False,False
1,2024_Trump_Harris,2024-09-10,2024,Davis,A historic race for president upended just wee...,False,,False,False,False
2,2024_Trump_Harris,2024-09-10,2024,Muir,The candidates separated by the smallest of ma...,False,,False,False,False
3,2024_Trump_Harris,2024-09-10,2024,Muir,"Good evening, I'm David Muir. And thank you fo...",False,,False,False,False
4,2024_Trump_Harris,2024-09-10,2024,Davis,And I'm Linsey Davis. Tonight's meeting could ...,False,,False,False,False
5,2024_Trump_Harris,2024-09-10,2024,Muir,And that brings us to the rules of tonight's d...,False,,False,False,False
6,2024_Trump_Harris,2024-09-10,2024,Davis,President Trump won the coin toss. He chose to...,False,,False,False,False
7,2024_Trump_Harris,2024-09-10,2024,Muir,So let's now welcome the candidates to the sta...,False,,False,False,False
8,2024_Trump_Harris,2024-09-10,2024,Harris,Kamala Harris. Let's have a good debate.,True,Democratic,False,False,False
9,2024_Trump_Harris,2024-09-10,2024,Trump,Nice to see you. Have fun.,True,Republican,True,False,False


In [11]:
debate_2024_vp=extract_debate_txt(
    file_path="data_dir/transcript_2024_Vance_Walz.txt",
    title="2024_Vance_Walz_VP",
    year=2024,date="2024-10-01",vp_debate=True,
    candidate_info={
        "Jdv": {"is_candidate": True,"party": "Republican","is_winner": True,"is_incumbent": False},
        "Tw": {"is_candidate": True,"party": "Democratic","is_winner": False,"is_incumbent": False}
    }
)

#Rename Jdv to Vance, Tw to Walz, No to O'Donnell, and Mb to Brennan
debate_2024_vp.loc[debate_2024_vp["actor"] == "Jdv", "actor"] = "Vance"
debate_2024_vp.loc[debate_2024_vp["actor"] == "Tw", "actor"] = "Walz"
debate_2024_vp.loc[debate_2024_vp["actor"] == "No", "actor"] = "O'Donnell"
debate_2024_vp.loc[debate_2024_vp["actor"] == "Mb", "actor"] = "Brennan"


df_debates = pd.concat([df_debates, debate_2024_vp], ignore_index=True)
debate_2024_vp.head(20)

Unnamed: 0,debate_title,date,year,actor,dialogue,is_candidate,party,is_winner,VP_debate,is_incumbent
0,2024_Vance_Walz_VP,2024-10-01,2024,O'Donnell,Good evening. I'm Norah O'Donnell and thank yo...,False,,False,True,False
1,2024_Vance_Walz_VP,2024-10-01,2024,Brennan,I'm Margaret Brennan. In order to have a thoug...,False,,False,True,False
2,2024_Vance_Walz_VP,2024-10-01,2024,Brennan,"Thank you, Norah. Earlier today, Iran launched...",False,,False,True,False
3,2024_Vance_Walz_VP,2024-10-01,2024,Walz,"Well, thank you. And thank you for those joini...",True,Democratic,False,True,False
4,2024_Vance_Walz_VP,2024-10-01,2024,Brennan,"Governor, your time is up. Senator Vance, the ...",False,,False,True,False
5,2024_Vance_Walz_VP,2024-10-01,2024,Vance,"So, Margaret, I want to answer the question. F...",True,Republican,True,True,False
6,2024_Vance_Walz_VP,2024-10-01,2024,Brennan,"Thank you, Senator. Governor Walz, do you care...",False,,False,True,False
7,2024_Vance_Walz_VP,2024-10-01,2024,Walz,"Well, look, Donald Trump was in office. We'll ...",True,Democratic,False,True,False
8,2024_Vance_Walz_VP,2024-10-01,2024,Brennan,"Senator Vance, the U.S. did have a diplomatic ...",False,,False,True,False
9,2024_Vance_Walz_VP,2024-10-01,2024,Vance,"Well, first of all, Margaret, diplomacy is not...",True,Republican,True,True,False


In [12]:
df_debates.sort_values(by=["year", "date"], inplace=True)
df_debates.reset_index(drop=True, inplace=True)
df_debates

Unnamed: 0,debate_title,date,year,actor,dialogue,is_candidate,party,is_winner,VP_debate,is_incumbent
0,1960_Kennedy_Nixon_1,1960-09-26,1960,Kennedy,"Mr. Smith, Mr. Nixon. In the election of 1860,...",True,Democratic,True,False,False
1,1960_Kennedy_Nixon_1,1960-09-26,1960,Smith,And now the opening statement by Vice Presiden...,False,,False,False,False
2,1960_Kennedy_Nixon_1,1960-09-26,1960,Nixon,"Mr. Smith, Senator Kennedy. The things that Se...",True,Republican,False,False,False
3,1960_Kennedy_Nixon_1,1960-09-26,1960,Smith,"Thank you, Mr. Nixon. That completes the openi...",False,,False,False,False
4,1960_Kennedy_Nixon_1,1960-09-26,1960,Fleming,"Senator, the Vice President in his campaign ha...",False,,False,False,False
...,...,...,...,...,...,...,...,...,...,...
9928,2024_Vance_Walz_VP,2024-10-01,2024,Walz,"Well, thank you, Senator Vance. Thank you to C...",True,Democratic,False,True,False
9929,2024_Vance_Walz_VP,2024-10-01,2024,Brennan,"Governor Walz. Thank you. Senator Vance, your ...",False,,False,True,False
9930,2024_Vance_Walz_VP,2024-10-01,2024,Vance,"Well, I want to thank Governor Walz, you folks...",True,Republican,True,True,False
9931,2024_Vance_Walz_VP,2024-10-01,2024,Brennan,"Senator Vance, thank you. And thank you both f...",False,,False,True,False


In [13]:
# Save the final DataFrame to a CSV file
df_debates.to_csv("debate_transcripts_cleaned.csv", index=False, encoding="utf-8")
print(f"Data saved")

Data saved


In [14]:
df_debates.describe(include="all")

Unnamed: 0,debate_title,date,year,actor,dialogue,is_candidate,party,is_winner,VP_debate,is_incumbent
count,9933,9933,9933.0,9933,9933,9933,5651,9933,9933,9933
unique,48,48,,171,8563,2,3,2,2,2
top,2020_Biden_Trump_1,2020-09-29,,Trump,(CROSSTALK),True,Republican,False,False,False
freq,858,858,,930,163,5651,2990,7187,7334,8332
mean,,,2004.906876,,,,,,,
std,,,14.783792,,,,,,,
min,,,1960.0,,,,,,,
25%,,,1992.0,,,,,,,
50%,,,2008.0,,,,,,,
75%,,,2016.0,,,,,,,


In [15]:
summarize_debate_actors(df_debates, only_candidates=True)

Unnamed: 0,debate_title,date,actor,is_candidate,party,is_winner,VP_debate,is_incumbent,statement_count
0,1960_Kennedy_Nixon_1,1960-09-26,Kennedy,True,Democratic,True,False,False,17
1,1960_Kennedy_Nixon_1,1960-09-26,Nixon,True,Republican,False,False,False,10
2,1960_Kennedy_Nixon_2,1960-10-07,Kennedy,True,Democratic,True,False,False,13
3,1960_Kennedy_Nixon_2,1960-10-07,Nixon,True,Republican,False,False,False,12
4,1960_Kennedy_Nixon_3,1960-10-13,Kennedy,True,Democratic,True,False,False,14
...,...,...,...,...,...,...,...,...,...
95,2024_Trump_Biden,2024-07-27,Trump,True,Republican,True,False,False,49
96,2024_Trump_Harris,2024-09-10,Harris,True,Democratic,False,False,False,35
97,2024_Trump_Harris,2024-09-10,Trump,True,Republican,True,False,False,76
98,2024_Vance_Walz_VP,2024-10-01,Vance,True,Republican,True,True,False,48
