# Imports

In [1]:
import os
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib import ticker
from matplotlib.ticker import MaxNLocator

# Load Data

In [None]:
# Load the data
input_file_foras = list(Path(os.path.join("data")).glob("PTSS_Data_Foras.xlsx"))[0]
input_file_synergy = list(Path(os.path.join("data")).glob("PTSS_Data_Synergy.xlsx"))[0]
fulltext_foras = list(Path(os.path.join("data")).glob("PTSS_Data_Foras_Fulltext.xlsx"))[
    0
]
fulltext_foras_2nd = list(
    Path(os.path.join("data")).glob("PTSS_Data_Foras_Fulltext_2ndscreener.xlsx")
)[0]
fulltext_synergy = list(
    Path(os.path.join("data")).glob("PTSS_Data_Synergy_Fulltext.xlsx")
)[0]

# Print the file names
print(
    "Results based on file: ",
    input_file_foras,
    input_file_synergy,
    fulltext_foras,
    fulltext_foras_2nd,
    fulltext_synergy,
)

# Read the foras file and filter out the duplicates
foras_unfiltered = pd.read_excel(input_file_foras)
foras_filtered = foras_unfiltered[foras_unfiltered["filter_duplicate"] != 1]

# Read the other files
synergy = pd.read_excel(input_file_synergy)
fulltext_foras = pd.read_excel(fulltext_foras)
fulltext_foras_2nd = pd.read_excel(fulltext_foras_2nd)
fulltext_synergy = pd.read_excel(fulltext_synergy)

# Print the number of rows in each file
print("Number of records in original FORAS file: ", foras_unfiltered.shape[0])
print(
    "Number of records in FORAS after filtering duplicates: ", foras_filtered.shape[0]
)
print("Number of records in SYNERGY", synergy.shape[0])
print("Number of records in FORAS fulltext", fulltext_foras.shape[0])
print("Number of records in FORAS fulltext 2nd screener", fulltext_foras_2nd.shape[0])
print("Number of records in SYNERGY fulltext", fulltext_synergy.shape[0])

# Background variables

## Duplicates

In [None]:
# calculate the number of duplicates in the foras unfiltred file using duplicate_record_identifier
duplicates = foras_unfiltered[
    foras_unfiltered["duplicate_record_identifier"].notnull()
].shape[0]
print(
    "Number of duplicates in FORAS file, which will be ignored in the remainder of the analyses: ",
    duplicates,
)

## Number of PIDs, Titles and Abstracts

In [None]:
# Function to calculate missing data and plot for a given dataset
def analyze_missing_data(dataset, dataset_name):
    # Define columns to check for missing values
    columns_to_check = ["doi", "openalex_id", "title", "abstract"]
    missing_data_counts = {col: dataset[col].isnull().sum() for col in columns_to_check}
    missing_data_counts["Both Missing"] = (
        dataset["doi"].isnull().sum() & dataset["openalex_id"].isnull().sum()
    )

    # Print the number of records without certain values
    print(f"Missing Data Analysis for {dataset_name}:")
    for category, count in missing_data_counts.items():
        print(f"  - Number of records without {category}: {count}")

    # Calculate percentages for visualization
    total_records = dataset.shape[0]
    percentages = {
        key: (value / total_records) * 100 for key, value in missing_data_counts.items()
    }

    # Plotting
    plt.figure(figsize=(10, 6))
    categories = list(percentages.keys())
    values = list(percentages.values())
    bars = plt.bar(categories, values, color=plt.cm.tab10(range(len(categories))))

    # Add title and labels
    plt.title(f"Percentage of Missing Data in Each Category ({dataset_name})")
    plt.xlabel("Missing Data Category")
    plt.ylabel("Percentage of Total Records")

    # Annotate bars
    for bar, category in zip(bars, categories):
        height = bar.get_height()
        count = missing_data_counts[category]
        plt.text(
            bar.get_x() + bar.get_width() / 2.0,
            height,
            f"{count} ({height:.1f}%)",
            ha="center",
            va="bottom",
        )

    # Display the chart
    plt.show()

    # Return results for further programmatic use
    return missing_data_counts


# Call the function for both datasets
foras_results = analyze_missing_data(foras_filtered, "Foras")
synergy_results = analyze_missing_data(synergy, "Synergy")

# Frequencies

## Frequencies for search results

In [None]:
def generate_frequency_overview(dataset, variable_list):
    print("Frequency Overview:\n")
    for variable in variable_list:
        if variable not in dataset.columns:
            print(f"Variable '{variable}' not found in the dataset. Skipping...\n")
            continue

        print(f"Variable: {variable}")
        value_counts = dataset[variable].value_counts(dropna=False)
        total_records = value_counts.sum()
        percentages = (value_counts / total_records * 100).round(2)
        frequency_table = pd.DataFrame(
            {"Count": value_counts, "Percentage": percentages}
        )

        print(frequency_table)
        print("\n" + "-" * 50 + "\n")


# New list of variables to analyze
variable_list_additional = [
    "search_replication",
    "search_comprehensive",
    "search_snowballing",
    "search_fulltext",
    "search_openalex_inlusion_criteria",
    "search_openalex_inlusion_criteria_long",
    "search_openalex_logistic",
    "search_openalex_logistic_long",
    "search_openalex_all_abstracts",
    "search_openalex_all_abstracts_long",
    "batch",
]

generate_frequency_overview(foras_filtered, variable_list_additional)

## Frequencies for labeling decissions

In [None]:
# List of variables to analyze
variable_list = [
    "title_eligible_Bruno",
    "TI-AB_IC1_Bruno",
    "TI-AB_IC2_Bruno",
    "TI-AB_IC3_Bruno",
    "TI-AB_IC4_Bruno",
    "TI-AB_final_label_Bruno",
    "title_eligible_Rutger",
    "TI_final_label",
    "TI-AB_final_label_Rutger",
    "TI-AB_disagreement_human-human",
    "TI-AB_IC1_LLM",
    "TI-AB_IC2_LLM",
    "TI-AB_IC3_LLM",
    "TI-AB_IC4_LLM",
    "TI-AB_final_label_LLM",
    "LLM_re-assessed",
    "TI-AB_disagreement_human-LLM",
    "TI-AB_IC1_joint",
    "TI-AB_IC2_joint",
    "TI-AB_IC3_joint",
    "TI-AB_IC4_joint",
    "TI-AB_IC1_final",
    "TI-AB_IC2_final",
    "TI-AB_IC3_final",
    "TI-AB_IC4_final",
    "TI-AB_final_label",
    "full_text_available",
    "FT_IC1_Bruno",
    "FT_IC2_Bruno",
    "FT_IC3_Bruno",
    "FT_IC4_Bruno",
    "FT_exlusion_reason_Bruno",
    "FT_inclusion_Bruno",
    "FT_inclusion_Rutger",
    "FT_exlusion_reason_Rutger",
    "FT_disagreements_Bruno-Rutger",
    "FT_IC1_joint",
    "FT_IC2_joint",
    "FT_IC3_joint",
    "FT_IC4_joint",
    "FT_IC1_final",
    "FT_IC2_final",
    "FT_IC3_final",
    "FT_IC4_final",
    "FT_final_label",
    "label_included_TIAB",
    "label_included_FT",
]

# Call the function to generate the frequency overview
generate_frequency_overview(foras_filtered, variable_list)

## Labeling combinations

In [7]:
def analyze_inclusions(dataset, final_label, label_1, label_2):
    # Total count and percentages for inclusions
    print(f"Inclusion Analysis for {final_label}:\n")

    # Cross-tabulation
    print(f"Cross-tabulation of {label_1} vs {label_2}:\n")
    crosstab = pd.crosstab(
        dataset[label_1], dataset[label_2], margins=True, dropna=True
    )
    print(crosstab, "\n")

    print(f"Cross-tabulation of {final_label} vs {label_1}:\n")
    crosstab = pd.crosstab(
        dataset[final_label], dataset[label_1], margins=True, dropna=True
    )
    print(crosstab, "\n")

    print(f"Cross-tabulation of {final_label} vs {label_2}:\n")
    crosstab = pd.crosstab(
        dataset[final_label], dataset[label_2], margins=True, dropna=True
    )
    print(crosstab, "\n")

In [None]:
# Call the function for title inclusions
analyze_inclusions(
    foras_filtered, "TI_final_label", "title_eligible_Rutger", "title_eligible_Bruno"
)

# Call the function for abstract inclusions
analyze_inclusions(
    foras_filtered,
    "TI-AB_final_label",
    "TI-AB_final_label_Rutger",
    "TI-AB_final_label_Bruno",
)

# Call the function for full text inclusions
analyze_inclusions(
    foras_filtered, "FT_final_label", "FT_inclusion_Rutger", "FT_inclusion_Bruno"
)

In [None]:
# Call the function for ti-ab inclusions criteria 1
analyze_inclusions(
    foras_filtered, "TI-AB_IC1_final", "TI-AB_IC1_Bruno", "TI-AB_IC1_joint"
)

# Call the function for ti-ab inclusions criteria 2
analyze_inclusions(
    foras_filtered, "TI-AB_IC2_final", "TI-AB_IC2_Bruno", "TI-AB_IC2_joint"
)

# Call the function for ti-ab inclusions criteria 3
analyze_inclusions(
    foras_filtered, "TI-AB_IC3_final", "TI-AB_IC3_Bruno", "TI-AB_IC3_joint"
)

# Call the function for ti-ab inclusions criteria 4
analyze_inclusions(
    foras_filtered, "TI-AB_IC4_final", "TI-AB_IC4_Bruno", "TI-AB_IC4_joint"
)

In [None]:
# Call the function for FT inclusions criteria 1
analyze_inclusions(foras_filtered, "FT_IC1_final", "FT_IC1_Bruno", "FT_IC1_joint")

# Call the function for FT inclusions criteria 2
analyze_inclusions(foras_filtered, "FT_IC2_final", "FT_IC2_Bruno", "FT_IC2_joint")

# Call the function for FT inclusions criteria 3
analyze_inclusions(foras_filtered, "FT_IC3_final", "FT_IC3_Bruno", "FT_IC3_joint")

# Call the function for FT inclusions criteria 4
analyze_inclusions(foras_filtered, "FT_IC4_final", "FT_IC4_Bruno", "FT_IC4_joint")

# Plots

## Search results

### Frequencies

In [None]:
# Original binary columns
search_columns = [
    "search_replication",
    "search_comprehensive",
    "search_snowballing",
    "search_fulltext",
    "search_openalex_inlusion_criteria",
    "search_openalex_inlusion_criteria_long",
    "search_openalex_logistic",
    "search_openalex_logistic_long",
    "search_openalex_all_abstracts",
    "search_openalex_all_abstracts_long",
]

# Shorter names for plotting
short_names = {
    "search_replication": "Replication",
    "search_comprehensive": "Comprehensive",
    "search_snowballing": "Snowballing",
    "search_fulltext": "Fulltext",
    "search_openalex_inlusion_criteria": "OpenAlex-Inclusion",
    "search_openalex_inlusion_criteria_long": "OpenAlex-Inclusion-Long",
    "search_openalex_logistic": "OpenAlex-Logistic",
    "search_openalex_logistic_long": "OpenAlex-Logistic-Long",
    "search_openalex_all_abstracts": "OpenAlex-All-Abstracts",
    "search_openalex_all_abstracts_long": "OpenAlex-All-Abstracts-Long",
}

# Define a color scheme
bar_color = "#009739"

# Calculate counts for each binary column in Foras
counts_foras = [foras_filtered[column].sum() for column in search_columns]

# Set up the figure and axis
plt.figure(figsize=(12, 6))  # Adjust width dynamically for readability
bars_foras = plt.bar(short_names.values(), counts_foras, color=bar_color)

# Add title and labels
plt.title("Number of Records Found via the Different Search Methods", fontsize=14)
plt.ylabel("Count", fontsize=12)
plt.xlabel("Search Method", fontsize=12)
plt.xticks(rotation=45, ha="right")

# Add gridlines for better readability
plt.grid(axis="y", linestyle="--", alpha=0.7)

# Annotate bars with their values
for bar in bars_foras:
    yval = bar.get_height()
    plt.text(
        bar.get_x() + bar.get_width() / 2,
        yval + 0.05 * max(counts_foras),
        f"{int(yval)}",
        ha="center",
        va="bottom",
        fontsize=10,
    )

# Display the plot
plt.tight_layout()
plt.show()

### Old versus New

In [None]:
# Create 'old-school' variable: 1 if any of the specified columns is 1, else 0
foras_filtered["old-school"] = (
    foras_filtered[["search_replication", "search_comprehensive", "search_snowballing"]]
    .any(axis=1)
    .astype(int)
)

# Create 'new-school' variable: 1 if any of the specified columns is 1, else 0
foras_filtered["new-school"] = (
    foras_filtered[
        [
            "search_openalex_inlusion_criteria_long",
            "search_openalex_logistic_long",
            "search_openalex_all_abstracts_long",
            "search_fulltext",
        ]
    ]
    .any(axis=1)
    .astype(int)
)

# Calculate counts for each category
categories = [
    "Old-school",
    "New-school",
    "Both old and new",
]
counts = [
    foras_filtered["old-school"].sum(),
    foras_filtered["new-school"].sum(),
    foras_filtered[
        (foras_filtered["old-school"] == 1) & (foras_filtered["new-school"] == 1)
    ].shape[0],
]

# Print the counts for validation
print("Total records: ", foras_filtered.shape[0])
print("Old-school: ", counts[0])
print("New-school: ", counts[1])
print("Both: ", counts[2])

# Plotting
plt.figure(figsize=(12, 6))  # Adjusted figure size for better spacing
bar_color = "#009739"  # Consistent color for inclusion-related metrics
bars = plt.bar(categories, counts, color=bar_color)

# Add title and labels
plt.title("Number of Records Found via Different Search Methods in Foras", fontsize=14)
plt.ylabel("Count", fontsize=12)
plt.xlabel("Search Method", fontsize=12)
plt.xticks(rotation=45, ha="right")  # Rotate labels at 45 degrees for readability

# Annotate each bar with its count
for bar in bars:
    yval = bar.get_height()
    plt.text(
        bar.get_x() + bar.get_width() / 2,
        yval + 0.05 * max(counts),  # Dynamic adjustment for tall bars
        f"{int(yval)}",
        ha="center",
        va="bottom",
        fontsize=10,
    )

# Add gridlines for easier visualization
plt.grid(axis="y", linestyle="--", alpha=0.7)

# Show the plot
plt.tight_layout()
plt.show()

### Bar chart with Unique search results 

In [None]:
# Define search columns
search_columns = [
    "search_replication",
    "search_comprehensive",
    "search_snowballing",
    "search_fulltext",
    "search_openalex_inlusion_criteria_long",
    "search_openalex_logistic_long",
    "search_openalex_all_abstracts_long",
]

# Generate permutation column
foras_filtered = foras_filtered.copy()  # Avoid SettingWithCopyWarning
foras_filtered["permutation"] = foras_filtered[search_columns].apply(
    lambda row: "".join(row.values.astype(int).astype(str)), axis=1
)

# Value counts and aggregation
value_counts = foras_filtered["permutation"].value_counts().reset_index()
value_counts.columns = ["permutation", "count"]

sum_inclusions = (
    foras_filtered.groupby("permutation")[["TI-AB_final_label", "FT_final_label"]]
    .sum()
    .reset_index()
)

# Merge results
result = pd.merge(value_counts, sum_inclusions, on="permutation")

# Convert permutation to binary columns
for index, col in enumerate(search_columns):
    result[col] = result["permutation"].str[index].astype(int)

result.drop(columns="permutation", inplace=True)

# Reorder columns
columns_to_move = ["count", "TI-AB_final_label", "FT_final_label"]
new_order = [
    col for col in result.columns if col not in columns_to_move
] + columns_to_move
result = result[new_order]

# Filter data
df_filtered = result[(result["count"] != 0) & (result["TI-AB_final_label"] != 0)].copy()

# Combination column
df_filtered["combination"] = (
    df_filtered[search_columns].astype(str).agg("-".join, axis=1)
)

# Aggregate data
df_relevant_filtered_agg = (
    df_filtered.groupby("combination")
    .agg(
        relevant_count=("TI-AB_final_label", "sum"),
        **{col: (col, "sum") for col in search_columns},
    )
    .reset_index()
)

df_relevant_filtered_agg.sort_values(by="relevant_count", ascending=False, inplace=True)

# Define conditions with corrections
conditions = [
    # Uniquely via Snowballing
    (df_relevant_filtered_agg["search_snowballing"] == 1)
    & (
        df_relevant_filtered_agg[
            [
                "search_replication",
                "search_comprehensive",
                "search_fulltext",
                "search_openalex_inlusion_criteria_long",
                "search_openalex_logistic_long",
                "search_openalex_all_abstracts_long",
            ]
        ].sum(axis=1)
        == 0
    ),
    # Unique via Old-School (Replication or Comprehensive)
    (
        df_relevant_filtered_agg[["search_replication", "search_comprehensive"]].sum(
            axis=1
        )
        > 0
    )
    & (
        df_relevant_filtered_agg[
            [
                "search_snowballing",
                "search_fulltext",
                "search_openalex_inlusion_criteria_long",
                "search_openalex_logistic_long",
                "search_openalex_all_abstracts_long",
            ]
        ].sum(axis=1)
        == 0
    ),
    # Unique via OpenAlex (Inclusion Criteria, Logistic, All Abstracts)
    (
        df_relevant_filtered_agg[
            [
                "search_openalex_inlusion_criteria_long",
                "search_openalex_logistic_long",
                "search_openalex_all_abstracts_long",
            ]
        ].sum(axis=1)
        > 0
    )
    & (
        df_relevant_filtered_agg[
            [
                "search_replication",
                "search_comprehensive",
                "search_snowballing",
                "search_fulltext",
            ]
        ].sum(axis=1)
        == 0
    ),
    # Always Found (All methods combined, excluding Fulltext)
    (df_relevant_filtered_agg["search_fulltext"] == 0)
    & (
        df_relevant_filtered_agg[
            [
                "search_replication",
                "search_comprehensive",
                "search_snowballing",
                "search_openalex_inlusion_criteria_long",
                "search_openalex_logistic_long",
                "search_openalex_all_abstracts_long",
            ]
        ].sum(axis=1)
        == len(search_columns) - 1
    ),
    # Unique via Fulltext
    (df_relevant_filtered_agg["search_fulltext"] == 1)
    & (
        df_relevant_filtered_agg[
            [
                "search_replication",
                "search_comprehensive",
                "search_snowballing",
                "search_openalex_inlusion_criteria_long",
                "search_openalex_logistic_long",
                "search_openalex_all_abstracts_long",
            ]
        ].sum(axis=1)
        == 0
    ),
]

# Assign colors
colors = ["#ff7f0e", "#8B4513", "#4682B4", "#2ca02c", "#ffcc00", "#d3d3d3"]
df_relevant_filtered_agg["color"] = np.select(
    conditions, colors[:-1], default=colors[-1]
)

# Plot the data
plt.figure(figsize=(14, 10))
bars = plt.bar(
    df_relevant_filtered_agg["combination"],
    df_relevant_filtered_agg["relevant_count"],
    color=df_relevant_filtered_agg["color"],
)

plt.title(
    "Overview of Included Records per Combination of Search Strategies", fontsize=14
)
plt.xlabel("Combination", fontsize=12)
plt.ylabel("Relevant Count", fontsize=12)
plt.xticks(rotation=90)

# Dynamic legend with k= and italicized k
condition_counts = df_relevant_filtered_agg.groupby("color")["relevant_count"].sum()
legend_labels = [
    f"{label} $k={int(condition_counts.get(color, 0))}$"
    for label, color in zip(
        [
            "Uniquely via Snowballing",
            "Unique via Old-School",
            "Unique via OpenAlex",
            "Always Found",
            "Unique via Fulltext",
            "Other Cases",
        ],
        colors,
    )
]
handles = [
    plt.Line2D([0], [0], marker="o", color="w", markerfacecolor=color, markersize=10)
    for color in colors
]
plt.legend(
    handles,
    legend_labels,
    title="Conditions",
    bbox_to_anchor=(1.05, 1),
    loc="upper left",
)

plt.gca().yaxis.set_major_locator(MaxNLocator(integer=True))

# Add bullet list
datasets = [
    "x...... Replication",
    ".x..... Comprehensive",
    "..x.... Snowballing",
    "...x... Fulltext",
    "....x.. Inclusion Criteria",
    ".....x. Logistic",
    "......x All Abstracts",
]
plt.text(
    0.8,
    0.1,
    "Order of Datasets:\n" + "\n".join(f"• {dataset}" for dataset in datasets),
    horizontalalignment="left",
    verticalalignment="center",
    transform=plt.gcf().transFigure,
    bbox=dict(facecolor="none", edgecolor="black"),
)


# Adjust layout
plt.tight_layout()
plt.show()

In [None]:
# Calculate total relevant count
total_relevant_count = df_relevant_filtered_agg["relevant_count"].sum()

# Calculate total number of records uniquely identified with only one search strategy
unique_single_strategy_count = df_relevant_filtered_agg[
    (df_relevant_filtered_agg[search_columns].sum(axis=1) == 1)
]["relevant_count"].sum()
unique_single_strategy_percentage = (
    unique_single_strategy_count / total_relevant_count
) * 100
print(
    f"Total number of records uniquely identified with only one search strategy: {unique_single_strategy_count}"
)
print(
    f"Percentage of records uniquely identified with only one search strategy: {unique_single_strategy_percentage:.2f}%"
)

# Calculate number and percentage of other cases
other_cases_count = df_relevant_filtered_agg[
    df_relevant_filtered_agg["color"] == "#d3d3d3"
]["relevant_count"].sum()
other_cases_percentage = (other_cases_count / total_relevant_count) * 100
print(f"Number of other cases: {other_cases_count}")
print(f"Percentage of other cases: {other_cases_percentage:.2f}%")

# Calculate total number of records minus other cases
minus_other_cases_count = total_relevant_count - other_cases_count
minus_other_cases_percentage = (minus_other_cases_count / total_relevant_count) * 100
print(f"Total number of records minus other cases: {minus_other_cases_count}")
print(f"Percentage of records minus other cases: {minus_other_cases_percentage:.2f}%")

## Pie chart with Ti-Ab inclusions for Foras and Synergy

In [None]:
# Helper function to format autopct values with decimals and commas
def autopct_format(values):
    def inner_autopct(pct):
        total = sum(values)
        val = round(pct * total / 100.0)
        if val > 999:
            val_str = f"{val:,.0f}"  # Format with commas for large numbers
        else:
            val_str = f"{val}"
        return f"{val_str} ({pct:.1f}%)"

    return inner_autopct


# Function to create a pie chart with left-aligned text for included values
def plot_pie_chart(data, title, subplot_position, labels):
    plt.subplot(1, 3, subplot_position)
    wedges, texts, autotexts = plt.pie(
        data,
        labels=labels,
        colors=brazilian_colors,
        autopct=autopct_format(data),
        startangle=90,
    )
    plt.title(f"{title}\nTotal: {data.sum():,}", loc="center")

    # Adjust alignment for included values
    for i, (autotext, wedge) in enumerate(zip(autotexts, wedges)):
        if labels[i] == "Included":
            autotext.set_horizontalalignment("right")
        else:
            autotext.set_horizontalalignment("center")


# Brazilian flag colors
brazilian_colors = [
    "#FEDD00",  # yellow
    "#009739",  # green
]

# Data preparation
foras_tiab_records = foras_filtered["label_included_TIAB"].value_counts()
synergy_tiab_records = synergy["TI-AB-corrected"].value_counts()
combined_tiab_records = foras_tiab_records.add(synergy_tiab_records, fill_value=0)

# Plot setup
plt.figure(figsize=(18, 6))
plt.subplots_adjust(right=0.85)

# Plotting
plot_pie_chart(foras_tiab_records, "FORAS TI-AB", 1, ["Excluded", "Included"])
plot_pie_chart(synergy_tiab_records, "Synergy TI-AB", 2, ["Excluded", "Included"])
plot_pie_chart(combined_tiab_records, "Total TI-AB", 3, ["Excluded", "Included"])

# Legend
plt.legend(
    ["Excluded", "Included"], loc="center left", bbox_to_anchor=(1, 0.5), frameon=False
)

plt.show()

## Pie chart with FT inclusions for Foras and Synergy

In [None]:
# Data preparation for FT labels
foras_ft_records = foras_filtered["label_included_FT"].value_counts()
synergy_ft_records = synergy["FT-corrected"].value_counts()
combined_ft_records = foras_ft_records.add(synergy_ft_records, fill_value=0)

# Plot setup for FT labels
plt.figure(figsize=(18, 6))
plt.subplots_adjust(right=0.85)

# Plotting for FT labels
plot_pie_chart(foras_ft_records, "FORAS FT", 1, labels=["Excluded", "Included"])
plot_pie_chart(synergy_ft_records, "Synergy FT", 2, labels=["Excluded", "Included"])
plot_pie_chart(combined_ft_records, "Total FT", 3, labels=["Excluded", "Included"])

# Legend
plt.legend(
    ["Excluded", "Included"], loc="center left", bbox_to_anchor=(1, 0.5), frameon=False
)

plt.show()