In [12]:
import pandas as pd

ods = (
    pd.read_excel(
        "../DB AF Completo 1.ods",
    )
    .loc[:, ["ID", "Phenotype", "Low Voltage / Scar area", "Rhythm during Mapping"]]
    .rename(
        columns={
            "ID": "Patient ID",
            "Low Voltage / Scar area": "Substrate",
            "Rhythm during Mapping": "Rhythm",
        }
    )
    .dropna(axis=0, how="any")
    .astype({"Patient ID": int})
)
csv = (
    pd.read_csv("thrs_<15_filt/lase.csv", sep=",")
    .dropna(axis=0, how="any")
    .astype({"Patient ID": int})
)
display(ods)
csv

Unnamed: 0,Patient ID,Phenotype,Substrate,Rhythm
3,9,Persistent,Abnormal,AF
4,10,Paroxysmal,Normal,SR
5,11,Persistent,Normal,AF
6,12,Persistent,Abnormal,SR
7,13,Persistent,Abnormal,AF
...,...,...,...,...
171,154,Parossistica,Normale,RS
177,160,Parossistica,Normale,RS
178,161,Parossistica,Normale,RS
179,162,Parossistica,Normale,RS


Unnamed: 0,Patient ID,Entropy,LASE
3,100,0,2.088307
4,101,0,1.328165
5,102,0,1.093049
6,103,0,1.873493
7,104,0,1.958372
...,...,...,...
95,94,0,1.992618
96,95,0,1.387183
97,96,0,1.947167
98,97,0,1.720367


In [13]:
# Join the two dataframes on the "ID" column
merged_df = pd.merge(
    ods.loc[:, ["Patient ID", "Phenotype", "Substrate", "Rhythm"]],
    csv,
    on="Patient ID",
    how="inner",
)

phenotype_mapping = {
    "Long standing persistent": "Persistente",
    "Persistente": "Persistente",
    "Parossistica": "Parossistica",
    "Parossistico": "Parossistica",
}

substrate_mapping = {
    "Normale": "Normale",
    "Abnormal": "Anormale",
    "Patologia": "Anormale",
    "Patologia ": "Anormale",
    "Patologia altri distretti": "Anormale",
    "Patologia parete posteriore": "Anormale",
    "Patologia parete posteriore e anteriore": "Anormale",
    "Patologia posteriore + altri": "Anormale",
    "Patologia posteriore e+ altri": "Anormale",
}

rhythm_mapping = {
    "AF": "FA",
    "FA": "FA",
    "SR": "RS",
    "RS": "RS",
}

merged_df["Phenotype"] = merged_df["Phenotype"].map(phenotype_mapping)
merged_df["Substrate"] = merged_df["Substrate"].map(substrate_mapping)
merged_df["Rhythm"] = merged_df["Rhythm"].map(rhythm_mapping)

merged_df

Unnamed: 0,Patient ID,Phenotype,Substrate,Rhythm,Entropy,LASE
0,40,Parossistica,Anormale,RS,0,1.798542
1,41,Parossistica,Normale,RS,0,2.019792
2,42,Parossistica,Normale,RS,0,1.240099
3,43,Persistente,Anormale,FA,0,1.247075
4,45,Persistente,Anormale,FA,0,1.467762
...,...,...,...,...,...,...
87,147,Parossistica,Normale,RS,0,1.774546
88,148,Parossistica,Normale,RS,0,1.539108
89,149,Parossistica,Normale,RS,0,1.121666
90,150,Parossistica,Normale,RS,0,2.180600


In [14]:
x = merged_df.loc[:, ["Phenotype"]]
y = merged_df.loc[:, ["Rhythm"]]

confusion_matrix = pd.crosstab(
    x["Phenotype"],
    y["Rhythm"],
    rownames=["Phenotype"],
    colnames=["Rhythm"],
    margins=True,
)
confusion_matrix = confusion_matrix.rename(
    index={"All": "Totale"}, columns={"All": "Totale"}
)
confusion_matrix

Rhythm,FA,RS,Totale
Phenotype,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Parossistica,3,53,56
Persistente,31,0,31
Totale,34,53,87


In [15]:
merged_df[
    (merged_df.loc[:, "Phenotype"] == "Parossistica")
    & (merged_df.loc[:, "Rhythm"] == "FA")
]

Unnamed: 0,Patient ID,Phenotype,Substrate,Rhythm,Entropy,LASE
55,100,Parossistica,Normale,FA,0,2.088307
84,144,Parossistica,Normale,FA,0,1.90041
85,145,Parossistica,Normale,FA,0,1.581234


# Plots

In [16]:
from scipy.stats import ttest_ind
from sklearn.metrics import roc_auc_score
import matplotlib.pyplot as plt


filtered = [True, False]
threshold = ["15"]

data = {
    "threshold": [],
    "filtered": [],
    "phenotype_pvalue": [],
    "phenotype_auc": [],
    "substrate_pvalue": [],
    "substrate_auc": [],
    "rhythm_pvalue": [],
    "rhythm_auc": [],
}

for f in filtered:
    for t in threshold:

        if t == "no":
            first_part = "no_thrs"
        else:
            first_part = f"thrs_<{t}"
        if f:
            second_part = "filt"
        else:
            second_part = "no_filt"

        dir_name = "_".join([first_part, second_part])

        df = (
            pd.read_csv(dir_name + "/lase.csv", sep=",")
            .dropna(axis=0, how="any")
            .astype({"Patient ID": int})
        )

        df = pd.merge(
            ods.loc[:, ["Patient ID", "Phenotype", "Substrate", "Rhythm"]],
            df,
            on="Patient ID",
            how="inner",
        )

        df["Phenotype"] = df["Phenotype"].map(phenotype_mapping)
        df["Substrate"] = df["Substrate"].map(substrate_mapping)
        df["Rhythm"] = df["Rhythm"].map(rhythm_mapping)

        paroxysmal_lase = df[df["Phenotype"] == "Parossistica"]["LASE"]
        persistent_lase = df[df["Phenotype"] == "Persistente"]["LASE"]
        abnormal_lase = df[df["Substrate"] == "Anormale"]["LASE"]
        normal_lase = df[df["Substrate"] == "Normale"]["LASE"]
        sinus_lase = df[df["Rhythm"] == "RS"]["LASE"]
        fibril_lase = df[df["Rhythm"] == "FA"]["LASE"]

        # Perform t-test
        t_stat, phenoype_p_value = ttest_ind(
            paroxysmal_lase, persistent_lase, equal_var=False, alternative="greater"
        )
        t_stat, substrate_p_value = ttest_ind(
            abnormal_lase, normal_lase, equal_var=False, alternative="less"
        )
        t_stat, rhythm_p_value = ttest_ind(
            sinus_lase, fibril_lase, equal_var=False, alternative="greater"
        )
        if phenoype_p_value > 0.5:
            phenoype_p_value = 1 - phenoype_p_value
        if substrate_p_value > 0.5:
            substrate_p_value = 1 - substrate_p_value
        if rhythm_p_value > 0.5:
            rhythm_p_value = 1 - rhythm_p_value

        # Calculate AUC
        y = df["LASE"]
        y_true = df["Phenotype"].apply(lambda x: 1 if x == "Parossistica" else 0)
        if t == "3,5":
            y_true = df["Phenotype"].apply(lambda x: 0 if x == "Parossistica" else 1)
        y_pred = df["LASE"]
        phenotype_auc = roc_auc_score(y_true, y_pred)
        if phenotype_auc < 0.5:
            phenotype_auc = 1 - phenotype_auc

        y = df["LASE"]
        y_true = df["Substrate"].apply(lambda x: 0 if x == "Anormale" else 1)
        y_pred = df["LASE"]
        substrate_auc = roc_auc_score(y_true, y_pred)
        if substrate_auc < 0.5:
            substrate_auc = 1 - substrate_auc

        y = df["LASE"]
        y_true = df["Rhythm"].apply(lambda x: 0 if x == "FA" else 1)
        y_pred = df["LASE"]
        rhythm_auc = roc_auc_score(y_true, y_pred)
        if rhythm_auc < 0.5:
            rhythm_auc = 1 - rhythm_auc

        # Create boxplots
        plt.ioff()
        plt.figure(figsize=(10, 6))
        plt.boxplot(
            [
                df[df["Phenotype"] == "Parossistica"]["LASE"],
                df[df["Phenotype"] == "Persistente"]["LASE"],
            ],
            tick_labels=["Paroxysmal", "Persistent"],
        )
        plt.title(
            f"Threshold: {t} mV, Filtered: {f}; p-value: {phenoype_p_value:.3f} AUC: {phenotype_auc:.3f}"
        )
        plt.ylabel("LASE")
        plt.xlabel("Phenotype")
        plt.grid()
        plt.savefig(dir_name + "/phenotype_boxplot.png", dpi=300, bbox_inches="tight")
        plt.close()

        plt.figure(figsize=(10, 6))
        plt.boxplot(
            [
                df[df["Substrate"] == "Anormale"]["LASE"],
                df[df["Substrate"] == "Normale"]["LASE"],
            ],
            tick_labels=["Abnormal", "Normal"],
        )
        plt.title(
            f"Threshold: {t} mV, Filtered: {f}; p-value: {substrate_p_value:.3f} AUC: {substrate_auc:.3f}"
        )
        plt.ylabel("LASE")
        plt.xlabel("Substrate")
        plt.grid()
        plt.savefig(dir_name + "/substrate_boxplot.png", dpi=300, bbox_inches="tight")
        plt.close()

        plt.figure(figsize=(10, 6))
        plt.boxplot(
            [
                df[df["Rhythm"] == "RS"]["LASE"],
                df[df["Rhythm"] == "FA"]["LASE"],
            ],
            tick_labels=["RS", "FA"],
        )
        plt.title(
            f"Threshold: {t} mV, Filtered: {f}; p-value: {rhythm_p_value:.3f} AUC: {rhythm_auc:.3f}"
        )
        plt.ylabel("LASE")
        plt.xlabel("Rhythm")
        plt.grid()
        plt.savefig(dir_name + "/rhythm_boxplot.png", dpi=300, bbox_inches="tight")
        plt.close()

        # Append results to the data dictionary
        data["threshold"].append(t)
        data["filtered"].append(f)
        data["phenotype_pvalue"].append(phenoype_p_value)
        data["phenotype_auc"].append(phenotype_auc)
        data["substrate_pvalue"].append(substrate_p_value)
        data["substrate_auc"].append(substrate_auc)
        data["rhythm_pvalue"].append(rhythm_p_value)
        data["rhythm_auc"].append(rhythm_auc)

# Create a DataFrame from the data dictionary
results_df = pd.DataFrame(data).round(3)
results_df.to_csv("results.csv", index=False)
results_df

Unnamed: 0,threshold,filtered,phenotype_pvalue,phenotype_auc,substrate_pvalue,substrate_auc,rhythm_pvalue,rhythm_auc
0,15,True,0.0,0.823,0.0,0.801,0.0,0.8
1,15,False,0.0,0.815,0.0,0.799,0.0,0.801
