# 1. Analysis of results from ichorCNA

Before running this notebook, you need to run all the scripts in `../../scripts/cfDNA/*.sh`.

In order to generate figures 2A and 2B, run `../../scripts/plotting_cfDNA_ONT_fig2.R`.

In order to generate supplementary figure 3A, run `../../scripts/plotting_cfDNA_ONT_suppfig3A.R`.

In [None]:
import os
import sys
import warnings

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from plotting_settings import PLOTTING_PARAMS
from scipy.stats import fisher_exact, mannwhitneyu

warnings.filterwarnings("ignore")
sys.path.insert(1, "../../helper_functions")

In [None]:
df = pd.read_csv("../../bladder_metadata.csv", index_col=0)
samples = df.index.to_list()
samples = [sample.replace(" ", "") for sample in samples]
df.index = df.index.str.rstrip()

In [None]:
alternative_ids = {"B3": "F10", "B1": "C1", "B17": "H11"}

In [None]:
fraction = {}
for sample in samples:
    if sample in alternative_ids:
        sample = alternative_ids[sample]
    param = f"chromothripsis/bladder_cancer/liquid_biopsy/bam_files/{sample}/ichorCNA_{sample}.params.txt"
    if not os.path.isfile(param):
        print(f"{sample} doesnt exist")
        continue
    with open(param) as fin:
        sample = param.split("/")[-2]
        for line in fin:
            if line.startswith("Tumor Fraction:"):
                fraction[sample] = line.split(":\t")[-1].rstrip()
full = pd.DataFrame.from_dict(fraction, orient="index").sort_values(
    by=0, ascending=False
)

In [None]:
mapping = {}
for k, v in alternative_ids.items():
    mapping[v] = k

In [None]:
full.rename(index=mapping, columns={0: "cfDNA"}, inplace=True)

In [None]:
df = df.join(full)
# Tumor fraction needs to be more than 3% in order to classify a sample as tumor positive
df["cfDNA_detection"] = np.where(df["cfDNA"].astype(float) > 0.03, True, False)

In [None]:
detected = (
    df.groupby("Grade")["cfDNA_detection"]
    .value_counts()
    .unstack(fill_value=0)
    .reindex(["Benign", "Low Grade", "High Grade"])
)
detected = detected.drop("Benign", axis=0)

In [None]:
sns.set_theme(style="white", rc=PLOTTING_PARAMS)
ax = detected.plot(kind="bar", stacked=False, figsize=(8, 6))
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
for container in ax.containers:
    ax.bar_label(container, label_type="edge", size=12)

plt.ylabel("Number of samples")
plt.xticks(rotation=0)
plt.legend(title="Detected tumor DNA", loc="center left", bbox_to_anchor=(1, 0.5))
plt.savefig("suppfig_3B_detected_tumor_cfDNA.svg", dpi=300, bbox_inches="tight")
plt.show()
plt.close()

In [None]:
detected

In [None]:
fisher_exact(detected.to_numpy(), alternative="greater")

In [None]:
df["cfDNA"] = df["cfDNA"].astype(float)
df["Grade"] = pd.Categorical(
    df["Grade"], categories=["Low Grade", "High Grade"], ordered=True
)

In [None]:
sns.boxplot(
    df[df["Grade"] != "Benign"],
    y="cfDNA",
    x="Grade",
    hue="Grade",
    palette=["#7C5E87", "#F392DA"],
    legend=False,
)
sns.stripplot(df[df["Grade"] != "Benign"], y="cfDNA", x="Grade", color="black")
sns.despine(top=True, right=True, left=False, bottom=False)
plt.ylabel("Estimated tumor fraction")

plt.xlabel("Grade")


plt.savefig("suppfig_3C_cfDNA_tumor_fraction.svg", dpi=300, bbox_inches="tight")
plt.show()
plt.close()

In [None]:
mannwhitneyu(
    df[df["Grade"] == "High Grade"]["cfDNA"], df[df["Grade"] == "Low Grade"]["cfDNA"]
)