In [1]:
import glob

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scanpy as sc
import seaborn as sns
from tqdm import tqdm

In [309]:
sns.set(style="white", font_scale=1.5)

In [310]:
sns.reset_defaults()

In [None]:
colors = sns.color_palette("colorblind", 10)
colors

# Coverage

In [312]:
coverage_cell = []
assay = [
    "HIPSD&R-seq (PDX)",
    "HIPSD&R-seq (LFS041_LFS087_1:1)",
    "HIPSD-seq (LFS041_62)",
    "sciHIPSD-seq (LFS041_LFS087_spikein)",
    "DEFND-seq (LFS041_LFS087_1:1)",
    "10x (PDX)",
]
paths = [
    "multiome",
    "multiome_fibroblasts",
    "matac",
    "sci_fibroblasts",
    "multiome_fibroblasts_DEFND",
    "10x",
]
color_order = [colors[4], colors[1], colors[2], colors[7], colors[9], colors[0]]
for path in paths:
    tmp = pd.read_csv(f"QC/{path}/cell_coverage.csv", index_col=0)
    coverage_cell.append(tmp["0"].dropna())

In [None]:
plt.figure(figsize=(10, 5))
for i in range(len(assay)):
    plt.plot(
        sorted(coverage_cell[i], reverse=True),
        label=assay[i],
        color=color_order[i],
        lw=2,
    )

plt.xlabel("Cells: best to worse (log)", fontsize=14)
plt.ylabel(f"Coverage(X)", fontsize=14)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.legend(fontsize=14)
plt.tight_layout()
plt.xscale("log")

plt.savefig("final_figures/qc/coverage_cell.png", dpi=300)
plt.savefig("final_figures/qc/coverage_cell.svg", dpi=300)

plt.show()

In [314]:
coverage_meta = []
assay = [
    "HIPSD&R-seq (PDX)",
    "HIPSD&R-seq (LFS041_LFS087_1:1)",
    "HIPSD-seq (LFS041_62)",
    "DEFND-seq (LFS041_LFS087_1:1)",
]
paths = ["multiome", "multiome_fibroblasts", "matac", "multiome_fibroblasts_DEFND"]
color_order = [colors[4], colors[1], colors[2], colors[9]]
for i in range(len(paths)):
    if paths[i] != "10x":
        d_type = "meta_coverage"
    else:
        d_type = "cell_coverage"
    tmp = pd.read_csv(f"QC/{paths[i]}/{d_type}.csv", index_col=0)
    coverage_meta.append(tmp["0"].dropna())

In [None]:
plt.figure(figsize=(10, 5))
for i in range(len(assay)):
    plt.plot(
        sorted(coverage_meta[i], reverse=True),
        label=assay[i],
        color=color_order[i],
        lw=2,
    )

plt.xlabel("Metacells: best to worse (log)", fontsize=14)
plt.ylabel(f"Coverage(X)", fontsize=14)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.legend(fontsize=14)
plt.tight_layout()
plt.xscale("log")

plt.savefig("final_figures/qc/coverage_meta.png", dpi=300)
plt.savefig("final_figures/qc/coverage_meta.svg", dpi=300)

plt.show()

# Counts

In [None]:
counts_cell = []
assay = [
    "HIPSD&R-seq (PDX)",
    "HIPSD&R-seq (LFS041_LFS087_1:1)",
    "HIPSD-seq (LFS041_62)",
    "sciHIPSD-seq (LFS041_LFS087_spikein)",
    "DEFND-seq (LFS041_LFS087_1:1)",
    "10x (PDX)",
]
paths = [
    "multiome",
    "multiome_fibroblasts",
    "matac",
    "sci_fibroblasts",
    "multiome_fibroblasts_DEFND",
    "10x",
]
color_order = [colors[4], colors[1], colors[2], colors[7], colors[9], colors[0]]
for path in tqdm(paths):
    tmp = pd.read_csv(f"QC/{path}/cell_counts.csv", index_col=0)
    counts_cell.append(tmp.sum(axis=1))

In [None]:
plt.figure(figsize=(10, 4))
for i in range(len(assay)):
    plt.plot(
        sorted(counts_cell[i], reverse=True), label=assay[i], color=color_order[i], lw=2
    )


plt.xlabel("Cells: best to worse (log)")
plt.ylabel(f"Reads")
plt.xscale("log")

plt.savefig("final_figures/qc/counts_cell.png", dpi=300)
plt.savefig("final_figures/qc/counts_cell.svg", dpi=300)

plt.show()

In [None]:
plt.figure(figsize=(10, 5))
for i in range(len(assay)):
    plt.plot(
        np.cumsum(sorted(counts_cell[i], reverse=True)),
        label=assay[i],
        color=color_order[i],
        lw=2,
    )


plt.xlabel("Cells(log)", fontsize=14)
plt.ylabel(f"Counts", fontsize=14)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.xscale("log")
plt.tight_layout()
plt.savefig("final_figures/qc/counts_cumsum_cell.png", dpi=300)
plt.savefig("final_figures/qc/counts_cumsum_cell.svg", dpi=300)

plt.show()

In [None]:
counts_meta = []
assay = [
    "HIPSD&R-seq (PDX)",
    "HIPSD&R-seq (LFS041_LFS087_1:1)",
    "HIPSD-seq (LFS041_62)",
    "DEFND-seq (LFS041_LFS087_1:1)",
]
paths = ["multiome", "multiome_fibroblasts", "matac", "multiome_fibroblasts_DEFND"]
color_order = [colors[4], colors[1], colors[2], colors[9]]
for path in tqdm(paths):
    try:
        tmp = pd.read_csv(f"QC/{path}/meta_counts.csv", index_col=0)
    except:
        print(path)
        tmp = pd.read_csv(f"QC/{path}/cell_counts.csv", index_col=0)
    counts_meta.append(tmp.sum(axis=1))

In [None]:
plt.figure(figsize=(10, 5))
for i in range(len(assay)):
    plt.plot(
        sorted(counts_meta[i], reverse=True),
        label=assay[i],
        color=color_order[i],
        lw=2,
    )

plt.xlabel("(meta)cells: best to worse (log)", fontsize=14)
plt.ylabel(f"Reads", fontsize=14)
plt.legend(bbox_to_anchor=(1.2, 1), ncol=1)
plt.xscale("log")
plt.legend().set_visible(False)

plt.savefig("final_figures/qc/counts_meta.png", dpi=300)
plt.savefig("final_figures/qc/counts_meta.svg", dpi=300)

plt.show()

In [None]:
plt.figure(figsize=(10, 5))
for i in range(len(assay)):
    plt.plot(
        np.cumsum(sorted(counts_meta[i], reverse=True)),
        label=assay[i],
        color=color_order[i],
        lw=2,
    )


plt.xlabel("Metacells (log)", fontsize=14)
plt.ylabel(f"Cumulative counts", fontsize=14)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.xscale("log")
plt.tight_layout()
plt.legend().set_visible(False)

plt.savefig("final_figures/qc/counts_cumsum_meta.png", dpi=300)
plt.savefig("final_figures/qc/counts_cumsum_meta.svg", dpi=300)

plt.show()

# Duplication rate

In [None]:
dr_meta = []
assay = [
    "HIPSD&R-seq (PDX)",
    "HIPSD&R-seq (LFS041_LFS087_1:1)",
    "HIPSD-seq (LFS041_62)",
    "DEFND-seq (LFS041_LFS087_1:1)",
]
paths = ["multiome", "multiome_fibroblasts", "matac", "multiome_fibroblasts_DEFND"]
color_order = [colors[4], colors[1], colors[2], colors[9]]
for path in paths:
    try:
        tmp = pd.read_csv(f"QC/{path}/meta_dup_rate.csv", index_col=0)
    except:
        print(path)
        tmp = pd.read_csv(f"QC/{path}/cell_dup_rate.csv", index_col=0)
    print(path, tmp["0"].median())
    dr_meta.append(tmp["0"])

In [None]:
plt.figure(figsize=(10, 4))
for i in range(len(assay)):
    plt.plot(
        sorted(dr_meta[i], reverse=False), label=assay[i], color=color_order[i], lw=2
    )


plt.xlabel("Cells: best to worse (log)")
plt.ylabel(f"Duplication rate")
plt.tight_layout()
plt.xscale("log")

plt.savefig("final_figures/qc/dr_meta.png", dpi=300)
plt.savefig("final_figures/qc/dr_meta.svg", dpi=300)

plt.show()

In [None]:
fig = plt.figure(figsize=(10, 5))
ax = fig.add_subplot(111)

bp = ax.boxplot(dr_meta, patch_artist=True)
for patch, color in zip(bp["boxes"], colors):
    patch.set_facecolor("white")

for whisker in bp["whiskers"]:
    whisker.set(color="blue", linewidth=1.5, linestyle=":")
for median in bp["medians"]:
    median.set(color="blue", linewidth=3)

ax.set_xticklabels(assay)
plt.ylabel(f"Duplication rate")
plt.xticks(rotation=45)
plt.tight_layout()


plt.savefig("final_figures/qc/boxplot_meta.png", dpi=300)
plt.savefig("final_figures/qc/boxplot_meta.svg", dpi=300)

plt.show()

In [325]:
dr_cell = []
assay = [
    "HIPSD&R-seq (PDX)",
    "HIPSD&R-seq (LFS041_LFS087_1:1)",
    "HIPSD-seq (LFS041_62)",
    "sciHIPSD-seq (LFS041_LFS087_spikein)",
    "DEFND-seq (LFS041_LFS087_1:1)",
    "10x (PDX)",
]
paths = [
    "multiome",
    "multiome_fibroblasts",
    "matac",
    "sci_fibroblasts",
    "multiome_fibroblasts_DEFND",
    "10x",
]
color_order = [colors[4], colors[1], colors[2], colors[7], colors[9], colors[0]]
for path in paths:
    tmp = pd.read_csv(f"QC/{path}/cell_dup_rate.csv", index_col=0)
    dr_cell.append(tmp["0"])

In [None]:
fig = plt.figure(figsize=(10, 5))
ax = fig.add_subplot(111)

bp = ax.boxplot(dr_cell, patch_artist=True)
for patch, color in zip(bp["boxes"], colors):
    patch.set_facecolor("white")

for whisker in bp["whiskers"]:
    whisker.set(color="blue", linewidth=1.5, linestyle=":")
for median in bp["medians"]:
    median.set(color="blue", linewidth=3)

ax.set_xticklabels(assay)
plt.ylabel(f"Duplication rate", fontsize=14)
plt.xticks(rotation=45)
plt.yticks(fontsize=12)
plt.tight_layout()


plt.savefig("final_figures/qc/boxplot_dup.png", dpi=300)
plt.savefig("final_figures/qc/boxplot_dup.svg", dpi=300)

plt.show()

In [None]:
plt.figure(figsize=(10, 4))
for i in range(len(assay)):
    plt.plot(sorted(dr_cell[i], reverse=False), label=assay[i], color=color_order[i])


plt.xlabel("Cells: best to worse (log)")
plt.ylabel(f"Duplication rate")
plt.xscale("log")

plt.savefig("final_figures/qc/dr_cell.png", dpi=300)
plt.savefig("final_figures/qc/dr_cell.svg", dpi=300)

plt.show()

In [None]:
lorenz_meta = []
for path in paths:
    try:
        tmp = pd.read_csv(f"QC/{path}/meta_counts.csv", index_col=0)
    except:
        print(path)
        tmp = pd.read_csv(f"QC/{path}/cell_counts.csv", index_col=0)
    lorenz_meta.append(lorenz(np.sum(tmp, axis=0)))

In [50]:
lorenz_meta = []
for path in paths:
    tmp = pd.read_csv(f"QC/{path}/cell_counts.csv", index_col=0)
    lorenz_meta.append(lorenz(np.sum(tmp, axis=0)))

In [None]:
plt.subplots(figsize=(5, 5))
for i in range(len(assay)):
    unif = lorenz_meta[i]

    plt.plot(
        np.arange(len(unif)) / (len(unif) - 1),
        unif,
        label=assay[i],
        color=color_order[i],
        lw=2,
    )


plt.plot(
    np.arange(len(unif)) / (len(unif) - 1),
    np.arange(len(unif)) / len(unif),
    label="uniform",
    c="black",
    ls="--",
)


plt.xlabel("Fraction of genome")
plt.ylabel("Fraction of reads")
plt.grid()

plt.tight_layout()

plt.savefig("final_figures/qc/lorenz.png", dpi=300)
plt.savefig("final_figures/qc/lorenz.svg", dpi=300)
plt.show()

# Cells per metacells

In [327]:
path_meta = "aurelie_data/pdx_paired/mc_files/*.txt"


def mc2cell(path_meta):
    files = glob.glob(path_meta)
    idx = []
    mcs = []
    for i, file in enumerate(files):
        cells = pd.read_csv(file, header=None)[0].tolist()
        mcs.append(len([x.split("/")[-1].split("_")[-1].split(".")[0] for x in cells]))
    return mcs

In [328]:
mome_meta = mc2cell(path_meta)

In [329]:
matac_meta = mc2cell("aurelie_data/LFS041_p63/atac_only/metacells_old/*.txt")

In [330]:
sci_meta = mc2cell("aurelie_data/100k/mc_files/*.txt")

In [331]:
hispdr_meta = mc2cell("HIPSDR-seq/metacells_100kb/*.txt")

In [332]:
defnd_meta = mc2cell("DEFND-seq/metacells/*.txt")

In [None]:
fig = plt.figure(figsize=(10, 5))
ax = fig.add_subplot(111)

bp = ax.boxplot([matac_meta, mome_meta, hispdr_meta, defnd_meta], patch_artist=True)
for patch in bp["boxes"]:
    patch.set_facecolor("white")

for whisker in bp["whiskers"]:
    whisker.set(color="blue", linewidth=1.5, linestyle=":")
for median in bp["medians"]:
    median.set(color="blue", linewidth=3)

ax.set_xticklabels(
    [
        "HIPSD-seq (LFS041_62)",
        "HIPSD&R-seq (PDX)",
        "HIPSD&R-seq (LFS041_LFS087_1:1)",
        "DEFND-seq (LFS041_LFS087_1:1)",
    ]
)
plt.ylabel(f"Cells per metacell", fontsize=14)
plt.xticks(rotation=45)
plt.tight_layout()


plt.savefig("final_figures/qc/boxplot_meta_count.png", dpi=300)
plt.savefig("final_figures/qc/boxplot_meta_count.svg", dpi=300)

plt.show()

## Unfiltered vs. filtered

In [10]:
bars_hipsdr = sc.read_h5ad("HIPSDR-seq/adata_cnv_processed.h5ad").obs_names

In [None]:
len(bars_hipsdr)

In [12]:
bars_sci = pd.read_csv("sci/CNVs_sci_filtered.csv", index_col=0).index

In [None]:
len(bars_sci)

In [14]:
bars_defnd = pd.read_csv("DEFND-seq/CNVs_filtered.csv", index_col=0).index

In [None]:
len(bars_defnd)

In [341]:
assay = [
    "HIPSD&R-seq (PDX)",
    "HIPSD&R-seq (LFS041_LFS087_1:1)",
    "HIPSD-seq (LFS041_62)",
    "sciHIPSD-seq (LFS041_LFS087_spikein)",
    "DEFND-seq (LFS041_LFS087_1:1)",
    "10x (PDX)",
]
paths = [
    "multiome",
    "multiome_fibroblasts",
    "matac",
    "sci_fibroblasts",
    "multiome_fibroblasts_DEFND",
    "10x",
]
color_order = [colors[4], colors[1], colors[2], colors[7], colors[9], colors[0]]

In [342]:
coverage_cell = []
assay = [
    "HIPSD&R-seq (LFS041_LFS087_1:1)",
    "HIPSD&R-seq (LFS041_LFS087_1:1) - filtered",
    "sciHIPSD-seq (LFS041_LFS087_spikein)",
    "sciHIPSD-seq (LFS041_LFS087_spikein) - filtered",
    "DEFND-seq (LFS041_LFS087_1:1)",
    "DEFND-seq (LFS041_LFS087_1:1) - filtered",
]
paths = ["multiome_fibroblasts", "sci_fibroblasts", "multiome_fibroblasts_DEFND"]
color_order = [colors[1], colors[7], colors[9]]
for path in paths:
    tmp = pd.read_csv(f"QC/{path}/cell_coverage.csv", index_col=0)
    coverage_cell.append(tmp["0"].dropna())
    if path == "multiome_fibroblasts":
        coverage_cell.append(tmp.loc[bars_hipsdr]["0"].dropna())
    elif path == "sci_fibroblasts":
        coverage_cell.append(tmp.loc[bars_sci]["0"].dropna())
    elif path == "multiome_fibroblasts_DEFND":
        coverage_cell.append(tmp.loc[bars_defnd]["0"].dropna())

In [None]:
plt.figure(figsize=(10, 5))
for i in range(len(assay)):
    if i % 2 == 0:
        plt.plot(
            sorted(coverage_cell[i], reverse=True),
            label=assay[i],
            color=color_order[i // 2],
            lw=1,
        )
    else:
        plt.plot(
            sorted(coverage_cell[i], reverse=True),
            label=assay[i],
            color=color_order[i // 2],
            lw=2,
            linestyle="dashed",
        )


plt.xlabel("Cells: best to worse (log)", fontsize=14)
plt.ylabel(f"Coverage(X)", fontsize=14)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.legend(fontsize=14)
plt.tight_layout()
plt.xscale("log")

plt.savefig("final_figures/qc/coverage_cell_filtered.png", dpi=300)
plt.savefig("final_figures/qc/coverage_cell_filtered.svg", dpi=300)

plt.show()

# Counts

In [None]:
counts_cell = []
assay = [
    "HIPSD&R-seq (LFS041_LFS087_1:1)",
    "HIPSD&R-seq (LFS041_LFS087_1:1) - filtered",
    "sciHIPSD-seq (LFS041_LFS087_spikein)",
    "sciHIPSD-seq (LFS041_LFS087_spikein) - filtered",
    "DEFND-seq (LFS041_LFS087_1:1)",
    "DEFND-seq (LFS041_LFS087_1:1) - filtered",
]
paths = ["multiome_fibroblasts", "sci_fibroblasts", "multiome_fibroblasts_DEFND"]
color_order = [colors[1], colors[7], colors[9]]
for path in tqdm(paths):
    tmp = pd.read_csv(f"QC/{path}/cell_counts.csv", index_col=0)
    counts_cell.append(tmp.sum(axis=1))
    if path == "multiome_fibroblasts":
        counts_cell.append(tmp.loc[bars_hipsdr].sum(axis=1))
    elif path == "sci_fibroblasts":
        counts_cell.append(tmp.loc[bars_sci].sum(axis=1))
    elif path == "multiome_fibroblasts_DEFND":
        counts_cell.append(tmp.loc[bars_defnd].sum(axis=1))

In [None]:
plt.figure(figsize=(10, 5))
for i in range(len(assay)):
    if i % 2 == 0:
        plt.plot(
            sorted(counts_cell[i], reverse=True),
            label=assay[i],
            color=color_order[i // 2],
            lw=1,
        )
    else:
        plt.plot(
            sorted(counts_cell[i], reverse=True),
            label=assay[i],
            color=color_order[i // 2],
            lw=2,
            linestyle="dashed",
        )


plt.xlabel("Cells: best to worse (log)", fontsize=14)
plt.ylabel(f"Reads", fontsize=14)
plt.legend(fontsize=14)
plt.xscale("log")

plt.show()

In [None]:
plt.figure(figsize=(10, 5))
for i in range(len(assay)):
    if i % 2 == 0:
        plt.plot(
            np.cumsum(sorted(counts_cell[i], reverse=True)),
            label=assay[i],
            color=color_order[i // 2],
            lw=1,
        )
    else:
        plt.plot(
            np.cumsum(sorted(counts_cell[i], reverse=True)),
            label=assay[i],
            color=color_order[i // 2],
            lw=2,
            linestyle="dashed",
        )


plt.xlabel("Cells(log)", fontsize=14)
plt.ylabel(f"Counts", fontsize=14)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.xscale("log")
plt.tight_layout()
plt.savefig("final_figures/qc/counts_cumsum_cell_filtered.png", dpi=300)
plt.savefig("final_figures/qc/counts_cumsum_cell_filtered.svg", dpi=300)

plt.show()

## Lorenz curve

In [3]:
def lorenz(arr):
    # this divides the prefix sum by the total sum
    # this ensures all the values are between 0 and 1.0
    arr = np.sort(arr)
    scaled_prefix_sum = arr.cumsum() / arr.sum()
    # this prepends the 0 value (because 0% of all people have 0% of all wealth)
    return np.insert(scaled_prefix_sum, 0, 0)

In [4]:
hipsdr = pd.read_csv("HIPSDR-seq/full_counts.csv", index_col=0)
defnd = pd.read_csv("DEFND-seq/full_counts.csv", index_col=0)
sci = pd.read_csv("sci/full_counts.csv", index_col=0)

In [6]:
paths = ["multiome_fibroblasts", "sci_fibroblasts", "multiome_fibroblasts_DEFND"]

In [16]:
lorenz_meta = []
for df, path in zip([hipsdr, sci, defnd], paths):
    lorenz_matac = df.apply(lambda row: lorenz(row), axis=1).values
    lorenz_matac_2d = np.array([np.array(x) for x in lorenz_matac])
    lorenz_meta.append(np.nanmedian(lorenz_matac_2d, axis=0))
    if path == "multiome_fibroblasts":
        lorenz_matac = df.loc[bars_hipsdr].apply(lambda row: lorenz(row), axis=1).values
        lorenz_matac_2d = np.array([np.array(x) for x in lorenz_matac])
        lorenz_meta.append((np.nanmedian(lorenz_matac_2d, axis=0)))
    elif path == "sci_fibroblasts":
        lorenz_matac = df.loc[bars_sci].apply(lambda row: lorenz(row), axis=1).values
        lorenz_matac_2d = np.array([np.array(x) for x in lorenz_matac])
        lorenz_meta.append((np.nanmedian(lorenz_matac_2d, axis=0)))
    elif path == "multiome_fibroblasts_DEFND":
        lorenz_matac = df.loc[bars_defnd].apply(lambda row: lorenz(row), axis=1).values
        lorenz_matac_2d = np.array([np.array(x) for x in lorenz_matac])
        lorenz_meta.append((np.nanmedian(lorenz_matac_2d, axis=0)))

In [17]:
assay = [
    "HIPSD&R-seq (LFS041_LFS087_1:1)",
    "HIPSD&R-seq (LFS041_LFS087_1:1) - filtered",
    "sciHIPSD-seq (LFS041_LFS087_spikein)",
    "sciHIPSD-seq (LFS041_LFS087_spikein) - filtered",
    "DEFND-seq (LFS041_LFS087_1:1)",
    "DEFND-seq (LFS041_LFS087_1:1) - filtered",
]

In [None]:
plt.subplots(figsize=(6, 8))
for i in range(len(assay)):
    unif = lorenz_meta[i]
    if i % 2 == 0:
        plt.plot(
            np.arange(len(unif)) / (len(unif) - 1),
            unif,
            label=assay[i],
            color=color_order[i // 2],
            lw=2,
        )
    else:
        plt.plot(
            np.arange(len(unif)) / (len(unif) - 1),
            unif,
            label=assay[i],
            color=color_order[i // 2],
            lw=2,
            linestyle="dashed",
        )


plt.plot(
    np.arange(len(unif)) / (len(unif) - 1),
    np.arange(len(unif)) / len(unif),
    label="uniform",
    c="black",
    ls="--",
)


plt.xlabel("Fraction of genome", fontsize=14)
plt.ylabel("Fraction of reads", fontsize=14)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.grid()

plt.legend(fontsize=14, loc="upper center", bbox_to_anchor=(0.5, -0.2))
plt.tight_layout()

plt.savefig("final_figures/qc/lorenz.png", dpi=300, bbox_inches="tight")
plt.savefig("final_figures/qc/lorenz.svg", dpi=300, bbox_inches="tight")
plt.show()

## RNA comparison

# CNV kit

In [None]:
adata_cnv = sc.read_h5ad("aurelie_data/pdx_10x_old/rna_seurat_raw.h5ad")

In [None]:
adata_cnv.var_names_make_unique()
sc.pp.calculate_qc_metrics(adata_cnv, inplace=True)
sc.pl.scatter(
    adata_cnv,
    x="total_counts",
    y="n_genes_by_counts",
    size=100,
)

# multiome

In [None]:
adata_multiome = sc.read_10x_h5(
    "aurelie_data/pdx_paired/rna_cellranger/outs/filtered_feature_bc_matrix.h5"
)

In [None]:
adata_multiome.var_names_make_unique()
sc.pp.calculate_qc_metrics(adata_multiome, inplace=True)
sc.pl.scatter(
    adata_multiome,
    x="total_counts",
    y="n_genes_by_counts",
    size=100,
)

# all together

In [None]:
colors = sns.color_palette("colorblind", 10)

In [None]:
fig, ax = plt.subplots(figsize=(5, 5))

x = np.asarray(adata_multiome.X.sum(axis=1))
y = np.asarray(np.sum(adata_multiome.X > 0, axis=1))

ax.scatter(x, y, color=colors[4], alpha=0.3, label="HIPSD&R-seq (PDX)")

x = np.asarray(adata_cnv.X.sum(axis=1))
y = np.asarray(np.sum(adata_cnv.X > 0, axis=1))

ax.scatter(x, y, color=colors[0], alpha=0.3, label="10x (PDX)")


ax.set_xlabel("UMI Counts", fontsize=14)
ax.set_ylabel("Genes Detected", fontsize=14)
ax.set_xscale("log")
ax.set_yscale("log")

plt.tight_layout()
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.legend(fontsize=14, loc="upper center", bbox_to_anchor=(0.5, -0.2))
plt.savefig("final_figures/qc/rna_qc.png", dpi=300, bbox_inches="tight")
plt.savefig("final_figures/qc/rna_qc.svg", dpi=300, bbox_inches="tight")
plt.show()