In [None]:
import numpy as np
import matplotlib.pyplot as plt
import scanpy as sc
import os

In [None]:
adata = sc.read("/mnt/sata2/Analysis_Alex_2/perturb1/final_filtered_on_leiden.h5ad")

In [None]:
output_dir = "figures/guide_rna_stats"

In [None]:
try:
    os.makedirs(output_dir)
except FileExistsError:
    pass

In [None]:
corresponding_guide_rnas = ["sgCd19", "sgThy1", "sgCxcr3"]
P14s = adata[adata.obs["guide_rnas"].isin(corresponding_guide_rnas)]

In [None]:
guide_rna_genes = ["Muc5ac", "Neurog3", "Fer1l6"]
corresponding_guide_rnas = ["sgCd19", "sgThy1", "sgCxcr3"]

In [None]:
guide_rna_indices = np.where(~adata.var.index.isin(guide_rna_genes))[0]

In [None]:
true_guide_rna_indices = np.where(adata.var.index.isin(guide_rna_genes))[0]

In [None]:
count_values = np.count_nonzero(np.array(P14s[:, true_guide_rna_indices].X), axis=0)

In [None]:
import matplotlib.pyplot as plt

# Define the data
sizes = count_values
labels = [
    corresponding_guide_rnas[p]
    for p in [guide_rna_genes.index(k) for k in P14s.var.index[true_guide_rna_indices]]
]

# Create the pie chart with both percentage and quantity


# Define a function to format the labels
def func(pct, allvals):
    absolute = int(pct / 100.0 * np.sum(allvals))
    return "{:.1f}%\n({:d})".format(pct, absolute)


# Create the pie chart
fig, ax = plt.subplots()
ax.pie(
    sizes,
    labels=labels,
    colors=["#92c5de", "#f57f20", "#0d5cb6"],
    autopct=lambda pct: func(pct, sizes),
    startangle=90,
)

# Equal aspect ratio ensures that pie is drawn as a circle.
ax.axis("equal")

plt.title("Guide composition of transferred cells")
plt.savefig(os.path.join(output_dir, "panel_s2_guide_composition.pdf"))
plt.show()

Number of cells with multiple guides

In [None]:
output_folder = r"/mnt/sata2/Analysis_Alex_2/perturb1"

In [None]:
adata = sc.read(os.path.join(output_folder, "final_celltyped_and_axes.h5ad"))

In [None]:
guide_rna_genes = ["Muc5ac", "Neurog3", "Fer1l6"]
corresponding_guide_rnas = ["sgCd19", "sgThy1", "sgCxcr3"]

In [None]:
def transcript_thresholding_for_P14s(adata, gene_lists, minimum_counts):
    ctrl = adata.copy()
    for k in range(len(gene_lists)):
        gene_list = gene_lists[k]
        indices = np.where(ctrl.var.index.isin(gene_list))[0]
        ctrl = ctrl[np.array(ctrl[:, indices].X).sum(axis=1) >= minimum_counts[k]]

    indices = np.where(ctrl.var.index.isin(["Muc2"]))[0]
    ctrl = ctrl[np.array(ctrl[:, indices].X).sum(axis=1) <= 1]
    ctrl.X = ctrl.X.astype(np.float64)

    return ctrl


def remove_cells_multiple_guides(adata, guide_rna_genes):
    true_guide_rna_indices = np.where(adata.var.index.isin(guide_rna_genes))[0]
    adata = adata[
        np.where(~(np.count_nonzero(adata[:, true_guide_rna_indices].X, axis=1) > 1))[
            0
        ],
        :,
    ]
    return adata


def assign_guide_rnas(adata, guide_rna_genes, corresponding_guide_rnas):
    true_guide_rna_indices = []
    for i in guide_rna_genes:
        id_ = np.where(adata.var.index == i)[0]
        true_guide_rna_indices.append(id_)
    true_guide_rna_indices = np.array(true_guide_rna_indices).flatten()
    adata.obs["guide_rna_genes"] = list(
        np.array(guide_rna_genes)[
            np.array(adata[:, true_guide_rna_indices].X.argmax(axis=1))
        ]
    )
    adata.obs["guide_rnas"] = list(
        np.array(corresponding_guide_rnas)[
            np.array(adata[:, true_guide_rna_indices].X.argmax(axis=1))
        ]
    )
    return adata

In [None]:
gene_lists = [guide_rna_genes, ["Cd8a", "Cd8b1", "Cd3e"]]

minimum_counts = [1, 3]

In [None]:
filtered_adata = transcript_thresholding_for_P14s(adata, gene_lists, minimum_counts)
assigned_adata = assign_guide_rnas(
    filtered_adata, guide_rna_genes, corresponding_guide_rnas
)

In [None]:
true_guide_rna_indices = []
for i in guide_rna_genes:
    id_ = np.where(assigned_adata.var.index == i)[0]
    true_guide_rna_indices.append(id_)
true_guide_rna_indices = np.array(true_guide_rna_indices).flatten()

In [None]:
import numpy as np
import matplotlib.pyplot as plt

number, count = np.unique(
    np.count_nonzero(np.array(assigned_adata[:, true_guide_rna_indices].X), axis=1),
    return_counts=True,
)

# Define the data
sizes = count
labels = number

# Create the bar chart
fig, ax = plt.subplots(dpi=400)
bars = ax.bar(labels, sizes, color=["#7fb3d5", "#f3c683", "#9bd8d3"])

# Adding labels and title
ax.set_xlabel("Number of unique sgRNA types per cell")
ax.set_ylabel("Count")
plt.title(
    "Number of unique sgRNA types per cell among all cells passing perturbed filtering criteria"
)

# Ensure x-ticks are integers
ax.set_xticks(np.arange(min(labels), max(labels) + 1, 1))

# Add absolute number and percentage of the whole over each bar
total = np.sum(sizes)
for bar, size in zip(bars, sizes):
    height = bar.get_height()
    percentage = height / total * 100
    ax.annotate(
        f"{height}\n({percentage:.2f}%)",  # Format as "absolute\n(percentage%)"
        xy=(bar.get_x() + bar.get_width() / 2, height),
        xytext=(0, 3),  # 3 points vertical offset
        textcoords="offset points",
        ha="center",
        va="bottom",
    )
plt.ylim(0, 4500)
# Save the figure
plt.savefig(os.path.join(output_dir, "panel_s2_unique_guide_number_bar.pdf"))

# Show the plot
plt.show()