In [None]:
import scanpy as sc
import numpy as np
import pandas as pd
import scipy.stats as stats
import os
import matplotlib.pyplot as plt
import pickle

loading in the spatial perturb adata

In [None]:
output_folder = r"/mnt/sata2/Analysis_Alex_2/perturb1"

In [None]:
adata = sc.read(os.path.join(output_folder, "final_celltyped_and_axes.h5ad"))

In [None]:
adata.obs_names_make_unique()

Defining the spatial perturb variable names and corresponding sgRNAs

In [None]:
guide_rna_genes = ["Muc5ac", "Neurog3", "Fer1l6"]
corresponding_guide_rnas = ["sgCd19", "sgThy1", "sgCxcr3"]

Helper functions

In [None]:
def transcript_thresholding_for_P14s(adata, gene_lists, minimum_counts):
    ctrl = adata.copy()
    for k in range(len(gene_lists)):
        gene_list = gene_lists[k]
        indices = np.where(ctrl.var.index.isin(gene_list))[0]
        ctrl = ctrl[np.array(ctrl[:, indices].X).sum(axis=1) >= minimum_counts[k]]

    # Muc2 less than or equal to 1 filtering
    indices = np.where(ctrl.var.index.isin(["Muc2"]))[0]
    ctrl = ctrl[np.array(ctrl[:, indices].X).sum(axis=1) <= 1]
    ctrl.X = ctrl.X.astype(np.float64)

    return ctrl


def remove_cells_multiple_guides(adata, guide_rna_genes):
    true_guide_rna_indices = np.where(adata.var.index.isin(guide_rna_genes))[0]
    adata = adata[
        np.where(~(np.count_nonzero(adata[:, true_guide_rna_indices].X, axis=1) > 1))[
            0
        ],
        :,
    ]
    return adata


def assign_guide_rnas(adata, guide_rna_genes, corresponding_guide_rnas):
    true_guide_rna_indices = []
    for i in guide_rna_genes:
        id_ = np.where(adata.var.index == i)[0]
        true_guide_rna_indices.append(id_)
    true_guide_rna_indices = np.array(true_guide_rna_indices).flatten()
    adata.obs["guide_rna_genes"] = list(
        np.array(guide_rna_genes)[
            np.array(adata[:, true_guide_rna_indices].X.argmax(axis=1))
        ]
    )
    adata.obs["guide_rnas"] = list(
        np.array(corresponding_guide_rnas)[
            np.array(adata[:, true_guide_rna_indices].X.argmax(axis=1))
        ]
    )
    return adata

Filtering for cells with at least one guide RNA barcode and 3 counts of CD8 markers

In [None]:
gene_lists = [guide_rna_genes, ["Cd8a", "Cd8b1", "Cd3e"]]

minimum_counts = [1, 3]

In [None]:
filtered_adata = transcript_thresholding_for_P14s(adata, gene_lists, minimum_counts)
filtered_adata = remove_cells_multiple_guides(filtered_adata, guide_rna_genes)
assigned_adata = assign_guide_rnas(
    filtered_adata, guide_rna_genes, corresponding_guide_rnas
)

Single cell processing on the joint embedding

In [None]:
sc.tl.pca(assigned_adata)
sc.pp.neighbors(assigned_adata)
sc.tl.umap(assigned_adata)
sc.tl.leiden(assigned_adata)
sc.pl.umap(assigned_adata, color=["guide_rnas", "leiden"], vmax=10)

Process only the cells that passed the filtering constraints

In [None]:
p14s = assigned_adata.copy()

temp_X = np.array(p14s.X).astype(np.float64)

p14s.X = None

p14s.X = temp_X

sc.pp.normalize_total(p14s)
sc.pp.log1p(p14s)

sc.pp.filter_genes(p14s, min_cells=10)
p14s = p14s[:, ~p14s.var.index.isin(guide_rna_genes)]

Transform epithelial distance

In [None]:
def transformation(x, a=0.1, b=0.1, c=0.5, d=2.5, f=4, w=1):
    x = np.array(x)
    return a * np.exp(b * ((x - w))) - c * np.exp(-d * (x - w)) + f


adata.obs["epithelial_distance_transformed"] = transformation(
    adata.obs["epithelial_distance_clipped"]
)

Add sgRNA assignments to the object

In [None]:
adata.obs = adata.obs.merge(
    assigned_adata.obs[["guide_rnas"]], left_index=True, right_index=True, how="left"
)
array_vals = np.array(list(adata.obs["guide_rnas"].values))
array_vals[pd.isna(adata.obs["guide_rnas"]).values] = "Other cells"
adata.obs["guide_rnas"] = array_vals

In [None]:
adata.write("/mnt/sata2/Analysis_Alex_2/perturb1/adata_with_guides.h5ad")