In [None]:
import scvi
import scanpy as sc
import os
import pandas as pd
from scvi.model.utils import mde

Assigning cell types to the perturb spatial data

Enter the path to the data

In [None]:
output_folder = r"/mnt/sata2/Analysis_Alex_2/perturb1"

In [None]:
replicates_xenium_data = sc.read(
    os.path.join(output_folder, "final_filtered_on_leiden.h5ad")
)

Read in the time course adata

In [None]:
timecourse_path = "timecourse.h5ad"
reference_xenium_data = sc.read(timecourse_path)

In [None]:
del replicates_xenium_data.uns
del reference_xenium_data.uns

combine the two adatas

In [None]:
replicate_obs = replicates_xenium_data.obs
reference_obs = reference_xenium_data.obs

In [None]:
replicates_xenium_data.obs = pd.DataFrame(
    replicates_xenium_data.obs["batch"], index=replicates_xenium_data.obs.index.values
)
reference_xenium_data.obs = pd.DataFrame(
    reference_xenium_data.obs["batch"], index=reference_xenium_data.obs.index.values
)

In [None]:
concatenated_xenium = sc.concat([replicates_xenium_data, reference_xenium_data])

Run scvi

In [None]:
scvi.model.SCVI.setup_anndata(concatenated_xenium, batch_key="batch", layer="raw")

In [None]:
scvi_ref = scvi.model.SCVI(
    concatenated_xenium, n_layers=2, n_latent=30, gene_likelihood="nb"
)
scvi_ref.train()

In [None]:
concatenated_xenium.obsm["X_scVI"] = scvi_ref.get_latent_representation()

Make a 2d projection

In [None]:
sc.pp.neighbors(concatenated_xenium, use_rep="X_scVI")

In [None]:
concatenated_xenium.obsm["X_mde"] = mde(concatenated_xenium.obsm["X_scVI"])

Add necessary metadata to the concatenated object

In [None]:
observations = pd.concat(
    [
        replicate_obs[
            [
                "Subtype",
                "Type",
                "Immunocentric_Type",
                "Class",
                "leiden",
                "epithelial_distance",
                "crypt_villi_axis",
                "epithelial_distance_clipped",
                "batch",
            ]
        ],
        reference_obs[
            [
                "Subtype",
                "Type",
                "Immunocentric_Type",
                "Class",
                "leiden",
                "epithelial_distance",
                "crypt_villi_axis",
                "epithelial_distance_clipped",
                "batch",
            ]
        ],
    ]
)

In [None]:
numerical_indices = [str(i) for i in range(concatenated_xenium.obs.shape[0])]

In [None]:
concatenated_xenium.obs.index = numerical_indices
observations.index = numerical_indices

In [None]:
concatenated_xenium.obs = concatenated_xenium.obs.merge(
    observations, how="left", left_index=True, right_index=True
)

Cluster and subcluster the data

In [None]:
sc.tl.leiden(concatenated_xenium)

In [None]:
from tqdm.notebook import tqdm
import numpy as np


def get_celltype(celltype, ad_sp):
    ctype = ad_sp[ad_sp.obs.leiden.isin([celltype])]
    sc.pp.neighbors(ctype, use_rep="X_scVI")
    sc.tl.leiden(ctype, resolution=1.2)
    sc.tl.umap(ctype)
    return ctype


def reunite_with_ad(ad_sp, subset_ad, celltype):
    new_labels = []
    subclusters = ad_sp.obs.Sub_leiden.values
    clusters = ad_sp.obs.leiden.values
    idex = ad_sp.obs.index.values
    for i in tqdm(range(len(subclusters))):
        if clusters[i] == celltype:
            new_labels.append(subset_ad.obs.loc[idex[i], :].leiden)
        else:
            new_labels.append(subclusters[i])
    ad_sp.obs.Sub_leiden = new_labels
    return ad_sp


concatenated_xenium.obs["Sub_leiden"] = concatenated_xenium.obs["leiden"]
for leiden_to_subset in tqdm(np.unique(concatenated_xenium.obs["leiden"].values)):
    mac = get_celltype(leiden_to_subset, concatenated_xenium)
    mac.obs["leiden"] = [leiden_to_subset + "_" + i for i in mac.obs.leiden]
    concatenated_xenium = reunite_with_ad(concatenated_xenium, mac, leiden_to_subset)

Assign subtypes to the perturb data based on the most common time course subtype per cluster

In [None]:
dictionary_sub = {}
for group in concatenated_xenium.obs.groupby("Sub_leiden"):
    most_common_subtype = group[1]["Subtype"].value_counts().idxmax()
    dictionary_sub[group[0]] = most_common_subtype

In [None]:
reassigned_subtypes = []
for sub in concatenated_xenium.obs["Sub_leiden"].values:
    diction = dictionary_sub.get(sub)
    reassigned_subtypes.append(diction)

In [None]:
concatenated_xenium.obs["New_Subtype"] = reassigned_subtypes

create cell type heirarchy

In [None]:
types = sc.read(timecourse_path)
types_ = pd.crosstab(types.obs["Type"], types.obs["Subtype"])
class_ = pd.crosstab(types.obs["Class"], types.obs["Type"])
immuno_ = pd.crosstab(types.obs["Immunocentric_Type"], types.obs["Subtype"])

In [None]:
type_dictionary = {}
for i in concatenated_xenium.obs["New_Subtype"].cat.categories:
    type_dictionary[i] = types_.index.values[np.where(types_[i].values > 0)[0]][0]
all_types = []
for k in concatenated_xenium.obs["New_Subtype"].values:
    all_types.append(type_dictionary.get(k))
concatenated_xenium.obs["Type"] = all_types

itype_dictionary = {}
for i in concatenated_xenium.obs["New_Subtype"].cat.categories:
    try:
        itype_dictionary[i] = immuno_.index.values[np.where(immuno_[i].values > 0)[0]][
            0
        ]
    except:
        print(i)
        # itype_dictionary[i] = 'None'
all_itypes = []
for k in concatenated_xenium.obs["New_Subtype"].values:
    all_itypes.append(itype_dictionary.get(k))
concatenated_xenium.obs["Immunocentric_Type"] = all_itypes

class_dictionary = {}
for i in np.unique(concatenated_xenium.obs["Type"].values):
    class_dictionary[i] = class_.index.values[np.where(class_[i].values > 0)[0]][0]
all_classes = []
for k in concatenated_xenium.obs["Type"].values:
    all_classes.append(class_dictionary.get(k))
concatenated_xenium.obs["Class"] = all_classes

In [None]:
concatenated_xenium.obs["Subtype"] = concatenated_xenium.obs["New_Subtype"]

Store previously calculated metadata

In [None]:
first_df = pd.concat([reference_obs, replicate_obs])[
    [
        "total_transcripts",
        "nuclear_transcripts",
        "cytoplasmic_transcripts",
        "nuclear_transcript_percentage",
        "cell",
        "x",
        "y",
        "epithelial_distance",
        "crypt_villi_axis",
        "epithelial_distance_clipped",
    ]
]

In [None]:
first_df.index = concatenated_xenium.obs.index.values

Add the previously calculated metadata to the newly calculated metadata

In [None]:
concatenated_xenium.obs = concatenated_xenium.obs[
    [
        "batch_x",
        "leiden",
        "Sub_leiden",
        "Subtype",
        "Type",
        "Class",
        "Immunocentric_Type",
    ]
].merge(first_df, how="left", left_index=True, right_index=True)

In [None]:
concatenated_xenium = concatenated_xenium[
    concatenated_xenium.obs["batch_x"].isin(["SI_2", "SI_3"])
]

In [None]:
original_perturb = sc.read(os.path.join(output_folder, "final_filtered_on_leiden.h5ad"))

In [None]:
original_perturb.obs["Subtype"] = concatenated_xenium.obs["Subtype"].values
original_perturb.obs["Type"] = concatenated_xenium.obs["Type"].values
original_perturb.obs["Class"] = concatenated_xenium.obs["Class"].values
original_perturb.obs["Immunocentric_Type"] = concatenated_xenium.obs[
    "Immunocentric_Type"
].values

Write out the final processed perturb data

In [None]:
original_perturb.write(r"/mnt/sata2/Analysis_Alex_2/perturb1/figures_adata.h5ad")