In [None]:
import scvi
import scanpy as sc
import os
import pandas as pd
from scvi.model.utils import mde

##### Put the replicate 2 processed data path here

In [None]:
output_folder = r"/mnt/sata1/Analysis_Alex/timecourse_replicates/analysis/cleaned"

In [None]:
replicates_xenium_data = sc.read(
    os.path.join(output_folder, "final_celltyped_and_axes.h5ad")
)

##### Put the replicate 1 processed data path here

In [None]:
reference_xenium_data = sc.read(
    "/mnt/sata1/Analysis_Alex/timecourse_final/analysis/cleaned/final_celltyped_and_axes.h5ad"
)

Merging the two objects replicate 1 and replicate 2 together

In [None]:
replicates_xenium_data.obs.index = [
    "rep2_"
    + replicates_xenium_data.obs["batch"].values[d]
    + "_"
    + replicates_xenium_data.obs.index.values[d]
    for d in range(len(replicates_xenium_data.obs["batch"].values))
]

In [None]:
reference_xenium_data.obs.index = [
    "rep1_"
    + reference_xenium_data.obs["batch"].values[d]
    + "_"
    + reference_xenium_data.obs.index.values[d]
    for d in range(len(reference_xenium_data.obs["batch"].values))
]

In [None]:
del replicates_xenium_data.uns
del reference_xenium_data.uns

In [None]:
replicate_obs = replicates_xenium_data.obs
reference_obs = reference_xenium_data.obs

In [None]:
replicates_xenium_data.obs = pd.DataFrame(
    replicates_xenium_data.obs["batch"], index=replicates_xenium_data.obs.index.values
)

In [None]:
reference_xenium_data.obs = pd.DataFrame(
    reference_xenium_data.obs["batch"], index=reference_xenium_data.obs.index.values
)

In [None]:
concatenated_xenium = sc.concat([replicates_xenium_data, reference_xenium_data])

Running scvi joint integration on the objects

In [None]:
scvi.model.SCVI.setup_anndata(concatenated_xenium, batch_key="batch", layer="raw")

In [None]:
scvi_ref = scvi.model.SCVI(
    concatenated_xenium, n_layers=2, n_latent=30, gene_likelihood="nb"
)
scvi_ref.train()

In [None]:
concatenated_xenium.obsm["X_scVI"] = scvi_ref.get_latent_representation()

#### Projecting into UMAP, clustering, and subsetting the data to relevant observations

In [None]:
sc.pp.neighbors(concatenated_xenium, use_rep="X_scVI")

In [None]:
concatenated_xenium.obsm["X_mde"] = mde(concatenated_xenium.obsm["X_scVI"])

In [None]:
observations = pd.concat(
    [
        replicate_obs[
            [
                "predicted_longitudinal",
                "not_removed_from_longitudinal",
                "Subtype",
                "Type",
                "Immunocentric_Type",
                "Class",
                "leiden",
                "epithelial_distance",
                "crypt_villi_axis",
                "epithelial_distance_clipped",
                "batch",
            ]
        ],
        reference_obs[
            [
                "predicted_longitudinal",
                "not_removed_from_longitudinal",
                "Subtype",
                "Type",
                "Immunocentric_Type",
                "Class",
                "leiden",
                "epithelial_distance",
                "crypt_villi_axis",
                "epithelial_distance_clipped",
                "batch",
            ]
        ],
    ]
)

In [None]:
concatenated_xenium.obs = concatenated_xenium.obs.merge(
    observations, how="left", left_index=True, right_index=True
)

In [None]:
sc.tl.leiden(concatenated_xenium)

In [None]:
import matplotlib.pyplot as plt

fig = sc.pl.embedding(
    concatenated_xenium, basis="mde", color="leiden", return_fig=True, vmax=1
)
plt.xlim(-3, 3)
plt.ylim(-3, 3)
plt.show()

#### Subclustering all leiden clusters

In [None]:
from tqdm.notebook import tqdm
import numpy as np


def get_celltype(celltype, ad_sp):
    ctype = ad_sp[ad_sp.obs.leiden.isin([celltype])]
    sc.pp.neighbors(ctype, use_rep="X_scVI")
    sc.tl.leiden(ctype, resolution=1.2)
    sc.tl.umap(ctype)
    return ctype


def reunite_with_ad(ad_sp, subset_ad, celltype):
    new_labels = []
    subclusters = ad_sp.obs.Sub_leiden.values
    clusters = ad_sp.obs.leiden.values
    idex = ad_sp.obs.index.values
    for i in tqdm(range(len(subclusters))):
        if clusters[i] == celltype:
            new_labels.append(subset_ad.obs.loc[idex[i], :].leiden)
        else:
            new_labels.append(subclusters[i])
    ad_sp.obs.Sub_leiden = new_labels
    return ad_sp


concatenated_xenium.obs["Sub_leiden"] = concatenated_xenium.obs["leiden"]
for leiden_to_subset in tqdm(np.unique(concatenated_xenium.obs["leiden"].values)):
    mac = get_celltype(leiden_to_subset, concatenated_xenium)
    mac.obs["leiden"] = [leiden_to_subset + "_" + i for i in mac.obs.leiden]
    concatenated_xenium = reunite_with_ad(concatenated_xenium, mac, leiden_to_subset)

#### Reassigning the cell types of all the replicate 2 samples based on what cell type 1 annotation is most prevalent in each joint cluster 

In [None]:
dictionary_sub = {}
for group in concatenated_xenium.obs.groupby("Sub_leiden"):
    most_common_subtype = group[1]["Subtype"].value_counts().idxmax()
    dictionary_sub[group[0]] = most_common_subtype

In [None]:
reassigned_subtypes = []
for sub in concatenated_xenium.obs["Sub_leiden"].values:
    diction = dictionary_sub.get(sub)
    reassigned_subtypes.append(diction)

In [None]:
concatenated_xenium.obs["New_Subtype"] = reassigned_subtypes

In [None]:
import matplotlib.pyplot as plt

fig = sc.pl.embedding(
    concatenated_xenium,
    basis="mde",
    color="Sub_leiden",
    legend_loc="on data",
    return_fig=True,
    vmax=1,
)
plt.xlim(-3, 3)
plt.ylim(-3, 3)
plt.show()

In [None]:
import matplotlib.pyplot as plt

fig = sc.pl.embedding(
    concatenated_xenium, basis="mde", color="New_Subtype", return_fig=True, vmax=1
)
plt.xlim(-3, 3)
plt.ylim(-3, 3)
plt.show()

#### Assiging Type, Class and Immunocentric Type based on heirarchy
##### Put the path to the final replicate 1 adata

In [None]:
rep1_path = "/mnt/sata1/Analysis_Alex/timecourse_final/analysis/cleaned/final_celltyped_and_axes.h5ad"

In [None]:
types = sc.read(rep1_path)
types_ = pd.crosstab(types.obs["Type"], types.obs["Subtype"])
class_ = pd.crosstab(types.obs["Class"], types.obs["Type"])
immuno_ = pd.crosstab(types.obs["Immunocentric_Type"], types.obs["Subtype"])

In [None]:
type_dictionary = {}
for i in concatenated_xenium.obs["New_Subtype"].cat.categories:
    type_dictionary[i] = types_.index.values[np.where(types_[i].values > 0)[0]][0]
all_types = []
for k in concatenated_xenium.obs["New_Subtype"].values:
    all_types.append(type_dictionary.get(k))
concatenated_xenium.obs["Type"] = all_types

itype_dictionary = {}
for i in concatenated_xenium.obs["New_Subtype"].cat.categories:
    try:
        itype_dictionary[i] = immuno_.index.values[np.where(immuno_[i].values > 0)[0]][
            0
        ]
    except:
        print(i)
        # itype_dictionary[i] = 'None'
all_itypes = []
for k in concatenated_xenium.obs["New_Subtype"].values:
    all_itypes.append(itype_dictionary.get(k))
concatenated_xenium.obs["Immunocentric_Type"] = all_itypes

class_dictionary = {}
for i in np.unique(concatenated_xenium.obs["Type"].values):
    class_dictionary[i] = class_.index.values[np.where(class_[i].values > 0)[0]][0]
all_classes = []
for k in concatenated_xenium.obs["Type"].values:
    all_classes.append(class_dictionary.get(k))
concatenated_xenium.obs["Class"] = all_classes

# sc.pl.embedding(concatenated_xenium, basis='mde', color=['New_Subtype', 'Type', 'Class', 'Immunocentric_Type'])

In [None]:
import matplotlib.pyplot as plt

fig = sc.pl.embedding(
    concatenated_xenium, basis="mde", color="Type", return_fig=True, vmax=1
)
plt.xlim(-3, 3)
plt.ylim(-3, 3)
plt.show()

In [None]:
concatenated_xenium.obs["Subtype"] = concatenated_xenium.obs["New_Subtype"]

In [None]:
first_df = pd.concat([reference_obs, replicate_obs])[
    [
        "total_transcripts",
        "nuclear_transcripts",
        "cytoplasmic_transcripts",
        "nuclear_transcript_percentage",
        "cell",
        "x",
        "y",
        "predicted_longitudinal",
        "not_removed_from_longitudinal",
        "epithelial_distance",
        "crypt_villi_axis",
        "epithelial_distance_clipped",
    ]
]

In [None]:
concatenated_xenium.obs = concatenated_xenium.obs[
    ["batch", "leiden", "Sub_leiden", "Subtype", "Type", "Class", "Immunocentric_Type"]
].merge(first_df, how="left", left_index=True, right_index=True)

##### Writing out the object with all replicates

In [None]:
concatenated_xenium.write(
    r"/mnt/sata1/Analysis_Alex/timecourse_replicates/analysis/cleaned/full_xenium_replicates_and_reference.h5ad"
)

In [None]:
import scvi
import scanpy as sc
import os
import pandas as pd
from scvi.model.utils import mde

In [None]:
concatenated_xenium = sc.read(
    r"/mnt/sata1/Analysis_Alex/timecourse_replicates/analysis/cleaned/full_xenium_replicates_and_reference.h5ad"
)

#### Some manual correction of cell type clusters

In [None]:
new_cell_types = []
for i in range(len(concatenated_xenium.obs["Subtype"])):
    if (concatenated_xenium.obs["Subtype"].values[i] == "ILC") & (
        concatenated_xenium.obs["Sub_leiden"].values[i] in ["3_0", "3_6"]
    ):
        new_cell_types.append("NK-Cell")
    elif concatenated_xenium.obs["Subtype"].values[i] == "ILC":
        new_cell_types.append("ILC")
    elif concatenated_xenium.obs["Subtype"].values[i] == "NK-Cell":
        new_cell_types.append("DC2")
    elif concatenated_xenium.obs["Sub_leiden"].values[i] in ["11_0"]:
        new_cell_types.append("Paneth")
    else:
        new_cell_types.append(concatenated_xenium.obs["Subtype"][i])

In [None]:
concatenated_xenium.obs["Subtype"] = new_cell_types

In [None]:
types = sc.read(rep1_path)
types_ = pd.crosstab(types.obs["Type"], types.obs["Subtype"])
class_ = pd.crosstab(types.obs["Class"], types.obs["Type"])
immuno_ = pd.crosstab(types.obs["Immunocentric_Type"], types.obs["Subtype"])

In [None]:
import numpy as np

type_dictionary = {}
for i in concatenated_xenium.obs["Subtype"].cat.categories:
    type_dictionary[i] = types_.index.values[np.where(types_[i].values > 0)[0]][0]
all_types = []
for k in concatenated_xenium.obs["Subtype"].values:
    all_types.append(type_dictionary.get(k))
concatenated_xenium.obs["Type"] = all_types

itype_dictionary = {}
for i in concatenated_xenium.obs["Subtype"].cat.categories:
    try:
        itype_dictionary[i] = immuno_.index.values[np.where(immuno_[i].values > 0)[0]][
            0
        ]
    except:
        print(i)
        # itype_dictionary[i] = 'None'
all_itypes = []
for k in concatenated_xenium.obs["Subtype"].values:
    all_itypes.append(itype_dictionary.get(k))
concatenated_xenium.obs["Immunocentric_Type"] = all_itypes

class_dictionary = {}
for i in np.unique(concatenated_xenium.obs["Type"].values):
    class_dictionary[i] = class_.index.values[np.where(class_[i].values > 0)[0]][0]
all_classes = []
for k in concatenated_xenium.obs["Type"].values:
    all_classes.append(class_dictionary.get(k))
concatenated_xenium.obs["Class"] = all_classes

# sc.pl.embedding(concatenated_xenium, basis='mde', color=['Subtype', 'Type', 'Class', 'Immunocentric_Type'])

In [None]:
concatenated_xenium.write(
    r"/mnt/sata1/Analysis_Alex/timecourse_replicates/analysis/cleaned/full_xenium_replicates_and_reference.h5ad"
)