## Notebook for assigning cell types to clusters after manual annotation

In [None]:
import scanpy as sc
import numpy as np
from tqdm.notebook import tqdm
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import scvi
import glob

In [None]:
sc.set_figure_params(figsize=(4, 4))
### Path to the MERSCOPE object from the previous script.
input_folders = "/projects/2023_Spatial_Paper/Analysis_Alex/merscope_final/analysis/final/integrated_clustered.h5ad"
merscope = sc.read(input_folders)
### Path to the Xenium final object after replicate 2 processing
input_folders_x = "/mnt/sata1/Analysis_Alex/timecourse_replicates/analysis/cleaned/full_xenium_replicates_and_reference.h5ad"
types = sc.read(input_folders_x)

### Getting the celltype heirarchies from the Xenium data

In [None]:
types_ = pd.crosstab(types.obs["Type"], types.obs["Subtype"])
class_ = pd.crosstab(types.obs["Class"], types.obs["Type"])
immuno_ = pd.crosstab(types.obs["Immunocentric_Type"], types.obs["Subtype"])

#### Reading in a csv file with manual annotations of the sub-Leiden clusters that we derived through an ensembl of methods and manual annotation. This csv is provide in this directory

In [None]:
celllabels = pd.read_csv("merscope_annotations.csv", index_col=0)
celltypes = []
for i in merscope.obs["Sub_leiden"].values:
    celltypes.append(celllabels.loc[i].values[0])
merscope.obs["Subtype"] = celltypes

### Fixing nomenclature of the Subtype annotations

In [None]:
merscope.obs["Subtype"] = pd.Categorical(merscope.obs["Subtype"].values)

In [None]:
merscope.obs["Subtype"] = (
    merscope.obs["Subtype"]
    .replace("Enterocyte 1", "Enterocyte_1")
    .replace("Enterocyte 2", "Enterocyte_2")
    .replace("Enterocyte 3", "Enterocyte_3")
)
merscope.obs["Subtype"] = merscope.obs["Subtype"].replace("Eosinophils", "Eosinophil")
merscope.obs["Subtype"] = merscope.obs["Subtype"].replace(
    "Fibroblast_Pdgfrb+", "Fibroblast_Pdgfrb+ "
)
merscope.obs["Subtype"] = merscope.obs["Subtype"].replace(
    "Resting_Fibroblast", "Resting Fibroblast"
)

### Fixing nomenclature of type and class annotations.

In [None]:
types_flip = types_.T
types_flip["Unknown"] = 0
types_ = types_flip.T

In [None]:
types_["Unknown_1"] = [0 if i != "Unknown" else 1 for i in types_.index.values]
types_["Unknown_2"] = [0 if i != "Unknown" else 1 for i in types_.index.values]

In [None]:
class_flip = class_.T
class_flip["Unknown"] = 0
class_ = class_flip.T

In [None]:
class_["Unknown"] = [0 if i != "Unknown" else 1 for i in class_.index.values]

In [None]:
types_["Fibroblast_Apoe+"] = [
    0 if i != "Fibroblast" else 1 for i in types_.index.values
]

### Final annotation placement and plotting

In [None]:
xenium.obs["indices"] = [i for i in range(len(xenium.obs.index))]

type_dictionary = {}
for i in merscope.obs["Subtype"].cat.categories:
    type_dictionary[i] = types_.index.values[np.where(types_[i].values > 0)[0]][0]
all_types = []
for k in merscope.obs["Subtype"].values:
    all_types.append(type_dictionary.get(k))
merscope.obs["Type"] = all_types

itype_dictionary = {}
for i in merscope.obs["Subtype"].cat.categories:
    try:
        itype_dictionary[i] = immuno_.index.values[np.where(immuno_[i].values > 0)[0]][
            0
        ]
    except:
        itype_dictionary[i] = "None"
all_itypes = []
for k in merscope.obs["Subtype"].values:
    all_itypes.append(itype_dictionary.get(k))
merscope.obs["Immunocentric_Type"] = all_itypes

class_dictionary = {}
for i in np.unique(merscope.obs["Type"].values):
    class_dictionary[i] = class_.index.values[np.where(class_[i].values > 0)[0]][0]
all_classes = []
for k in merscope.obs["Type"].values:
    all_classes.append(class_dictionary.get(k))
merscope.obs["Class"] = all_classes

sc.pl.embedding(merscope, basis="mde", color=["Sub_leiden", "Type", "Class"], ncols=1)

### Write out the data at the desired path

In [None]:
merscope.write(
    "/projects/2023_Spatial_Paper/Analysis_Alex/merscope_final/analysis/final/full_celltypes_and_leiden.h5ad"
)