In [None]:
import scanpy as sc
import numpy as np
from tqdm.notebook import tqdm
import scipy.stats as stats
import pandas as pd
import os
import seaborn as sns
import matplotlib.pyplot as plt
import shapely
import glob
from sklearn.neighbors import NearestNeighbors
from PIL import Image, ImageDraw
import numpy as np
from scipy.spatial import cKDTree
import json
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Input, Embedding, Flatten, Dense, Concatenate
from tensorflow.keras.models import Model
from sklearn.preprocessing import OneHotEncoder
from core_functions.unrolling import *
from core_functions.initial_neighborhoods import *

#### This script specifies all axes in the human data, assigns final celltypes, and identifies CD8AB+ T cells

Put in the path to the human adata

In [None]:
output_folder = r"/mnt/sata1/Analysis_Alex/human_r1/analysis/cleaned"
adata = sc.read(os.path.join(output_folder, "final_human_adata.h5ad"))

Add the path to the final cluster annotations excel sheet, we have included this in the repository

In [None]:
miguel_cell = pd.read_excel(
    "MiguelTypist_human.xlsx",
    index_col=0,
    header=1,
)

#### Adding the annotations to the cells in the object

In [None]:
adata.obs = adata.obs[[i for i in adata.obs.columns if "UCell" not in i]]
adata.obs = adata.obs[[i for i in adata.obs.columns if "Topic" not in i]]

In [None]:
miguel_dict = {}
for i, row in miguel_cell.iterrows():
    miguel_dict[row["sub_leiden"]] = row[
        ["Class", "Type", "Subtype", "Immunocentric_Type"]
    ]

In [None]:
total_annotations = []
for i in adata.obs["Sub_leiden"]:
    total_annotations.append(miguel_dict.get(i))

In [None]:
annotations = pd.DataFrame(
    np.array(total_annotations),
    columns=["Class", "Type", "Subtype", "Immunocentric_Type"],
)

In [None]:
adata.obs["Class"] = pd.Categorical(annotations["Class"].values)
adata.obs["Type"] = pd.Categorical(annotations["Type"].values)
adata.obs["Subtype"] = pd.Categorical(annotations["Subtype"].values)
adata.obs["Immunocentric_Type"] = pd.Categorical(
    annotations["Immunocentric_Type"].values
)

#### Identifying CD8AB+ T cells

In [None]:
ab_cells = np.where(
    np.count_nonzero(adata.X[:, adata.var.index.isin(["CD8A", "CD8B"])], axis=1) == 2
)[0]

In [None]:
dg_cells = np.where(
    np.count_nonzero(adata.X[:, adata.var.index.isin(["CD3D", "CD3G"])], axis=1) >= 1
)[0]

In [None]:
no_cd4 = np.where(
    np.count_nonzero(adata.X[:, adata.var.index.isin(["CD4"])], axis=1) < 1
)[0]

In [None]:
cluster_2_or_3 = np.where(adata.obs["leiden"].isin(["2", "3"]))[0]

In [None]:
intersection_result = list(
    set(ab_cells) & set(dg_cells) & set(cluster_2_or_3) & set(no_cd4)
)

In [None]:
cd8_col = np.zeros(len(adata.obs.index))
cd8_col[intersection_result] = 1

In [None]:
adata.obs["CD8_column"] = cd8_col

##### Finalizing the crypt-villus axis

In [None]:
def custom_percentile_scale(data, min_val, max_val):

    # Scale the data using the custom percentiles
    scaled_data = (data - min_val) / (max_val - min_val)

    return scaled_data


min_maxs = {}
for cat in adata.obs.batch.cat.categories:
    sub_a_human = adata[adata.obs["batch"] == cat]
    min_val = np.percentile(sub_a_human.obs["crypt_villi_axis"], 0.1)
    max_val = np.percentile(sub_a_human.obs["crypt_villi_axis"], 99.9)
    min_maxs[cat] = [min_val, max_val]

normalized_cv = np.zeros(len(adata.obs))
for cat in adata.obs.batch.cat.categories:
    ids = np.where(adata.obs["batch"] == cat)[0]
    normalized_cv[ids] = custom_percentile_scale(
        adata[ids, :].obs["crypt_villi_axis"],
        min_maxs.get(cat)[0],
        min_maxs.get(cat)[1],
    )
adata.obs["crypt_villi_axis"] = normalized_cv

##### Smoothing the crypt villus axis

In [None]:
neighbor_kernel = 100
crypt_vill = np.zeros(len(adata))
for unique in np.unique(adata.obs["batch"]):
    batch_id = np.where(adata.obs["batch"] == unique)[0]
    batchdata = adata[batch_id]

    celltype_cluster = batchdata.obs.index.values

    list_of_arrays = []

    spatial_points = np.array(
        [batchdata.obsm["X_spatial"][:, 0], batchdata.obsm["X_spatial"][:, 1]]
    ).T
    spatial_points_ref = np.array(
        [batchdata.obsm["X_spatial"][:, 0], batchdata.obsm["X_spatial"][:, 1]]
    ).T

    tree = KDTree(spatial_points_ref)
    for i_bac in tqdm(range(len(celltype_cluster))):
        current_cell = celltype_cluster[i_bac]
        distances, neighbors = tree.query(spatial_points[i_bac], k=neighbor_kernel)
        neighbors = np.array(list(neighbors))
        gene_array = np.mean(batchdata.obs["crypt_villi_axis"].values[neighbors])
        list_of_arrays.append(gene_array)

    crypt_vill[batch_id] = np.array(list_of_arrays)
adata.obs["crypt_villi_axis"] = crypt_vill

##### Clipping the epithelial axis to account for the high LP values of peyers patches

In [None]:
adata.obs["epithelial_distance_clipped"] = np.clip(
    adata.obs["epithelial_distance"], 0, 0.1
)

normalized_cv = np.zeros(len(adata.obs))


def custom_percentile_scale(data, min_val, max_val):

    # Scale the data using the custom percentiles
    scaled_data = (data - min_val) / (max_val - min_val)

    return scaled_data


min_maxs = {}
for cat in adata.obs.batch.cat.categories:
    sub_a_human = adata[adata.obs["batch"] == cat]
    min_val = np.percentile(sub_a_human.obs["epithelial_distance_clipped"], 0.1)
    max_val = np.percentile(sub_a_human.obs["epithelial_distance_clipped"], 99.9)
    min_maxs[cat] = [min_val, max_val]


for cat in adata.obs.batch.cat.categories:
    ids = np.where(adata.obs["batch"] == cat)[0]
    normalized_cv[ids] = custom_percentile_scale(
        adata[ids, :].obs["epithelial_distance_clipped"],
        min_maxs.get(cat)[0],
        min_maxs.get(cat)[1],
    )
adata.obs["epithelial_distance_clipped"] = normalized_cv

##### Writing out the final object

In [None]:
adata.write(os.path.join(output_folder, "final_human_adata_newest.h5ad"))