Cellchat preparation for VisiumHD

In [None]:
import scanpy as sc
import matplotlib.pyplot as plt
import numpy as np
import warnings
import pandas as pd
from scipy.spatial import KDTree

warnings.filterwarnings("ignore")

Read in the adatas for the experiments you want to use

In [None]:
experiments = ['SI_d8pi']
adatas = []
ct = 0
for experiment in experiments:
    adata = sc.read_h5ad(f'visium_hd/segmentation/segmentation_outputs/{experiment}_visium_adata.h5ad')
    adata.obs['batch'] = experiment
    adata.obsm['X_spatial'] = adata.obsm['X_spatial'] + int(300000 * ct) 
    adatas.append(adata)
    ct += 1
adata = sc.concat(adatas)

adata = adata[~pd.isna(adata.obs['Subtype'])]

Read in the Xenium timecouse data to get the celltype heirarchy

In [None]:
types = sc.read('timecourse.h5ad')
types_ = pd.crosstab(types.obs['Type'], types.obs['Subtype'])
class_ = pd.crosstab(types.obs['Class'], types.obs['Type'])
immuno_ = pd.crosstab(types.obs['Immunocentric_Type'], types.obs['Subtype'])

Create a cell type heirarchy dictionary

In [None]:


type_dictionary = {}
for i in adata.obs['Subtype'].cat.categories:
    type_dictionary[i] = types_.index.values[np.where(types_[i].values > 0)[0]][0]
all_types = []
for k in adata.obs['Subtype'].values:
    all_types.append(type_dictionary.get(k))
adata.obs['Type'] = all_types

itype_dictionary = {}
for i in adata.obs['Subtype'].cat.categories:
    try:
        itype_dictionary[i] = immuno_.index.values[np.where(immuno_[i].values > 0)[0]][0]
    except:
        itype_dictionary[i] = 'None'
all_itypes = []
for k in adata.obs['Subtype'].values:
    all_itypes.append(itype_dictionary.get(k))
adata.obs['Immunocentric_Type'] = all_itypes

class_dictionary = {}
for i in np.unique(adata.obs['Type'].values):
    class_dictionary[i] = class_.index.values[np.where(class_[i].values > 0)[0]][0]
all_classes = []
for k in adata.obs['Type'].values:
    all_classes.append(class_dictionary.get(k))
adata.obs['Class'] = all_classes


Getting an idea of the distance to the nearest cells to get parameters for cellchat

In [None]:
def kdtree_nearest_neighbor_distances(points):
    # Convert input to numpy array if it's not already
    points = np.array(points)
    
    # Create KDTree
    tree = KDTree(points)
    
    # Query the tree for the nearest neighbor of each point
    # We use k=2 because the nearest point to any point is itself (with distance 0)
    distances, _ = tree.query(points, k=2)
    
    # Return the distances to the nearest neighbors (second column)
    return distances[:, 1]
near_dist = kdtree_nearest_neighbor_distances(adata.obsm['X_spatial'][(adata.obsm['X_spatial'][:, 0] < 20000) & (adata.obsm['X_spatial'][:, 1] <20000)])

In [None]:
import matplotlib.pyplot as plt
plt.hist(near_dist, bins=100)
plt.show()

Noralize gene expression counts

In [None]:
sc.pp.normalize_per_cell(adata)
sc.pp.log1p(adata)

Defining gates for top, crypt, and muscularis

In [None]:
# Coordinates of the gates
d8_gates = {
    "Top": {
        "edges": [
            [0.15, 0.6],
            [0.8, 0.6],
            [0.8, 1.03],
            [0.15, 1.03],
        ],
        "label_position": {"x": 0.16, "y": 0.9},
        "fill": "#3A9AB244",
        "stroke": "#3A9AB2",
    },
    "Crypt": {
        "edges": [
            [0.15, 0.6],
            [0.8, 0.6],
            [0.8, 0],
            [0.15, 0],
        ],
        "label_position": {"x": 0.16, "y": 0.05},
        "fill": "#F11B0044",
        "stroke": "#F11B00",
    },
    "Muscularis": {
        "edges": [[0.8, 0], [0.8, 0.4], [6, 0.4], [6, 0], [0.8, 0]],
        "label_position": {"x": 0.6, "y": 0.05},
        "fill": "#BDC88155",
        "stroke": "#BDC881",
    },
}


In [None]:
# Custom biexponential transformation. Maybe not needed for IF data
def transformation(x, a=0.1, b=0.1, c=0.5, d=2.5, f=4, w=1):
    x = np.array(x)
    return a * np.exp(b * ((x - w))) - c * np.exp(-d * (x - w)) + f


def classify_cells(adata, gates, transformation=transformation):
    """
    Classify cells based on the gates.
    """
    from shapely.geometry import Point
    from shapely.geometry.polygon import Polygon
    import geopandas as gpd

    adata.obs["epithelial_distance_transformed"] = transformation(
        adata.obs["epithelial_distance"]
    )
    adata.obs["gate"] = False

    print("Creating polygons")
    polygons = {}
    for gate in gates:
        # Apply transformation to x values
        points = [
            [transformation(element[0])] + element[1:]
            for element in gates[gate]["edges"]
        ]
        polygons[gate] = Polygon(points)
    polygons = gpd.GeoSeries(polygons)
    gpd_poly = gpd.GeoDataFrame({"gates": polygons}, geometry="gates")

    print("Creating cells")
    cells = gpd.GeoSeries.from_xy(
        adata.obs["epithelial_distance_transformed"], adata.obs["crypt_villi_axis"]
    )
    gpd_cells = gpd.GeoDataFrame({"cells": cells}, geometry="cells")

    print("Joining cells and polygons")
    result = gpd.sjoin(
        gpd_cells,
        gpd_poly,
        how="left",
    )
    return result

Assigning all cells to the gate they lie within

In [None]:
classification = classify_cells(adata, d8_gates)
classification

In [None]:
adata.obs["gate"] = classification["index_right"]
adata

Defining CD8ab+ cells and adding their positional information

In [None]:
temp_subtype_array = np.array(list(adata.obs['Subtype'].values))
temp_subtype_array[adata.obs['Subtype'].isin(['Cd8_T-Cell_P14', 'Cd8_T-Cell_ab+'])] = 'Cd8_T-Cell'

In [None]:
def make_name(gate, cell):
    if cell == "Cd8_T-Cell":
        if gate == "Top":
            return "Cd8 ab+ top"
        elif gate == "Crypt":
            return "Cd8 ab+ crypt"
        elif gate == "Muscularis":
            return "Cd8 ab+ muscularis"
        else:
            return "Cd8 ab+ undeterminded"
    else:
        return cell


adata.obs["Subtype_gate"] = [
    make_name(gate, cell) for gate, cell in zip(adata.obs["gate"], temp_subtype_array)
]
adata = adata[~(adata.obs["Subtype_gate"] == "Cd8 ab+ undeterminded")]
adata.obs["Subtype_gate"] = adata.obs["Subtype_gate"].astype("category")
adata.obs["Subtype_gate"]
adata

Saving out adata

In [None]:
adata.obs_names_make_unique()

In [None]:
import os

if not os.path.exists("tmp"):
    os.makedirs("tmp")
    print(f"Folder 'tmp' created successfully!")
else:
    print(f"Folder 'tmp' already exists.")

adata.write_h5ad("tmp/adata_gated.h5ad")