Integrating the Xenium timecourse with VisiumHD

In [None]:
import anndata
import matplotlib.pyplot as plt
import numpy as np
import scanpy as sc
from scipy.stats import spearmanr
from scvi.data import cortex, smfish
from scvi.external import GIMVI
import os
import pandas as pd
import numpy as np
from sklearn.neighbors import KDTree
from tqdm.notebook import tqdm
import torch

In [None]:
train_size = 1

%config InlineBackend.print_figure_kwargs={'facecolor' : "w"}
%config InlineBackend.figure_format='retina'

Add paths to the Visium Experiment

In [None]:
visium_path = "VisiumHD_data/LJI_001_visiumhd_SI"
experiment = "SI_d8pi"

read in the Visium data

In [None]:
visium_data = sc.read(
    f"visium_hd/segmentation/segmentation_outputs/{experiment}_single_cell_adata.h5ad"
)
visium_data.var_names_make_unique()
sc.pp.filter_genes(visium_data, min_cells=10)
sc.pp.filter_cells(visium_data, min_genes=60)

Read in the Xenium timecourse data

In [None]:
xenium_file = "timecourse.h5ad"
xenium_data = sc.read(xenium_file)

Subset to day 8 to and intersecting genes with visium to prepare for integration

In [None]:
xenium_data = xenium_data[xenium_data.obs["batch"] == "day8_SI_Ctrl"]

In [None]:
intersection = []
non_intersecting = []
for k in visium_data.var_names:
    if k in xenium_data.var_names:
        intersection.append(k)
    else:
        non_intersecting.append(k)

# only use genes in both datasets
visium_data = visium_data[:, intersection].copy()
xenium_data = xenium_data[:, intersection].copy()

Prepare data for integration with gimVI

In [None]:
xenium_gene_names = xenium_data.var_names
n_genes = xenium_data.n_vars
n_train_genes = int(n_genes * train_size)

# randomly select training_genes

rand_train_genes = visium_data.var.index.values[: len(visium_data.var.index.values) - 2]
rand_test_genes = visium_data.var.index.values[len(visium_data.var.index.values) - 2 :]


visium_data_partial = visium_data.copy()

visium_data_partial.obs["batch"] = "visium"
# remove cells with no counts
sc.pp.filter_cells(visium_data_partial, min_counts=1)
sc.pp.filter_cells(xenium_data, min_counts=1)

# setup_anndata for visium and xeniumuencing data
GIMVI.setup_anndata(visium_data_partial, labels_key="batch")
GIMVI.setup_anndata(xenium_data, labels_key="Subtype")

Run gimVI

In [None]:
xenium_data.X = xenium_data.layers["raw"]

In [None]:
torch.set_float32_matmul_precision("medium")

In [None]:
# create our model
model = GIMVI(xenium_data, visium_data_partial, n_latent=10)

# train for 200 epochs
model.train(200)

Process the joint latent space created by gimVI

In [None]:
# get the latent representations for the xeniumuencing and visium data
latent_xenium, latent_visium = model.get_latent_representation()

# concatenate to one latent representation
latent_representation = np.concatenate([latent_xenium, latent_visium])
latent_adata = anndata.AnnData(latent_representation)

# labels which cells were from the xeniumuencing dataset and which were from the visium dataset
latent_labels = (["xenium"] * latent_xenium.shape[0]) + (
    ["visium"] * latent_visium.shape[0]
)
latent_adata.obs["labels"] = latent_labels

# compute umap
sc.pp.neighbors(latent_adata, use_rep="X")
sc.tl.umap(latent_adata)

Store projections

In [None]:
# save umap representations to original xenium and visium_datasets
xenium_data.obsm["X_umap_gimvi"] = latent_adata.obsm["X_umap"][
    latent_adata.obs["labels"] == "xenium"
]
visium_data_partial.obsm["X_umap_gimvi"] = latent_adata.obsm["X_umap"][
    latent_adata.obs["labels"] == "visium"
]

In [None]:
visium_data_partial.obs["modality"] = "visium"
xenium_data.obs["modality"] = "xenium"

In [None]:
xenium_latent = model.get_latent_representation()[0]
xenium_data.obsm["X_gimvi"] = xenium_latent
visium_data_partial.obsm["X_gimvi"] = model.get_latent_representation()[1]

In [None]:
sc.pp.neighbors(latent_adata, use_rep="X", n_neighbors=30)
sc.tl.umap(latent_adata, min_dist=0.1)

In [None]:
# save umap representations to original xenium and visium_datasets
xenium_data.obsm["X_umap_gimvi"] = latent_adata.obsm["X_umap"][: xenium_data.shape[0]]
visium_data_partial.obsm["X_umap_gimvi"] = latent_adata.obsm["X_umap"][
    xenium_data.shape[0] :
]

Impute cv axis and epithelial distance values into VisiumHD from Xenium

In [None]:
adata = latent_adata.copy()

# Filter visium and xenium cells
visium_cells = adata[adata.obs["labels"] == "visium"].copy()
xenium_cells = adata[adata.obs["labels"] == "xenium"].copy()
xenium_data_copy = xenium_data.copy()

# Create KD tree for xenium cells
xenium_gimvi = xenium_data.obsm["X_gimvi"]
kdtree = KDTree(xenium_gimvi)

# Find nearest xenium cells for each xenium
n_neighbors = 3  # Specify the number of nearest neighbors
distances, indices = kdtree.query(visium_data_partial.obsm["X_gimvi"], k=n_neighbors)

# Average gene expression for visium cells
averaged_expression = np.zeros((visium_cells.n_obs, 1))
averaged_epithelial = np.zeros((visium_cells.n_obs, 1))

for i in tqdm(range(visium_cells.n_obs)):
    xenium_neighbors_indices = indices[i]
    xenium_neighbors_expression = xenium_data_copy.obs["crypt_villi_axis"].values[
        xenium_neighbors_indices
    ]
    xenium_neighbors_epithelial = xenium_data_copy.obs["epithelial_distance"].values[
        xenium_neighbors_indices
    ]

    averaged_expression[i] = np.mean(xenium_neighbors_expression, axis=0)
    averaged_epithelial[i] = np.mean(xenium_neighbors_epithelial, axis=0)

visium_data_partial.obs["crypt_villi_axis"] = averaged_expression.flatten()
visium_data_partial.obs["epithelial_distance"] = averaged_epithelial.flatten()

Impute cell type into Visium

In [None]:
from collections import Counter

adata = latent_adata.copy()

# Filter visium and xenium cells
visium_cells = adata[adata.obs["labels"] == "visium"].copy()
xenium_cells = adata[adata.obs["labels"] == "xenium"].copy()
xenium_data_copy = xenium_data.copy()

# Create KD tree for xenium cells
xenium_gimvi = xenium_data.obsm["X_gimvi"]
kdtree = KDTree(xenium_gimvi)

# Find nearest xenium cells for each xenium
n_neighbors = 3  # Specify the number of nearest neighbors
distances, indices = kdtree.query(visium_data_partial.obsm["X_gimvi"], k=n_neighbors)

# Average gene expression for visium cells
averaged_expression = []

for i in tqdm(range(visium_cells.n_obs)):
    xenium_neighbors_indices = indices[i]

    xenium_neighbors_categories = xenium_data_copy.obs["Subtype"].values[
        xenium_neighbors_indices
    ]

    # Use Counter to count occurrences
    counter = Counter(xenium_neighbors_categories)

    # Find the most common element
    most_common = counter.most_common(1)

    averaged_expression.append(most_common[0][0])

visium_data_partial.obs["Subtype"] = averaged_expression

Smoothing the crypt-villus axis

In [None]:
visium_spatial_coords = visium_data_partial.obsm["X_spatial"]

kdtree = KDTree(visium_spatial_coords)

n_neighbors = 60  # Specify the number of nearest neighbors
distances, indices = kdtree.query(visium_spatial_coords, k=n_neighbors)

averaged_expression = np.zeros((len(visium_spatial_coords), 1))

for i in tqdm(range(len(visium_spatial_coords))):
    neighbors_indices = indices[i]
    neighbors_expression = visium_data_partial.obs["crypt_villi_axis"].values[
        neighbors_indices
    ]

    averaged_expression[i] = np.percentile(neighbors_expression, 75, axis=0)

n_neighbors_epithelial = 10  # Specify the number of nearest neighbors
distances, indices = kdtree.query(visium_spatial_coords, k=n_neighbors)

averaged_epithelial = np.zeros((len(visium_spatial_coords), 1))

for i in tqdm(range(len(visium_spatial_coords))):
    neighbors_indices = indices[i]
    neighbors_expression = visium_data_partial.obs["epithelial_distance"].values[
        neighbors_indices
    ]

    averaged_epithelial[i] = np.percentile(neighbors_expression, 75, axis=0)

In [None]:
visium_data_partial.obs["crypt_villi_axis_smoothed"] = averaged_expression.flatten()
visium_data_partial.obs["epithelial_distance_smoothed"] = averaged_epithelial.flatten()

Adding smoothed predictions to the adata

In [None]:
visium_data_partial.obs["crypt_villi_axis_predicted"] = visium_data_partial.obs[
    "crypt_villi_axis"
]
visium_data_partial.obs["epithelial_distance_predicted"] = visium_data_partial.obs[
    "epithelial_distance"
]

In [None]:
visium_data_partial.obs["crypt_villi_axis"] = visium_data_partial.obs[
    "crypt_villi_axis_smoothed"
]
visium_data_partial.obs["epithelial_distance"] = visium_data_partial.obs[
    "epithelial_distance_smoothed"
]

Redefining epithelial distance using distance to epithelial cells because more trustworthy than the epithelial imputation

In [None]:
visium_data_partial.obs["Class"] = [
    (
        "Epithelial"
        if i
        in [
            "Enterocyte_3",
            "Transit_Amplifying",
            "Early_Enterocyte",
            "Enterocyte_1",
            "Enterocyte_2",
            "ISC",
            "Goblet",
            "Enteroendocrine",
            "Paneth",
            "Tuft",
        ]
        else "Other"
    )
    for i in visium_data_partial.obs["Subtype"]
]
points_epi = visium_data_partial[
    visium_data_partial.obs.Class.isin(["Epithelial"])
].obsm["X_spatial"]

all_tree = KDTree(visium_data_partial.obsm["X_spatial"])
epi_tree = KDTree(points_epi)
distances_all, neighbors_all = all_tree.query(
    visium_data_partial.obsm["X_spatial"], k=5
)
distances, neighbors = epi_tree.query(visium_data_partial.obsm["X_spatial"], k=5)
distance_medians = np.mean(distances, axis=1) / np.mean(distances_all, axis=1)
visium_data_partial.obs["epithelial_distance"] = distance_medians
visium_data_partial.obs["epithelial_distance"] = visium_data_partial.obs[
    "epithelial_distance"
] / np.percentile(visium_data_partial.obs["epithelial_distance"], 99)

In [None]:
visium_data_partial.write(
    f"visium_hd/segmentation/segmentation_outputs/{experiment}_visium_adata.h5ad"
)

Adding the imputed results to the original visium adata

In [None]:
visium_data_partial = sc.read(
    f"visium_hd/segmentation/segmentation_outputs/{experiment}_visium_adata.h5ad"
)

visium_data = sc.read(
    f"visium_hd/segmentation/segmentation_outputs/{experiment}_single_cell_adata.h5ad"
)

visium_data.var_names_make_unique()
sc.pp.filter_genes(visium_data, min_cells=10)
sc.pp.filter_cells(visium_data, min_genes=60)

visium_data.obs = visium_data.obs.merge(
    visium_data_partial.obs[["crypt_villi_axis", "epithelial_distance", "Subtype"]],
    how="left",
    left_index=True,
    right_index=True,
)

visium_data.write(
    f"visium_hd/segmentation/segmentation_outputs/{experiment}_visium_adata.h5ad"
)