Creating the cytokine expression along the crypt-villus axis heatmap

In [None]:
import os
# important for gpd.sjoin
os.environ['USE_PYGEOS'] = '0'

import scanpy as sc
import scvelo as scv
import geopandas as gpd

import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
from matplotlib.gridspec import GridSpecFromSubplotSpec
from mpl_toolkits.axes_grid1 import AxesGrid
from matplotlib.patches import Rectangle
#import mpl_scatter_density # adds projection='scatter_density'
import numpy as np
import warnings
import seaborn as sns
import igraph
import random
from shapely.geometry import Polygon, Point
from scipy.spatial import KDTree
from tqdm.notebook import tqdm
from PIL import Image, ImageDraw
warnings.filterwarnings('ignore')

In [2]:
experiment = 'SI_d8pi'

We want to use all capture spots, even the ones that don't fall within a cell nucleus. So we need to load in all the capture spots.

In [3]:
dir_base = f'VisiumHD_data/LJI_001_visiumhd_SI/count_outputs/visium_hd_count_{experiment}/outs/binned_outputs/square_002um/'
# Load Visium HD data
raw_h5_file = dir_base+'filtered_feature_bc_matrix.h5'
adata = sc.read_10x_h5(raw_h5_file)

# Load the Spatial Coordinates
tissue_position_file = dir_base+'spatial/tissue_positions.parquet'
df_tissue_positions=pd.read_parquet(tissue_position_file)

#Set the index of the dataframe to the barcodes
df_tissue_positions = df_tissue_positions.set_index('barcode')

# Create an index in the dataframe to check joins
df_tissue_positions['index']=df_tissue_positions.index

# Adding the tissue positions to the meta data
adata.obs =  pd.merge(adata.obs, df_tissue_positions, left_index=True, right_index=True)

# Create a GeoDataFrame from the DataFrame of coordinates
geometry = [Point(xy) for xy in zip(df_tissue_positions['pxl_col_in_fullres'], df_tissue_positions['pxl_row_in_fullres'])]
gdf_coordinates = gpd.GeoDataFrame(df_tissue_positions, geometry=geometry)



##### Making the Visium figures - gene expression trends and spatial axes

In [4]:
import matplotlib.colors as clr
zissou = ["#3A9AB2", "#6FB2C1", "#91BAB6", "#A5C2A3", "#BDC881", "#DCCB4E", "#E3B710", "#E79805", "#EC7A05", "#EF5703", "#F11B00"]

colormap = clr.LinearSegmentedColormap.from_list("Zissou", zissou)
colormap_r = clr.LinearSegmentedColormap.from_list("Zissou", zissou[::-1])

In [5]:
#load in the visium adata
experiments = ['SI_d8pi']
adatas = []
ct = 0
for experiment in experiments:
    adata_ = sc.read_h5ad(f'/mnt/sata1/Analysis_Alex/visium_hd/segmentation/segmentation_outputs/{experiment}_visium_adata.h5ad')
    adata_.obs['batch'] = experiment
    adata_.obsm['X_spatial'] = adata_.obsm['X_spatial'] + int(300000 * ct) 
    adatas.append(adata_)
    ct += 1
adata_ = sc.concat(adatas)

In [6]:
# drop cells where crypt_villus_axis or epithelial distance is none
adata_ = adata_[~adata_.obs['crypt_villi_axis'].isna()]
adata_ = adata_[~adata_.obs['epithelial_distance'].isna()]

Taking the adata with all capture spota, not just the ones that fall within a nucleus:

In [7]:
adata.obs['transcripts_per_cell'] = adata.X.sum(1)

In [8]:
sc.pp.filter_genes(adata, min_cells=10)

sc.pp.normalize_per_cell(adata)
sc.pp.log1p(adata)

Creating a KDtree to find the nearest neighbor to each capture spot

In [9]:
tree = KDTree(adata_.obsm['X_spatial'])

In [10]:
cv = []
epi = []
cv_values = adata_.obs['crypt_villi_axis'].values
epi_values = adata_.obs['epithelial_distance'].values

ids = tree.query(adata.obs[['pxl_row_in_fullres', 'pxl_col_in_fullres']].values, k = 5)[1]
    


Get the cv axis and epithelial distance values for the mean of the neighbors of each capture spot

In [11]:
for k in range(len(ids)):
    cv_axis = np.mean(cv_values[ids[k]])
    epithelial = np.mean(epi_values[ids[k]])
    cv.append(cv_axis)
    epi.append(epithelial)

In [12]:
adata.obs['crypt_villi_axis'] = cv
adata.obs['epithelial_distance'] = epi

Load in list of cytokines

In [None]:
# list from https://www.genome.jp/pathway/mmu04060
df_cytokines = pd.read_csv("kegg_cytokines.csv")
df_cytokines = df_cytokines[df_cytokines['type'] == 'ligand']
df_cytokines

In [14]:
cytokine_genes = df_cytokines['gene'][df_cytokines['gene'].isin(adata.var_names)].to_list()

Define functions for creating the scVelo heatmap

In [15]:
from typing import List

def filter_adata_expressed_in_n_cells(adata, percent=0.05):
    bin_Layer = adata.X > 0
    gene_expressed_in_percent_cells = np.mean(bin_Layer, axis=0)
    keep = gene_expressed_in_percent_cells > percent
    adata = adata[:,keep]
    return(adata)

def scvelo_heatmap(
    adata: sc.AnnData,
    batches: List[str],
    sortby: str,
    highlight: List[str],
    n_bins: int = 5,
):
    """
    Create a heatmap to visualize gene expression trends in single-cell RNA-seq data, 
    with options for subsetting, sorting, and highlighting genes.

    Parameters:
    - adata (sc.AnnData): Annotated data object containing single-cell RNA-seq data.
    - batches (List[str]): List of batch identifiers to subset the data.
    - sortby (str): Variable to sort the heatmap by (e.g., "crypt_villi_axis").
    - highlight (List[str]): List of labels to highlight on the heatmap.
    - n_bins (int, optional): Integer specifying the number of bins to use for convolution (default: 5).

    Returns:
    - s (seaborn.matrix.ClusterGrid): Matplotlib figure object representing the heatmap.

    This function subsets the input data based on specified batches, and creates a heatmap
    to visualize gene expression trends along a specified variable. The function also allows
    highlighting specific labels on the y-axis.

    Example:
    ```
    scvelo_heatmap(adata, batches=list(batches.keys())[0:2],
               sortby="crypt_villi_axis",
               highlight=highlight,
               n_bins=20)
    ```
    """
    print("Creating Heatmap for batches", " + ".join(batches))
    # Subset batches
    adata = adata[adata.obs["batch"].isin(batches)]
    # Filter to include only genes that are expressed in 5% of the cells
    # adata = filter_adata_expressed_in_n_cells(adata)
    adata = adata.copy()

    n_convolve = len(adata) // n_bins
    print(f"Setting `n_convolve` to {n_convolve} ({n_bins} bins, {len(adata)} cells) ")
    # Plot

    s = scv.pl.heatmap(
        adata,
        var_names=adata.var_names,
        sortby="crypt_villi_axis",
        n_convolve=n_convolve,
        show=False,
        yticklabels=True,
        rasterized=True,
        color_map=colormap,
        figsize = (8, 16)
    )
    ax = s.ax_heatmap

    ids = [i for i, e in enumerate(adata.var_names) if e in in_xenium]
    ax.tick_params(axis='both', labelsize=10)  # Adjust font size of tick labels

    # Loop through the x-axis tick labels and show/hide based on the 'highlight' list
    for i, label in enumerate(ax.get_yticklabels()):
        if label.get_text() not in highlight:
            label.set_visible(False)
            ax.get_yticklines()[2 * i + 1].set_visible(False)
        ax.get_yticklines()[2 * i].set_visible(False)
        if label.get_text() in in_xenium:
            label.set_color('red')  # Set color to red

    ax.set_xlabel("")
    ax.set_title(f"Visium Gene Expression Trends Along {sortby}")
    ax.grid(False)
    s.savefig('figures/Visium_Cytokines.pdf')
    return s

Get the list of genes imaged in the Xenium data

In [16]:
in_xenium  = sc.read('timecourse.h5ad').var.index.values

Create an image of the spatial data to mark what section we should calculate gene trends for. This is important because some sections have very low transcript detection efficiency, which can lead to misleading trends.

In [17]:
adata.obsm['X_spatial'] = adata.obs[['pxl_col_in_fullres', 'pxl_row_in_fullres']].values

In [19]:

all_spatial = adata.obsm['X_spatial']
#red hex code
image_colors = ['#FF0000' for v in adata.obs.index]

#D6, D7, D30 #downsize = 20
downsize = 50
# Example 2D point array (replace this with your own data)
points = all_spatial/downsize

# Define the size of the image (adjust as needed)
image_width = 2000
image_height = 2000

# Create a white canvas as the base image
base_image = Image.new('RGB', (image_width, image_height), (255, 255, 255))

# Draw the points on the image
draw = ImageDraw.Draw(base_image)
point_size = 1  # Size of the points
ct = 0
for point in points:
    draw.ellipse((point[0] - point_size, point[1] - point_size, point[0] + point_size, point[1] + point_size),
                    fill=image_colors[ct])
    ct += 1

file_path = 'Subtype_image.png'
base_image.save(file_path)

In the labelme label editor, create a polygon around the section of the tissue to remove. Save the anotation as a json file.

In [20]:
from tqdm.notebook import tqdm
from shapely.geometry import Point, Polygon
import json

remove_ids = []
current_adata = adata.copy()

json_file_path =os.path.join('Subtype_image.json')
all_spatial = current_adata.obsm['X_spatial']

# Load the JSON data from the file
with open(json_file_path, 'r') as json_file:
    data = json.load(json_file)

# Extract relevant information from the JSON data
image_height = data['imageHeight']
image_width = data['imageWidth']
image_path = data['imagePath']
shapes = data['shapes']

# Process the shapes (annotations)
removals = []
points = [] 
top_points = [] 
for shape in shapes:
    label = shape['label']
    removals.append(shape['points'])

total_indices = []
for ir in removals:
    ir_ = np.array(ir)*downsize
    poly = Polygon(ir_)
    indices = []
    for i in range(len(all_spatial)):
        pt = Point(all_spatial[i])
        if pt.within(poly):
            indices.append(i)
    total_indices.append(indices)

total_indices=list(set([element for sublist in total_indices for element in sublist]))
index_set = list(set(total_indices))


In [21]:
keep_inds = np.setdiff1d(np.arange(len(current_adata.obs.index)), index_set)


In [None]:
if len(index_set) > 0:
    current_adata = current_adata[keep_inds, :]
    remove_ids.extend(current_adata.obs.index.values)
print(len(remove_ids))

In [23]:
current_adata.obs['batch'] = 'SI_d8pi'

Create the gene trend heatmap on the subsetted object

In [None]:
s = scvelo_heatmap(
    current_adata[:, current_adata.var.index.isin(cytokine_genes)],
    batches=['SI_d8pi'],
    sortby="crypt_villi_axis",
    highlight=cytokine_genes,
    n_bins=7
)

Plot the crypt-villus axis on the Visium data

In [None]:
sc.set_figure_params(dpi=300)
for batch in np.unique(adata.obs['batch']):
    fig = sc.pl.embedding(adata[adata.obs['batch'] == batch], basis='spatial', color = 'crypt_villi_axis', title=f'{batch} Visium Crypt Villi Axis', return_fig=True)
    ax = fig.gca()
    ax.axis('equal')
    fig.savefig(f'figures/{batch}_visium_cvaxis.pdf')