# GraphST

- **Creator**: Sebastian Birk (<sebastian.birk@helmholtz-munich.de>).
- **Affiliation:** Helmholtz Munich, Institute of Computational Biology (ICB), Talavera-López Lab
- **Date of Creation:** 11.01.2023
- **Date of Last Modification:** 12.01.2023

- The GraphST source code is available at https://github.com/JinmiaoChenLab/GraphST.
- The corresponding preprint is Long, Y. et al. DeepST: A versatile graph contrastive learning framework for spatially informed clustering, integration, and deconvolution of spatial transcriptomics. Preprint at https://doi.org/10.1101/2022.08.02.502407.

- The workflow of this notebook follows the tutorial from https://deepst-tutorials.readthedocs.io/en/latest/Tutorial%201_10X%20Visium.html.
- The authors use raw counts as input to GraphST (stored in adata.X). Therefore, we also use raw counts.
- To define the spatial neighborhood graph, the original GraphST paper uses the 3 nearest neighbors of a cell as neighbors and the union of all neighbors is used as final spatial neighborhood graph (the adjacency matrix is made symmetric).

## 1. Setup

### 1.1 Import Libraries

In [1]:
import os
from datetime import datetime

import multiprocessing as mp
import pandas as pd
import scanpy as sc
import squidpy as sq
import torch
from GraphST import GraphST
from sklearn import metrics

### 1.2 Define Parameters

In [2]:
dataset = "seqfish_mouse_organogenesis_embryo2"
cell_type_key = "celltype_mapped_refined"
latent_key = "graphst_latent"
leiden_resolution = 0.3
n_clusters = 7
random_seed = 0

### 1.3 Run Notebook Setup

In [3]:
sc.set_figure_params(figsize=(6, 6))

In [4]:
# Get time of notebook execution for timestamping saved artifacts
now = datetime.now()
current_timestamp = now.strftime("%d%m%Y_%H%M%S")

In [5]:
# Run device, by default, the package is implemented on 'cpu'. We recommend using GPU.
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# the location of R, which is necessary for mclust algorithm. Please replace the path below with local R installation path
os.environ["R_HOME"] = "/home/cartalop/anaconda3/envs/graphst/lib/R/bin/R"

### 1.4 Configure Paths and Directories

In [6]:
data_folder_path = "../datasets/srt_data/gold/"
figure_folder_path = f"../figures/method_benchmarking/{dataset}/"

In [7]:
# Create required directories
os.makedirs(figure_folder_path, exist_ok=True)

## 2. Data

In [8]:
# Load data
adata_original = sc.read_h5ad(data_folder_path + f"{dataset}.h5ad")

## 3. Graph ST Model

In [None]:
model_seeds = [0, 1] * 5

for run_number, n_neighbors in enumerate([4, 4, 8, 8, 12, 12, 16, 16, 20, 20]):
    adata = sc.read_h5ad(data_folder_path + f"{dataset}.h5ad")
    
    # Store raw counts in adata.X
    adata.X = adata.layers["counts"].toarray()
    
    # Compute spatial neighborhood graph
    sq.gr.spatial_neighbors(adata,
                            coord_type="generic",
                            spatial_key="spatial",
                            n_neighs=n_neighbors)
    adata.obsm["graph_neigh"] = adata.obsp["spatial_connectivities"]
    adj = adata.obsm["graph_neigh"].copy()
    adj = adj + adj.T
    adata.obsm["adj"] = np.where(adj>1, 1, adj)
    
    # Define model
    model = GraphST.GraphST(adata,
                            device=device,
                            random_seed=model_seeds[run_number])
    
    # Train model
    adata = model.train()
    
    # Use GraphST latent space for UMAP generation
    sc.pp.neighbors(adata, use_rep="emb", n_neighbors=n_neighbors)
    sc.tl.umap(adata, min_dist=0.3)
    fig = sc.pl.umap(adata,
                     color=[cell_type_key],
                     title="Latent Space with Cell Types: GraphST",
                     return_fig=True)
    fig.savefig(f"{figure_folder_path}/latent_graphst_cell_types_run_{run_number + 1}_{current_timestamp}.png",
                bbox_inches="tight")

    # Compute latent Leiden clustering
    sc.tl.leiden(adata=adata,
                 resolution=leiden_resolution,
                 random_state=random_seed,
                 key_added=f"latent_graphst_leiden_{str(leiden_resolution)}")

    # Create subplot of latent Leiden cluster annotations in physical and latent space
    fig, axs = plt.subplots(nrows=2, ncols=1, figsize=(6, 12))
    title = fig.suptitle(t="Latent and Physical Space with Leiden Clusters: GraphST")
    sc.pl.umap(adata=adata,
               color=[f"latent_graphst_leiden_{str(leiden_resolution)}"],
               title=f"Latent Space with Leiden Clusters",
               ax=axs[0],
               show=False)
    sc.pl.spatial(adata=adata,
                  color=[f"latent_graphst_leiden_{str(leiden_resolution)}"],
                  spot_size=0.03,
                  title=f"Physical Space with Leiden Clusters",
                  ax=axs[1],
                  show=False)

    # Create and position shared legend
    handles, labels = axs[0].get_legend_handles_labels()
    lgd = fig.legend(handles, labels, bbox_to_anchor=(1.1, 0.90))
    axs[0].get_legend().remove()
    axs[1].get_legend().remove()

    # Adjust, save and display plot
    plt.subplots_adjust(wspace=0, hspace=0.2)
    fig.savefig(f"{figure_folder_path}/latent_physical_comparison_graphst_leiden_run_{run_number + 1}_{current_timestamp}.png",
                bbox_extra_artists=(lgd, title),
                bbox_inches="tight")
    plt.show()

    adata_original.obsm[latent_key + f"_run{run_number + 1}"] = adata.obsm["emb"]
    
# Label all 'gene programs' as active gene programs for subsequent benchmarking
adata_original.uns["graphst_active_gp_names"] = np.array([f"latent_{i}" for i in range(adata_new.obsm["graphst_latent_run1"].shape[1])])

# Store data to disk
adata_original.write(f"{data_folder_path}/{dataset}_graphst.h5ad")

In [38]:
import numpy as np
def check_symmetric(a, rtol=1e-05, atol=1e-08):
    return np.allclose(a, a.T, rtol=rtol, atol=atol)

In [39]:
check_symmetric(test)

False

In [34]:
test.T

array([[0., 1., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 1., 0., 0.]])

In [35]:
test

array([[0., 1., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 1., 0., 0.]])

In [None]:
    adata.obsm['graph_neigh'] = interaction
    
    #transform adj to symmetrical adj
    adj = interaction
    adj = adj + adj.T
    adj = np.where(adj>1, 1, adj)
    
    adata.obsm['adj'] = adj

In [22]:
# Define model
model = GraphST.GraphST(adata, device=device, random_seed=0)

KeyError: 'graph_neigh'

In [None]:
# Train model
adata = model.train()

In [None]:
adata

In [None]:
# plotting predicted labels by UMAP
sc.pp.neighbors(adata, use_rep="emb", n_neighbors=10)
sc.tl.umap(adata)
sc.pl.umap(adata, color=cell_type_key, title=['Predicted labels'], show=False)