# Tutorial 3: Stereo-seq

In this tutorial, we show how to apply DeepST to Stereo-seq data for spatial domains identification. We take mouse olfactory bulk data as example and set the number of clusters as 7.

The preprocessed data can be accessed via https://zenodo.org/record/6925603#.YuM5WXZBwuU.

In [25]:
import os
import torch
import pandas as pd
import scanpy as sc
from sklearn import metrics
import multiprocessing as mp

In [26]:
from DeepST import DeepST

In [27]:
dataset = 'Mouse_Olfactory'

In [28]:
# set random seed
random_seed = 50 
DeepST.fix_seed(random_seed)

In [29]:
# Run device
device = torch.device('cuda:3' if torch.cuda.is_available() else 'cpu')

# the location of R 
os.environ['R_HOME'] = '/scbio4/tools/R/R-4.0.3_openblas/R-4.0.3'

In [30]:
# the number of clusters
n_clusters = 7

In [31]:
# read data
file_path = '/home/yahui/anaconda3/work/CellCluster_DEC/data/Mouse_Olfactory/'
adata = sc.read_h5ad(file_path + 'filtered_feature_bc_matrix.h5ad')
adata.var_names_make_unique()

In [32]:
# filter out genes expressed in less than 50 beads 
sc.pp.filter_genes(adata, min_cells=50)

# preprocessing for ST data
DeepST.preprocess(adata)

# built graph
DeepST.construct_interaction(adata)
DeepST.add_contrastive_label(adata)



In [38]:
# get features
DeepST.get_feature(adata)

In [39]:
adata

AnnData object with n_obs × n_vars = 19109 × 14376
    obs: 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes'
    var: 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'n_cells', 'highly_variable', 'highly_variable_rank', 'means', 'variances', 'variances_norm', 'mean', 'std'
    uns: 'hvg', 'log1p'
    obsm: 'spatial', 'distance_matrix', 'graph_neigh', 'adj', 'label_CSL', 'feat', 'feat_a', 'emb'

In [40]:
# define and run model
model = DeepST.Train(adata, device=device)
adata.obsm['emb'] = model.train()

Begin to train ST data...


100%|████████████████████████████████████████████████████████████████████████████████████████| 600/600 [01:28<00:00,  6.76it/s]


Optimization finished for ST data!


In [41]:
adata

AnnData object with n_obs × n_vars = 19109 × 14376
    obs: 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes'
    var: 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'n_cells', 'highly_variable', 'highly_variable_rank', 'means', 'variances', 'variances_norm', 'mean', 'std'
    uns: 'hvg', 'log1p'
    obsm: 'spatial', 'distance_matrix', 'graph_neigh', 'adj', 'label_CSL', 'feat', 'feat_a', 'emb'

In [43]:
# clustering
#from DeepST.utils import clustering
#clustering(adata, n_clusters, refinement=False)

In [44]:
# plotting spatial clustering result
#plt.rcParams["figure.figsize"] = (4, 3)
#sc.pl.embedding(adata, basis="spatial", color="label",s=6, show=False, title='DeepST')