In [7]:
datasets=["b004","be_ton","mpb"]

In [8]:
import argparse

from dance.datasets.spatial import SpatialLIBDDataset
from dance.modules.spatial.spatial_domain.louvain import Louvain
from dance.transforms.preprocess import set_seed

Louvain_scores=[]
parser = argparse.ArgumentParser()
parser.add_argument("--cache", action="store_true", help="Cache processed data.")
parser.add_argument("--sample_number", type=str, default="151673",
                    help="12 human dorsolateral prefrontal cortex datasets for the spatial domain task.")
parser.add_argument("--seed", type=int, default=202, help="Random seed.")
parser.add_argument("--n_components", type=int, default=50, help="Number of PC components.")
parser.add_argument("--neighbors", type=int, default=17, help="Number of neighbors.")
for dataset in datasets:
    try:
        args = parser.parse_args(['--sample_number',dataset, '--seed','5',"--n_components","30"])
        set_seed(args.seed)

        # Initialize model and get model specific preprocessing pipeline
        model = Louvain(resolution=1)
        preprocessing_pipeline = model.preprocessing_pipeline(dim=args.n_components, n_neighbors=args.neighbors)

        # Load data and perform necessary preprocessing
        dataloader = SpatialLIBDDataset(data_id=args.sample_number,data_dir="/home/zyxing/data/spatial")
        data = dataloader.load_data(transform=preprocessing_pipeline, cache=args.cache)
        adj, y = data.get_data(return_type="default")

        # Train and evaluate model
        model = Louvain(resolution=1)
        score = model.fit_score(adj, y.values.ravel())
    except Exception as e:
        score=e
    finally:
        print(score)
        Louvain_scores.append(score)
""" To reproduce louvain on other samples, please refer to command lines belows:
NOTE: you have to run multiple times to get best performance.

human dorsolateral prefrontal cortex sample 151673:
python louvain.py --sample_number=151673 --seed=5
# 0.305

human dorsolateral prefrontal cortex sample 151676:
python louvain.py --sample_number=151676 --seed=203
# 0.288

human dorsolateral prefrontal cortex sample 151507:
python louvain.py --sample_number=151507 --seed=10
# 0.285
"""


[INFO][2023-10-25 23:52:38,401][dance][_load_raw_data] Loading expression data from /home/zyxing/data/spatial/b004/b004_raw_feature_bc_matrix.h5


[INFO][2023-10-25 23:52:38,664][dance][_load_raw_data] Loading spatial info from /home/zyxing/data/spatial/b004/tissue_positions_list.txt
[INFO][2023-10-25 23:52:38,940][dance][_load_raw_data] Loading label info from /home/zyxing/data/spatial/b004/cluster_labels.csv
[INFO][2023-10-25 23:52:39,185][dance][_load_raw_data] Loading image data from /home/zyxing/data/spatial/b004/b004_full_image.tif
[INFO][2023-10-25 23:52:39,186][dance][_load_raw_data] image doesn't exist,use louvain
[ WARN:0@4703.187] global loadsave.cpp:248 findDecoder imread_('/home/zyxing/data/spatial/b004/b004_full_image.tif'): can't open/read file: check file path/integrity
[INFO][2023-10-25 23:52:39,188][dance][_load_raw_data] image doesn't exist,use louvain
[INFO][2023-10-25 23:52:39,278][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 248285 × 48
    obs: 'Unnamed: 0', 'ground_truth', 'label'
    uns: 'image', 'dance_config'
    obsm: 'spatial', 'spatial_pixel

0.25355328180154496


[INFO][2023-10-26 00:15:19,341][dance][_load_raw_data] Loading spatial info from /home/zyxing/data/spatial/be_ton/tissue_positions_list.txt
[INFO][2023-10-26 00:15:19,521][dance][_load_raw_data] Loading label info from /home/zyxing/data/spatial/be_ton/cluster_labels.csv
[INFO][2023-10-26 00:15:19,650][dance][_load_raw_data] Loading image data from /home/zyxing/data/spatial/be_ton/be_ton_full_image.tif
[INFO][2023-10-26 00:15:19,652][dance][_load_raw_data] image doesn't exist,use louvain
[INFO][2023-10-26 00:15:19,652][dance][_load_raw_data] image doesn't exist,use louvain
[ WARN:0@6063.652] global loadsave.cpp:248 findDecoder imread_('/home/zyxing/data/spatial/be_ton/be_ton_full_image.tif'): can't open/read file: check file path/integrity
[INFO][2023-10-26 00:15:19,747][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 219926 × 44
    obs: 'Unnamed: 0', 'ground_truth', 'label'
    uns: 'image', 'dance_config'
    obsm: 'spatial', 's

0.12017291301674463


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
[INFO][2023-10-26 00:30:00,367][dance][_load_raw_data] Loading spatial info from /home/zyxing/data/spatial/mpb/tissue_positions_list.txt
[INFO][2023-10-26 00:30:00,379][dance][_load_raw_data] Loading label info from /home/zyxing/data/spatial/mpb/cluster_labels.csv
[INFO][2023-10-26 00:30:00,386][dance][_load_raw_data] Loading image data from /home/zyxing/data/spatial/mpb/mpb_full_image.tif
  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
[INFO][2023-10-26 00:30:00,506][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 3353 × 31053
    obs: 'Barcode', 'ground_truth', 'label'
    var: 'gene_ids', 'feature_types', 'genome'
    uns: 'image', 'dance_config'
    obsm: 'spatial', 'spatial_pixel'
[INFO][2023-10-26 00:30:00,507][dance.Compose][__call__] Applying composed transformations:
Compose(
  FilterGenesMatch(prefixes=['ERCC', 'MT-'], suf

0.48856056584053464


' To reproduce louvain on other samples, please refer to command lines belows:\nNOTE: you have to run multiple times to get best performance.\n\nhuman dorsolateral prefrontal cortex sample 151673:\npython louvain.py --sample_number=151673 --seed=5\n# 0.305\n\nhuman dorsolateral prefrontal cortex sample 151676:\npython louvain.py --sample_number=151676 --seed=203\n# 0.288\n\nhuman dorsolateral prefrontal cortex sample 151507:\npython louvain.py --sample_number=151507 --seed=10\n# 0.285\n'

In [9]:
Louvain_scores

[0.25355328180154496, 0.12017291301674463, 0.48856056584053464]

In [10]:
import argparse

from dance.datasets.spatial import SpatialLIBDDataset
from dance.modules.spatial.spatial_domain.spagcn import SpaGCN, refine
from dance.utils import set_seed

SpaGCN_scores=[]
parser = argparse.ArgumentParser()
parser.add_argument("--cache", action="store_true", help="Cache processed data.")
parser.add_argument("--sample_number", type=str, default="151673",
                    help="12 human dorsolateral prefrontal cortex datasets for the spatial domain task.")
parser.add_argument("--beta", type=int, default=49, help="")
parser.add_argument("--alpha", type=int, default=1, help="")
parser.add_argument("--p", type=float, default=0.05,
                    help="percentage of total expression contributed by neighborhoods.")
parser.add_argument("--l", type=float, default=0.5, help="the parameter to control percentage p.")
parser.add_argument("--start", type=float, default=0.01, help="starting value for searching l.")
parser.add_argument("--end", type=float, default=1000, help="ending value for searching l.")
parser.add_argument("--tol", type=float, default=5e-3, help="tolerant value for searching l.")
parser.add_argument("--max_run", type=int, default=200, help="max runs.")
parser.add_argument("--epochs", type=int, default=200, help="Number of epochs.")
parser.add_argument("--n_clusters", type=int, default=7, help="the number of clusters")
parser.add_argument("--step", type=float, default=0.1, help="")
parser.add_argument("--lr", type=float, default=0.05, help="learning rate")
parser.add_argument("--random_state", type=int, default=100, help="")
args = parser.parse_args(['--sample_number',"mpb",'--lr','0.1'])
set_seed(args.random_state)

# Initialize model and get model specific preprocessing pipeline
model = SpaGCN()
preprocessing_pipeline = model.preprocessing_pipeline(alpha=args.alpha, beta=args.beta)

# Load data and perform necessary preprocessing
dataloader = SpatialLIBDDataset(data_id=args.sample_number,data_dir="/home/zyxing/data/spatial")
data = dataloader.load_data(transform=preprocessing_pipeline, cache=args.cache)
(x, adj, adj_2d), y = data.get_train_data()

# Train and evaluate model
l = model.search_l(args.p, adj, start=args.start, end=args.end, tol=args.tol, max_run=args.max_run)
model.set_l(l)
res = model.search_set_res((x, adj), l=l, target_num=args.n_clusters, start=0.4, step=args.step, tol=args.tol,
                        lr=args.lr, epochs=args.epochs, max_run=args.max_run)

pred = model.fit_predict((x, adj), init_spa=True, init="louvain", tol=args.tol, lr=args.lr, epochs=args.epochs,
                        res=res)
score = model.default_score_func(y, pred)
print(f"ARI: {score:.4f}")

refined_pred = refine(sample_id=data.data.obs_names.tolist(), pred=pred.tolist(), dis=adj_2d, shape="hexagon")
score_refined = model.default_score_func(y, refined_pred)
""" To reproduce SpaGCN on other samples, please refer to command lines belows:

human dorsolateral prefrontal cortex sample 151673:
python spagcn.py --sample_number=151673 --lr=0.1

human dorsolateral prefrontal cortex sample 151676:
python spagcn.py --sample_number=151676  --lr=0.02

human dorsolateral prefrontal cortex sample 151507:
python spagcn.py --sample_number=151507  --lr=0.009
"""


[INFO][2023-10-26 00:30:04,440][dance][set_seed] Setting global random seed to 100
[INFO][2023-10-26 00:30:04,441][dance][_load_raw_data] Loading expression data from /home/zyxing/data/spatial/mpb/mpb_raw_feature_bc_matrix.h5


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
[INFO][2023-10-26 00:30:05,421][dance][_load_raw_data] Loading spatial info from /home/zyxing/data/spatial/mpb/tissue_positions_list.txt
[INFO][2023-10-26 00:30:05,432][dance][_load_raw_data] Loading label info from /home/zyxing/data/spatial/mpb/cluster_labels.csv
[INFO][2023-10-26 00:30:05,439][dance][_load_raw_data] Loading image data from /home/zyxing/data/spatial/mpb/mpb_full_image.tif
  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
[INFO][2023-10-26 00:30:05,566][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 3353 × 31053
    obs: 'Barcode', 'ground_truth', 'label'
    var: 'gene_ids', 'feature_types', 'genome'
    uns: 'image', 'dance_config'
    obsm: 'spatial', 'spatial_pixel'
[INFO][2023-10-26 00:30:05,567][dance.Compose][__call__] Applying composed transformations:
Compose(
  FilterGenesMatch(prefixes=['ERCC', 'MT-'], suf

ARI: 0.4882


' To reproduce SpaGCN on other samples, please refer to command lines belows:\n\nhuman dorsolateral prefrontal cortex sample 151673:\npython spagcn.py --sample_number=151673 --lr=0.1\n\nhuman dorsolateral prefrontal cortex sample 151676:\npython spagcn.py --sample_number=151676  --lr=0.02\n\nhuman dorsolateral prefrontal cortex sample 151507:\npython spagcn.py --sample_number=151507  --lr=0.009\n'

In [11]:
import argparse

import numpy as np

from dance.datasets.spatial import SpatialLIBDDataset
from dance.modules.spatial.spatial_domain.stagate import Stagate
from dance.transforms.preprocess import set_seed


parser = argparse.ArgumentParser()
parser.add_argument("--cache", action="store_true", help="Cache processed data.")
parser.add_argument("--sample_number", type=str, default="151673",
                    help="12 human dorsolateral prefrontal cortex datasets for the spatial domain task.")
parser.add_argument("--hidden_dims", type=list, default=[512, 32], help="hidden dimensions")
parser.add_argument("--rad_cutoff", type=int, default=150, help="")
parser.add_argument("--seed", type=int, default=3, help="")
parser.add_argument("--epochs", type=int, default=1000, help="epochs")
parser.add_argument("--high_variable_genes", type=int, default=3000, help="")
args = parser.parse_args(['--sample_number','mpb','--seed','16'])
set_seed(args.seed)

# Initialize model and get model specific preprocessing pipeline
model = Stagate([args.high_variable_genes] + args.hidden_dims)
preprocessing_pipeline = model.preprocessing_pipeline(n_top_hvgs=args.high_variable_genes, radius=args.rad_cutoff)

# Load data and perform necessary preprocessing
dataloader = SpatialLIBDDataset(data_id=args.sample_number,data_dir="/home/zyxing/data/spatial")
data = dataloader.load_data(transform=preprocessing_pipeline, cache=args.cache)
adj, y = data.get_data(return_type="default")
x = data.data.X.A
edge_list_array = np.vstack(np.nonzero(adj))

# Train and evaluate model
model = Stagate([args.high_variable_genes] + args.hidden_dims)
score = model.fit_score((x, edge_list_array), y, epochs=args.epochs, random_state=args.seed)
print(f"ARI: {score:.4f}")
""" To reproduce Stagate on other samples, please refer to command lines belows:
NOTE: since the stagate method is unstable, you have to run at least 5 times to get
      best performance. (same with original Stagate paper)

human dorsolateral prefrontal cortex sample 151673:
python stagate.py --sample_number=151673 --seed=16

human dorsolateral prefrontal cortex sample 151676:
python stagate.py --sample_number=151676 --seed=2030

human dorsolateral prefrontal cortex sample 151507:
python stagate.py --sample_number=151507 --seed=2021
"""


[INFO][2023-10-26 00:30:19,818][dance][_load_raw_data] Loading expression data from /home/zyxing/data/spatial/mpb/mpb_raw_feature_bc_matrix.h5
  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
[INFO][2023-10-26 00:30:20,838][dance][_load_raw_data] Loading spatial info from /home/zyxing/data/spatial/mpb/tissue_positions_list.txt
[INFO][2023-10-26 00:30:20,852][dance][_load_raw_data] Loading label info from /home/zyxing/data/spatial/mpb/cluster_labels.csv
[INFO][2023-10-26 00:30:20,860][dance][_load_raw_data] Loading image data from /home/zyxing/data/spatial/mpb/mpb_full_image.tif
  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
[INFO][2023-10-26 00:30:21,026][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 3353 × 31053
    obs: 'Barcode', 'ground_truth', 'label'
    var: 'gene_ids', 'feature_types', 'genome'
    uns: 'image', 'dance_config'
    obsm: 'spatial', 'spatial_pixel'
[INFO]

ARI: 0.3814


' To reproduce Stagate on other samples, please refer to command lines belows:\nNOTE: since the stagate method is unstable, you have to run at least 5 times to get\n      best performance. (same with original Stagate paper)\n\nhuman dorsolateral prefrontal cortex sample 151673:\npython stagate.py --sample_number=151673 --seed=16\n\nhuman dorsolateral prefrontal cortex sample 151676:\npython stagate.py --sample_number=151676 --seed=2030\n\nhuman dorsolateral prefrontal cortex sample 151507:\npython stagate.py --sample_number=151507 --seed=2021\n'

In [12]:
import argparse

from dance.datasets.spatial import SpatialLIBDDataset
from dance.modules.spatial.spatial_domain.stlearn import StKmeans, StLouvain
from dance.transforms.preprocess import set_seed

MODES = ["louvain", "kmeans"]

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--cache", action="store_true", help="Cache processed data.")
    parser.add_argument("--sample_number", type=str, default="151673",
                        help="12 human dorsolateral prefrontal cortex datasets for the spatial domain task.")
    parser.add_argument("--mode", type=str, default="louvain", choices=MODES)
    parser.add_argument("--n_clusters", type=int, default=17, help="the number of clusters")
    parser.add_argument("--seed", type=int, default=2)
    parser.add_argument("--n_components", type=int, default=50, help="the number of components in PCA")
    parser.add_argument("--device", type=str, default="cuda", help="device for resnet extract feature")
    args = parser.parse_args(['--n_clusters','20','--sample_number','mpb','--seed','93'])
    set_seed(args.seed)

    # Initialize model and get model specific preprocessing pipeline
    if args.mode == "kmeans":
        model = StKmeans(n_clusters=args.n_clusters)
    elif args.mode == "louvain":
        model = StLouvain(resolution=0.6)
    else:
        raise ValueError(f"Unknown mode {args.mode!r}, available options are {MODES}")
    preprocessing_pipeline = model.preprocessing_pipeline()

    # Load data and perform necessary preprocessing
    dataloader = SpatialLIBDDataset(data_id=args.sample_number,data_dir="/home/zyxing/data/spatial")
    data = dataloader.load_data(transform=preprocessing_pipeline, cache=args.cache)
    x, y = data.get_data(return_type="default")

    # Train and evaluate model
    score = model.fit_score(x, y.values.ravel())
    print(f"ARI: {score:.4f}")
""" To reproduce stlearn on other samples, please refer to command lines belows:
NOTE: since the stlearn method is unstable, you have to run multiple times to get
      best performance.

human dorsolateral prefrontal cortex sample 151673:
python stlearn.py --n_clusters=20 --sample_number=151673 --seed=93

human dorsolateral prefrontal cortex sample 151676:
python stlearn.py --n_clusters=20 --sample_number=151676 --seed=11

human dorsolateral prefrontal cortex sample 151507:
python stlearn.py --n_clusters=20 --sample_number=151507 --seed=0
"""


[INFO][2023-10-26 00:31:04,278][dance][_load_raw_data] Loading expression data from /home/zyxing/data/spatial/mpb/mpb_raw_feature_bc_matrix.h5
  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
[INFO][2023-10-26 00:31:05,244][dance][_load_raw_data] Loading spatial info from /home/zyxing/data/spatial/mpb/tissue_positions_list.txt
[INFO][2023-10-26 00:31:05,257][dance][_load_raw_data] Loading label info from /home/zyxing/data/spatial/mpb/cluster_labels.csv
[INFO][2023-10-26 00:31:05,264][dance][_load_raw_data] Loading image data from /home/zyxing/data/spatial/mpb/mpb_full_image.tif
  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
[INFO][2023-10-26 00:31:05,427][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 3353 × 31053
    obs: 'Barcode', 'ground_truth', 'label'
    var: 'gene_ids', 'feature_types', 'genome'
    uns: 'image', 'dance_config'
    obsm: 'spatial', 'spatial_pixel'
[INFO]

ARI: 0.5191


' To reproduce stlearn on other samples, please refer to command lines belows:\nNOTE: since the stlearn method is unstable, you have to run multiple times to get\n      best performance.\n\nhuman dorsolateral prefrontal cortex sample 151673:\npython stlearn.py --n_clusters=20 --sample_number=151673 --seed=93\n\nhuman dorsolateral prefrontal cortex sample 151676:\npython stlearn.py --n_clusters=20 --sample_number=151676 --seed=11\n\nhuman dorsolateral prefrontal cortex sample 151507:\npython stlearn.py --n_clusters=20 --sample_number=151507 --seed=0\n'

In [13]:
datasets=["sub_pancreatic_cancer","sub_human_breast_cancer"]

In [14]:
import argparse

from dance.datasets.spatial import SpatialLIBDDataset
from dance.modules.spatial.spatial_domain.louvain import Louvain
from dance.transforms.preprocess import set_seed

Louvain_scores=[]
parser = argparse.ArgumentParser()
parser.add_argument("--cache", action="store_true", help="Cache processed data.")
parser.add_argument("--sample_number", type=str, default="151673",
                    help="12 human dorsolateral prefrontal cortex datasets for the spatial domain task.")
parser.add_argument("--seed", type=int, default=202, help="Random seed.")
parser.add_argument("--n_components", type=int, default=50, help="Number of PC components.")
parser.add_argument("--neighbors", type=int, default=17, help="Number of neighbors.")
for dataset in datasets:
    try:
        args = parser.parse_args(['--sample_number',dataset, '--seed','5',"--n_components","30"])
        set_seed(args.seed)

        # Initialize model and get model specific preprocessing pipeline
        model = Louvain(resolution=1)
        preprocessing_pipeline = model.preprocessing_pipeline(dim=args.n_components, n_neighbors=args.neighbors)

        # Load data and perform necessary preprocessing
        dataloader = SpatialLIBDDataset(data_id=args.sample_number,data_dir="/home/zyxing/data/spatial")
        data = dataloader.load_data(transform=preprocessing_pipeline, cache=args.cache)
        adj, y = data.get_data(return_type="default")

        # Train and evaluate model
        model = Louvain(resolution=1)
        score = model.fit_score(adj, y.values.ravel())
    except Exception as e:
        score=e
    finally:
        print(score)
        Louvain_scores.append(score)
""" To reproduce louvain on other samples, please refer to command lines belows:
NOTE: you have to run multiple times to get best performance.

human dorsolateral prefrontal cortex sample 151673:
python louvain.py --sample_number=151673 --seed=5
# 0.305

human dorsolateral prefrontal cortex sample 151676:
python louvain.py --sample_number=151676 --seed=203
# 0.288

human dorsolateral prefrontal cortex sample 151507:
python louvain.py --sample_number=151507 --seed=10
# 0.285
"""


[INFO][2023-10-26 00:33:17,857][dance][_load_raw_data] Loading expression data from /home/zyxing/data/spatial/sub_pancreatic_cancer/sub_pancreatic_cancer_raw_feature_bc_matrix.h5
[INFO][2023-10-26 00:33:17,874][dance][_load_raw_data] Loading spatial info from /home/zyxing/data/spatial/sub_pancreatic_cancer/tissue_positions_list.txt
[INFO][2023-10-26 00:33:17,879][dance][_load_raw_data] Loading label info from /home/zyxing/data/spatial/sub_pancreatic_cancer/cluster_labels.csv
[INFO][2023-10-26 00:33:17,885][dance][_load_raw_data] Loading image data from /home/zyxing/data/spatial/sub_pancreatic_cancer/sub_pancreatic_cancer_full_image.tif


[INFO][2023-10-26 00:33:30,198][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 2000 × 474
    obs: '0', 'ground_truth', 'label'
    var: 'gene_ids', 'feature_types', 'genome'
    uns: 'image', 'dance_config'
    obsm: 'spatial', 'spatial_pixel'
[INFO][2023-10-26 00:33:30,200][dance.Compose][__call__] Applying composed transformations:
Compose(
  FilterGenesMatch(prefixes=['ERCC', 'MT-'], suffixes=[]),
  AnnDataTransform(func=scanpy.preprocessing._normalization.normalize_total, func_kwargs={'target_sum': 10000.0}),
  AnnDataTransform(func=scanpy.preprocessing._simple.scale, func_kwargs={}),
  CellPCA(n_components=30),
  NeighborGraph(n_neighbors=17, n_pcs=None, knn=True, random_state=0, method='umap', metric='euclidean'),
  SetConfig(config_dict={'feature_channel': 'NeighborGraph', 'feature_channel_type': 'obsp', 'label_channel': 'label', 'label_channel_type': 'obs'}),
)
[INFO][2023-10-26 00:33:30,201][dance.FilterGenesMatch][__ca

0.4245974721439729


[INFO][2023-10-26 00:33:46,541][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 2000 × 313
    obs: '0', 'ground_truth', 'label'
    uns: 'image', 'dance_config'
    obsm: 'spatial', 'spatial_pixel'
[INFO][2023-10-26 00:33:46,543][dance.Compose][__call__] Applying composed transformations:
Compose(
  FilterGenesMatch(prefixes=['ERCC', 'MT-'], suffixes=[]),
  AnnDataTransform(func=scanpy.preprocessing._normalization.normalize_total, func_kwargs={'target_sum': 10000.0}),
  AnnDataTransform(func=scanpy.preprocessing._simple.scale, func_kwargs={}),
  CellPCA(n_components=30),
  NeighborGraph(n_neighbors=17, n_pcs=None, knn=True, random_state=0, method='umap', metric='euclidean'),
  SetConfig(config_dict={'feature_channel': 'NeighborGraph', 'feature_channel_type': 'obsp', 'label_channel': 'label', 'label_channel_type': 'obs'}),
)
[INFO][2023-10-26 00:33:46,544][dance.FilterGenesMatch][__call__] 0 number of genes will be removed due to 

0.5051484277373146


' To reproduce louvain on other samples, please refer to command lines belows:\nNOTE: you have to run multiple times to get best performance.\n\nhuman dorsolateral prefrontal cortex sample 151673:\npython louvain.py --sample_number=151673 --seed=5\n# 0.305\n\nhuman dorsolateral prefrontal cortex sample 151676:\npython louvain.py --sample_number=151676 --seed=203\n# 0.288\n\nhuman dorsolateral prefrontal cortex sample 151507:\npython louvain.py --sample_number=151507 --seed=10\n# 0.285\n'

In [15]:
Louvain_scores

[0.4245974721439729, 0.5051484277373146]

In [16]:
import argparse

from dance.datasets.spatial import SpatialLIBDDataset
from dance.modules.spatial.spatial_domain.spagcn import SpaGCN, refine
from dance.utils import set_seed

SpaGCN_scores=[]
parser = argparse.ArgumentParser()
parser.add_argument("--cache", action="store_true", help="Cache processed data.")
parser.add_argument("--sample_number", type=str, default="151673",
                    help="12 human dorsolateral prefrontal cortex datasets for the spatial domain task.")
parser.add_argument("--beta", type=int, default=500, help="")
parser.add_argument("--alpha", type=int, default=500, help="")
parser.add_argument("--p", type=float, default=0.05,
                    help="percentage of total expression contributed by neighborhoods.")
parser.add_argument("--l", type=float, default=0.5, help="the parameter to control percentage p.")
parser.add_argument("--start", type=float, default=0.01, help="starting value for searching l.")
parser.add_argument("--end", type=float, default=1000, help="ending value for searching l.")
parser.add_argument("--tol", type=float, default=5e-3, help="tolerant value for searching l.")
parser.add_argument("--max_run", type=int, default=200, help="max runs.")
parser.add_argument("--epochs", type=int, default=200, help="Number of epochs.")
parser.add_argument("--n_clusters", type=int, default=7, help="the number of clusters")
parser.add_argument("--step", type=float, default=0.1, help="")
parser.add_argument("--lr", type=float, default=0.05, help="learning rate")
parser.add_argument("--random_state", type=int, default=100, help="")
for dataset in datasets:
        args = parser.parse_args(['--sample_number',dataset,'--lr','0.1'])
        set_seed(args.random_state)

        # Initialize model and get model specific preprocessing pipeline
        model = SpaGCN()
        preprocessing_pipeline = model.preprocessing_pipeline(alpha=args.alpha, beta=args.beta)

        # Load data and perform necessary preprocessing
        dataloader = SpatialLIBDDataset(data_id=args.sample_number,data_dir="/home/zyxing/data/spatial")
        data = dataloader.load_data(transform=preprocessing_pipeline, cache=args.cache)
        (x, adj, adj_2d), y = data.get_train_data()

        # Train and evaluate model
        l = model.search_l(args.p, adj, start=args.start, end=args.end, tol=args.tol, max_run=args.max_run)
        model.set_l(l)
        res = model.search_set_res((x, adj), l=l, target_num=args.n_clusters, start=0.4, step=args.step, tol=args.tol,
                                lr=args.lr, epochs=args.epochs, max_run=args.max_run)

        pred = model.fit_predict((x, adj), init_spa=True, init="louvain", tol=args.tol, lr=args.lr, epochs=args.epochs,
                                res=res,device="cuda")
        score = model.default_score_func(y, pred)
        SpaGCN_scores.append(score)

        refined_pred = refine(sample_id=data.data.obs_names.tolist(), pred=pred.tolist(), dis=adj_2d, shape="hexagon")
        score_refined = model.default_score_func(y, refined_pred)
""" To reproduce SpaGCN on other samples, please refer to command lines belows:

    human dorsolateral prefrontal cortex sample 151673:
    python spagcn.py --sample_number=151673 --lr=0.1

    human dorsolateral prefrontal cortex sample 151676:
    python spagcn.py --sample_number=151676  --lr=0.02

    human dorsolateral prefrontal cortex sample 151507:
    python spagcn.py --sample_number=151507  --lr=0.009
    """


[INFO][2023-10-26 00:33:51,526][dance][set_seed] Setting global random seed to 100
[INFO][2023-10-26 00:33:51,529][dance][_load_raw_data] Loading expression data from /home/zyxing/data/spatial/sub_pancreatic_cancer/sub_pancreatic_cancer_raw_feature_bc_matrix.h5
[INFO][2023-10-26 00:33:51,552][dance][_load_raw_data] Loading spatial info from /home/zyxing/data/spatial/sub_pancreatic_cancer/tissue_positions_list.txt
[INFO][2023-10-26 00:33:51,561][dance][_load_raw_data] Loading label info from /home/zyxing/data/spatial/sub_pancreatic_cancer/cluster_labels.csv
[INFO][2023-10-26 00:33:51,569][dance][_load_raw_data] Loading image data from /home/zyxing/data/spatial/sub_pancreatic_cancer/sub_pancreatic_cancer_full_image.tif


[INFO][2023-10-26 00:34:00,579][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 2000 × 474
    obs: '0', 'ground_truth', 'label'
    var: 'gene_ids', 'feature_types', 'genome'
    uns: 'image', 'dance_config'
    obsm: 'spatial', 'spatial_pixel'
[INFO][2023-10-26 00:34:00,580][dance.Compose][__call__] Applying composed transformations:
Compose(
  FilterGenesMatch(prefixes=['ERCC', 'MT-'], suffixes=[]),
  AnnDataTransform(func=scanpy.preprocessing._normalization.normalize_total, func_kwargs={'target_sum': 10000.0}),
  AnnDataTransform(func=scanpy.preprocessing._simple.log1p, func_kwargs={}),
  SpaGCNGraph(alpha=500, beta=500),
  SpaGCNGraph2D(),
  CellPCA(n_components=50),
  SetConfig(config_dict={'feature_channel': ['CellPCA', 'SpaGCNGraph', 'SpaGCNGraph2D'], 'feature_channel_type': ['obsm', 'obsp', 'obsp'], 'label_channel': 'label', 'label_channel_type': 'obs'}),
)
[INFO][2023-10-26 00:34:00,582][dance.FilterGenesMatch][__call__]

' To reproduce SpaGCN on other samples, please refer to command lines belows:\n\n    human dorsolateral prefrontal cortex sample 151673:\n    python spagcn.py --sample_number=151673 --lr=0.1\n\n    human dorsolateral prefrontal cortex sample 151676:\n    python spagcn.py --sample_number=151676  --lr=0.02\n\n    human dorsolateral prefrontal cortex sample 151507:\n    python spagcn.py --sample_number=151507  --lr=0.009\n    '

In [17]:
SpaGCN_scores

[0.27378139574017296, 0.559193437234145]

In [18]:
import argparse

import numpy as np

from dance.datasets.spatial import SpatialLIBDDataset
from dance.modules.spatial.spatial_domain.stagate import Stagate
from dance.transforms.preprocess import set_seed
Stagate_scores=[]

parser = argparse.ArgumentParser()
parser.add_argument("--cache", action="store_true", help="Cache processed data.")
parser.add_argument("--sample_number", type=str, default="151673",
                    help="12 human dorsolateral prefrontal cortex datasets for the spatial domain task.")
parser.add_argument("--hidden_dims", type=list, default=[512, 32], help="hidden dimensions")
parser.add_argument("--rad_cutoff", type=int, default=150, help="")
parser.add_argument("--seed", type=int, default=3, help="")
parser.add_argument("--epochs", type=int, default=1000, help="epochs")
parser.add_argument("--high_variable_genes", type=int, default=313, help="")
for dataset in datasets:
      args = parser.parse_args(['--sample_number',dataset,'--seed','16'])
      set_seed(args.seed)

      # Initialize model and get model specific preprocessing pipeline
      model = Stagate([args.high_variable_genes] + args.hidden_dims)
      preprocessing_pipeline = model.preprocessing_pipeline(n_top_hvgs=args.high_variable_genes, radius=args.rad_cutoff)

      # Load data and perform necessary preprocessing
      dataloader = SpatialLIBDDataset(data_id=args.sample_number,data_dir="/home/zyxing/data/spatial")
      data = dataloader.load_data(transform=preprocessing_pipeline, cache=args.cache)
      adj, y = data.get_data(return_type="default")
      x = data.data.X.A
      edge_list_array = np.vstack(np.nonzero(adj))

      # Train and evaluate model
      model = Stagate([args.high_variable_genes] + args.hidden_dims,device="cuda:2")
      score = model.fit_score((x, edge_list_array), y, epochs=args.epochs, random_state=args.seed)
      Stagate_scores.append(score)
      """ To reproduce Stagate on other samples, please refer to command lines belows:
      NOTE: since the stagate method is unstable, you have to run at least 5 times to get
            best performance. (same with original Stagate paper)

      human dorsolateral prefrontal cortex sample 151673:
      python stagate.py --sample_number=151673 --seed=16

      human dorsolateral prefrontal cortex sample 151676:
      python stagate.py --sample_number=151676 --seed=2030

      human dorsolateral prefrontal cortex sample 151507:
      python stagate.py --sample_number=151507 --seed=2021
      """


[INFO][2023-10-26 00:34:50,943][dance][_load_raw_data] Loading expression data from /home/zyxing/data/spatial/sub_pancreatic_cancer/sub_pancreatic_cancer_raw_feature_bc_matrix.h5
[INFO][2023-10-26 00:34:50,965][dance][_load_raw_data] Loading spatial info from /home/zyxing/data/spatial/sub_pancreatic_cancer/tissue_positions_list.txt


[INFO][2023-10-26 00:34:50,973][dance][_load_raw_data] Loading label info from /home/zyxing/data/spatial/sub_pancreatic_cancer/cluster_labels.csv
[INFO][2023-10-26 00:34:50,981][dance][_load_raw_data] Loading image data from /home/zyxing/data/spatial/sub_pancreatic_cancer/sub_pancreatic_cancer_full_image.tif
[INFO][2023-10-26 00:35:00,354][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 2000 × 474
    obs: '0', 'ground_truth', 'label'
    var: 'gene_ids', 'feature_types', 'genome'
    uns: 'image', 'dance_config'
    obsm: 'spatial', 'spatial_pixel'
[INFO][2023-10-26 00:35:00,356][dance.Compose][__call__] Applying composed transformations:
Compose(
  AnnDataTransform(func=scanpy.preprocessing._highly_variable_genes.highly_variable_genes, func_kwargs={'flavor': 'seurat_v3', 'n_top_genes': 313, 'subset': True}),
  AnnDataTransform(func=scanpy.preprocessing._normalization.normalize_total, func_kwargs={'target_sum': 10000.0}),
  AnnDa

In [19]:
Stagate_scores

[0.17421365641942377, 0.3703528419733546]

In [20]:
import argparse

from dance.datasets.spatial import SpatialLIBDDataset
from dance.modules.spatial.spatial_domain.stlearn import StKmeans, StLouvain
from dance.transforms.preprocess import set_seed

MODES = ["louvain", "kmeans"]

St_ModesScores=[]
parser = argparse.ArgumentParser()
parser.add_argument("--cache", action="store_true", help="Cache processed data.")
parser.add_argument("--sample_number", type=str, default="151673",
                    help="12 human dorsolateral prefrontal cortex datasets for the spatial domain task.")
parser.add_argument("--mode", type=str, default="louvain", choices=MODES)
parser.add_argument("--n_clusters", type=int, default=17, help="the number of clusters")
parser.add_argument("--seed", type=int, default=2)
parser.add_argument("--n_components", type=int, default=50, help="the number of components in PCA")
parser.add_argument("--device", type=str, default="cuda", help="device for resnet extract feature")
for dataset in datasets:
    args = parser.parse_args(['--n_clusters','20','--sample_number',dataset,'--seed','93'])
    set_seed(args.seed)

    # Initialize model and get model specific preprocessing pipeline
    if args.mode == "kmeans":
        model = StKmeans(n_clusters=args.n_clusters)
    elif args.mode == "louvain":
        model = StLouvain(resolution=0.6)
    else:
        raise ValueError(f"Unknown mode {args.mode!r}, available options are {MODES}")
    preprocessing_pipeline = model.preprocessing_pipeline(crop_size=10,target_size=299)

    # Load data and perform necessary preprocessing
    dataloader = SpatialLIBDDataset(data_id=args.sample_number,data_dir="/home/zyxing/data/spatial")
    data = dataloader.load_data(transform=preprocessing_pipeline, cache=args.cache)
    x, y = data.get_data(return_type="default")

    # Train and evaluate model
    score = model.fit_score(x, y.values.ravel())
    St_ModesScores.append(score)
""" To reproduce stlearn on other samples, please refer to command lines belows:
NOTE: since the stlearn method is unstable, you have to run multiple times to get
      best performance.

human dorsolateral prefrontal cortex sample 151673:
python stlearn.py --n_clusters=20 --sample_number=151673 --seed=93

human dorsolateral prefrontal cortex sample 151676:
python stlearn.py --n_clusters=20 --sample_number=151676 --seed=11

human dorsolateral prefrontal cortex sample 151507:
python stlearn.py --n_clusters=20 --sample_number=151507 --seed=0
"""


[INFO][2023-10-26 00:35:35,992][dance][_load_raw_data] Loading expression data from /home/zyxing/data/spatial/sub_pancreatic_cancer/sub_pancreatic_cancer_raw_feature_bc_matrix.h5
[INFO][2023-10-26 00:35:36,011][dance][_load_raw_data] Loading spatial info from /home/zyxing/data/spatial/sub_pancreatic_cancer/tissue_positions_list.txt
[INFO][2023-10-26 00:35:36,017][dance][_load_raw_data] Loading label info from /home/zyxing/data/spatial/sub_pancreatic_cancer/cluster_labels.csv
[INFO][2023-10-26 00:35:36,025][dance][_load_raw_data] Loading image data from /home/zyxing/data/spatial/sub_pancreatic_cancer/sub_pancreatic_cancer_full_image.tif
[INFO][2023-10-26 00:35:45,041][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 2000 × 474
    obs: '0', 'ground_truth', 'label'
    var: 'gene_ids', 'feature_types', 'genome'
    uns: 'image', 'dance_config'
    obsm: 'spatial', 'spatial_pixel'
[INFO][2023-10-26 00:35:45,043][dance.Compose][__call_

' To reproduce stlearn on other samples, please refer to command lines belows:\nNOTE: since the stlearn method is unstable, you have to run multiple times to get\n      best performance.\n\nhuman dorsolateral prefrontal cortex sample 151673:\npython stlearn.py --n_clusters=20 --sample_number=151673 --seed=93\n\nhuman dorsolateral prefrontal cortex sample 151676:\npython stlearn.py --n_clusters=20 --sample_number=151676 --seed=11\n\nhuman dorsolateral prefrontal cortex sample 151507:\npython stlearn.py --n_clusters=20 --sample_number=151507 --seed=0\n'

In [21]:
St_ModesScores

[0.21686632960908456, 0.24452581195775105]