In [1]:
datasets=["lung6","lung9_1","lung9_2","hcc_liver","normal_liver","kidney_1139","kidney_10838","kidney_3323","kidney_642",
        "kidney_8693","kidney_2566","kidney_213","kidney_4061","kidney_1098","kidney_8471"]

In [2]:
import argparse
from pprint import pprint

from dance.datasets.spatial import CellTypeDeconvoDataset
from dance.modules.spatial.cell_type_deconvo.card import Card
card_scores=[]
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("--cache", action="store_true", help="Cache processed data.")
parser.add_argument("--dataset", default="CARD_synthetic", choices=CellTypeDeconvoDataset.AVAILABLE_DATA)
parser.add_argument("--datadir", default="../../../../data/spatial", help="Directory to save the data.")
parser.add_argument("--max_iter", type=int, default=10, help="Maximum optimization iteration.")
parser.add_argument("--epsilon", type=float, default=1e-10, help="Optimization threshold.")
parser.add_argument("--location_free", action="store_true", help="Do not supply spatial location if set.")
for dataset in datasets:
    try:
        args = parser.parse_args(['--dataset',dataset])
        pprint(vars(args))

        # Load dataset
        preprocessing_pipeline = Card.preprocessing_pipeline()
        dataset = CellTypeDeconvoDataset(data_dir=args.datadir, data_id=args.dataset)
        data = dataset.load_data(transform=preprocessing_pipeline, cache=args.cache)

        # inputs: x_count, x_spatial
        inputs, y = data.get_data(split_name="test", return_type="numpy")
        basis = data.get_feature(return_type="default", channel="CellTopicProfile", channel_type="varm")

        # Train and evaluate model
        model = Card(basis)
        score = model.fit_score(inputs, y, max_iter=args.max_iter, epsilon=args.epsilon, location_free=args.location_free)
    except Exception as e:
        score=e
    finally:
        card_scores.append(score)
"""To reproduce CARD benchmarks, please refer to command lines belows:

CARD synthetic $ python card.py --dataset CARD_synthetic

GSE174746 $ python card.py --dataset GSE174746 --location_free

SPOTLight synthetic $ python card.py --dataset SPOTLight_synthetic --location_free

"""


{'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'lung6',
 'epsilon': 1e-10,
 'location_free': False,
 'max_iter': 10}


[INFO][2023-10-18 09:15:51,908][dance][_load_raw_data] Number of cell types: reference = 15, real = 22
[INFO][2023-10-18 09:15:51,909][dance][_load_raw_data] Subsetting to common cell types (n=11):
['B_cell',
 'Endothelial',
 'Epithelial',
 'Fibroblast',
 'Macrophage',
 'Mast',
 'Monocyte',
 'Myeloid',
 'NK_cell',
 'Neutrophil',
 'Plasma']
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:15:52,036][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 17465 × 923
    obs: 'cellname', 'sample', 'cellType', 'malignant', 'source', 'cell_subtype', 'complexity', 'batch'
    uns: 'dance_config'
    obsm: 'cell_type_portion', 'spatial'
[INFO][2023-10-18 09:15:52,037][dance.Compose][__call__] Applying composed transformations:
Compose(
  CellTopicProfile(ct_select='auto', ct_key='cellType', split_name='ref', method='mean'),
  FilterGenesMatch(prefixes=['mt-'], suffixes=[]),
  FilterGenesCommon(batch_key=None, split_keys=['ref', 'test']

True


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:15:52,339][dance.FilterGenesMarker][get_marker_genes] Found 14 marker genes for cell type 'B_cell'
[INFO][2023-10-18 09:15:52,340][dance.FilterGenesMarker][get_marker_genes] Found 224 marker genes for cell type 'Endothelial'
[INFO][2023-10-18 09:15:52,342][dance.FilterGenesMarker][get_marker_genes] Found 168 marker genes for cell type 'Epithelial'
[INFO][2023-10-18 09:15:52,343][dance.FilterGenesMarker][get_marker_genes] Found 215 marker genes for cell type 'Fibroblast'
[INFO][2023-10-18 09:15:52,344][dance.FilterGenesMarker][get_marker_genes] Found 122 marker genes for cell type 'Macrophage'
[INFO][2023-10-18 09:15:52,346][dance.FilterGenesMarker][get_marker_genes] Found 16 marker genes for cell type 'Mast'
[INFO][2023-10-18 09:15:52,347][dance.FilterGenesMarker][get_marker_genes] Found 18 marker genes for cell type 'Monocyte'
[INFO][2023-10-18 09:15:52,348][dance.FilterGenesMarker][get_mark

True
True
{'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'lung9_1',
 'epsilon': 1e-10,
 'location_free': False,
 'max_iter': 10}


[INFO][2023-10-18 09:15:53,097][dance][_load_raw_data] Number of cell types: reference = 15, real = 22
[INFO][2023-10-18 09:15:53,099][dance][_load_raw_data] Subsetting to common cell types (n=11):
['B_cell',
 'Endothelial',
 'Epithelial',
 'Fibroblast',
 'Macrophage',
 'Mast',
 'Monocyte',
 'Myeloid',
 'NK_cell',
 'Neutrophil',
 'Plasma']
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:15:53,249][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 17265 × 923
    obs: 'cellname', 'sample', 'cellType', 'malignant', 'source', 'cell_subtype', 'complexity', 'batch'
    uns: 'dance_config'
    obsm: 'cell_type_portion', 'spatial'
[INFO][2023-10-18 09:15:53,250][dance.Compose][__call__] Applying composed transformations:
Compose(
  CellTopicProfile(ct_select='auto', ct_key='cellType', split_name='ref', method='mean'),
  FilterGenesMatch(prefixes=['mt-'], suffixes=[]),
  FilterGenesCommon(batch_key=None, split_keys=['ref', 'test']

True


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:15:53,579][dance.FilterGenesMarker][get_marker_genes] Found 14 marker genes for cell type 'B_cell'
[INFO][2023-10-18 09:15:53,581][dance.FilterGenesMarker][get_marker_genes] Found 224 marker genes for cell type 'Endothelial'
[INFO][2023-10-18 09:15:53,582][dance.FilterGenesMarker][get_marker_genes] Found 168 marker genes for cell type 'Epithelial'
[INFO][2023-10-18 09:15:53,584][dance.FilterGenesMarker][get_marker_genes] Found 215 marker genes for cell type 'Fibroblast'
[INFO][2023-10-18 09:15:53,585][dance.FilterGenesMarker][get_marker_genes] Found 122 marker genes for cell type 'Macrophage'
[INFO][2023-10-18 09:15:53,586][dance.FilterGenesMarker][get_marker_genes] Found 16 marker genes for cell type 'Mast'
[INFO][2023-10-18 09:15:53,588][dance.FilterGenesMarker][get_marker_genes] Found 18 marker genes for cell type 'Monocyte'
[INFO][2023-10-18 09:15:53,589][dance.FilterGenesMarker][get_mark

True
True
{'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'lung9_2',
 'epsilon': 1e-10,
 'location_free': False,
 'max_iter': 10}


[INFO][2023-10-18 09:15:54,167][dance][_load_raw_data] Number of cell types: reference = 15, real = 22
[INFO][2023-10-18 09:15:54,169][dance][_load_raw_data] Subsetting to common cell types (n=11):
['B_cell',
 'Endothelial',
 'Epithelial',
 'Fibroblast',
 'Macrophage',
 'Mast',
 'Monocyte',
 'Myeloid',
 'NK_cell',
 'Neutrophil',
 'Plasma']
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:15:54,301][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 17765 × 923
    obs: 'cellname', 'sample', 'cellType', 'malignant', 'source', 'cell_subtype', 'complexity', 'batch'
    uns: 'dance_config'
    obsm: 'cell_type_portion', 'spatial'
[INFO][2023-10-18 09:15:54,302][dance.Compose][__call__] Applying composed transformations:
Compose(
  CellTopicProfile(ct_select='auto', ct_key='cellType', split_name='ref', method='mean'),
  FilterGenesMatch(prefixes=['mt-'], suffixes=[]),
  FilterGenesCommon(batch_key=None, split_keys=['ref', 'test']

True


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:15:54,602][dance.FilterGenesMarker][get_marker_genes] Found 14 marker genes for cell type 'B_cell'
[INFO][2023-10-18 09:15:54,603][dance.FilterGenesMarker][get_marker_genes] Found 224 marker genes for cell type 'Endothelial'
[INFO][2023-10-18 09:15:54,605][dance.FilterGenesMarker][get_marker_genes] Found 168 marker genes for cell type 'Epithelial'
[INFO][2023-10-18 09:15:54,606][dance.FilterGenesMarker][get_marker_genes] Found 215 marker genes for cell type 'Fibroblast'
[INFO][2023-10-18 09:15:54,607][dance.FilterGenesMarker][get_marker_genes] Found 122 marker genes for cell type 'Macrophage'
[INFO][2023-10-18 09:15:54,608][dance.FilterGenesMarker][get_marker_genes] Found 16 marker genes for cell type 'Mast'
[INFO][2023-10-18 09:15:54,609][dance.FilterGenesMarker][get_marker_genes] Found 18 marker genes for cell type 'Monocyte'
[INFO][2023-10-18 09:15:54,611][dance.FilterGenesMarker][get_mark

True
True
{'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'hcc_liver',
 'epsilon': 1e-10,
 'location_free': False,
 'max_iter': 10}


[INFO][2023-10-18 09:15:55,406][dance][_load_raw_data] Number of cell types: reference = 10, real = 14
[INFO][2023-10-18 09:15:55,408][dance][_load_raw_data] Subsetting to common cell types (n=6):
['B_cell', 'Endothelial', 'Epithelial', 'HSC', 'NK_cell', 'T_cell']
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:15:55,498][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 13785 × 977
    obs: 'cell_name', 'sample', 'source', 'disease', 'cellType', 'cell_subtype_clusters', 'complexity', 'batch'
    uns: 'dance_config'
    obsm: 'cell_type_portion', 'spatial'
[INFO][2023-10-18 09:15:55,499][dance.Compose][__call__] Applying composed transformations:
Compose(
  CellTopicProfile(ct_select='auto', ct_key='cellType', split_name='ref', method='mean'),
  FilterGenesMatch(prefixes=['mt-'], suffixes=[]),
  FilterGenesCommon(batch_key=None, split_keys=['ref', 'test']),
  FilterGenesMarker(ct_profile_channel='CellTopicProfile', subset=

True


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:15:55,738][dance.FilterGenesMarker][get_marker_genes] Found 50 marker genes for cell type 'B_cell'
[INFO][2023-10-18 09:15:55,740][dance.FilterGenesMarker][get_marker_genes] Found 149 marker genes for cell type 'Endothelial'
[INFO][2023-10-18 09:15:55,741][dance.FilterGenesMarker][get_marker_genes] Found 98 marker genes for cell type 'Epithelial'
[INFO][2023-10-18 09:15:55,743][dance.FilterGenesMarker][get_marker_genes] Found 170 marker genes for cell type 'HSC'
[INFO][2023-10-18 09:15:55,744][dance.FilterGenesMarker][get_marker_genes] Found 50 marker genes for cell type 'NK_cell'
[INFO][2023-10-18 09:15:55,745][dance.FilterGenesMarker][get_marker_genes] Found 48 marker genes for cell type 'T_cell'
[INFO][2023-10-18 09:15:55,747][dance.FilterGenesMarker][get_marker_genes] Total number of marker genes found: 535
  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("obs")
[INFO][2

True
True
{'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'normal_liver',
 'epsilon': 1e-10,
 'location_free': False,
 'max_iter': 10}


[INFO][2023-10-18 09:16:05,048][dance.CellTopicProfile][get_ct_profile] Generating cell-type profiles ('mean' aggregation) for ['alpha-beta T cell', 'central venous liver sinusoidal endothelial cell', 'cholangiocyte', 'erythroid cell', 'gamma-delta T cell', 'hepatic stellate cell', 'hepatocyte', 'inflammatory macrophage', 'mature B cell', 'natural killer cell', 'periportal liver sinusoidal endothelial cell', 'portal liver sinusoidal endothelial cell']
[INFO][2023-10-18 09:16:05,052][dance.CellTopicProfile][get_ct_profile] Aggregating 'alpha-beta T cell' profiles over 961 samples
[INFO][2023-10-18 09:16:05,054][dance.CellTopicProfile][get_ct_profile] Number of 'alpha-beta T cell' cells in batch 0: 961
[INFO][2023-10-18 09:16:05,055][dance.CellTopicProfile][get_ct_profile] Aggregating 'central venous liver sinusoidal endothelial cell' profiles over 306 samples
[INFO][2023-10-18 09:16:05,057][dance.CellTopicProfile][get_ct_profile] Number of 'central venous liver sinusoidal endothelial ce

True


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:16:05,282][dance.FilterGenesPercentile][__call__] Filtering genes based on rv expression percentiles in layer None
[INFO][2023-10-18 09:16:05,284][dance.FilterGenesPercentile][__call__] 16 genes removed
  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:16:05,311][dance.SetConfig][__call__] Updating the dance data object config options:
{'feature_channel': [None, 'spatial'],
 'feature_channel_type': ['X', 'obsm'],
 'label_channel': 'cell_type_portion'}
[INFO][2023-10-18 09:16:05,312][dance][set_config_from_dict] Setting config 'feature_channel' to [None, 'spatial']
[INFO][2023-10-18 09:16:05,313][dance][set_config_from_dict] Setting config 'feature_channel_type' to ['X', 'obsm']
[INFO][2023-10-18 09:16:05,314][dance][set_config_from_dict] Setting config 'label_channel' to 'cell_type_portion'
[INFO][2023-10-18 09:16:05,315][dance][load_data] Data tra

True
True
{'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'kidney_1139',
 'epsilon': 1e-10,
 'location_free': False,
 'max_iter': 10}


  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:16:10,159][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 39763 × 952
    obs: 'cellType', 'batch'
    uns: 'dance_config'
    obsm: 'cell_type_portion', 'spatial'
[INFO][2023-10-18 09:16:10,160][dance.Compose][__call__] Applying composed transformations:
Compose(
  CellTopicProfile(ct_select='auto', ct_key='cellType', split_name='ref', method='mean'),
  FilterGenesMatch(prefixes=['mt-'], suffixes=[]),
  FilterGenesCommon(batch_key=None, split_keys=['ref', 'test']),
  FilterGenesMarker(ct_profile_channel='CellTopicProfile', subset=True, threshold=1.25, eps=1e-06),
  FilterGenesPercentile(min_val=1, max_val=99, mode='rv'),
  SetConfig(config_dict={'feature_channel': [None, 'spatial'], 'feature_channel_type': ['X', 'obsm'], 'label_channel': 'cell_type_portion'}),
)
[INFO][2023-10-18 09:16:10,246][dance.CellTopicProfile][get_ct_profile] Generating cell-type profiles ('mean' a

True


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:16:10,735][dance.FilterGenesCommon][__call__] 933 genes found in 'ref'
[INFO][2023-10-18 09:16:10,737][dance.FilterGenesCommon][__call__] 952 genes found in 'test'
[INFO][2023-10-18 09:16:10,738][dance.FilterGenesCommon][__call__] Found 933 common genes out of 952 total genes.
  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:16:11,014][dance.FilterGenesMarker][get_marker_genes] Found 115 marker genes for cell type 'Ascending vasa recta endothelium'
[INFO][2023-10-18 09:16:11,015][dance.FilterGenesMarker][get_marker_genes] Found 53 marker genes for cell type 'B cell'
[INFO][2023-10-18 09:16:11,017][dance.FilterGenesMarker][get_marker_genes] Found 67 marker genes for cell type 'CD4 T cell'
[INFO][2023-10-18 09:16:11,018][dance.FilterGenesMarker][get_marker_genes] Found 84 marker genes for cell type 'CD8 T cell'
[INFO][2023-10-18 09:16:11,019][dance.

True
True
{'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'kidney_10838',
 'epsilon': 1e-10,
 'location_free': False,
 'max_iter': 10}


  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:16:12,091][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 39668 × 952
    obs: 'cellType', 'batch'
    uns: 'dance_config'
    obsm: 'cell_type_portion', 'spatial'
[INFO][2023-10-18 09:16:12,092][dance.Compose][__call__] Applying composed transformations:
Compose(
  CellTopicProfile(ct_select='auto', ct_key='cellType', split_name='ref', method='mean'),
  FilterGenesMatch(prefixes=['mt-'], suffixes=[]),
  FilterGenesCommon(batch_key=None, split_keys=['ref', 'test']),
  FilterGenesMarker(ct_profile_channel='CellTopicProfile', subset=True, threshold=1.25, eps=1e-06),
  FilterGenesPercentile(min_val=1, max_val=99, mode='rv'),
  SetConfig(config_dict={'feature_channel': [None, 'spatial'], 'feature_channel_type': ['X', 'obsm'], 'label_channel': 'cell_type_portion'}),
)
[INFO][2023-10-18 09:16:12,177][dance.CellTopicProfile][get_ct_profile] Generating cell-type profiles ('mean' a

True


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:16:12,656][dance.FilterGenesCommon][__call__] 933 genes found in 'ref'
[INFO][2023-10-18 09:16:12,658][dance.FilterGenesCommon][__call__] 952 genes found in 'test'
[INFO][2023-10-18 09:16:12,659][dance.FilterGenesCommon][__call__] Found 933 common genes out of 952 total genes.
  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:16:12,924][dance.FilterGenesMarker][get_marker_genes] Found 121 marker genes for cell type 'Ascending vasa recta endothelium'
[INFO][2023-10-18 09:16:12,926][dance.FilterGenesMarker][get_marker_genes] Found 56 marker genes for cell type 'B cell'
[INFO][2023-10-18 09:16:12,927][dance.FilterGenesMarker][get_marker_genes] Found 71 marker genes for cell type 'CD4 T cell'
[INFO][2023-10-18 09:16:12,929][dance.FilterGenesMarker][get_marker_genes] Found 89 marker genes for cell type 'CD8 T cell'
[INFO][2023-10-18 09:16:12,930][dance.

True
True
{'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'kidney_3323',
 'epsilon': 1e-10,
 'location_free': False,
 'max_iter': 10}


[INFO][2023-10-18 09:16:13,762][dance][_load_raw_data] Number of cell types: reference = 31, real = 35
[INFO][2023-10-18 09:16:13,763][dance][_load_raw_data] Subsetting to common cell types (n=29):
['Ascending vasa recta endothelium',
 'B cell',
 'CD4 T cell',
 'CD8 T cell',
 'Connecting tubule',
 'Descending vasa recta endothelium',
 'Distinct proximal tubule 1',
 'Distinct proximal tubule 2',
 'Epithelial progenitor cell',
 'Fibroblast',
 'Glomerular endothelium',
 'Indistinct intercalated cell',
 'MNP-a/classical monocyte derived',
 'MNP-b/non-classical monocyte derived',
 'Mast cell',
 'NK cell',
 'Neutrophil',
 'Pelvic epithelium',
 'Peritubular capillary endothelium 1',
 'Peritubular capillary endothelium 2',
 'Podocyte',
 'Principal cell',
 'Proliferating Proximal Tubule',
 'Proximal tubule',
 'Thick ascending limb of Loop of Henle',
 'Transitional urothelium',
 'Type A intercalated cell',
 'Type B intercalated cell',
 'dendritic cell']
  utils.warn_names_duplicates("obs")
[INFO

True


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:16:14,556][dance.FilterGenesCommon][__call__] 933 genes found in 'ref'
[INFO][2023-10-18 09:16:14,558][dance.FilterGenesCommon][__call__] 952 genes found in 'test'
[INFO][2023-10-18 09:16:14,559][dance.FilterGenesCommon][__call__] Found 933 common genes out of 952 total genes.
  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:16:14,816][dance.FilterGenesMarker][get_marker_genes] Found 121 marker genes for cell type 'Ascending vasa recta endothelium'
[INFO][2023-10-18 09:16:14,818][dance.FilterGenesMarker][get_marker_genes] Found 56 marker genes for cell type 'B cell'
[INFO][2023-10-18 09:16:14,819][dance.FilterGenesMarker][get_marker_genes] Found 71 marker genes for cell type 'CD4 T cell'
[INFO][2023-10-18 09:16:14,821][dance.FilterGenesMarker][get_marker_genes] Found 89 marker genes for cell type 'CD8 T cell'
[INFO][2023-10-18 09:16:14,822][dance.

True
True
{'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'kidney_642',
 'epsilon': 1e-10,
 'location_free': False,
 'max_iter': 10}


[INFO][2023-10-18 09:16:15,638][dance][_load_raw_data] Number of cell types: reference = 31, real = 32
[INFO][2023-10-18 09:16:15,638][dance][_load_raw_data] Subsetting to common cell types (n=27):
['Ascending vasa recta endothelium',
 'B cell',
 'CD4 T cell',
 'CD8 T cell',
 'Connecting tubule',
 'Descending vasa recta endothelium',
 'Distinct proximal tubule 1',
 'Distinct proximal tubule 2',
 'Epithelial progenitor cell',
 'Fibroblast',
 'Glomerular endothelium',
 'Indistinct intercalated cell',
 'MNP-a/classical monocyte derived',
 'MNP-b/non-classical monocyte derived',
 'NK cell',
 'Pelvic epithelium',
 'Peritubular capillary endothelium 1',
 'Peritubular capillary endothelium 2',
 'Podocyte',
 'Principal cell',
 'Proliferating Proximal Tubule',
 'Proximal tubule',
 'Thick ascending limb of Loop of Henle',
 'Transitional urothelium',
 'Type A intercalated cell',
 'Type B intercalated cell',
 'dendritic cell']
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:16:15,882][da

True


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:16:16,437][dance.FilterGenesCommon][__call__] 933 genes found in 'ref'
[INFO][2023-10-18 09:16:16,440][dance.FilterGenesCommon][__call__] 952 genes found in 'test'
[INFO][2023-10-18 09:16:16,441][dance.FilterGenesCommon][__call__] Found 933 common genes out of 952 total genes.
  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:16:16,716][dance.FilterGenesMarker][get_marker_genes] Found 114 marker genes for cell type 'Ascending vasa recta endothelium'
[INFO][2023-10-18 09:16:16,717][dance.FilterGenesMarker][get_marker_genes] Found 57 marker genes for cell type 'B cell'
[INFO][2023-10-18 09:16:16,718][dance.FilterGenesMarker][get_marker_genes] Found 77 marker genes for cell type 'CD4 T cell'
[INFO][2023-10-18 09:16:16,719][dance.FilterGenesMarker][get_marker_genes] Found 87 marker genes for cell type 'CD8 T cell'
[INFO][2023-10-18 09:16:16,720][dance.

True
True
{'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'kidney_8693',
 'epsilon': 1e-10,
 'location_free': False,
 'max_iter': 10}


  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:16:17,695][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 39722 × 952
    obs: 'cellType', 'batch'
    uns: 'dance_config'
    obsm: 'cell_type_portion', 'spatial'
[INFO][2023-10-18 09:16:17,696][dance.Compose][__call__] Applying composed transformations:
Compose(
  CellTopicProfile(ct_select='auto', ct_key='cellType', split_name='ref', method='mean'),
  FilterGenesMatch(prefixes=['mt-'], suffixes=[]),
  FilterGenesCommon(batch_key=None, split_keys=['ref', 'test']),
  FilterGenesMarker(ct_profile_channel='CellTopicProfile', subset=True, threshold=1.25, eps=1e-06),
  FilterGenesPercentile(min_val=1, max_val=99, mode='rv'),
  SetConfig(config_dict={'feature_channel': [None, 'spatial'], 'feature_channel_type': ['X', 'obsm'], 'label_channel': 'cell_type_portion'}),
)
[INFO][2023-10-18 09:16:17,782][dance.CellTopicProfile][get_ct_profile] Generating cell-type profiles ('mean' a

True


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:16:18,271][dance.FilterGenesCommon][__call__] 933 genes found in 'ref'
[INFO][2023-10-18 09:16:18,274][dance.FilterGenesCommon][__call__] 952 genes found in 'test'
[INFO][2023-10-18 09:16:18,276][dance.FilterGenesCommon][__call__] Found 933 common genes out of 952 total genes.
  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:16:18,573][dance.FilterGenesMarker][get_marker_genes] Found 115 marker genes for cell type 'Ascending vasa recta endothelium'
[INFO][2023-10-18 09:16:18,575][dance.FilterGenesMarker][get_marker_genes] Found 56 marker genes for cell type 'B cell'
[INFO][2023-10-18 09:16:18,577][dance.FilterGenesMarker][get_marker_genes] Found 70 marker genes for cell type 'CD4 T cell'
[INFO][2023-10-18 09:16:18,579][dance.FilterGenesMarker][get_marker_genes] Found 89 marker genes for cell type 'CD8 T cell'
[INFO][2023-10-18 09:16:18,580][dance.

True
True
{'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'kidney_2566',
 'epsilon': 1e-10,
 'location_free': False,
 'max_iter': 10}


[INFO][2023-10-18 09:16:19,541][dance][_load_raw_data] Number of cell types: reference = 31, real = 35
[INFO][2023-10-18 09:16:19,542][dance][_load_raw_data] Subsetting to common cell types (n=29):
['Ascending vasa recta endothelium',
 'B cell',
 'CD4 T cell',
 'CD8 T cell',
 'Connecting tubule',
 'Descending vasa recta endothelium',
 'Distinct proximal tubule 1',
 'Distinct proximal tubule 2',
 'Epithelial progenitor cell',
 'Fibroblast',
 'Glomerular endothelium',
 'Indistinct intercalated cell',
 'MNP-a/classical monocyte derived',
 'MNP-b/non-classical monocyte derived',
 'Mast cell',
 'NK cell',
 'Neutrophil',
 'Pelvic epithelium',
 'Peritubular capillary endothelium 1',
 'Peritubular capillary endothelium 2',
 'Podocyte',
 'Principal cell',
 'Proliferating Proximal Tubule',
 'Proximal tubule',
 'Thick ascending limb of Loop of Henle',
 'Transitional urothelium',
 'Type A intercalated cell',
 'Type B intercalated cell',
 'dendritic cell']
  utils.warn_names_duplicates("obs")
[INFO

True


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:16:20,322][dance.FilterGenesCommon][__call__] 933 genes found in 'ref'
[INFO][2023-10-18 09:16:20,324][dance.FilterGenesCommon][__call__] 952 genes found in 'test'
[INFO][2023-10-18 09:16:20,325][dance.FilterGenesCommon][__call__] Found 933 common genes out of 952 total genes.
  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:16:20,581][dance.FilterGenesMarker][get_marker_genes] Found 121 marker genes for cell type 'Ascending vasa recta endothelium'
[INFO][2023-10-18 09:16:20,582][dance.FilterGenesMarker][get_marker_genes] Found 56 marker genes for cell type 'B cell'
[INFO][2023-10-18 09:16:20,584][dance.FilterGenesMarker][get_marker_genes] Found 71 marker genes for cell type 'CD4 T cell'
[INFO][2023-10-18 09:16:20,585][dance.FilterGenesMarker][get_marker_genes] Found 89 marker genes for cell type 'CD8 T cell'
[INFO][2023-10-18 09:16:20,586][dance.

True
True
{'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'kidney_213',
 'epsilon': 1e-10,
 'location_free': False,
 'max_iter': 10}


[INFO][2023-10-18 09:16:21,401][dance][_load_raw_data] Number of cell types: reference = 31, real = 33
[INFO][2023-10-18 09:16:21,402][dance][_load_raw_data] Subsetting to common cell types (n=27):
['Ascending vasa recta endothelium',
 'B cell',
 'CD4 T cell',
 'CD8 T cell',
 'Connecting tubule',
 'Descending vasa recta endothelium',
 'Distinct proximal tubule 1',
 'Distinct proximal tubule 2',
 'Epithelial progenitor cell',
 'Fibroblast',
 'Glomerular endothelium',
 'Indistinct intercalated cell',
 'MNP-a/classical monocyte derived',
 'MNP-b/non-classical monocyte derived',
 'Mast cell',
 'NK cell',
 'Pelvic epithelium',
 'Peritubular capillary endothelium 1',
 'Peritubular capillary endothelium 2',
 'Podocyte',
 'Principal cell',
 'Proliferating Proximal Tubule',
 'Proximal tubule',
 'Thick ascending limb of Loop of Henle',
 'Type A intercalated cell',
 'Type B intercalated cell',
 'dendritic cell']
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:16:21,641][dance][load_data

True


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:16:22,205][dance.FilterGenesCommon][__call__] 933 genes found in 'ref'
[INFO][2023-10-18 09:16:22,208][dance.FilterGenesCommon][__call__] 952 genes found in 'test'
[INFO][2023-10-18 09:16:22,209][dance.FilterGenesCommon][__call__] Found 933 common genes out of 952 total genes.
  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:16:22,483][dance.FilterGenesMarker][get_marker_genes] Found 112 marker genes for cell type 'Ascending vasa recta endothelium'
[INFO][2023-10-18 09:16:22,484][dance.FilterGenesMarker][get_marker_genes] Found 56 marker genes for cell type 'B cell'
[INFO][2023-10-18 09:16:22,486][dance.FilterGenesMarker][get_marker_genes] Found 68 marker genes for cell type 'CD4 T cell'
[INFO][2023-10-18 09:16:22,488][dance.FilterGenesMarker][get_marker_genes] Found 86 marker genes for cell type 'CD8 T cell'
[INFO][2023-10-18 09:16:22,489][dance.

True
True
{'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'kidney_4061',
 'epsilon': 1e-10,
 'location_free': False,
 'max_iter': 10}


[INFO][2023-10-18 09:16:23,345][dance][_load_raw_data] Number of cell types: reference = 31, real = 35
[INFO][2023-10-18 09:16:23,346][dance][_load_raw_data] Subsetting to common cell types (n=29):
['Ascending vasa recta endothelium',
 'B cell',
 'CD4 T cell',
 'CD8 T cell',
 'Connecting tubule',
 'Descending vasa recta endothelium',
 'Distinct proximal tubule 1',
 'Distinct proximal tubule 2',
 'Epithelial progenitor cell',
 'Fibroblast',
 'Glomerular endothelium',
 'Indistinct intercalated cell',
 'MNP-a/classical monocyte derived',
 'MNP-b/non-classical monocyte derived',
 'Mast cell',
 'NK cell',
 'Neutrophil',
 'Pelvic epithelium',
 'Peritubular capillary endothelium 1',
 'Peritubular capillary endothelium 2',
 'Podocyte',
 'Principal cell',
 'Proliferating Proximal Tubule',
 'Proximal tubule',
 'Thick ascending limb of Loop of Henle',
 'Transitional urothelium',
 'Type A intercalated cell',
 'Type B intercalated cell',
 'dendritic cell']
  utils.warn_names_duplicates("obs")
[INFO

True


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:16:24,142][dance.FilterGenesCommon][__call__] 933 genes found in 'ref'
[INFO][2023-10-18 09:16:24,144][dance.FilterGenesCommon][__call__] 952 genes found in 'test'
[INFO][2023-10-18 09:16:24,145][dance.FilterGenesCommon][__call__] Found 933 common genes out of 952 total genes.
  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:16:24,400][dance.FilterGenesMarker][get_marker_genes] Found 121 marker genes for cell type 'Ascending vasa recta endothelium'
[INFO][2023-10-18 09:16:24,401][dance.FilterGenesMarker][get_marker_genes] Found 56 marker genes for cell type 'B cell'
[INFO][2023-10-18 09:16:24,402][dance.FilterGenesMarker][get_marker_genes] Found 71 marker genes for cell type 'CD4 T cell'
[INFO][2023-10-18 09:16:24,404][dance.FilterGenesMarker][get_marker_genes] Found 89 marker genes for cell type 'CD8 T cell'
[INFO][2023-10-18 09:16:24,405][dance.

True
True
{'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'kidney_1098',
 'epsilon': 1e-10,
 'location_free': False,
 'max_iter': 10}


[INFO][2023-10-18 09:16:25,184][dance][_load_raw_data] Number of cell types: reference = 31, real = 35
[INFO][2023-10-18 09:16:25,185][dance][_load_raw_data] Subsetting to common cell types (n=29):
['Ascending vasa recta endothelium',
 'B cell',
 'CD4 T cell',
 'CD8 T cell',
 'Connecting tubule',
 'Descending vasa recta endothelium',
 'Distinct proximal tubule 1',
 'Distinct proximal tubule 2',
 'Epithelial progenitor cell',
 'Fibroblast',
 'Glomerular endothelium',
 'Indistinct intercalated cell',
 'MNP-a/classical monocyte derived',
 'MNP-b/non-classical monocyte derived',
 'Mast cell',
 'NK cell',
 'Neutrophil',
 'Pelvic epithelium',
 'Peritubular capillary endothelium 1',
 'Peritubular capillary endothelium 2',
 'Podocyte',
 'Principal cell',
 'Proliferating Proximal Tubule',
 'Proximal tubule',
 'Thick ascending limb of Loop of Henle',
 'Transitional urothelium',
 'Type A intercalated cell',
 'Type B intercalated cell',
 'dendritic cell']
  utils.warn_names_duplicates("obs")
[INFO

True


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:16:25,975][dance.FilterGenesCommon][__call__] 933 genes found in 'ref'
[INFO][2023-10-18 09:16:25,978][dance.FilterGenesCommon][__call__] 952 genes found in 'test'
[INFO][2023-10-18 09:16:25,979][dance.FilterGenesCommon][__call__] Found 933 common genes out of 952 total genes.
  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:16:26,238][dance.FilterGenesMarker][get_marker_genes] Found 121 marker genes for cell type 'Ascending vasa recta endothelium'
[INFO][2023-10-18 09:16:26,239][dance.FilterGenesMarker][get_marker_genes] Found 56 marker genes for cell type 'B cell'
[INFO][2023-10-18 09:16:26,241][dance.FilterGenesMarker][get_marker_genes] Found 71 marker genes for cell type 'CD4 T cell'
[INFO][2023-10-18 09:16:26,242][dance.FilterGenesMarker][get_marker_genes] Found 89 marker genes for cell type 'CD8 T cell'
[INFO][2023-10-18 09:16:26,243][dance.

True
True
{'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'kidney_8471',
 'epsilon': 1e-10,
 'location_free': False,
 'max_iter': 10}


[INFO][2023-10-18 09:16:27,094][dance][_load_raw_data] Number of cell types: reference = 31, real = 34
[INFO][2023-10-18 09:16:27,095][dance][_load_raw_data] Subsetting to common cell types (n=28):
['Ascending vasa recta endothelium',
 'B cell',
 'CD4 T cell',
 'CD8 T cell',
 'Connecting tubule',
 'Descending vasa recta endothelium',
 'Distinct proximal tubule 1',
 'Distinct proximal tubule 2',
 'Epithelial progenitor cell',
 'Fibroblast',
 'Glomerular endothelium',
 'Indistinct intercalated cell',
 'MNP-a/classical monocyte derived',
 'MNP-b/non-classical monocyte derived',
 'Mast cell',
 'NK cell',
 'Pelvic epithelium',
 'Peritubular capillary endothelium 1',
 'Peritubular capillary endothelium 2',
 'Podocyte',
 'Principal cell',
 'Proliferating Proximal Tubule',
 'Proximal tubule',
 'Thick ascending limb of Loop of Henle',
 'Transitional urothelium',
 'Type A intercalated cell',
 'Type B intercalated cell',
 'dendritic cell']
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09

True


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:16:27,881][dance.FilterGenesCommon][__call__] 933 genes found in 'ref'
[INFO][2023-10-18 09:16:27,883][dance.FilterGenesCommon][__call__] 952 genes found in 'test'
[INFO][2023-10-18 09:16:27,884][dance.FilterGenesCommon][__call__] Found 933 common genes out of 952 total genes.
  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:16:28,147][dance.FilterGenesMarker][get_marker_genes] Found 115 marker genes for cell type 'Ascending vasa recta endothelium'
[INFO][2023-10-18 09:16:28,148][dance.FilterGenesMarker][get_marker_genes] Found 56 marker genes for cell type 'B cell'
[INFO][2023-10-18 09:16:28,150][dance.FilterGenesMarker][get_marker_genes] Found 70 marker genes for cell type 'CD4 T cell'
[INFO][2023-10-18 09:16:28,151][dance.FilterGenesMarker][get_marker_genes] Found 89 marker genes for cell type 'CD8 T cell'
[INFO][2023-10-18 09:16:28,152][dance.

True
True


'To reproduce CARD benchmarks, please refer to command lines belows:\n\nCARD synthetic $ python card.py --dataset CARD_synthetic\n\nGSE174746 $ python card.py --dataset GSE174746 --location_free\n\nSPOTLight synthetic $ python card.py --dataset SPOTLight_synthetic --location_free\n\n'

In [3]:
card_scores

[0.010272752447378514,
 0.009862226044343612,
 0.03963365760350732,
 0.04203772165249851,
 0.07915342912237518,
 0.0025627811712195204,
 0.0026267280082778836,
 0.004993524984865446,
 0.007193671054587352,
 0.009280539696023327,
 0.004437053861375017,
 0.0035000743026642857,
 0.007056872704951607,
 0.00508543313727727,
 0.005383125487452269]

In [4]:
import argparse
from pprint import pprint

import numpy as np
import torch

from dance.datasets.spatial import CellTypeDeconvoDataset
from dance.modules.spatial.cell_type_deconvo import DSTG
from dance.utils import set_seed

parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("--cache", action="store_true", help="Cache processed data.")
parser.add_argument("--dataset", default="CARD_synthetic", choices=CellTypeDeconvoDataset.AVAILABLE_DATA)
parser.add_argument("--datadir", default="../../../../data/spatial", help="Directory to save the data.")
parser.add_argument("--sc_ref", type=bool, default=True, help="Reference scRNA (True) or cell-mixtures (False).")
parser.add_argument("--num_pseudo", type=int, default=500, help="Number of pseudo mixtures to generate.")
parser.add_argument("--n_hvg", type=int, default=2000, help="Number of HVGs.")
parser.add_argument("--lr", type=float, default=1e-2, help="Learning rate.")
parser.add_argument("--wd", type=float, default=1e-4, help="Weight decay.")
parser.add_argument("--k_filter", type=int, default=200, help="Graph node filter.")
parser.add_argument("--num_cc", type=int, default=30, help="Dimension of canonical correlation analysis.")
parser.add_argument("--bias", type=bool, default=False, help="Include/Exclude bias term.")
parser.add_argument("--nhid", type=int, default=16, help="Number of neurons in latent layer.")
parser.add_argument("--dropout", type=float, default=0., help="Dropout rate.")
parser.add_argument("--epochs", type=int, default=25, help="Number of epochs to train the model.")
parser.add_argument("--seed", type=int, default=17, help="Random seed.")
parser.add_argument("--device", default="auto", help="Computation device.")
DSTG_scores=[]
for dataset in datasets:
    try:
        args = parser.parse_args(["--dataset",dataset,"--nhid","16","--lr", ".001","--k_filter","50"])
        set_seed(args.seed)
        pprint(vars(args))

        # Load dataset
        preprocessing_pipeline = DSTG.preprocessing_pipeline(
            n_pseudo=args.num_pseudo,
            n_top_genes=args.n_hvg,
            k_filter=args.k_filter,
            num_cc=args.num_cc,
        )
        dataset = CellTypeDeconvoDataset(data_dir=args.datadir, data_id=args.dataset)
        data = dataset.load_data(transform=preprocessing_pipeline, cache=args.cache)

        (adj, x), y = data.get_data(return_type="default")
        x, y = torch.FloatTensor(x), torch.FloatTensor(y.values)
        adj = torch.sparse.FloatTensor(torch.LongTensor([adj.row.tolist(), adj.col.tolist()]),
                                    torch.FloatTensor(adj.data.astype(np.int32)))
        train_mask = data.get_split_mask("pseudo", return_type="torch")
        inputs = (adj, x, train_mask)

        # Train and evaluate model
        model = DSTG(nhid=args.nhid, bias=args.bias, dropout=args.dropout, device=args.device)
        pred = model.fit_predict(inputs, y, lr=args.lr, max_epochs=args.epochs, weight_decay=args.wd)
        test_mask = data.get_split_mask("test", return_type="torch")
        score = model.default_score_func(y[test_mask], pred[test_mask])
    except Exception as e:
        score=e
    finally:
        DSTG_scores.append(score)
        """To reproduce DSTG benchmarks, please refer to command lines belows:

        CARD synthetic $ python dstg.py --dataset CARD_synthetic --nhid 16 --lr .001 --k_filter
        50

        GSE174746 $ python dstg.py --dataset GSE174746 --nhid 16 --lr .0001 --k_filter 50

        SPOTLight synthetic $ python dstg.py --dataset SPOTLight_synthetic --nhid 32 --lr .1
        --epochs 25

        """


[INFO][2023-10-18 09:16:28,759][dance][set_seed] Setting global random seed to 17


{'bias': False,
 'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'lung6',
 'device': 'auto',
 'dropout': 0.0,
 'epochs': 25,
 'k_filter': 50,
 'lr': 0.001,
 'n_hvg': 2000,
 'nhid': 16,
 'num_cc': 30,
 'num_pseudo': 500,
 'sc_ref': True,
 'seed': 17,
 'wd': 0.0001}


[INFO][2023-10-18 09:16:29,025][dance][_load_raw_data] Number of cell types: reference = 15, real = 22
[INFO][2023-10-18 09:16:29,026][dance][_load_raw_data] Subsetting to common cell types (n=11):
['B_cell',
 'Endothelial',
 'Epithelial',
 'Fibroblast',
 'Macrophage',
 'Mast',
 'Monocyte',
 'Myeloid',
 'NK_cell',
 'Neutrophil',
 'Plasma']
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:16:29,149][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 17465 × 923
    obs: 'cellname', 'sample', 'cellType', 'malignant', 'source', 'cell_subtype', 'complexity', 'batch'
    uns: 'dance_config'
    obsm: 'cell_type_portion', 'spatial'
[INFO][2023-10-18 09:16:29,150][dance.Compose][__call__] Applying composed transformations:
Compose(
  FilterGenesCommon(batch_key=None, split_keys=['ref', 'test']),
  PseudoMixture(n_pseudo=500, nc_min=2, nc_max=10, ct_select='auto'),
  RemoveSplit(split_name='ref'),
  AnnDataTransform(func=scanpy.prep

True
True
{'bias': False,
 'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'lung9_1',
 'device': 'auto',
 'dropout': 0.0,
 'epochs': 25,
 'k_filter': 50,
 'lr': 0.001,
 'n_hvg': 2000,
 'nhid': 16,
 'num_cc': 30,
 'num_pseudo': 500,
 'sc_ref': True,
 'seed': 17,
 'wd': 0.0001}


[INFO][2023-10-18 09:16:33,205][dance][_load_raw_data] Number of cell types: reference = 15, real = 22
[INFO][2023-10-18 09:16:33,206][dance][_load_raw_data] Subsetting to common cell types (n=11):
['B_cell',
 'Endothelial',
 'Epithelial',
 'Fibroblast',
 'Macrophage',
 'Mast',
 'Monocyte',
 'Myeloid',
 'NK_cell',
 'Neutrophil',
 'Plasma']
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:16:33,333][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 17265 × 923
    obs: 'cellname', 'sample', 'cellType', 'malignant', 'source', 'cell_subtype', 'complexity', 'batch'
    uns: 'dance_config'
    obsm: 'cell_type_portion', 'spatial'
[INFO][2023-10-18 09:16:33,334][dance.Compose][__call__] Applying composed transformations:
Compose(
  FilterGenesCommon(batch_key=None, split_keys=['ref', 'test']),
  PseudoMixture(n_pseudo=500, nc_min=2, nc_max=10, ct_select='auto'),
  RemoveSplit(split_name='ref'),
  AnnDataTransform(func=scanpy.prep

True
True
{'bias': False,
 'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'lung9_2',
 'device': 'auto',
 'dropout': 0.0,
 'epochs': 25,
 'k_filter': 50,
 'lr': 0.001,
 'n_hvg': 2000,
 'nhid': 16,
 'num_cc': 30,
 'num_pseudo': 500,
 'sc_ref': True,
 'seed': 17,
 'wd': 0.0001}


[INFO][2023-10-18 09:16:35,447][dance][_load_raw_data] Number of cell types: reference = 15, real = 22
[INFO][2023-10-18 09:16:35,449][dance][_load_raw_data] Subsetting to common cell types (n=11):
['B_cell',
 'Endothelial',
 'Epithelial',
 'Fibroblast',
 'Macrophage',
 'Mast',
 'Monocyte',
 'Myeloid',
 'NK_cell',
 'Neutrophil',
 'Plasma']
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:16:35,576][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 17765 × 923
    obs: 'cellname', 'sample', 'cellType', 'malignant', 'source', 'cell_subtype', 'complexity', 'batch'
    uns: 'dance_config'
    obsm: 'cell_type_portion', 'spatial'
[INFO][2023-10-18 09:16:35,577][dance.Compose][__call__] Applying composed transformations:
Compose(
  FilterGenesCommon(batch_key=None, split_keys=['ref', 'test']),
  PseudoMixture(n_pseudo=500, nc_min=2, nc_max=10, ct_select='auto'),
  RemoveSplit(split_name='ref'),
  AnnDataTransform(func=scanpy.prep

True
True
{'bias': False,
 'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'hcc_liver',
 'device': 'auto',
 'dropout': 0.0,
 'epochs': 25,
 'k_filter': 50,
 'lr': 0.001,
 'n_hvg': 2000,
 'nhid': 16,
 'num_cc': 30,
 'num_pseudo': 500,
 'sc_ref': True,
 'seed': 17,
 'wd': 0.0001}


[INFO][2023-10-18 09:16:37,627][dance][_load_raw_data] Number of cell types: reference = 10, real = 14
[INFO][2023-10-18 09:16:37,629][dance][_load_raw_data] Subsetting to common cell types (n=6):
['B_cell', 'Endothelial', 'Epithelial', 'HSC', 'NK_cell', 'T_cell']
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:16:37,719][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 13785 × 977
    obs: 'cell_name', 'sample', 'source', 'disease', 'cellType', 'cell_subtype_clusters', 'complexity', 'batch'
    uns: 'dance_config'
    obsm: 'cell_type_portion', 'spatial'
[INFO][2023-10-18 09:16:37,720][dance.Compose][__call__] Applying composed transformations:
Compose(
  FilterGenesCommon(batch_key=None, split_keys=['ref', 'test']),
  PseudoMixture(n_pseudo=500, nc_min=2, nc_max=10, ct_select='auto'),
  RemoveSplit(split_name='ref'),
  AnnDataTransform(func=scanpy.preprocessing._normalization.normalize_total, func_kwargs={'target_sum': 

True
True
{'bias': False,
 'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'normal_liver',
 'device': 'auto',
 'dropout': 0.0,
 'epochs': 25,
 'k_filter': 50,
 'lr': 0.001,
 'n_hvg': 2000,
 'nhid': 16,
 'num_cc': 30,
 'num_pseudo': 500,
 'sc_ref': True,
 'seed': 17,
 'wd': 0.0001}


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:16:41,001][dance.RemoveSplit][__call__] Popping split: {self.split_name!r}
[INFO][2023-10-18 09:16:41,003][dance][pop] Updating split index for 'pseudo'. 500 -> 500
[INFO][2023-10-18 09:16:41,004][dance][pop] Updating split index for 'test'. 2,709 -> 2,709
  view_to_actual(adata)
[INFO][2023-10-18 09:16:41,074][dance.ScaleFeature][__call__] Scaling split:pseudo (n=500)
[INFO][2023-10-18 09:16:41,077][dance.ScaleFeature][__call__] Scaling split:test (n=2,709)
[INFO][2023-10-18 09:16:43,324][dance.SetConfig][__call__] Updating the dance data object config options:
{'feature_channel': ['DSTGraph', None],
 'feature_channel_type': ['obsp', 'X'],
 'label_channel': 'cell_type_portion'}
[INFO][2023-10-18 09:16:43,325][dance][set_config_from_dict] Setting config 'feature_channel' to ['DSTGraph', None]
[INFO][2023-10-18 09:16:43,326][dance][set_config_from_dict] Set

True
True
{'bias': False,
 'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'kidney_1139',
 'device': 'auto',
 'dropout': 0.0,
 'epochs': 25,
 'k_filter': 50,
 'lr': 0.001,
 'n_hvg': 2000,
 'nhid': 16,
 'num_cc': 30,
 'num_pseudo': 500,
 'sc_ref': True,
 'seed': 17,
 'wd': 0.0001}


  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:16:43,834][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 39763 × 952
    obs: 'cellType', 'batch'
    uns: 'dance_config'
    obsm: 'cell_type_portion', 'spatial'
[INFO][2023-10-18 09:16:43,835][dance.Compose][__call__] Applying composed transformations:
Compose(
  FilterGenesCommon(batch_key=None, split_keys=['ref', 'test']),
  PseudoMixture(n_pseudo=500, nc_min=2, nc_max=10, ct_select='auto'),
  RemoveSplit(split_name='ref'),
  AnnDataTransform(func=scanpy.preprocessing._normalization.normalize_total, func_kwargs={'target_sum': 10000.0}),
  AnnDataTransform(func=scanpy.preprocessing._simple.log1p, func_kwargs={}),
  AnnDataTransform(func=scanpy.preprocessing._highly_variable_genes.highly_variable_genes, func_kwargs={'flavor': 'seurat', 'n_top_genes': 2000, 'batch_key': 'batch', 'subset': True}),
  ScaleFeature(axis=0, mode='standardize', eps=-1, split_names='ALL', batch

True
True
{'bias': False,
 'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'kidney_10838',
 'device': 'auto',
 'dropout': 0.0,
 'epochs': 25,
 'k_filter': 50,
 'lr': 0.001,
 'n_hvg': 2000,
 'nhid': 16,
 'num_cc': 30,
 'num_pseudo': 500,
 'sc_ref': True,
 'seed': 17,
 'wd': 0.0001}


  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:16:46,223][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 39668 × 952
    obs: 'cellType', 'batch'
    uns: 'dance_config'
    obsm: 'cell_type_portion', 'spatial'
[INFO][2023-10-18 09:16:46,223][dance.Compose][__call__] Applying composed transformations:
Compose(
  FilterGenesCommon(batch_key=None, split_keys=['ref', 'test']),
  PseudoMixture(n_pseudo=500, nc_min=2, nc_max=10, ct_select='auto'),
  RemoveSplit(split_name='ref'),
  AnnDataTransform(func=scanpy.preprocessing._normalization.normalize_total, func_kwargs={'target_sum': 10000.0}),
  AnnDataTransform(func=scanpy.preprocessing._simple.log1p, func_kwargs={}),
  AnnDataTransform(func=scanpy.preprocessing._highly_variable_genes.highly_variable_genes, func_kwargs={'flavor': 'seurat', 'n_top_genes': 2000, 'batch_key': 'batch', 'subset': True}),
  ScaleFeature(axis=0, mode='standardize', eps=-1, split_names='ALL', batch

True
True
{'bias': False,
 'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'kidney_3323',
 'device': 'auto',
 'dropout': 0.0,
 'epochs': 25,
 'k_filter': 50,
 'lr': 0.001,
 'n_hvg': 2000,
 'nhid': 16,
 'num_cc': 30,
 'num_pseudo': 500,
 'sc_ref': True,
 'seed': 17,
 'wd': 0.0001}


[INFO][2023-10-18 09:16:48,265][dance][_load_raw_data] Number of cell types: reference = 31, real = 35
[INFO][2023-10-18 09:16:48,266][dance][_load_raw_data] Subsetting to common cell types (n=29):
['Ascending vasa recta endothelium',
 'B cell',
 'CD4 T cell',
 'CD8 T cell',
 'Connecting tubule',
 'Descending vasa recta endothelium',
 'Distinct proximal tubule 1',
 'Distinct proximal tubule 2',
 'Epithelial progenitor cell',
 'Fibroblast',
 'Glomerular endothelium',
 'Indistinct intercalated cell',
 'MNP-a/classical monocyte derived',
 'MNP-b/non-classical monocyte derived',
 'Mast cell',
 'NK cell',
 'Neutrophil',
 'Pelvic epithelium',
 'Peritubular capillary endothelium 1',
 'Peritubular capillary endothelium 2',
 'Podocyte',
 'Principal cell',
 'Proliferating Proximal Tubule',
 'Proximal tubule',
 'Thick ascending limb of Loop of Henle',
 'Transitional urothelium',
 'Type A intercalated cell',
 'Type B intercalated cell',
 'dendritic cell']
  utils.warn_names_duplicates("obs")
[INFO

True
True
{'bias': False,
 'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'kidney_642',
 'device': 'auto',
 'dropout': 0.0,
 'epochs': 25,
 'k_filter': 50,
 'lr': 0.001,
 'n_hvg': 2000,
 'nhid': 16,
 'num_cc': 30,
 'num_pseudo': 500,
 'sc_ref': True,
 'seed': 17,
 'wd': 0.0001}


[INFO][2023-10-18 09:16:50,604][dance][_load_raw_data] Number of cell types: reference = 31, real = 32
[INFO][2023-10-18 09:16:50,605][dance][_load_raw_data] Subsetting to common cell types (n=27):
['Ascending vasa recta endothelium',
 'B cell',
 'CD4 T cell',
 'CD8 T cell',
 'Connecting tubule',
 'Descending vasa recta endothelium',
 'Distinct proximal tubule 1',
 'Distinct proximal tubule 2',
 'Epithelial progenitor cell',
 'Fibroblast',
 'Glomerular endothelium',
 'Indistinct intercalated cell',
 'MNP-a/classical monocyte derived',
 'MNP-b/non-classical monocyte derived',
 'NK cell',
 'Pelvic epithelium',
 'Peritubular capillary endothelium 1',
 'Peritubular capillary endothelium 2',
 'Podocyte',
 'Principal cell',
 'Proliferating Proximal Tubule',
 'Proximal tubule',
 'Thick ascending limb of Loop of Henle',
 'Transitional urothelium',
 'Type A intercalated cell',
 'Type B intercalated cell',
 'dendritic cell']
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:16:50,847][da

True
True
{'bias': False,
 'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'kidney_8693',
 'device': 'auto',
 'dropout': 0.0,
 'epochs': 25,
 'k_filter': 50,
 'lr': 0.001,
 'n_hvg': 2000,
 'nhid': 16,
 'num_cc': 30,
 'num_pseudo': 500,
 'sc_ref': True,
 'seed': 17,
 'wd': 0.0001}


[INFO][2023-10-18 09:16:53,149][dance][_load_raw_data] Number of cell types: reference = 31, real = 34
[INFO][2023-10-18 09:16:53,150][dance][_load_raw_data] Subsetting to common cell types (n=28):
['Ascending vasa recta endothelium',
 'B cell',
 'CD4 T cell',
 'CD8 T cell',
 'Connecting tubule',
 'Descending vasa recta endothelium',
 'Distinct proximal tubule 1',
 'Distinct proximal tubule 2',
 'Epithelial progenitor cell',
 'Fibroblast',
 'Glomerular endothelium',
 'Indistinct intercalated cell',
 'MNP-a/classical monocyte derived',
 'MNP-b/non-classical monocyte derived',
 'Mast cell',
 'NK cell',
 'Pelvic epithelium',
 'Peritubular capillary endothelium 1',
 'Peritubular capillary endothelium 2',
 'Podocyte',
 'Principal cell',
 'Proliferating Proximal Tubule',
 'Proximal tubule',
 'Thick ascending limb of Loop of Henle',
 'Transitional urothelium',
 'Type A intercalated cell',
 'Type B intercalated cell',
 'dendritic cell']
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09

True
True
{'bias': False,
 'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'kidney_2566',
 'device': 'auto',
 'dropout': 0.0,
 'epochs': 25,
 'k_filter': 50,
 'lr': 0.001,
 'n_hvg': 2000,
 'nhid': 16,
 'num_cc': 30,
 'num_pseudo': 500,
 'sc_ref': True,
 'seed': 17,
 'wd': 0.0001}


[INFO][2023-10-18 09:16:56,062][dance][_load_raw_data] Number of cell types: reference = 31, real = 35
[INFO][2023-10-18 09:16:56,063][dance][_load_raw_data] Subsetting to common cell types (n=29):
['Ascending vasa recta endothelium',
 'B cell',
 'CD4 T cell',
 'CD8 T cell',
 'Connecting tubule',
 'Descending vasa recta endothelium',
 'Distinct proximal tubule 1',
 'Distinct proximal tubule 2',
 'Epithelial progenitor cell',
 'Fibroblast',
 'Glomerular endothelium',
 'Indistinct intercalated cell',
 'MNP-a/classical monocyte derived',
 'MNP-b/non-classical monocyte derived',
 'Mast cell',
 'NK cell',
 'Neutrophil',
 'Pelvic epithelium',
 'Peritubular capillary endothelium 1',
 'Peritubular capillary endothelium 2',
 'Podocyte',
 'Principal cell',
 'Proliferating Proximal Tubule',
 'Proximal tubule',
 'Thick ascending limb of Loop of Henle',
 'Transitional urothelium',
 'Type A intercalated cell',
 'Type B intercalated cell',
 'dendritic cell']
  utils.warn_names_duplicates("obs")
[INFO

True
True
{'bias': False,
 'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'kidney_213',
 'device': 'auto',
 'dropout': 0.0,
 'epochs': 25,
 'k_filter': 50,
 'lr': 0.001,
 'n_hvg': 2000,
 'nhid': 16,
 'num_cc': 30,
 'num_pseudo': 500,
 'sc_ref': True,
 'seed': 17,
 'wd': 0.0001}


  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:16:58,576][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 39617 × 952
    obs: 'cellType', 'batch'
    uns: 'dance_config'
    obsm: 'cell_type_portion', 'spatial'
[INFO][2023-10-18 09:16:58,577][dance.Compose][__call__] Applying composed transformations:
Compose(
  FilterGenesCommon(batch_key=None, split_keys=['ref', 'test']),
  PseudoMixture(n_pseudo=500, nc_min=2, nc_max=10, ct_select='auto'),
  RemoveSplit(split_name='ref'),
  AnnDataTransform(func=scanpy.preprocessing._normalization.normalize_total, func_kwargs={'target_sum': 10000.0}),
  AnnDataTransform(func=scanpy.preprocessing._simple.log1p, func_kwargs={}),
  AnnDataTransform(func=scanpy.preprocessing._highly_variable_genes.highly_variable_genes, func_kwargs={'flavor': 'seurat', 'n_top_genes': 2000, 'batch_key': 'batch', 'subset': True}),
  ScaleFeature(axis=0, mode='standardize', eps=-1, split_names='ALL', batch

True
True
{'bias': False,
 'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'kidney_4061',
 'device': 'auto',
 'dropout': 0.0,
 'epochs': 25,
 'k_filter': 50,
 'lr': 0.001,
 'n_hvg': 2000,
 'nhid': 16,
 'num_cc': 30,
 'num_pseudo': 500,
 'sc_ref': True,
 'seed': 17,
 'wd': 0.0001}


[INFO][2023-10-18 09:17:00,671][dance][_load_raw_data] Number of cell types: reference = 31, real = 35
[INFO][2023-10-18 09:17:00,672][dance][_load_raw_data] Subsetting to common cell types (n=29):
['Ascending vasa recta endothelium',
 'B cell',
 'CD4 T cell',
 'CD8 T cell',
 'Connecting tubule',
 'Descending vasa recta endothelium',
 'Distinct proximal tubule 1',
 'Distinct proximal tubule 2',
 'Epithelial progenitor cell',
 'Fibroblast',
 'Glomerular endothelium',
 'Indistinct intercalated cell',
 'MNP-a/classical monocyte derived',
 'MNP-b/non-classical monocyte derived',
 'Mast cell',
 'NK cell',
 'Neutrophil',
 'Pelvic epithelium',
 'Peritubular capillary endothelium 1',
 'Peritubular capillary endothelium 2',
 'Podocyte',
 'Principal cell',
 'Proliferating Proximal Tubule',
 'Proximal tubule',
 'Thick ascending limb of Loop of Henle',
 'Transitional urothelium',
 'Type A intercalated cell',
 'Type B intercalated cell',
 'dendritic cell']
  utils.warn_names_duplicates("obs")
[INFO

True
True
{'bias': False,
 'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'kidney_1098',
 'device': 'auto',
 'dropout': 0.0,
 'epochs': 25,
 'k_filter': 50,
 'lr': 0.001,
 'n_hvg': 2000,
 'nhid': 16,
 'num_cc': 30,
 'num_pseudo': 500,
 'sc_ref': True,
 'seed': 17,
 'wd': 0.0001}


[INFO][2023-10-18 09:17:02,938][dance][_load_raw_data] Number of cell types: reference = 31, real = 35
[INFO][2023-10-18 09:17:02,939][dance][_load_raw_data] Subsetting to common cell types (n=29):
['Ascending vasa recta endothelium',
 'B cell',
 'CD4 T cell',
 'CD8 T cell',
 'Connecting tubule',
 'Descending vasa recta endothelium',
 'Distinct proximal tubule 1',
 'Distinct proximal tubule 2',
 'Epithelial progenitor cell',
 'Fibroblast',
 'Glomerular endothelium',
 'Indistinct intercalated cell',
 'MNP-a/classical monocyte derived',
 'MNP-b/non-classical monocyte derived',
 'Mast cell',
 'NK cell',
 'Neutrophil',
 'Pelvic epithelium',
 'Peritubular capillary endothelium 1',
 'Peritubular capillary endothelium 2',
 'Podocyte',
 'Principal cell',
 'Proliferating Proximal Tubule',
 'Proximal tubule',
 'Thick ascending limb of Loop of Henle',
 'Transitional urothelium',
 'Type A intercalated cell',
 'Type B intercalated cell',
 'dendritic cell']
  utils.warn_names_duplicates("obs")
[INFO

True
True
{'bias': False,
 'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'kidney_8471',
 'device': 'auto',
 'dropout': 0.0,
 'epochs': 25,
 'k_filter': 50,
 'lr': 0.001,
 'n_hvg': 2000,
 'nhid': 16,
 'num_cc': 30,
 'num_pseudo': 500,
 'sc_ref': True,
 'seed': 17,
 'wd': 0.0001}


[INFO][2023-10-18 09:17:06,054][dance][_load_raw_data] Number of cell types: reference = 31, real = 34
[INFO][2023-10-18 09:17:06,055][dance][_load_raw_data] Subsetting to common cell types (n=28):
['Ascending vasa recta endothelium',
 'B cell',
 'CD4 T cell',
 'CD8 T cell',
 'Connecting tubule',
 'Descending vasa recta endothelium',
 'Distinct proximal tubule 1',
 'Distinct proximal tubule 2',
 'Epithelial progenitor cell',
 'Fibroblast',
 'Glomerular endothelium',
 'Indistinct intercalated cell',
 'MNP-a/classical monocyte derived',
 'MNP-b/non-classical monocyte derived',
 'Mast cell',
 'NK cell',
 'Pelvic epithelium',
 'Peritubular capillary endothelium 1',
 'Peritubular capillary endothelium 2',
 'Podocyte',
 'Principal cell',
 'Proliferating Proximal Tubule',
 'Proximal tubule',
 'Thick ascending limb of Loop of Henle',
 'Transitional urothelium',
 'Type A intercalated cell',
 'Type B intercalated cell',
 'dendritic cell']
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09

True
True


In [5]:
DSTG_scores

[0.00932844,
 0.008754343,
 0.008421534,
 0.023887137,
 0.046285834,
 0.0049878345,
 0.0030663814,
 0.003820763,
 0.004870248,
 0.0055336705,
 0.0040534544,
 0.004075728,
 0.004991771,
 0.0042319656,
 0.0041610063]

In [6]:
import argparse
from pprint import pprint

from dance.datasets.spatial import CellTypeDeconvoDataset
from dance.modules.spatial.cell_type_deconvo.spatialdecon import SpatialDecon
from dance.utils import set_seed

parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("--cache", action="store_true", help="Cache processed data.")
parser.add_argument("--dataset", default="CARD_synthetic", choices=CellTypeDeconvoDataset.AVAILABLE_DATA)
parser.add_argument("--datadir", default="../../../../data/spatial", help="Directory to save the data.")
parser.add_argument("--lr", type=float, default=1e-4, help="Learning rate.")
parser.add_argument("--bias", type=bool, default=False, help="Include/Exclude bias term.")
parser.add_argument("--max_iter", type=int, default=10000, help="Maximum optimization iteration.")
parser.add_argument("--seed", type=int, default=17, help="Random seed.")
parser.add_argument("--device", default="auto", help="Computation device.")

SpatialDecon_scores=[]
for dataset in datasets:
    try:
        args = parser.parse_args(["--dataset",dataset, "--lr", ".01","--max_iter","2250","--bias","1"])
        set_seed(args.seed)
        pprint(vars(args))

        # Load dataset
        preprocessing_pipeline = SpatialDecon.preprocessing_pipeline()
        dataset = CellTypeDeconvoDataset(data_dir=args.datadir, data_id=args.dataset)
        data = dataset.load_data(transform=preprocessing_pipeline, cache=args.cache)
        cell_types = data.data.obsm["cell_type_portion"].columns.tolist()

        x, y = data.get_data(split_name="test", return_type="torch")
        ct_profile = data.get_feature(return_type="torch", channel="CellTopicProfile", channel_type="varm")

        # Train and evaluate model
        spaDecon = SpatialDecon(ct_profile, ct_select=cell_types, bias=args.bias, device=args.device)
        score = spaDecon.fit_score(x, y, lr=args.lr, max_iter=args.max_iter, print_period=100)
    except Exception as e:
        score=e
    finally:
        SpatialDecon_scores.append(score)
"""To reproduce SpatialDecon benchmarks, please refer to command lines belows:

CARD synthetic $ python spatialdecon.py --dataset CARD_synthetic --lr .01 --max_iter
2250 --bias 1

GSE174746 $ python spatialdecon.py --dataset GSE174746 --lr .0001 --max_iter 20000
--bias 1

SPOTLight synthetic $ python spatialdecon.py --dataset SPOTLight_synthetic --lr .01
--max_iter 500 --bias 1

"""


[INFO][2023-10-18 09:17:08,178][dance][set_seed] Setting global random seed to 17


{'bias': True,
 'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'lung6',
 'device': 'auto',
 'lr': 0.01,
 'max_iter': 2250,
 'seed': 17}


[INFO][2023-10-18 09:17:08,418][dance][_load_raw_data] Number of cell types: reference = 15, real = 22
[INFO][2023-10-18 09:17:08,419][dance][_load_raw_data] Subsetting to common cell types (n=11):
['B_cell',
 'Endothelial',
 'Epithelial',
 'Fibroblast',
 'Macrophage',
 'Mast',
 'Monocyte',
 'Myeloid',
 'NK_cell',
 'Neutrophil',
 'Plasma']
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:17:08,539][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 17465 × 923
    obs: 'cellname', 'sample', 'cellType', 'malignant', 'source', 'cell_subtype', 'complexity', 'batch'
    uns: 'dance_config'
    obsm: 'cell_type_portion', 'spatial'
[INFO][2023-10-18 09:17:08,539][dance.Compose][__call__] Applying composed transformations:
Compose(
  CellTopicProfile(ct_select='auto', ct_key='cellType', split_name='ref', method='median'),
  SetConfig(config_dict={'label_channel': 'cell_type_portion'}),
)
[INFO][2023-10-18 09:17:08,573][dance.CellTo

True


[INFO][2023-10-18 09:17:08,871][dance][fit] Epoch: 100/2250 Loss: 4.49833e+00
[INFO][2023-10-18 09:17:08,965][dance][fit] Epoch: 200/2250 Loss: 3.57504e+00
[INFO][2023-10-18 09:17:09,065][dance][fit] Epoch: 300/2250 Loss: 3.07046e+00
[INFO][2023-10-18 09:17:09,168][dance][fit] Epoch: 400/2250 Loss: 2.73843e+00
[INFO][2023-10-18 09:17:09,275][dance][fit] Epoch: 500/2250 Loss: 2.49880e+00
[INFO][2023-10-18 09:17:09,390][dance][fit] Epoch: 600/2250 Loss: 2.31574e+00
[INFO][2023-10-18 09:17:09,499][dance][fit] Epoch: 700/2250 Loss: 2.17017e+00
[INFO][2023-10-18 09:17:09,611][dance][fit] Epoch: 800/2250 Loss: 2.05105e+00
[INFO][2023-10-18 09:17:09,721][dance][fit] Epoch: 900/2250 Loss: 1.95139e+00
[INFO][2023-10-18 09:17:09,832][dance][fit] Epoch: 1000/2250 Loss: 1.86654e+00
[INFO][2023-10-18 09:17:09,944][dance][fit] Epoch: 1100/2250 Loss: 1.79329e+00
[INFO][2023-10-18 09:17:10,055][dance][fit] Epoch: 1200/2250 Loss: 1.72929e+00
[INFO][2023-10-18 09:17:10,164][dance][fit] Epoch: 1300/2250 

True
True
{'bias': True,
 'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'lung9_1',
 'device': 'auto',
 'lr': 0.01,
 'max_iter': 2250,
 'seed': 17}


[INFO][2023-10-18 09:17:11,644][dance][_load_raw_data] Number of cell types: reference = 15, real = 22
[INFO][2023-10-18 09:17:11,645][dance][_load_raw_data] Subsetting to common cell types (n=11):
['B_cell',
 'Endothelial',
 'Epithelial',
 'Fibroblast',
 'Macrophage',
 'Mast',
 'Monocyte',
 'Myeloid',
 'NK_cell',
 'Neutrophil',
 'Plasma']
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:17:11,797][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 17265 × 923
    obs: 'cellname', 'sample', 'cellType', 'malignant', 'source', 'cell_subtype', 'complexity', 'batch'
    uns: 'dance_config'
    obsm: 'cell_type_portion', 'spatial'
[INFO][2023-10-18 09:17:11,798][dance.Compose][__call__] Applying composed transformations:
Compose(
  CellTopicProfile(ct_select='auto', ct_key='cellType', split_name='ref', method='median'),
  SetConfig(config_dict={'label_channel': 'cell_type_portion'}),
)
[INFO][2023-10-18 09:17:11,843][dance.CellTo

True


[INFO][2023-10-18 09:17:12,060][dance.CellTopicProfile][get_ct_profile] Number of 'Monocyte' cells in batch 0: 1,593
[INFO][2023-10-18 09:17:12,061][dance.CellTopicProfile][get_ct_profile] Aggregating 'Myeloid' profiles over 1,413 samples
[INFO][2023-10-18 09:17:12,078][dance.CellTopicProfile][get_ct_profile] Number of 'Myeloid' cells in batch 0: 1,413
[INFO][2023-10-18 09:17:12,080][dance.CellTopicProfile][get_ct_profile] Aggregating 'NK_cell' profiles over 1,638 samples
[INFO][2023-10-18 09:17:12,097][dance.CellTopicProfile][get_ct_profile] Number of 'NK_cell' cells in batch 0: 1,638
[INFO][2023-10-18 09:17:12,100][dance.CellTopicProfile][get_ct_profile] Aggregating 'Neutrophil' profiles over 89 samples
[INFO][2023-10-18 09:17:12,102][dance.CellTopicProfile][get_ct_profile] Number of 'Neutrophil' cells in batch 0: 89
[INFO][2023-10-18 09:17:12,103][dance.CellTopicProfile][get_ct_profile] Aggregating 'Plasma' profiles over 895 samples
[INFO][2023-10-18 09:17:12,113][dance.CellTopicPro

True
True
{'bias': True,
 'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'lung9_2',
 'device': 'auto',
 'lr': 0.01,
 'max_iter': 2250,
 'seed': 17}


[INFO][2023-10-18 09:17:15,715][dance][_load_raw_data] Number of cell types: reference = 15, real = 22
[INFO][2023-10-18 09:17:15,717][dance][_load_raw_data] Subsetting to common cell types (n=11):
['B_cell',
 'Endothelial',
 'Epithelial',
 'Fibroblast',
 'Macrophage',
 'Mast',
 'Monocyte',
 'Myeloid',
 'NK_cell',
 'Neutrophil',
 'Plasma']
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:17:15,870][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 17765 × 923
    obs: 'cellname', 'sample', 'cellType', 'malignant', 'source', 'cell_subtype', 'complexity', 'batch'
    uns: 'dance_config'
    obsm: 'cell_type_portion', 'spatial'
[INFO][2023-10-18 09:17:15,871][dance.Compose][__call__] Applying composed transformations:
Compose(
  CellTopicProfile(ct_select='auto', ct_key='cellType', split_name='ref', method='median'),
  SetConfig(config_dict={'label_channel': 'cell_type_portion'}),
)
[INFO][2023-10-18 09:17:15,917][dance.CellTo

True


[INFO][2023-10-18 09:17:16,133][dance.CellTopicProfile][get_ct_profile] Number of 'Monocyte' cells in batch 0: 1,593
[INFO][2023-10-18 09:17:16,136][dance.CellTopicProfile][get_ct_profile] Aggregating 'Myeloid' profiles over 1,413 samples
[INFO][2023-10-18 09:17:16,155][dance.CellTopicProfile][get_ct_profile] Number of 'Myeloid' cells in batch 0: 1,413
[INFO][2023-10-18 09:17:16,157][dance.CellTopicProfile][get_ct_profile] Aggregating 'NK_cell' profiles over 1,638 samples
[INFO][2023-10-18 09:17:16,174][dance.CellTopicProfile][get_ct_profile] Number of 'NK_cell' cells in batch 0: 1,638
[INFO][2023-10-18 09:17:16,176][dance.CellTopicProfile][get_ct_profile] Aggregating 'Neutrophil' profiles over 89 samples
[INFO][2023-10-18 09:17:16,179][dance.CellTopicProfile][get_ct_profile] Number of 'Neutrophil' cells in batch 0: 89
[INFO][2023-10-18 09:17:16,181][dance.CellTopicProfile][get_ct_profile] Aggregating 'Plasma' profiles over 895 samples
[INFO][2023-10-18 09:17:16,191][dance.CellTopicPro

True
True
{'bias': True,
 'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'hcc_liver',
 'device': 'auto',
 'lr': 0.01,
 'max_iter': 2250,
 'seed': 17}


[INFO][2023-10-18 09:17:20,227][dance][_load_raw_data] Number of cell types: reference = 10, real = 14
[INFO][2023-10-18 09:17:20,228][dance][_load_raw_data] Subsetting to common cell types (n=6):
['B_cell', 'Endothelial', 'Epithelial', 'HSC', 'NK_cell', 'T_cell']
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:17:20,316][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 13785 × 977
    obs: 'cell_name', 'sample', 'source', 'disease', 'cellType', 'cell_subtype_clusters', 'complexity', 'batch'
    uns: 'dance_config'
    obsm: 'cell_type_portion', 'spatial'
[INFO][2023-10-18 09:17:20,318][dance.Compose][__call__] Applying composed transformations:
Compose(
  CellTopicProfile(ct_select='auto', ct_key='cellType', split_name='ref', method='median'),
  SetConfig(config_dict={'label_channel': 'cell_type_portion'}),
)
[INFO][2023-10-18 09:17:20,339][dance.CellTopicProfile][get_ct_profile] Generating cell-type profiles ('median' a

True


[INFO][2023-10-18 09:17:20,597][dance][fit] Epoch: 100/2250 Loss: 6.85403e+00
[INFO][2023-10-18 09:17:20,691][dance][fit] Epoch: 200/2250 Loss: 5.48648e+00
[INFO][2023-10-18 09:17:20,790][dance][fit] Epoch: 300/2250 Loss: 4.71783e+00
[INFO][2023-10-18 09:17:20,891][dance][fit] Epoch: 400/2250 Loss: 4.19782e+00
[INFO][2023-10-18 09:17:21,004][dance][fit] Epoch: 500/2250 Loss: 3.81337e+00
[INFO][2023-10-18 09:17:21,114][dance][fit] Epoch: 600/2250 Loss: 3.51343e+00
[INFO][2023-10-18 09:17:21,220][dance][fit] Epoch: 700/2250 Loss: 3.27070e+00
[INFO][2023-10-18 09:17:21,328][dance][fit] Epoch: 800/2250 Loss: 3.06897e+00
[INFO][2023-10-18 09:17:21,433][dance][fit] Epoch: 900/2250 Loss: 2.89789e+00
[INFO][2023-10-18 09:17:21,539][dance][fit] Epoch: 1000/2250 Loss: 2.75047e+00
[INFO][2023-10-18 09:17:21,644][dance][fit] Epoch: 1100/2250 Loss: 2.62178e+00
[INFO][2023-10-18 09:17:21,750][dance][fit] Epoch: 1200/2250 Loss: 2.50825e+00
[INFO][2023-10-18 09:17:21,856][dance][fit] Epoch: 1300/2250 

True
True
{'bias': True,
 'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'normal_liver',
 'device': 'auto',
 'lr': 0.01,
 'max_iter': 2250,
 'seed': 17}


[INFO][2023-10-18 09:17:23,093][dance.CellTopicProfile][get_ct_profile] Generating cell-type profiles ('median' aggregation) for ['alpha-beta T cell', 'central venous liver sinusoidal endothelial cell', 'cholangiocyte', 'erythroid cell', 'gamma-delta T cell', 'hepatic stellate cell', 'hepatocyte', 'inflammatory macrophage', 'mature B cell', 'natural killer cell', 'periportal liver sinusoidal endothelial cell', 'portal liver sinusoidal endothelial cell']
[INFO][2023-10-18 09:17:23,099][dance.CellTopicProfile][get_ct_profile] Aggregating 'alpha-beta T cell' profiles over 961 samples
[INFO][2023-10-18 09:17:23,107][dance.CellTopicProfile][get_ct_profile] Number of 'alpha-beta T cell' cells in batch 0: 961
[INFO][2023-10-18 09:17:23,108][dance.CellTopicProfile][get_ct_profile] Aggregating 'central venous liver sinusoidal endothelial cell' profiles over 306 samples
[INFO][2023-10-18 09:17:23,112][dance.CellTopicProfile][get_ct_profile] Number of 'central venous liver sinusoidal endothelial 

True


[INFO][2023-10-18 09:17:23,389][dance][fit] Epoch: 200/2250 Loss: 2.85876e+00
[INFO][2023-10-18 09:17:23,492][dance][fit] Epoch: 300/2250 Loss: 2.36984e+00
[INFO][2023-10-18 09:17:23,598][dance][fit] Epoch: 400/2250 Loss: 2.05215e+00
[INFO][2023-10-18 09:17:23,703][dance][fit] Epoch: 500/2250 Loss: 1.82592e+00
[INFO][2023-10-18 09:17:23,814][dance][fit] Epoch: 600/2250 Loss: 1.65545e+00
[INFO][2023-10-18 09:17:23,920][dance][fit] Epoch: 700/2250 Loss: 1.52199e+00
[INFO][2023-10-18 09:17:24,027][dance][fit] Epoch: 800/2250 Loss: 1.41450e+00
[INFO][2023-10-18 09:17:24,133][dance][fit] Epoch: 900/2250 Loss: 1.32607e+00
[INFO][2023-10-18 09:17:24,243][dance][fit] Epoch: 1000/2250 Loss: 1.25207e+00
[INFO][2023-10-18 09:17:24,351][dance][fit] Epoch: 1100/2250 Loss: 1.18928e+00
[INFO][2023-10-18 09:17:24,463][dance][fit] Epoch: 1200/2250 Loss: 1.13541e+00
[INFO][2023-10-18 09:17:24,575][dance][fit] Epoch: 1300/2250 Loss: 1.08874e+00
[INFO][2023-10-18 09:17:24,684][dance][fit] Epoch: 1400/2250

True
True
{'bias': True,
 'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'kidney_1139',
 'device': 'auto',
 'lr': 0.01,
 'max_iter': 2250,
 'seed': 17}


[INFO][2023-10-18 09:17:25,979][dance][_load_raw_data] Number of cell types: reference = 31, real = 33
[INFO][2023-10-18 09:17:25,980][dance][_load_raw_data] Subsetting to common cell types (n=28):
['Ascending vasa recta endothelium',
 'B cell',
 'CD4 T cell',
 'CD8 T cell',
 'Connecting tubule',
 'Descending vasa recta endothelium',
 'Distinct proximal tubule 1',
 'Distinct proximal tubule 2',
 'Epithelial progenitor cell',
 'Fibroblast',
 'Glomerular endothelium',
 'Indistinct intercalated cell',
 'MNP-a/classical monocyte derived',
 'MNP-b/non-classical monocyte derived',
 'Mast cell',
 'NK cell',
 'Neutrophil',
 'Pelvic epithelium',
 'Peritubular capillary endothelium 1',
 'Peritubular capillary endothelium 2',
 'Podocyte',
 'Principal cell',
 'Proliferating Proximal Tubule',
 'Proximal tubule',
 'Thick ascending limb of Loop of Henle',
 'Type A intercalated cell',
 'Type B intercalated cell',
 'dendritic cell']
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:17:26,219][d

True


[INFO][2023-10-18 09:17:26,733][dance.CellTopicProfile][get_ct_profile] Number of 'Proximal tubule' cells in batch 0: 27,497
[INFO][2023-10-18 09:17:26,735][dance.CellTopicProfile][get_ct_profile] Aggregating 'Thick ascending limb of Loop of Henle' profiles over 675 samples
[INFO][2023-10-18 09:17:26,739][dance.CellTopicProfile][get_ct_profile] Number of 'Thick ascending limb of Loop of Henle' cells in batch 0: 675
[INFO][2023-10-18 09:17:26,741][dance.CellTopicProfile][get_ct_profile] Aggregating 'Type A intercalated cell' profiles over 275 samples
[INFO][2023-10-18 09:17:26,743][dance.CellTopicProfile][get_ct_profile] Number of 'Type A intercalated cell' cells in batch 0: 275
[INFO][2023-10-18 09:17:26,745][dance.CellTopicProfile][get_ct_profile] Aggregating 'Type B intercalated cell' profiles over 125 samples
[INFO][2023-10-18 09:17:26,747][dance.CellTopicProfile][get_ct_profile] Number of 'Type B intercalated cell' cells in batch 0: 125
[INFO][2023-10-18 09:17:26,748][dance.CellTop

True
True
{'bias': True,
 'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'kidney_10838',
 'device': 'auto',
 'lr': 0.01,
 'max_iter': 2250,
 'seed': 17}


[INFO][2023-10-18 09:17:29,310][dance][_load_raw_data] Number of cell types: reference = 31, real = 35
[INFO][2023-10-18 09:17:29,311][dance][_load_raw_data] Subsetting to common cell types (n=29):
['Ascending vasa recta endothelium',
 'B cell',
 'CD4 T cell',
 'CD8 T cell',
 'Connecting tubule',
 'Descending vasa recta endothelium',
 'Distinct proximal tubule 1',
 'Distinct proximal tubule 2',
 'Epithelial progenitor cell',
 'Fibroblast',
 'Glomerular endothelium',
 'Indistinct intercalated cell',
 'MNP-a/classical monocyte derived',
 'MNP-b/non-classical monocyte derived',
 'Mast cell',
 'NK cell',
 'Neutrophil',
 'Pelvic epithelium',
 'Peritubular capillary endothelium 1',
 'Peritubular capillary endothelium 2',
 'Podocyte',
 'Principal cell',
 'Proliferating Proximal Tubule',
 'Proximal tubule',
 'Thick ascending limb of Loop of Henle',
 'Transitional urothelium',
 'Type A intercalated cell',
 'Type B intercalated cell',
 'dendritic cell']
  utils.warn_names_duplicates("obs")
[INFO

True


[INFO][2023-10-18 09:17:30,080][dance.CellTopicProfile][get_ct_profile] Number of 'Proximal tubule' cells in batch 0: 27,497
[INFO][2023-10-18 09:17:30,082][dance.CellTopicProfile][get_ct_profile] Aggregating 'Thick ascending limb of Loop of Henle' profiles over 675 samples
[INFO][2023-10-18 09:17:30,086][dance.CellTopicProfile][get_ct_profile] Number of 'Thick ascending limb of Loop of Henle' cells in batch 0: 675
[INFO][2023-10-18 09:17:30,088][dance.CellTopicProfile][get_ct_profile] Aggregating 'Transitional urothelium' profiles over 45 samples
[INFO][2023-10-18 09:17:30,089][dance.CellTopicProfile][get_ct_profile] Number of 'Transitional urothelium' cells in batch 0: 45
[INFO][2023-10-18 09:17:30,091][dance.CellTopicProfile][get_ct_profile] Aggregating 'Type A intercalated cell' profiles over 275 samples
[INFO][2023-10-18 09:17:30,094][dance.CellTopicProfile][get_ct_profile] Number of 'Type A intercalated cell' cells in batch 0: 275
[INFO][2023-10-18 09:17:30,095][dance.CellTopicPr

True
True
{'bias': True,
 'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'kidney_3323',
 'device': 'auto',
 'lr': 0.01,
 'max_iter': 2250,
 'seed': 17}


[INFO][2023-10-18 09:17:32,692][dance][_load_raw_data] Number of cell types: reference = 31, real = 35
[INFO][2023-10-18 09:17:32,693][dance][_load_raw_data] Subsetting to common cell types (n=29):
['Ascending vasa recta endothelium',
 'B cell',
 'CD4 T cell',
 'CD8 T cell',
 'Connecting tubule',
 'Descending vasa recta endothelium',
 'Distinct proximal tubule 1',
 'Distinct proximal tubule 2',
 'Epithelial progenitor cell',
 'Fibroblast',
 'Glomerular endothelium',
 'Indistinct intercalated cell',
 'MNP-a/classical monocyte derived',
 'MNP-b/non-classical monocyte derived',
 'Mast cell',
 'NK cell',
 'Neutrophil',
 'Pelvic epithelium',
 'Peritubular capillary endothelium 1',
 'Peritubular capillary endothelium 2',
 'Podocyte',
 'Principal cell',
 'Proliferating Proximal Tubule',
 'Proximal tubule',
 'Thick ascending limb of Loop of Henle',
 'Transitional urothelium',
 'Type A intercalated cell',
 'Type B intercalated cell',
 'dendritic cell']
  utils.warn_names_duplicates("obs")
[INFO

True


[INFO][2023-10-18 09:17:33,525][dance.CellTopicProfile][get_ct_profile] Number of 'Proximal tubule' cells in batch 0: 27,497
[INFO][2023-10-18 09:17:33,528][dance.CellTopicProfile][get_ct_profile] Aggregating 'Thick ascending limb of Loop of Henle' profiles over 675 samples
[INFO][2023-10-18 09:17:33,534][dance.CellTopicProfile][get_ct_profile] Number of 'Thick ascending limb of Loop of Henle' cells in batch 0: 675
[INFO][2023-10-18 09:17:33,536][dance.CellTopicProfile][get_ct_profile] Aggregating 'Transitional urothelium' profiles over 45 samples
[INFO][2023-10-18 09:17:33,537][dance.CellTopicProfile][get_ct_profile] Number of 'Transitional urothelium' cells in batch 0: 45
[INFO][2023-10-18 09:17:33,539][dance.CellTopicProfile][get_ct_profile] Aggregating 'Type A intercalated cell' profiles over 275 samples
[INFO][2023-10-18 09:17:33,542][dance.CellTopicProfile][get_ct_profile] Number of 'Type A intercalated cell' cells in batch 0: 275
[INFO][2023-10-18 09:17:33,544][dance.CellTopicPr

True
True
{'bias': True,
 'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'kidney_642',
 'device': 'auto',
 'lr': 0.01,
 'max_iter': 2250,
 'seed': 17}


[INFO][2023-10-18 09:17:37,314][dance][_load_raw_data] Number of cell types: reference = 31, real = 32
[INFO][2023-10-18 09:17:37,315][dance][_load_raw_data] Subsetting to common cell types (n=27):
['Ascending vasa recta endothelium',
 'B cell',
 'CD4 T cell',
 'CD8 T cell',
 'Connecting tubule',
 'Descending vasa recta endothelium',
 'Distinct proximal tubule 1',
 'Distinct proximal tubule 2',
 'Epithelial progenitor cell',
 'Fibroblast',
 'Glomerular endothelium',
 'Indistinct intercalated cell',
 'MNP-a/classical monocyte derived',
 'MNP-b/non-classical monocyte derived',
 'NK cell',
 'Pelvic epithelium',
 'Peritubular capillary endothelium 1',
 'Peritubular capillary endothelium 2',
 'Podocyte',
 'Principal cell',
 'Proliferating Proximal Tubule',
 'Proximal tubule',
 'Thick ascending limb of Loop of Henle',
 'Transitional urothelium',
 'Type A intercalated cell',
 'Type B intercalated cell',
 'dendritic cell']
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:17:37,552][da

True


[INFO][2023-10-18 09:17:38,058][dance.CellTopicProfile][get_ct_profile] Number of 'Proximal tubule' cells in batch 0: 27,497
[INFO][2023-10-18 09:17:38,060][dance.CellTopicProfile][get_ct_profile] Aggregating 'Thick ascending limb of Loop of Henle' profiles over 675 samples
[INFO][2023-10-18 09:17:38,065][dance.CellTopicProfile][get_ct_profile] Number of 'Thick ascending limb of Loop of Henle' cells in batch 0: 675
[INFO][2023-10-18 09:17:38,067][dance.CellTopicProfile][get_ct_profile] Aggregating 'Transitional urothelium' profiles over 45 samples
[INFO][2023-10-18 09:17:38,068][dance.CellTopicProfile][get_ct_profile] Number of 'Transitional urothelium' cells in batch 0: 45
[INFO][2023-10-18 09:17:38,070][dance.CellTopicProfile][get_ct_profile] Aggregating 'Type A intercalated cell' profiles over 275 samples
[INFO][2023-10-18 09:17:38,072][dance.CellTopicProfile][get_ct_profile] Number of 'Type A intercalated cell' cells in batch 0: 275
[INFO][2023-10-18 09:17:38,073][dance.CellTopicPr

True
True
{'bias': True,
 'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'kidney_8693',
 'device': 'auto',
 'lr': 0.01,
 'max_iter': 2250,
 'seed': 17}


[INFO][2023-10-18 09:17:41,370][dance][_load_raw_data] Number of cell types: reference = 31, real = 34
[INFO][2023-10-18 09:17:41,371][dance][_load_raw_data] Subsetting to common cell types (n=28):
['Ascending vasa recta endothelium',
 'B cell',
 'CD4 T cell',
 'CD8 T cell',
 'Connecting tubule',
 'Descending vasa recta endothelium',
 'Distinct proximal tubule 1',
 'Distinct proximal tubule 2',
 'Epithelial progenitor cell',
 'Fibroblast',
 'Glomerular endothelium',
 'Indistinct intercalated cell',
 'MNP-a/classical monocyte derived',
 'MNP-b/non-classical monocyte derived',
 'Mast cell',
 'NK cell',
 'Pelvic epithelium',
 'Peritubular capillary endothelium 1',
 'Peritubular capillary endothelium 2',
 'Podocyte',
 'Principal cell',
 'Proliferating Proximal Tubule',
 'Proximal tubule',
 'Thick ascending limb of Loop of Henle',
 'Transitional urothelium',
 'Type A intercalated cell',
 'Type B intercalated cell',
 'dendritic cell']
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09

True


[INFO][2023-10-18 09:17:42,300][dance.CellTopicProfile][get_ct_profile] Number of 'Proximal tubule' cells in batch 0: 27,497
[INFO][2023-10-18 09:17:42,302][dance.CellTopicProfile][get_ct_profile] Aggregating 'Thick ascending limb of Loop of Henle' profiles over 675 samples
[INFO][2023-10-18 09:17:42,309][dance.CellTopicProfile][get_ct_profile] Number of 'Thick ascending limb of Loop of Henle' cells in batch 0: 675
[INFO][2023-10-18 09:17:42,311][dance.CellTopicProfile][get_ct_profile] Aggregating 'Transitional urothelium' profiles over 45 samples
[INFO][2023-10-18 09:17:42,312][dance.CellTopicProfile][get_ct_profile] Number of 'Transitional urothelium' cells in batch 0: 45
[INFO][2023-10-18 09:17:42,314][dance.CellTopicProfile][get_ct_profile] Aggregating 'Type A intercalated cell' profiles over 275 samples
[INFO][2023-10-18 09:17:42,317][dance.CellTopicProfile][get_ct_profile] Number of 'Type A intercalated cell' cells in batch 0: 275
[INFO][2023-10-18 09:17:42,319][dance.CellTopicPr

True
True
{'bias': True,
 'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'kidney_2566',
 'device': 'auto',
 'lr': 0.01,
 'max_iter': 2250,
 'seed': 17}


[INFO][2023-10-18 09:17:45,908][dance][_load_raw_data] Number of cell types: reference = 31, real = 35
[INFO][2023-10-18 09:17:45,909][dance][_load_raw_data] Subsetting to common cell types (n=29):
['Ascending vasa recta endothelium',
 'B cell',
 'CD4 T cell',
 'CD8 T cell',
 'Connecting tubule',
 'Descending vasa recta endothelium',
 'Distinct proximal tubule 1',
 'Distinct proximal tubule 2',
 'Epithelial progenitor cell',
 'Fibroblast',
 'Glomerular endothelium',
 'Indistinct intercalated cell',
 'MNP-a/classical monocyte derived',
 'MNP-b/non-classical monocyte derived',
 'Mast cell',
 'NK cell',
 'Neutrophil',
 'Pelvic epithelium',
 'Peritubular capillary endothelium 1',
 'Peritubular capillary endothelium 2',
 'Podocyte',
 'Principal cell',
 'Proliferating Proximal Tubule',
 'Proximal tubule',
 'Thick ascending limb of Loop of Henle',
 'Transitional urothelium',
 'Type A intercalated cell',
 'Type B intercalated cell',
 'dendritic cell']
  utils.warn_names_duplicates("obs")
[INFO

True


[INFO][2023-10-18 09:17:46,833][dance.CellTopicProfile][get_ct_profile] Number of 'Proximal tubule' cells in batch 0: 27,497
[INFO][2023-10-18 09:17:46,836][dance.CellTopicProfile][get_ct_profile] Aggregating 'Thick ascending limb of Loop of Henle' profiles over 675 samples
[INFO][2023-10-18 09:17:46,842][dance.CellTopicProfile][get_ct_profile] Number of 'Thick ascending limb of Loop of Henle' cells in batch 0: 675
[INFO][2023-10-18 09:17:46,844][dance.CellTopicProfile][get_ct_profile] Aggregating 'Transitional urothelium' profiles over 45 samples
[INFO][2023-10-18 09:17:46,846][dance.CellTopicProfile][get_ct_profile] Number of 'Transitional urothelium' cells in batch 0: 45
[INFO][2023-10-18 09:17:46,847][dance.CellTopicProfile][get_ct_profile] Aggregating 'Type A intercalated cell' profiles over 275 samples
[INFO][2023-10-18 09:17:46,850][dance.CellTopicProfile][get_ct_profile] Number of 'Type A intercalated cell' cells in batch 0: 275
[INFO][2023-10-18 09:17:46,852][dance.CellTopicPr

True
True
{'bias': True,
 'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'kidney_213',
 'device': 'auto',
 'lr': 0.01,
 'max_iter': 2250,
 'seed': 17}


[INFO][2023-10-18 09:17:50,014][dance][_load_raw_data] Number of cell types: reference = 31, real = 33
[INFO][2023-10-18 09:17:50,015][dance][_load_raw_data] Subsetting to common cell types (n=27):
['Ascending vasa recta endothelium',
 'B cell',
 'CD4 T cell',
 'CD8 T cell',
 'Connecting tubule',
 'Descending vasa recta endothelium',
 'Distinct proximal tubule 1',
 'Distinct proximal tubule 2',
 'Epithelial progenitor cell',
 'Fibroblast',
 'Glomerular endothelium',
 'Indistinct intercalated cell',
 'MNP-a/classical monocyte derived',
 'MNP-b/non-classical monocyte derived',
 'Mast cell',
 'NK cell',
 'Pelvic epithelium',
 'Peritubular capillary endothelium 1',
 'Peritubular capillary endothelium 2',
 'Podocyte',
 'Principal cell',
 'Proliferating Proximal Tubule',
 'Proximal tubule',
 'Thick ascending limb of Loop of Henle',
 'Type A intercalated cell',
 'Type B intercalated cell',
 'dendritic cell']
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:17:50,303][dance][load_data

True


[INFO][2023-10-18 09:17:50,962][dance.CellTopicProfile][get_ct_profile] Number of 'Proximal tubule' cells in batch 0: 27,497
[INFO][2023-10-18 09:17:50,964][dance.CellTopicProfile][get_ct_profile] Aggregating 'Thick ascending limb of Loop of Henle' profiles over 675 samples
[INFO][2023-10-18 09:17:50,970][dance.CellTopicProfile][get_ct_profile] Number of 'Thick ascending limb of Loop of Henle' cells in batch 0: 675
[INFO][2023-10-18 09:17:50,972][dance.CellTopicProfile][get_ct_profile] Aggregating 'Type A intercalated cell' profiles over 275 samples
[INFO][2023-10-18 09:17:50,975][dance.CellTopicProfile][get_ct_profile] Number of 'Type A intercalated cell' cells in batch 0: 275
[INFO][2023-10-18 09:17:50,977][dance.CellTopicProfile][get_ct_profile] Aggregating 'Type B intercalated cell' profiles over 125 samples
[INFO][2023-10-18 09:17:50,979][dance.CellTopicProfile][get_ct_profile] Number of 'Type B intercalated cell' cells in batch 0: 125
[INFO][2023-10-18 09:17:50,980][dance.CellTop

True
True
{'bias': True,
 'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'kidney_4061',
 'device': 'auto',
 'lr': 0.01,
 'max_iter': 2250,
 'seed': 17}


[INFO][2023-10-18 09:17:54,482][dance][_load_raw_data] Number of cell types: reference = 31, real = 35
[INFO][2023-10-18 09:17:54,483][dance][_load_raw_data] Subsetting to common cell types (n=29):
['Ascending vasa recta endothelium',
 'B cell',
 'CD4 T cell',
 'CD8 T cell',
 'Connecting tubule',
 'Descending vasa recta endothelium',
 'Distinct proximal tubule 1',
 'Distinct proximal tubule 2',
 'Epithelial progenitor cell',
 'Fibroblast',
 'Glomerular endothelium',
 'Indistinct intercalated cell',
 'MNP-a/classical monocyte derived',
 'MNP-b/non-classical monocyte derived',
 'Mast cell',
 'NK cell',
 'Neutrophil',
 'Pelvic epithelium',
 'Peritubular capillary endothelium 1',
 'Peritubular capillary endothelium 2',
 'Podocyte',
 'Principal cell',
 'Proliferating Proximal Tubule',
 'Proximal tubule',
 'Thick ascending limb of Loop of Henle',
 'Transitional urothelium',
 'Type A intercalated cell',
 'Type B intercalated cell',
 'dendritic cell']
  utils.warn_names_duplicates("obs")
[INFO

True


[INFO][2023-10-18 09:17:55,416][dance.CellTopicProfile][get_ct_profile] Number of 'Proximal tubule' cells in batch 0: 27,497
[INFO][2023-10-18 09:17:55,419][dance.CellTopicProfile][get_ct_profile] Aggregating 'Thick ascending limb of Loop of Henle' profiles over 675 samples
[INFO][2023-10-18 09:17:55,425][dance.CellTopicProfile][get_ct_profile] Number of 'Thick ascending limb of Loop of Henle' cells in batch 0: 675
[INFO][2023-10-18 09:17:55,427][dance.CellTopicProfile][get_ct_profile] Aggregating 'Transitional urothelium' profiles over 45 samples
[INFO][2023-10-18 09:17:55,429][dance.CellTopicProfile][get_ct_profile] Number of 'Transitional urothelium' cells in batch 0: 45
[INFO][2023-10-18 09:17:55,431][dance.CellTopicProfile][get_ct_profile] Aggregating 'Type A intercalated cell' profiles over 275 samples
[INFO][2023-10-18 09:17:55,435][dance.CellTopicProfile][get_ct_profile] Number of 'Type A intercalated cell' cells in batch 0: 275
[INFO][2023-10-18 09:17:55,437][dance.CellTopicPr

True
True
{'bias': True,
 'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'kidney_1098',
 'device': 'auto',
 'lr': 0.01,
 'max_iter': 2250,
 'seed': 17}


[INFO][2023-10-18 09:17:58,455][dance][_load_raw_data] Number of cell types: reference = 31, real = 35
[INFO][2023-10-18 09:17:58,456][dance][_load_raw_data] Subsetting to common cell types (n=29):
['Ascending vasa recta endothelium',
 'B cell',
 'CD4 T cell',
 'CD8 T cell',
 'Connecting tubule',
 'Descending vasa recta endothelium',
 'Distinct proximal tubule 1',
 'Distinct proximal tubule 2',
 'Epithelial progenitor cell',
 'Fibroblast',
 'Glomerular endothelium',
 'Indistinct intercalated cell',
 'MNP-a/classical monocyte derived',
 'MNP-b/non-classical monocyte derived',
 'Mast cell',
 'NK cell',
 'Neutrophil',
 'Pelvic epithelium',
 'Peritubular capillary endothelium 1',
 'Peritubular capillary endothelium 2',
 'Podocyte',
 'Principal cell',
 'Proliferating Proximal Tubule',
 'Proximal tubule',
 'Thick ascending limb of Loop of Henle',
 'Transitional urothelium',
 'Type A intercalated cell',
 'Type B intercalated cell',
 'dendritic cell']
  utils.warn_names_duplicates("obs")
[INFO

True


[INFO][2023-10-18 09:17:59,380][dance.CellTopicProfile][get_ct_profile] Number of 'Proximal tubule' cells in batch 0: 27,497
[INFO][2023-10-18 09:17:59,382][dance.CellTopicProfile][get_ct_profile] Aggregating 'Thick ascending limb of Loop of Henle' profiles over 675 samples
[INFO][2023-10-18 09:17:59,388][dance.CellTopicProfile][get_ct_profile] Number of 'Thick ascending limb of Loop of Henle' cells in batch 0: 675
[INFO][2023-10-18 09:17:59,390][dance.CellTopicProfile][get_ct_profile] Aggregating 'Transitional urothelium' profiles over 45 samples
[INFO][2023-10-18 09:17:59,392][dance.CellTopicProfile][get_ct_profile] Number of 'Transitional urothelium' cells in batch 0: 45
[INFO][2023-10-18 09:17:59,394][dance.CellTopicProfile][get_ct_profile] Aggregating 'Type A intercalated cell' profiles over 275 samples
[INFO][2023-10-18 09:17:59,397][dance.CellTopicProfile][get_ct_profile] Number of 'Type A intercalated cell' cells in batch 0: 275
[INFO][2023-10-18 09:17:59,399][dance.CellTopicPr

True
True
{'bias': True,
 'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'kidney_8471',
 'device': 'auto',
 'lr': 0.01,
 'max_iter': 2250,
 'seed': 17}


[INFO][2023-10-18 09:18:02,808][dance][_load_raw_data] Number of cell types: reference = 31, real = 34
[INFO][2023-10-18 09:18:02,809][dance][_load_raw_data] Subsetting to common cell types (n=28):
['Ascending vasa recta endothelium',
 'B cell',
 'CD4 T cell',
 'CD8 T cell',
 'Connecting tubule',
 'Descending vasa recta endothelium',
 'Distinct proximal tubule 1',
 'Distinct proximal tubule 2',
 'Epithelial progenitor cell',
 'Fibroblast',
 'Glomerular endothelium',
 'Indistinct intercalated cell',
 'MNP-a/classical monocyte derived',
 'MNP-b/non-classical monocyte derived',
 'Mast cell',
 'NK cell',
 'Pelvic epithelium',
 'Peritubular capillary endothelium 1',
 'Peritubular capillary endothelium 2',
 'Podocyte',
 'Principal cell',
 'Proliferating Proximal Tubule',
 'Proximal tubule',
 'Thick ascending limb of Loop of Henle',
 'Transitional urothelium',
 'Type A intercalated cell',
 'Type B intercalated cell',
 'dendritic cell']
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09

True


[INFO][2023-10-18 09:18:03,729][dance.CellTopicProfile][get_ct_profile] Number of 'Proximal tubule' cells in batch 0: 27,497
[INFO][2023-10-18 09:18:03,731][dance.CellTopicProfile][get_ct_profile] Aggregating 'Thick ascending limb of Loop of Henle' profiles over 675 samples
[INFO][2023-10-18 09:18:03,737][dance.CellTopicProfile][get_ct_profile] Number of 'Thick ascending limb of Loop of Henle' cells in batch 0: 675
[INFO][2023-10-18 09:18:03,739][dance.CellTopicProfile][get_ct_profile] Aggregating 'Transitional urothelium' profiles over 45 samples
[INFO][2023-10-18 09:18:03,742][dance.CellTopicProfile][get_ct_profile] Number of 'Transitional urothelium' cells in batch 0: 45
[INFO][2023-10-18 09:18:03,744][dance.CellTopicProfile][get_ct_profile] Aggregating 'Type A intercalated cell' profiles over 275 samples
[INFO][2023-10-18 09:18:03,748][dance.CellTopicProfile][get_ct_profile] Number of 'Type A intercalated cell' cells in batch 0: 275
[INFO][2023-10-18 09:18:03,750][dance.CellTopicPr

True
True


'To reproduce SpatialDecon benchmarks, please refer to command lines belows:\n\nCARD synthetic $ python spatialdecon.py --dataset CARD_synthetic --lr .01 --max_iter\n2250 --bias 1\n\nGSE174746 $ python spatialdecon.py --dataset GSE174746 --lr .0001 --max_iter 20000\n--bias 1\n\nSPOTLight synthetic $ python spatialdecon.py --dataset SPOTLight_synthetic --lr .01\n--max_iter 500 --bias 1\n\n'

In [7]:
SpatialDecon_scores

[0.020546548,
 0.012112096,
 0.024561495,
 0.029576628,
 0.037299387,
 0.004355375,
 0.002765877,
 0.006421732,
 0.005125333,
 0.0069700954,
 0.005393536,
 0.003511426,
 0.0067862123,
 0.0071246694,
 0.0050303894]

In [8]:
import argparse
from pprint import pprint

from dance.datasets.spatial import CellTypeDeconvoDataset
from dance.modules.spatial.cell_type_deconvo.spotlight import SPOTlight
from dance.utils import set_seed

parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("--cache", action="store_true", help="Cache processed data.")
parser.add_argument("--dataset", default="CARD_synthetic", choices=CellTypeDeconvoDataset.AVAILABLE_DATA)
parser.add_argument("--datadir", default="../../../../data/spatial", help="Directory to save the data.")
parser.add_argument("--lr", type=float, default=1e-3, help="Learning rate.")
parser.add_argument("--rank", type=int, default=2, help="Rank of the NMF module.")
parser.add_argument("--bias", type=bool, default=False, help="Include/Exclude bias term.")
parser.add_argument("--max_iter", type=int, default=4000, help="Maximum optimization iteration.")
parser.add_argument("--seed", type=int, default=17, help="Random seed.")
parser.add_argument("--device", default="auto", help="Computation device.")
SPOTlight_scores=[]
for dataset in datasets:
    try:
        args = parser.parse_args(["--dataset",dataset,"--lr",".1","--max_iter","100","--rank","8","--bias","0"])
        set_seed(args.seed)
        pprint(vars(args))

        # Load dataset
        preprocessing_pipeline = SPOTlight.preprocessing_pipeline()
        dataset = CellTypeDeconvoDataset(data_dir=args.datadir, data_id=args.dataset)
        data = dataset.load_data(transform=preprocessing_pipeline, cache=args.cache)
        cell_types = data.data.obsm["cell_type_portion"].columns.tolist()

        x, y = data.get_data(split_name="test", return_type="torch")
        ref_count = data.get_feature(split_name="ref", return_type="numpy")
        ref_annot = data.get_feature(split_name="ref", return_type="numpy", channel="cellType", channel_type="obs")

        # Train and evaluate model
        model = SPOTlight(ref_count, ref_annot, cell_types, rank=args.rank, bias=args.bias, device=args.device)
        score = model.fit_score(x, y, lr=args.lr, max_iter=args.max_iter)
    except Exception as e:
        score=e
    finally:
        SPOTlight_scores.append(score)
"""To reproduce SpatialDecon benchmarks, please refer to command lines belows:

CARD_synthetic $ python spotlight.py --dataset CARD_synthetic --lr .1 --max_iter 100
--rank 8 --bias 0

GSE174746 $ python spotlight.py --dataset GSE174746 --lr .1 --max_iter 15000 --rank 4
--bias 0

SPOTLight synthetic $ python spotlight.py --dataset SPOTLight_synthetic --lr .1
--max_iter 150 --rank 10 --bias 0

"""


[INFO][2023-10-18 09:18:07,142][dance][set_seed] Setting global random seed to 17


{'bias': True,
 'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'lung6',
 'device': 'auto',
 'lr': 0.1,
 'max_iter': 100,
 'rank': 8,
 'seed': 17}


[INFO][2023-10-18 09:18:07,419][dance][_load_raw_data] Number of cell types: reference = 15, real = 22
[INFO][2023-10-18 09:18:07,420][dance][_load_raw_data] Subsetting to common cell types (n=11):
['B_cell',
 'Endothelial',
 'Epithelial',
 'Fibroblast',
 'Macrophage',
 'Mast',
 'Monocyte',
 'Myeloid',
 'NK_cell',
 'Neutrophil',
 'Plasma']
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:18:07,543][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 17465 × 923
    obs: 'cellname', 'sample', 'cellType', 'malignant', 'source', 'cell_subtype', 'complexity', 'batch'
    uns: 'dance_config'
    obsm: 'cell_type_portion', 'spatial'
[INFO][2023-10-18 09:18:07,544][dance.SetConfig][__call__] Updating the dance data object config options:
{'label_channel': 'cell_type_portion'}
[INFO][2023-10-18 09:18:07,545][dance][set_config_from_dict] Setting config 'label_channel' to 'cell_type_portion'
[INFO][2023-10-18 09:18:07,546][dance][load_

True
True
True
{'bias': True,
 'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'lung9_1',
 'device': 'auto',
 'lr': 0.1,
 'max_iter': 100,
 'rank': 8,
 'seed': 17}


[INFO][2023-10-18 09:18:08,162][dance][_load_raw_data] Number of cell types: reference = 15, real = 22
[INFO][2023-10-18 09:18:08,165][dance][_load_raw_data] Subsetting to common cell types (n=11):
['B_cell',
 'Endothelial',
 'Epithelial',
 'Fibroblast',
 'Macrophage',
 'Mast',
 'Monocyte',
 'Myeloid',
 'NK_cell',
 'Neutrophil',
 'Plasma']
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:18:08,290][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 17265 × 923
    obs: 'cellname', 'sample', 'cellType', 'malignant', 'source', 'cell_subtype', 'complexity', 'batch'
    uns: 'dance_config'
    obsm: 'cell_type_portion', 'spatial'
[INFO][2023-10-18 09:18:08,291][dance.SetConfig][__call__] Updating the dance data object config options:
{'label_channel': 'cell_type_portion'}
[INFO][2023-10-18 09:18:08,292][dance][set_config_from_dict] Setting config 'label_channel' to 'cell_type_portion'
[INFO][2023-10-18 09:18:08,292][dance][load_

True
True
True
{'bias': True,
 'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'lung9_2',
 'device': 'auto',
 'lr': 0.1,
 'max_iter': 100,
 'rank': 8,
 'seed': 17}


[INFO][2023-10-18 09:18:08,883][dance][_load_raw_data] Number of cell types: reference = 15, real = 22
[INFO][2023-10-18 09:18:08,884][dance][_load_raw_data] Subsetting to common cell types (n=11):
['B_cell',
 'Endothelial',
 'Epithelial',
 'Fibroblast',
 'Macrophage',
 'Mast',
 'Monocyte',
 'Myeloid',
 'NK_cell',
 'Neutrophil',
 'Plasma']
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:18:09,006][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 17765 × 923
    obs: 'cellname', 'sample', 'cellType', 'malignant', 'source', 'cell_subtype', 'complexity', 'batch'
    uns: 'dance_config'
    obsm: 'cell_type_portion', 'spatial'
[INFO][2023-10-18 09:18:09,007][dance.SetConfig][__call__] Updating the dance data object config options:
{'label_channel': 'cell_type_portion'}
[INFO][2023-10-18 09:18:09,007][dance][set_config_from_dict] Setting config 'label_channel' to 'cell_type_portion'
[INFO][2023-10-18 09:18:09,008][dance][load_

True
True
True
{'bias': True,
 'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'hcc_liver',
 'device': 'auto',
 'lr': 0.1,
 'max_iter': 100,
 'rank': 8,
 'seed': 17}


[INFO][2023-10-18 09:18:09,601][dance][_load_raw_data] Number of cell types: reference = 10, real = 14
[INFO][2023-10-18 09:18:09,602][dance][_load_raw_data] Subsetting to common cell types (n=6):
['B_cell', 'Endothelial', 'Epithelial', 'HSC', 'NK_cell', 'T_cell']
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:18:09,707][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 13785 × 977
    obs: 'cell_name', 'sample', 'source', 'disease', 'cellType', 'cell_subtype_clusters', 'complexity', 'batch'
    uns: 'dance_config'
    obsm: 'cell_type_portion', 'spatial'
[INFO][2023-10-18 09:18:09,708][dance.SetConfig][__call__] Updating the dance data object config options:
{'label_channel': 'cell_type_portion'}
[INFO][2023-10-18 09:18:09,709][dance][set_config_from_dict] Setting config 'label_channel' to 'cell_type_portion'
[INFO][2023-10-18 09:18:09,710][dance][load_data] Data transformed:
Data object that wraps (.data):
AnnData objec

True
True
True
{'bias': True,
 'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'normal_liver',
 'device': 'auto',
 'lr': 0.1,
 'max_iter': 100,
 'rank': 8,
 'seed': 17}


[INFO][2023-10-18 09:18:10,069][dance][_load_raw_data] Number of cell types: reference = 14, real = 15
[INFO][2023-10-18 09:18:10,070][dance][_load_raw_data] Subsetting to common cell types (n=12):
['alpha-beta T cell',
 'central venous liver sinusoidal endothelial cell',
 'cholangiocyte',
 'erythroid cell',
 'gamma-delta T cell',
 'hepatic stellate cell',
 'hepatocyte',
 'inflammatory macrophage',
 'mature B cell',
 'natural killer cell',
 'periportal liver sinusoidal endothelial cell',
 'portal liver sinusoidal endothelial cell']
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:18:10,146][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 10263 × 933
    obs: 'cellType', 'batch'
    uns: 'dance_config'
    obsm: 'cell_type_portion', 'spatial'
[INFO][2023-10-18 09:18:10,147][dance.SetConfig][__call__] Updating the dance data object config options:
{'label_channel': 'cell_type_portion'}
[INFO][2023-10-18 09:18:10,147][dance]

True
True
True
{'bias': True,
 'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'kidney_1139',
 'device': 'auto',
 'lr': 0.1,
 'max_iter': 100,
 'rank': 8,
 'seed': 17}


[INFO][2023-10-18 09:18:10,604][dance][_load_raw_data] Number of cell types: reference = 31, real = 33
[INFO][2023-10-18 09:18:10,605][dance][_load_raw_data] Subsetting to common cell types (n=28):
['Ascending vasa recta endothelium',
 'B cell',
 'CD4 T cell',
 'CD8 T cell',
 'Connecting tubule',
 'Descending vasa recta endothelium',
 'Distinct proximal tubule 1',
 'Distinct proximal tubule 2',
 'Epithelial progenitor cell',
 'Fibroblast',
 'Glomerular endothelium',
 'Indistinct intercalated cell',
 'MNP-a/classical monocyte derived',
 'MNP-b/non-classical monocyte derived',
 'Mast cell',
 'NK cell',
 'Neutrophil',
 'Pelvic epithelium',
 'Peritubular capillary endothelium 1',
 'Peritubular capillary endothelium 2',
 'Podocyte',
 'Principal cell',
 'Proliferating Proximal Tubule',
 'Proximal tubule',
 'Thick ascending limb of Loop of Henle',
 'Type A intercalated cell',
 'Type B intercalated cell',
 'dendritic cell']
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:18:10,848][d

True
True
True
{'bias': True,
 'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'kidney_10838',
 'device': 'auto',
 'lr': 0.1,
 'max_iter': 100,
 'rank': 8,
 'seed': 17}


[INFO][2023-10-18 09:18:11,644][dance][_load_raw_data] Number of cell types: reference = 31, real = 35
[INFO][2023-10-18 09:18:11,645][dance][_load_raw_data] Subsetting to common cell types (n=29):
['Ascending vasa recta endothelium',
 'B cell',
 'CD4 T cell',
 'CD8 T cell',
 'Connecting tubule',
 'Descending vasa recta endothelium',
 'Distinct proximal tubule 1',
 'Distinct proximal tubule 2',
 'Epithelial progenitor cell',
 'Fibroblast',
 'Glomerular endothelium',
 'Indistinct intercalated cell',
 'MNP-a/classical monocyte derived',
 'MNP-b/non-classical monocyte derived',
 'Mast cell',
 'NK cell',
 'Neutrophil',
 'Pelvic epithelium',
 'Peritubular capillary endothelium 1',
 'Peritubular capillary endothelium 2',
 'Podocyte',
 'Principal cell',
 'Proliferating Proximal Tubule',
 'Proximal tubule',
 'Thick ascending limb of Loop of Henle',
 'Transitional urothelium',
 'Type A intercalated cell',
 'Type B intercalated cell',
 'dendritic cell']
  utils.warn_names_duplicates("obs")
[INFO

True
True
True
{'bias': True,
 'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'kidney_3323',
 'device': 'auto',
 'lr': 0.1,
 'max_iter': 100,
 'rank': 8,
 'seed': 17}


[INFO][2023-10-18 09:18:12,771][dance][_load_raw_data] Number of cell types: reference = 31, real = 35
[INFO][2023-10-18 09:18:12,772][dance][_load_raw_data] Subsetting to common cell types (n=29):
['Ascending vasa recta endothelium',
 'B cell',
 'CD4 T cell',
 'CD8 T cell',
 'Connecting tubule',
 'Descending vasa recta endothelium',
 'Distinct proximal tubule 1',
 'Distinct proximal tubule 2',
 'Epithelial progenitor cell',
 'Fibroblast',
 'Glomerular endothelium',
 'Indistinct intercalated cell',
 'MNP-a/classical monocyte derived',
 'MNP-b/non-classical monocyte derived',
 'Mast cell',
 'NK cell',
 'Neutrophil',
 'Pelvic epithelium',
 'Peritubular capillary endothelium 1',
 'Peritubular capillary endothelium 2',
 'Podocyte',
 'Principal cell',
 'Proliferating Proximal Tubule',
 'Proximal tubule',
 'Thick ascending limb of Loop of Henle',
 'Transitional urothelium',
 'Type A intercalated cell',
 'Type B intercalated cell',
 'dendritic cell']
  utils.warn_names_duplicates("obs")
[INFO

True
True
True
{'bias': True,
 'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'kidney_642',
 'device': 'auto',
 'lr': 0.1,
 'max_iter': 100,
 'rank': 8,
 'seed': 17}


[INFO][2023-10-18 09:18:13,910][dance][_load_raw_data] Number of cell types: reference = 31, real = 32
[INFO][2023-10-18 09:18:13,911][dance][_load_raw_data] Subsetting to common cell types (n=27):
['Ascending vasa recta endothelium',
 'B cell',
 'CD4 T cell',
 'CD8 T cell',
 'Connecting tubule',
 'Descending vasa recta endothelium',
 'Distinct proximal tubule 1',
 'Distinct proximal tubule 2',
 'Epithelial progenitor cell',
 'Fibroblast',
 'Glomerular endothelium',
 'Indistinct intercalated cell',
 'MNP-a/classical monocyte derived',
 'MNP-b/non-classical monocyte derived',
 'NK cell',
 'Pelvic epithelium',
 'Peritubular capillary endothelium 1',
 'Peritubular capillary endothelium 2',
 'Podocyte',
 'Principal cell',
 'Proliferating Proximal Tubule',
 'Proximal tubule',
 'Thick ascending limb of Loop of Henle',
 'Transitional urothelium',
 'Type A intercalated cell',
 'Type B intercalated cell',
 'dendritic cell']
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:18:14,147][da

True
True
True
{'bias': True,
 'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'kidney_8693',
 'device': 'auto',
 'lr': 0.1,
 'max_iter': 100,
 'rank': 8,
 'seed': 17}


[INFO][2023-10-18 09:18:15,057][dance][_load_raw_data] Number of cell types: reference = 31, real = 34
[INFO][2023-10-18 09:18:15,058][dance][_load_raw_data] Subsetting to common cell types (n=28):
['Ascending vasa recta endothelium',
 'B cell',
 'CD4 T cell',
 'CD8 T cell',
 'Connecting tubule',
 'Descending vasa recta endothelium',
 'Distinct proximal tubule 1',
 'Distinct proximal tubule 2',
 'Epithelial progenitor cell',
 'Fibroblast',
 'Glomerular endothelium',
 'Indistinct intercalated cell',
 'MNP-a/classical monocyte derived',
 'MNP-b/non-classical monocyte derived',
 'Mast cell',
 'NK cell',
 'Pelvic epithelium',
 'Peritubular capillary endothelium 1',
 'Peritubular capillary endothelium 2',
 'Podocyte',
 'Principal cell',
 'Proliferating Proximal Tubule',
 'Proximal tubule',
 'Thick ascending limb of Loop of Henle',
 'Transitional urothelium',
 'Type A intercalated cell',
 'Type B intercalated cell',
 'dendritic cell']
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09

True
True
True
{'bias': True,
 'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'kidney_2566',
 'device': 'auto',
 'lr': 0.1,
 'max_iter': 100,
 'rank': 8,
 'seed': 17}


[INFO][2023-10-18 09:18:16,130][dance][_load_raw_data] Number of cell types: reference = 31, real = 35
[INFO][2023-10-18 09:18:16,132][dance][_load_raw_data] Subsetting to common cell types (n=29):
['Ascending vasa recta endothelium',
 'B cell',
 'CD4 T cell',
 'CD8 T cell',
 'Connecting tubule',
 'Descending vasa recta endothelium',
 'Distinct proximal tubule 1',
 'Distinct proximal tubule 2',
 'Epithelial progenitor cell',
 'Fibroblast',
 'Glomerular endothelium',
 'Indistinct intercalated cell',
 'MNP-a/classical monocyte derived',
 'MNP-b/non-classical monocyte derived',
 'Mast cell',
 'NK cell',
 'Neutrophil',
 'Pelvic epithelium',
 'Peritubular capillary endothelium 1',
 'Peritubular capillary endothelium 2',
 'Podocyte',
 'Principal cell',
 'Proliferating Proximal Tubule',
 'Proximal tubule',
 'Thick ascending limb of Loop of Henle',
 'Transitional urothelium',
 'Type A intercalated cell',
 'Type B intercalated cell',
 'dendritic cell']
  utils.warn_names_duplicates("obs")
[INFO

True
True
True
{'bias': True,
 'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'kidney_213',
 'device': 'auto',
 'lr': 0.1,
 'max_iter': 100,
 'rank': 8,
 'seed': 17}


[INFO][2023-10-18 09:18:17,300][dance][_load_raw_data] Number of cell types: reference = 31, real = 33
[INFO][2023-10-18 09:18:17,301][dance][_load_raw_data] Subsetting to common cell types (n=27):
['Ascending vasa recta endothelium',
 'B cell',
 'CD4 T cell',
 'CD8 T cell',
 'Connecting tubule',
 'Descending vasa recta endothelium',
 'Distinct proximal tubule 1',
 'Distinct proximal tubule 2',
 'Epithelial progenitor cell',
 'Fibroblast',
 'Glomerular endothelium',
 'Indistinct intercalated cell',
 'MNP-a/classical monocyte derived',
 'MNP-b/non-classical monocyte derived',
 'Mast cell',
 'NK cell',
 'Pelvic epithelium',
 'Peritubular capillary endothelium 1',
 'Peritubular capillary endothelium 2',
 'Podocyte',
 'Principal cell',
 'Proliferating Proximal Tubule',
 'Proximal tubule',
 'Thick ascending limb of Loop of Henle',
 'Type A intercalated cell',
 'Type B intercalated cell',
 'dendritic cell']
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09:18:17,551][dance][load_data

True
True
True
{'bias': True,
 'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'kidney_4061',
 'device': 'auto',
 'lr': 0.1,
 'max_iter': 100,
 'rank': 8,
 'seed': 17}


[INFO][2023-10-18 09:18:18,509][dance][_load_raw_data] Number of cell types: reference = 31, real = 35
[INFO][2023-10-18 09:18:18,510][dance][_load_raw_data] Subsetting to common cell types (n=29):
['Ascending vasa recta endothelium',
 'B cell',
 'CD4 T cell',
 'CD8 T cell',
 'Connecting tubule',
 'Descending vasa recta endothelium',
 'Distinct proximal tubule 1',
 'Distinct proximal tubule 2',
 'Epithelial progenitor cell',
 'Fibroblast',
 'Glomerular endothelium',
 'Indistinct intercalated cell',
 'MNP-a/classical monocyte derived',
 'MNP-b/non-classical monocyte derived',
 'Mast cell',
 'NK cell',
 'Neutrophil',
 'Pelvic epithelium',
 'Peritubular capillary endothelium 1',
 'Peritubular capillary endothelium 2',
 'Podocyte',
 'Principal cell',
 'Proliferating Proximal Tubule',
 'Proximal tubule',
 'Thick ascending limb of Loop of Henle',
 'Transitional urothelium',
 'Type A intercalated cell',
 'Type B intercalated cell',
 'dendritic cell']
  utils.warn_names_duplicates("obs")
[INFO

True
True
True
{'bias': True,
 'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'kidney_1098',
 'device': 'auto',
 'lr': 0.1,
 'max_iter': 100,
 'rank': 8,
 'seed': 17}


[INFO][2023-10-18 09:18:19,672][dance][_load_raw_data] Number of cell types: reference = 31, real = 35
[INFO][2023-10-18 09:18:19,673][dance][_load_raw_data] Subsetting to common cell types (n=29):
['Ascending vasa recta endothelium',
 'B cell',
 'CD4 T cell',
 'CD8 T cell',
 'Connecting tubule',
 'Descending vasa recta endothelium',
 'Distinct proximal tubule 1',
 'Distinct proximal tubule 2',
 'Epithelial progenitor cell',
 'Fibroblast',
 'Glomerular endothelium',
 'Indistinct intercalated cell',
 'MNP-a/classical monocyte derived',
 'MNP-b/non-classical monocyte derived',
 'Mast cell',
 'NK cell',
 'Neutrophil',
 'Pelvic epithelium',
 'Peritubular capillary endothelium 1',
 'Peritubular capillary endothelium 2',
 'Podocyte',
 'Principal cell',
 'Proliferating Proximal Tubule',
 'Proximal tubule',
 'Thick ascending limb of Loop of Henle',
 'Transitional urothelium',
 'Type A intercalated cell',
 'Type B intercalated cell',
 'dendritic cell']
  utils.warn_names_duplicates("obs")
[INFO

True


[INFO][2023-10-18 09:18:20,611][dance][set_seed] Setting global random seed to 17


True
True
{'bias': True,
 'cache': False,
 'datadir': '../../../../data/spatial',
 'dataset': 'kidney_8471',
 'device': 'auto',
 'lr': 0.1,
 'max_iter': 100,
 'rank': 8,
 'seed': 17}


[INFO][2023-10-18 09:18:20,841][dance][_load_raw_data] Number of cell types: reference = 31, real = 34
[INFO][2023-10-18 09:18:20,841][dance][_load_raw_data] Subsetting to common cell types (n=28):
['Ascending vasa recta endothelium',
 'B cell',
 'CD4 T cell',
 'CD8 T cell',
 'Connecting tubule',
 'Descending vasa recta endothelium',
 'Distinct proximal tubule 1',
 'Distinct proximal tubule 2',
 'Epithelial progenitor cell',
 'Fibroblast',
 'Glomerular endothelium',
 'Indistinct intercalated cell',
 'MNP-a/classical monocyte derived',
 'MNP-b/non-classical monocyte derived',
 'Mast cell',
 'NK cell',
 'Pelvic epithelium',
 'Peritubular capillary endothelium 1',
 'Peritubular capillary endothelium 2',
 'Podocyte',
 'Principal cell',
 'Proliferating Proximal Tubule',
 'Proximal tubule',
 'Thick ascending limb of Loop of Henle',
 'Transitional urothelium',
 'Type A intercalated cell',
 'Type B intercalated cell',
 'dendritic cell']
  utils.warn_names_duplicates("obs")
[INFO][2023-10-18 09

True
True
True


'To reproduce SpatialDecon benchmarks, please refer to command lines belows:\n\nCARD_synthetic $ python spotlight.py --dataset CARD_synthetic --lr .1 --max_iter 100\n--rank 8 --bias 0\n\nGSE174746 $ python spotlight.py --dataset GSE174746 --lr .1 --max_iter 15000 --rank 4\n--bias 0\n\nSPOTLight synthetic $ python spotlight.py --dataset SPOTLight_synthetic --lr .1\n--max_iter 150 --rank 10 --bias 0\n\n'

In [9]:
SPOTlight_scores

[0.024866568,
 0.018249623,
 0.04081243,
 0.032837987,
 0.024835482,
 0.0042338525,
 0.0025953983,
 0.0055743875,
 0.005686653,
 0.0083912825,
 0.0047201323,
 0.0035900192,
 0.0053713303,
 0.0060201143,
 0.004751296]