In [1]:
datasets= ["pbmc_data", "mouse_brain_data", "mouse_embryo_data", "human_breast_TGFb_data","human_breast_Dox_data","human_melanoma_data","mouse_visual_data"]

In [2]:
import argparse
from pprint import pformat

import scanpy as sc

from dance import logger
from dance.datasets.singlemodality import ImputationDataset
from dance.modules.single_modality.imputation.scgnn2 import ScGNN2
from dance.transforms import (AnnDataTransform, CellwiseMaskData, Compose, FilterCellsScanpy, FilterGenesScanpy,
                              FilterGenesTopK)
from dance.utils import set_seed
ScGNN2_scores=[]

# Parse arguments
parser = argparse.ArgumentParser(description="Main program for scGNN v2")
parser.add_argument("--dataset", default='mouse_brain_data', type=str, help="dataset id")

# Program related
parser.add_argument(
    "--use_bulk", action="store_true", default=False,
    help="(boolean, default False) If True, expect a bulk expression file and will run "
    "deconvolution and imputation")
parser.add_argument(
    "--given_cell_type_labels", action="store_true", default=False,
    help="(boolean, default False) If True, expect a cell type label file and will compute ARI "
    "against those labels")
parser.add_argument("--run_LTMG", action="store_true", default=False,
                    help="(boolean, default False) Not fully implemented")
parser.add_argument("--use_CCC", action="store_true", default=False,
                    help="(boolean, default False) Not fully implemented")
parser.add_argument(
    "--dropout_prob", type=float, default=0.1,
    help="(float, default 0.1) Probability that a non-zero value in the sc expression matrix will "
    "be set to zero. If this is set to 0, will not perform dropout or compute imputation error ")
parser.add_argument("--seed", type=int, default=1,
                    help="(int, default 1) Seed for torch and numpy random generators")
parser.add_argument("--total_epoch", type=int, default=31, help="(int, default 10) Total EM epochs")
parser.add_argument("--ari_threshold", type=float, default=0.95, help="(float, default 0.95) The threshold for ari")
parser.add_argument("--graph_change_threshold", type=float, default=0.01,
                    help="(float, default 0.01) The threshold for graph change")
parser.add_argument("--alpha", type=float, default=0.5, help="(float, default 0.5)")

# Data loading related
parser.add_argument(
    "--load_dataset_dir", type=str, default="/fs/ess/PCON0022/Edison/datasets",
    help="(str) Folder that stores all your datasets. For example, if your expression matrix is in "
    "/fs/ess/PCON1234/Brutus/datasets/12.Klein/T2000_expression.csv, this should be set to "
    "/fs/ess/PCON1234/Brutus/datasets")
parser.add_argument(
    "--load_dataset_name", type=str, default="12.Klein",
    help="(str) Folder that contains all the relevant input files. For example, if your expression "
    "matrix is in /fs/ess/PCON1234/Brutus/datasets/12.Klein/T2000_expression.csv, this should be "
    "set to 12.Klein")
parser.add_argument(
    "--load_use_benchmark", action="store_true", default=False,
    help="(boolean, default False) If True, expect the following files (replace DATASET_NAME with "
    "the input to the --load_dataset_name argument): `ind.DATASET_NAME.{x, tx, allx}`, "
    "`T2000_expression.csv`, `T2000_LTMG.txt`, `DATASET_NAME_cell_label.csv` if providing "
    "ground-truth cell type labels, and `DATASET_NAME_bulk.csv` if using bulk data")
parser.add_argument("--load_sc_dataset", type=str, default="", help="Not needed if using benchmark")
parser.add_argument("--load_bulk_dataset", type=str, default="", help="Not needed if using benchmark")
parser.add_argument("--load_cell_type_labels", type=str, default="", help="Not needed if using benchmark")
parser.add_argument("--load_LTMG", type=str, default=None, help="Not needed if using benchmark")

# Preprocess related
parser.add_argument("--preprocess_cell_cutoff", type=float, default=0.9, help="Not needed if using benchmark")
parser.add_argument("--preprocess_gene_cutoff", type=float, default=0.9, help="Not needed if using benchmark")
parser.add_argument("--preprocess_top_gene_select", type=int, default=2000, help="Not needed if using benchmark")

# Feature AE related
parser.add_argument(
    "--feature_AE_epoch", nargs=2, type=int, default=[500, 300],
    help="(two integers separated by a space, default 500 200) First number being non-EM epochs, "
    "second number being EM epochs")
parser.add_argument("--feature_AE_batch_size", type=int, default=12800, help="(int, default 12800) Batch size")
parser.add_argument("--feature_AE_learning_rate", type=float, default=1e-3,
                    help="(float, default 1e-3) Learning rate")
parser.add_argument(
    "--feature_AE_regu_strength", type=float, default=0.9,
    help="(float, default 0.9) In loss function, this is the weight on the LTMG regularization "
    "matrix")
parser.add_argument("--feature_AE_dropout_prob", type=float, default=0,
                    help="(float, default 0) The dropout probability for feature autoencoder")
parser.add_argument("--feature_AE_concat_prev_embed", type=str, default=None,
                    help="(str, default None) Choose from {'feature', 'graph'}")

# Graph AE related
parser.add_argument("--graph_AE_epoch", type=int, default=200,
                    help="(int, default 200) The epoch or graph autoencoder")
parser.add_argument(
    "--graph_AE_use_GAT", action="store_true", default=False,
    help="(boolean, default False) If true, will use GAT for GAE layers; otherwise will use GCN "
    "layers")
parser.add_argument("--graph_AE_GAT_dropout", type=float, default=0,
                    help="(int, default 0) The dropout probability for GAT")
parser.add_argument("--graph_AE_learning_rate", type=float, default=1e-2,
                    help="(float, default 1e-2) Learning rate")
parser.add_argument("--graph_AE_embedding_size", type=int, default=16,
                    help="(int, default 16) Graphh AE embedding size")
parser.add_argument(
    "--graph_AE_concat_prev_embed", action="store_true", default=False,
    help="(boolean, default False) If true, will concat GAE embed at t-1 with the inputted Feature "
    "AE embed at t for graph construction; else will construct graph using Feature AE embed only")
parser.add_argument("--graph_AE_normalize_embed", type=str, default=None,
                    help="(str, default None) Choose from {None, 'sum1', 'binary'}")
parser.add_argument("--graph_AE_graph_construction", type=str, default="v2",
                    help="(str, default v0) Choose from {'v0', 'v1', 'v2'}")
parser.add_argument("--graph_AE_neighborhood_factor", type=float, default=0.05, help="(int, default 10)")
parser.add_argument("--graph_AE_retain_weights", action="store_true", default=False,
                    help="(boolean, default False)")
parser.add_argument("--gat_multi_heads", type=int, default=2, help="(int, default 2)")
parser.add_argument("--gat_hid_embed", type=int, default=64, help="(int, default 64) The dim for hid_embed")

# Clustering related
parser.add_argument(
    "--clustering_louvain_only", action="store_true", default=False,
    help="(boolean, default False) If true, will use Louvain clustering only; otherwise, first use "
    "Louvain to determine clusters count (k), then perform KMeans.")
parser.add_argument(
    "--clustering_use_flexible_k", action="store_true", default=False,
    help="(boolean, default False) If true, will determine k using Louvain every epoch; otherwise, "
    "will rely on the k in the first epoch")
parser.add_argument("--clustering_embed", type=str, default="graph",
                    help="(str, default 'graph') Choose from {'feature', 'graph', 'both'}")
parser.add_argument("--clustering_method", type=str, default="KMeans",
                    help="(str, default 'KMeans') Choose from {'KMeans', 'AffinityPropagation'}")

# Cluster AE related
parser.add_argument("--cluster_AE_epoch", type=int, default=200, help="(int, default 200) The epoch for cluster AE")
parser.add_argument("--cluster_AE_batch_size", type=int, default=12800, help="(int, default 12800) Batch size")
parser.add_argument("--cluster_AE_learning_rate", type=float, default=1e-3,
                    help="(float, default 1e-3) Learning rate")
parser.add_argument(
    "--cluster_AE_regu_strength", type=float, default=0.9,
    help="(float, default 0.9) In loss function, this is the weight on the LTMG regularization "
    "matrix")
parser.add_argument("--cluster_AE_dropout_prob", type=float, default=0,
                    help="(float, default 0) The dropout probability for cluster AE")

# Deconvolution related
parser.add_argument("--deconv_opt1_learning_rate", type=float, default=1e-3,
                    help="(float, default 1e-3) learning rate")
parser.add_argument("--deconv_opt1_epoch", type=int, default=5000, help="(int, default 5000) epoch")
parser.add_argument("--deconv_opt1_epsilon", type=float, default=1e-4, help="(float, default 1e-4) epsilon")
parser.add_argument("--deconv_opt1_regu_strength", type=float, default=1e-2, help="(float, default 1e-2) strength")

parser.add_argument("--deconv_opt2_learning_rate", type=float, default=1e-1,
                    help="(float, default 1e-1) learning rate")
parser.add_argument("--deconv_opt2_epoch", type=int, default=500, help="(int, default 500) epoch")
parser.add_argument("--deconv_opt2_epsilon", type=float, default=1e-4, help="(float, default 1e-4) epsilon")
parser.add_argument("--deconv_opt2_regu_strength", type=float, default=1e-2, help="(float, default 1e-2) strength")

parser.add_argument("--deconv_opt3_learning_rate", type=float, default=1e-1, help="(float, default 1e-1)")
parser.add_argument("--deconv_opt3_epoch", type=int, default=150, help="(int, default 150) epoch")
parser.add_argument("--deconv_opt3_epsilon", type=float, default=1e-4, help="(float, default 1e-4) epsilon")
parser.add_argument("--deconv_opt3_regu_strength_1", type=float, default=0.8,
                    help="(float, default 0.8) strength_1")
parser.add_argument("--deconv_opt3_regu_strength_2", type=float, default=1e-2,
                    help="(float, default 1e-2) strength_2")
parser.add_argument("--deconv_opt3_regu_strength_3", type=float, default=1, help="(float, default 1) strength_3")

parser.add_argument("--deconv_tune_learning_rate", type=float, default=1e-2,
                    help="(float, default 1e-2) learning rate")
parser.add_argument("--deconv_tune_epoch", type=int, default=20, help="(int, default 20) epoch")
parser.add_argument("--deconv_tune_epsilon", type=float, default=1e-4, help="(float, default) epsilon")
for dataset in datasets:
    args = parser.parse_args(['--dataset',dataset,'--feature_AE_epoch','20','10','--cluster_AE_epoch','20','--total_epoch','2'])
    set_seed(args.seed)
    logger.info(pformat(vars(args)))

    preprocessing_pipeline = Compose(
        FilterGenesScanpy(min_cells=0.05),
        FilterCellsScanpy(min_genes=1),
        FilterGenesTopK(num_genes=200, mode="var"),
        CellwiseMaskData(),
        AnnDataTransform(sc.pp.log1p),
        log_level="INFO",
    )
    dataloader = ImputationDataset(data_dir="/home/zyxing/dance/data", dataset="mouse_visual_data")
    data = dataloader.load_data(transform=preprocessing_pipeline)

    x_train = data.data.X.A * data.data.layers["train_mask"]
    test_mask = data.data.layers["valid_mask"]

    model = ScGNN2(args)

    model.fit(x_train)
    test_mse = ((data.data.X.A[test_mask] - model.predict()[test_mask])**2).mean()
    ScGNN2_scores.append(test_mse)
    print(f"MSE: {test_mse:.4f}")
"""

Mouse Brain
$ python scgnn2.py --dataset mouse_brain_data --feature_AE_epoch 20 10 --cluster_AE_epoch 20 --total_epoch 2

Mouse Embryo
$ python scgnn2.py --dataset mouse_embryo_data --feature_AE_epoch 20 10 --cluster_AE_epoch 20 --total_epoch 2

PBMC
$ python scgnn2.py --dataset pbmc_data --feature_AE_epoch 20 10 --cluster_AE_epoch 20 --total_epoch 2

"""


[INFO][2023-09-08 21:48:42,758][dance][set_seed] Setting global random seed to 1
[INFO][2023-09-08 21:48:42,760][dance][<module>] {'alpha': 0.5,
 'ari_threshold': 0.95,
 'cluster_AE_batch_size': 12800,
 'cluster_AE_dropout_prob': 0,
 'cluster_AE_epoch': 20,
 'cluster_AE_learning_rate': 0.001,
 'cluster_AE_regu_strength': 0.9,
 'clustering_embed': 'graph',
 'clustering_louvain_only': False,
 'clustering_method': 'KMeans',
 'clustering_use_flexible_k': False,
 'dataset': 'pbmc_data',
 'deconv_opt1_epoch': 5000,
 'deconv_opt1_epsilon': 0.0001,
 'deconv_opt1_learning_rate': 0.001,
 'deconv_opt1_regu_strength': 0.01,
 'deconv_opt2_epoch': 500,
 'deconv_opt2_epsilon': 0.0001,
 'deconv_opt2_learning_rate': 0.1,
 'deconv_opt2_regu_strength': 0.01,
 'deconv_opt3_epoch': 150,
 'deconv_opt3_epsilon': 0.0001,
 'deconv_opt3_learning_rate': 0.1,
 'deconv_opt3_regu_strength_1': 0.8,
 'deconv_opt3_regu_strength_2': 0.01,
 'deconv_opt3_regu_strength_3': 1,
 'deconv_tune_epoch': 20,
 'deconv_tune_epsilo

MSE: 0.7639


[INFO][2023-09-08 21:53:44,519][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 8950 × 25202
    obs: 'time'
    uns: 'dance_config'
[INFO][2023-09-08 21:53:44,522][dance.Compose][__call__] Applying composed transformations:
Compose(
  FilterGenesScanpy(min_counts=None, min_cells=0.05, max_counts=None, max_cells=None, split_name=None),
  FilterCellsScanpy(min_counts=None, min_genes=1, max_counts=None, max_genes=None, split_name=None),
  FilterGenesTopK(num_genes=200, top=True, mode='var'),
  CellwiseMaskData(distr='exp', mask_rate=0.1, seed=None),
  AnnDataTransform(func=scanpy.preprocessing._simple.log1p, func_kwargs={}),
)
[INFO][2023-09-08 21:53:44,561][dance.FilterGenesScanpy][__call__] Subsetting genes (-1,790 removed) due to FilterGenesScanpy(min_counts=None, min_cells=0.05, max_counts=None, max_cells=None, split_name=None)
[INFO][2023-09-08 21:53:44,671][dance.FilterGenesTopK][__call__] Filtering genes based on var expressi

MSE: 0.8400


[INFO][2023-09-08 21:58:38,030][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 8950 × 25202
    obs: 'time'
    uns: 'dance_config'
[INFO][2023-09-08 21:58:38,032][dance.Compose][__call__] Applying composed transformations:
Compose(
  FilterGenesScanpy(min_counts=None, min_cells=0.05, max_counts=None, max_cells=None, split_name=None),
  FilterCellsScanpy(min_counts=None, min_genes=1, max_counts=None, max_genes=None, split_name=None),
  FilterGenesTopK(num_genes=200, top=True, mode='var'),
  CellwiseMaskData(distr='exp', mask_rate=0.1, seed=None),
  AnnDataTransform(func=scanpy.preprocessing._simple.log1p, func_kwargs={}),
)
[INFO][2023-09-08 21:58:38,071][dance.FilterGenesScanpy][__call__] Subsetting genes (-1,790 removed) due to FilterGenesScanpy(min_counts=None, min_cells=0.05, max_counts=None, max_cells=None, split_name=None)
[INFO][2023-09-08 21:58:38,186][dance.FilterGenesTopK][__call__] Filtering genes based on var expressi

MSE: 0.8589


[INFO][2023-09-08 22:03:32,391][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 8950 × 25202
    obs: 'time'
    uns: 'dance_config'
[INFO][2023-09-08 22:03:32,394][dance.Compose][__call__] Applying composed transformations:
Compose(
  FilterGenesScanpy(min_counts=None, min_cells=0.05, max_counts=None, max_cells=None, split_name=None),
  FilterCellsScanpy(min_counts=None, min_genes=1, max_counts=None, max_genes=None, split_name=None),
  FilterGenesTopK(num_genes=200, top=True, mode='var'),
  CellwiseMaskData(distr='exp', mask_rate=0.1, seed=None),
  AnnDataTransform(func=scanpy.preprocessing._simple.log1p, func_kwargs={}),
)
[INFO][2023-09-08 22:03:32,431][dance.FilterGenesScanpy][__call__] Subsetting genes (-1,790 removed) due to FilterGenesScanpy(min_counts=None, min_cells=0.05, max_counts=None, max_cells=None, split_name=None)
[INFO][2023-09-08 22:03:32,539][dance.FilterGenesTopK][__call__] Filtering genes based on var expressi

MSE: 0.7915


[INFO][2023-09-08 22:08:19,387][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 8950 × 25202
    obs: 'time'
    uns: 'dance_config'
[INFO][2023-09-08 22:08:19,390][dance.Compose][__call__] Applying composed transformations:
Compose(
  FilterGenesScanpy(min_counts=None, min_cells=0.05, max_counts=None, max_cells=None, split_name=None),
  FilterCellsScanpy(min_counts=None, min_genes=1, max_counts=None, max_genes=None, split_name=None),
  FilterGenesTopK(num_genes=200, top=True, mode='var'),
  CellwiseMaskData(distr='exp', mask_rate=0.1, seed=None),
  AnnDataTransform(func=scanpy.preprocessing._simple.log1p, func_kwargs={}),
)
[INFO][2023-09-08 22:08:19,435][dance.FilterGenesScanpy][__call__] Subsetting genes (-1,790 removed) due to FilterGenesScanpy(min_counts=None, min_cells=0.05, max_counts=None, max_cells=None, split_name=None)
[INFO][2023-09-08 22:08:19,537][dance.FilterGenesTopK][__call__] Filtering genes based on var expressi

MSE: 0.7971


[INFO][2023-09-08 22:13:08,653][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 8950 × 25202
    obs: 'time'
    uns: 'dance_config'
[INFO][2023-09-08 22:13:08,656][dance.Compose][__call__] Applying composed transformations:
Compose(
  FilterGenesScanpy(min_counts=None, min_cells=0.05, max_counts=None, max_cells=None, split_name=None),
  FilterCellsScanpy(min_counts=None, min_genes=1, max_counts=None, max_genes=None, split_name=None),
  FilterGenesTopK(num_genes=200, top=True, mode='var'),
  CellwiseMaskData(distr='exp', mask_rate=0.1, seed=None),
  AnnDataTransform(func=scanpy.preprocessing._simple.log1p, func_kwargs={}),
)
[INFO][2023-09-08 22:13:08,694][dance.FilterGenesScanpy][__call__] Subsetting genes (-1,790 removed) due to FilterGenesScanpy(min_counts=None, min_cells=0.05, max_counts=None, max_cells=None, split_name=None)
[INFO][2023-09-08 22:13:08,801][dance.FilterGenesTopK][__call__] Filtering genes based on var expressi

MSE: 0.9097


[INFO][2023-09-08 22:17:56,429][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 8950 × 25202
    obs: 'time'
    uns: 'dance_config'
[INFO][2023-09-08 22:17:56,432][dance.Compose][__call__] Applying composed transformations:
Compose(
  FilterGenesScanpy(min_counts=None, min_cells=0.05, max_counts=None, max_cells=None, split_name=None),
  FilterCellsScanpy(min_counts=None, min_genes=1, max_counts=None, max_genes=None, split_name=None),
  FilterGenesTopK(num_genes=200, top=True, mode='var'),
  CellwiseMaskData(distr='exp', mask_rate=0.1, seed=None),
  AnnDataTransform(func=scanpy.preprocessing._simple.log1p, func_kwargs={}),
)
[INFO][2023-09-08 22:17:56,472][dance.FilterGenesScanpy][__call__] Subsetting genes (-1,790 removed) due to FilterGenesScanpy(min_counts=None, min_cells=0.05, max_counts=None, max_cells=None, split_name=None)
[INFO][2023-09-08 22:17:56,575][dance.FilterGenesTopK][__call__] Filtering genes based on var expressi

MSE: 0.7767


'\n\nMouse Brain\n$ python scgnn2.py --dataset mouse_brain_data --feature_AE_epoch 20 10 --cluster_AE_epoch 20 --total_epoch 2\n\nMouse Embryo\n$ python scgnn2.py --dataset mouse_embryo_data --feature_AE_epoch 20 10 --cluster_AE_epoch 20 --total_epoch 2\n\nPBMC\n$ python scgnn2.py --dataset pbmc_data --feature_AE_epoch 20 10 --cluster_AE_epoch 20 --total_epoch 2\n\n'

In [3]:
ScGNN2_scores

[0.7639198474801978,
 0.8399611694867586,
 0.8588636453426658,
 0.7914635506133476,
 0.7970538281640861,
 0.9097019615164369,
 0.7767488242892576]

In [4]:
import argparse

import numpy as np
import torch

from dance.datasets.singlemodality import ImputationDataset
from dance.modules.single_modality.imputation.graphsci import GraphSCI
from dance.utils import set_seed
GraphSCI_scores=[]

parser = argparse.ArgumentParser()
parser.add_argument("--random_seed", type=int, default=10)
parser.add_argument("--dropout", type=float, default=0.1, help="dropout probability")
parser.add_argument("--gpu", type=int, default=-1, help="GPU id, -1 for cpu")
parser.add_argument("--lr", type=float, default=1e-3, help="learning rate")
parser.add_argument("--train_size", type=float, default=0.9, help="proportion of testing set")
parser.add_argument("--le", type=float, default=1, help="parameter of expression loss")
parser.add_argument("--la", type=float, default=1e-9, help="parameter of adjacency loss")
parser.add_argument("--ke", type=float, default=1e2, help="parameter of KL divergence of expression")
parser.add_argument("--ka", type=float, default=1, help="parameter of KL divergence of adjacency")
parser.add_argument("--n_epochs", type=int, default=300, help="number of training epochs")
parser.add_argument("--data_dir", type=str, default='/home/zyxing/dance/data', help='test directory')
parser.add_argument("--save_dir", type=str, default='result', help='save directory')
parser.add_argument("--filetype", type=str, default='h5', choices=['csv', 'gz', 'h5'],
                    help='data file type, csv, csv.gz, or h5')
parser.add_argument("--dataset", default='mouse_brain_data', type=str, help="dataset id")
parser.add_argument("--weight_decay", type=float, default=1e-6, help="Weight decay for exponential LR decay.")
parser.add_argument("--threshold", type=float, default=.3,
                    help="Lower bound for correlation between genes to determine edges in graph.")
parser.add_argument("--mask_rate", type=float, default=.1, help="Masking rate.")
parser.add_argument("--min_cells", type=float, default=.05,
                    help="Minimum proportion of cells expressed required for a gene to pass filtering")
parser.add_argument("--cache", action="store_true", help="Cache processed data.")
parser.add_argument("--mask", type=bool, default=True, help="Mask data for validation.")
for dataset in datasets:
    params = parser.parse_args(['--dataset',dataset])
    print(vars(params))
    set_seed(params.random_seed)

    dataloader = ImputationDataset(data_dir=params.data_dir, dataset=params.dataset, train_size=params.train_size)
    preprocessing_pipeline = GraphSCI.preprocessing_pipeline(min_cells=params.min_cells, threshold=params.threshold,
                                                            mask=params.mask, seed=params.random_seed,
                                                            mask_rate=params.mask_rate)
    data = dataloader.load_data(transform=preprocessing_pipeline, cache=params.cache)

    device = "cpu" if params.gpu == -1 else f"cuda:{params.gpu}"
    if params.mask:
        X, X_raw, g, mask = data.get_x(return_type="default")
    else:
        mask = None
        X, X_raw, g = data.get_x(return_type="default")
    X = torch.tensor(X.toarray()).to(device)
    X_raw = torch.tensor(X_raw.toarray()).to(device)
    g = g.to(device)
    train_idx = data.train_idx
    test_idx = data.test_idx
    X,X_raw=X.float(),X_raw.float()
    # g=g.double()
    model = GraphSCI(num_cells=X.shape[0], num_genes=X.shape[1], dataset=params.dataset, dropout=params.dropout,
                    gpu=params.gpu, seed=params.random_seed)
    model.fit(X, X_raw, g, train_idx, mask, params.le, params.la, params.ke, params.ka, params.n_epochs, params.lr,
            params.weight_decay)
    model.load_model()
    imputed_data = model.predict(X, X_raw, g, mask)
    score = model.score(X_raw, imputed_data, test_idx, mask, metric='RMSE')
    GraphSCI_scores.append(score)
    print("RMSE: %.4f" % score)
"""To reproduce GraphSCI benchmarks, please refer to command lines belows:

Mouse Brain:
$ python graphsci.py --dataset mouse_brain_data

Mouse Embryo:
$ python graphsci.py --dataset mouse_embryo_data

PBMC
$ python graphsci.py --dataset pbmc_data

"""


[INFO][2023-09-08 22:22:18,607][dance][set_seed] Setting global random seed to 10


{'random_seed': 10, 'dropout': 0.1, 'gpu': -1, 'lr': 0.001, 'train_size': 0.9, 'le': 1, 'la': 1e-09, 'ke': 100.0, 'ka': 1, 'n_epochs': 300, 'data_dir': '/home/zyxing/dance/data', 'save_dir': 'result', 'filetype': 'h5', 'dataset': 'pbmc_data', 'weight_decay': 1e-06, 'threshold': 0.3, 'mask_rate': 0.1, 'min_cells': 0.05, 'cache': False, 'mask': True}


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
[INFO][2023-09-08 22:22:19,326][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 5247 × 33538
    var: 'gene_ids', 'feature_types', 'genome'
    uns: 'dance_config'
[INFO][2023-09-08 22:22:19,327][dance.Compose][__call__] Applying composed transformations:
Compose(
  FilterGenesScanpy(min_counts=None, min_cells=0.05, max_counts=None, max_cells=None, split_name=None),
  FilterCellsScanpy(min_counts=1, min_genes=None, max_counts=None, max_genes=None, split_name=None),
  SaveRaw(),
  AnnDataTransform(func=scanpy.preprocessing._simple.log1p, func_kwargs={}),
  FeatureFeatureGraph(),
  CellwiseMaskData(distr='exp', mask_rate=0.1, seed=10),
  SetConfig(config_dict={'feature_channel': [None, None, 'FeatureFeatureGraph', 'train_mask'], 'feature_channel_type': ['X', 'raw_X', 'uns', 'layers'], 'label_channel': [None, None], 'label_channel_type': ['X', 'raw_X']}),
)
[IN

[Epoch0], train_loss 47344.816406, adj_loss 0.014825, express_loss 1.157790, kl_loss 47343.644531, valid_loss 6.832741
[Epoch1], train_loss 731.127197, adj_loss 0.015873, express_loss 1.145124, kl_loss 729.966187, valid_loss 2.236674
[Epoch2], train_loss 472.215729, adj_loss 0.014780, express_loss 1.132826, kl_loss 471.068115, valid_loss 2.052536
[Epoch3], train_loss 486.115295, adj_loss 0.014475, express_loss 1.123887, kl_loss 484.976929, valid_loss 1.955121
[Epoch4], train_loss 1759.833618, adj_loss 0.014344, express_loss 1.115230, kl_loss 1758.704102, valid_loss 1.851869
[Epoch5], train_loss 264.094299, adj_loss 0.014269, express_loss 1.102830, kl_loss 262.977203, valid_loss 1.831721
[Epoch6], train_loss 30.310125, adj_loss 0.014243, express_loss 1.098976, kl_loss 29.196905, valid_loss 1.805441
[Epoch7], train_loss 378.257843, adj_loss 0.014234, express_loss 1.094610, kl_loss 377.148987, valid_loss 1.801302
[Epoch8], train_loss 1260.535522, adj_loss 0.014223, express_loss 1.087275, 

[INFO][2023-09-08 22:25:05,736][dance][set_seed] Setting global random seed to 10


[Epoch299], train_loss 1.064121, adj_loss 0.014231, express_loss 0.927409, kl_loss 0.122482, valid_loss 1.789749
RMSE: 0.3099
{'random_seed': 10, 'dropout': 0.1, 'gpu': -1, 'lr': 0.001, 'train_size': 0.9, 'le': 1, 'la': 1e-09, 'ke': 100.0, 'ka': 1, 'n_epochs': 300, 'data_dir': '/home/zyxing/dance/data', 'save_dir': 'result', 'filetype': 'h5', 'dataset': 'mouse_brain_data', 'weight_decay': 1e-06, 'threshold': 0.3, 'mask_rate': 0.1, 'min_cells': 0.05, 'cache': False, 'mask': True}


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
[INFO][2023-09-08 22:25:07,249][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 11843 × 31053
    var: 'gene_ids', 'feature_types', 'genome'
    uns: 'dance_config'
[INFO][2023-09-08 22:25:07,250][dance.Compose][__call__] Applying composed transformations:
Compose(
  FilterGenesScanpy(min_counts=None, min_cells=0.05, max_counts=None, max_cells=None, split_name=None),
  FilterCellsScanpy(min_counts=1, min_genes=None, max_counts=None, max_genes=None, split_name=None),
  SaveRaw(),
  AnnDataTransform(func=scanpy.preprocessing._simple.log1p, func_kwargs={}),
  FeatureFeatureGraph(),
  CellwiseMaskData(distr='exp', mask_rate=0.1, seed=10),
  SetConfig(config_dict={'feature_channel': [None, None, 'FeatureFeatureGraph', 'train_mask'], 'feature_channel_type': ['X', 'raw_X', 'uns', 'layers'], 'label_channel': [None, None], 'label_channel_type': ['X', 'raw_X']}),
)
[I

[Epoch0], train_loss 142.122360, adj_loss 0.225158, express_loss 0.780675, kl_loss 141.116531, valid_loss 2.313598
[Epoch1], train_loss 1992.881592, adj_loss 0.227119, express_loss 0.772148, kl_loss 1991.882324, valid_loss 9.224771
[Epoch2], train_loss 1996.984497, adj_loss 0.224754, express_loss 0.762343, kl_loss 1995.997437, valid_loss 5.562654
[Epoch3], train_loss 486.087189, adj_loss 0.223800, express_loss 0.752110, kl_loss 485.111267, valid_loss 2.001688
[Epoch4], train_loss 311.223907, adj_loss 0.223886, express_loss 0.742100, kl_loss 310.257935, valid_loss 1.804444
[Epoch5], train_loss 641.744751, adj_loss 0.223759, express_loss 0.737683, kl_loss 640.783325, valid_loss 1.821292
[Epoch6], train_loss 171.983246, adj_loss 0.224115, express_loss 0.732511, kl_loss 171.026627, valid_loss 1.941072
[Epoch7], train_loss 478.464478, adj_loss 0.224129, express_loss 0.727074, kl_loss 477.513275, valid_loss 1.895053
[Epoch8], train_loss 1153.240967, adj_loss 0.223802, express_loss 0.718098, 

[INFO][2023-09-08 23:16:43,251][dance][set_seed] Setting global random seed to 10


RMSE: 0.3232
{'random_seed': 10, 'dropout': 0.1, 'gpu': -1, 'lr': 0.001, 'train_size': 0.9, 'le': 1, 'la': 1e-09, 'ke': 100.0, 'ka': 1, 'n_epochs': 300, 'data_dir': '/home/zyxing/dance/data', 'save_dir': 'result', 'filetype': 'h5', 'dataset': 'mouse_embryo_data', 'weight_decay': 1e-06, 'threshold': 0.3, 'mask_rate': 0.1, 'min_cells': 0.05, 'cache': False, 'mask': True}


[INFO][2023-09-08 23:16:46,641][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 2717 × 24175
    obs: 'time'
    uns: 'dance_config'
[INFO][2023-09-08 23:16:46,642][dance.Compose][__call__] Applying composed transformations:
Compose(
  FilterGenesScanpy(min_counts=None, min_cells=0.05, max_counts=None, max_cells=None, split_name=None),
  FilterCellsScanpy(min_counts=1, min_genes=None, max_counts=None, max_genes=None, split_name=None),
  SaveRaw(),
  AnnDataTransform(func=scanpy.preprocessing._simple.log1p, func_kwargs={}),
  FeatureFeatureGraph(),
  CellwiseMaskData(distr='exp', mask_rate=0.1, seed=10),
  SetConfig(config_dict={'feature_channel': [None, None, 'FeatureFeatureGraph', 'train_mask'], 'feature_channel_type': ['X', 'raw_X', 'uns', 'layers'], 'label_channel': [None, None], 'label_channel_type': ['X', 'raw_X']}),
)
[INFO][2023-09-08 23:16:46,760][dance.FilterGenesScanpy][__call__] Subsetting genes (-6,372 removed) due to 

[Epoch0], train_loss 48.327068, adj_loss 1.399954, express_loss 2.024753, kl_loss 44.902363, valid_loss 6.867118
[Epoch1], train_loss 17.255718, adj_loss 1.398991, express_loss 1.989055, kl_loss 13.867674, valid_loss 9.129341
[Epoch2], train_loss 49.152317, adj_loss 1.393713, express_loss 1.972008, kl_loss 45.786598, valid_loss 10.765398
[Epoch3], train_loss 17.587646, adj_loss 1.391904, express_loss 1.948984, kl_loss 14.246758, valid_loss 5.682721
[Epoch4], train_loss 12.581965, adj_loss 1.391425, express_loss 1.913260, kl_loss 9.277281, valid_loss 5.023032
[Epoch5], train_loss 5.980544, adj_loss 1.391212, express_loss 1.896446, kl_loss 2.692886, valid_loss 4.641704
[Epoch6], train_loss 10.307402, adj_loss 1.391123, express_loss 1.880461, kl_loss 7.035818, valid_loss 4.575951
[Epoch7], train_loss 4.642825, adj_loss 1.390918, express_loss 1.861529, kl_loss 1.390378, valid_loss 4.664849
[Epoch8], train_loss 5.494483, adj_loss 1.390948, express_loss 1.849736, kl_loss 2.253798, valid_loss

[INFO][2023-09-08 23:57:56,370][dance][set_seed] Setting global random seed to 10


RMSE: 1.5261
{'random_seed': 10, 'dropout': 0.1, 'gpu': -1, 'lr': 0.001, 'train_size': 0.9, 'le': 1, 'la': 1e-09, 'ke': 100.0, 'ka': 1, 'n_epochs': 300, 'data_dir': '/home/zyxing/dance/data', 'save_dir': 'result', 'filetype': 'h5', 'dataset': 'human_breast_TGFb_data', 'weight_decay': 1e-06, 'threshold': 0.3, 'mask_rate': 0.1, 'min_cells': 0.05, 'cache': False, 'mask': True}


[INFO][2023-09-08 23:58:14,416][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 7523 × 28910
    obs: 'nums'
    uns: 'dance_config'
[INFO][2023-09-08 23:58:14,417][dance.Compose][__call__] Applying composed transformations:
Compose(
  FilterGenesScanpy(min_counts=None, min_cells=0.05, max_counts=None, max_cells=None, split_name=None),
  FilterCellsScanpy(min_counts=1, min_genes=None, max_counts=None, max_genes=None, split_name=None),
  SaveRaw(),
  AnnDataTransform(func=scanpy.preprocessing._simple.log1p, func_kwargs={}),
  FeatureFeatureGraph(),
  CellwiseMaskData(distr='exp', mask_rate=0.1, seed=10),
  SetConfig(config_dict={'feature_channel': [None, None, 'FeatureFeatureGraph', 'train_mask'], 'feature_channel_type': ['X', 'raw_X', 'uns', 'layers'], 'label_channel': [None, None], 'label_channel_type': ['X', 'raw_X']}),
)
[INFO][2023-09-08 23:58:14,488][dance.FilterGenesScanpy][__call__] Subsetting genes (-3,721 removed) due to 

[Epoch0], train_loss 109.159615, adj_loss 0.060968, express_loss 1.236347, kl_loss 107.862297, valid_loss 7.610323
[Epoch1], train_loss 199.382065, adj_loss 0.061472, express_loss 1.233208, kl_loss 198.087387, valid_loss 18.891907
[Epoch2], train_loss 700.330688, adj_loss 0.060643, express_loss 1.226632, kl_loss 699.043396, valid_loss 740.770386
[Epoch3], train_loss 346.383575, adj_loss 0.060467, express_loss 1.212549, kl_loss 345.110565, valid_loss 3.860542
[Epoch4], train_loss 97.836800, adj_loss 0.060568, express_loss 1.203452, kl_loss 96.572777, valid_loss 3.067867
[Epoch5], train_loss 17.418636, adj_loss 0.060430, express_loss 1.202729, kl_loss 16.155478, valid_loss 3.040006
[Epoch6], train_loss 167.272354, adj_loss 0.060620, express_loss 1.195780, kl_loss 166.015961, valid_loss 3.030973
[Epoch7], train_loss 33.457390, adj_loss 0.060540, express_loss 1.187871, kl_loss 32.208981, valid_loss 3.016978
[Epoch8], train_loss 21.384785, adj_loss 0.060548, express_loss 1.180869, kl_loss 2

[INFO][2023-09-09 00:16:25,109][dance][set_seed] Setting global random seed to 10


RMSE: 1.3779
{'random_seed': 10, 'dropout': 0.1, 'gpu': -1, 'lr': 0.001, 'train_size': 0.9, 'le': 1, 'la': 1e-09, 'ke': 100.0, 'ka': 1, 'n_epochs': 300, 'data_dir': '/home/zyxing/dance/data', 'save_dir': 'result', 'filetype': 'h5', 'dataset': 'human_breast_Dox_data', 'weight_decay': 1e-06, 'threshold': 0.3, 'mask_rate': 0.1, 'min_cells': 0.05, 'cache': False, 'mask': True}


[INFO][2023-09-09 00:16:30,493][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 3496 × 27059
    obs: 'nums'
    uns: 'dance_config'
[INFO][2023-09-09 00:16:30,494][dance.Compose][__call__] Applying composed transformations:
Compose(
  FilterGenesScanpy(min_counts=None, min_cells=0.05, max_counts=None, max_cells=None, split_name=None),
  FilterCellsScanpy(min_counts=1, min_genes=None, max_counts=None, max_genes=None, split_name=None),
  SaveRaw(),
  AnnDataTransform(func=scanpy.preprocessing._simple.log1p, func_kwargs={}),
  FeatureFeatureGraph(),
  CellwiseMaskData(distr='exp', mask_rate=0.1, seed=10),
  SetConfig(config_dict={'feature_channel': [None, None, 'FeatureFeatureGraph', 'train_mask'], 'feature_channel_type': ['X', 'raw_X', 'uns', 'layers'], 'label_channel': [None, None], 'label_channel_type': ['X', 'raw_X']}),
)
[INFO][2023-09-09 00:16:30,531][dance.FilterGenesScanpy][__call__] Subsetting genes (-1,487 removed) due to 

[Epoch0], train_loss 12.457875, adj_loss 0.009458, express_loss 2.090623, kl_loss 10.357794, valid_loss 10.560386
[Epoch1], train_loss 7.189975, adj_loss 0.009928, express_loss 2.084590, kl_loss 5.095457, valid_loss 6.931870
[Epoch2], train_loss 5.535420, adj_loss 0.008950, express_loss 2.070924, kl_loss 3.455547, valid_loss 6.822077
[Epoch3], train_loss 4.892448, adj_loss 0.008775, express_loss 2.052985, kl_loss 2.830688, valid_loss 6.533391
[Epoch4], train_loss 4.833032, adj_loss 0.008729, express_loss 2.053712, kl_loss 2.770590, valid_loss 6.502020
[Epoch5], train_loss 4.833717, adj_loss 0.008763, express_loss 2.045866, kl_loss 2.779089, valid_loss 6.453347
[Epoch6], train_loss 4.723823, adj_loss 0.008776, express_loss 2.034218, kl_loss 2.680829, valid_loss 6.457279
[Epoch7], train_loss 4.743715, adj_loss 0.008698, express_loss 2.026917, kl_loss 2.708101, valid_loss 6.528263
[Epoch8], train_loss 4.631662, adj_loss 0.008746, express_loss 2.017220, kl_loss 2.605696, valid_loss 6.53762

[INFO][2023-09-09 00:18:14,904][dance][set_seed] Setting global random seed to 10


[Epoch299], train_loss 3.915101, adj_loss 0.008750, express_loss 1.607210, kl_loss 2.299142, valid_loss 6.256895
RMSE: 2.8833
{'random_seed': 10, 'dropout': 0.1, 'gpu': -1, 'lr': 0.001, 'train_size': 0.9, 'le': 1, 'la': 1e-09, 'ke': 100.0, 'ka': 1, 'n_epochs': 300, 'data_dir': '/home/zyxing/dance/data', 'save_dir': 'result', 'filetype': 'h5', 'dataset': 'human_melanoma_data', 'weight_decay': 1e-06, 'threshold': 0.3, 'mask_rate': 0.1, 'min_cells': 0.05, 'cache': False, 'mask': True}


[INFO][2023-09-09 00:18:51,370][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 8640 × 32287
    obs: 'nums'
    uns: 'dance_config'
[INFO][2023-09-09 00:18:51,371][dance.Compose][__call__] Applying composed transformations:
Compose(
  FilterGenesScanpy(min_counts=None, min_cells=0.05, max_counts=None, max_cells=None, split_name=None),
  FilterCellsScanpy(min_counts=1, min_genes=None, max_counts=None, max_genes=None, split_name=None),
  SaveRaw(),
  AnnDataTransform(func=scanpy.preprocessing._simple.log1p, func_kwargs={}),
  FeatureFeatureGraph(),
  CellwiseMaskData(distr='exp', mask_rate=0.1, seed=10),
  SetConfig(config_dict={'feature_channel': [None, None, 'FeatureFeatureGraph', 'train_mask'], 'feature_channel_type': ['X', 'raw_X', 'uns', 'layers'], 'label_channel': [None, None], 'label_channel_type': ['X', 'raw_X']}),
)
[INFO][2023-09-09 00:18:51,419][dance.FilterGenesScanpy][__call__] Subsetting genes (-1,463 removed) due to 

[Epoch0], train_loss 2235549184.000000, adj_loss 0.302729, express_loss 1.316340, kl_loss 2235549184.000000, valid_loss 15605080.000000
[Epoch1], train_loss 2551160320.000000, adj_loss 0.329245, express_loss 1.292535, kl_loss 2551160320.000000, valid_loss 555483.812500
[Epoch2], train_loss 1646016256.000000, adj_loss 0.307278, express_loss 1.279371, kl_loss 1646016256.000000, valid_loss 761305.500000
[Epoch3], train_loss 1615203584.000000, adj_loss 0.300764, express_loss 1.254008, kl_loss 1615203584.000000, valid_loss 136025.937500
[Epoch4], train_loss 1076629120.000000, adj_loss 0.298972, express_loss 1.230010, kl_loss 1076629120.000000, valid_loss 1467.901001
[Epoch5], train_loss 881550528.000000, adj_loss 0.298323, express_loss 1.210757, kl_loss 881550528.000000, valid_loss 46.542286
[Epoch6], train_loss 790020800.000000, adj_loss 0.298061, express_loss 1.201183, kl_loss 790020800.000000, valid_loss 12.179887
[Epoch7], train_loss 1307978752.000000, adj_loss 0.297937, express_loss 1.

[INFO][2023-09-09 00:27:03,347][dance][set_seed] Setting global random seed to 10


RMSE: 3.2824
{'random_seed': 10, 'dropout': 0.1, 'gpu': -1, 'lr': 0.001, 'train_size': 0.9, 'le': 1, 'la': 1e-09, 'ke': 100.0, 'ka': 1, 'n_epochs': 300, 'data_dir': '/home/zyxing/dance/data', 'save_dir': 'result', 'filetype': 'h5', 'dataset': 'mouse_visual_data', 'weight_decay': 1e-06, 'threshold': 0.3, 'mask_rate': 0.1, 'min_cells': 0.05, 'cache': False, 'mask': True}


[INFO][2023-09-09 00:27:29,369][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 8950 × 25202
    obs: 'time'
    uns: 'dance_config'
[INFO][2023-09-09 00:27:29,370][dance.Compose][__call__] Applying composed transformations:
Compose(
  FilterGenesScanpy(min_counts=None, min_cells=0.05, max_counts=None, max_cells=None, split_name=None),
  FilterCellsScanpy(min_counts=1, min_genes=None, max_counts=None, max_genes=None, split_name=None),
  SaveRaw(),
  AnnDataTransform(func=scanpy.preprocessing._simple.log1p, func_kwargs={}),
  FeatureFeatureGraph(),
  CellwiseMaskData(distr='exp', mask_rate=0.1, seed=10),
  SetConfig(config_dict={'feature_channel': [None, None, 'FeatureFeatureGraph', 'train_mask'], 'feature_channel_type': ['X', 'raw_X', 'uns', 'layers'], 'label_channel': [None, None], 'label_channel_type': ['X', 'raw_X']}),
)
[INFO][2023-09-09 00:27:29,412][dance.FilterGenesScanpy][__call__] Subsetting genes (-1,790 removed) due to 

[Epoch0], train_loss 6988152.000000, adj_loss 0.026756, express_loss 0.741626, kl_loss 6988151.000000, valid_loss 293.024048
[Epoch1], train_loss 8377392.500000, adj_loss 0.028067, express_loss 0.736401, kl_loss 8377391.500000, valid_loss 2364.984863
[Epoch2], train_loss 4835464.000000, adj_loss 0.026987, express_loss 0.721079, kl_loss 4835463.500000, valid_loss 111.216049
[Epoch3], train_loss 2899924.250000, adj_loss 0.026715, express_loss 0.707728, kl_loss 2899923.500000, valid_loss 9.066483
[Epoch4], train_loss 1911128.625000, adj_loss 0.026612, express_loss 0.693549, kl_loss 1911127.875000, valid_loss 5.384711
[Epoch5], train_loss 759895.625000, adj_loss 0.026574, express_loss 0.684215, kl_loss 759894.937500, valid_loss 2.663148
[Epoch6], train_loss 590757.812500, adj_loss 0.026554, express_loss 0.674209, kl_loss 590757.125000, valid_loss 2.301640
[Epoch7], train_loss 421032.093750, adj_loss 0.026540, express_loss 0.670513, kl_loss 421031.406250, valid_loss 2.153551
[Epoch8], train

'To reproduce GraphSCI benchmarks, please refer to command lines belows:\n\nMouse Brain:\n$ python graphsci.py --dataset mouse_brain_data\n\nMouse Embryo:\n$ python graphsci.py --dataset mouse_embryo_data\n\nPBMC\n$ python graphsci.py --dataset pbmc_data\n\n'

In [5]:
GraphSCI_scores

[0.3098731019260862,
 0.32321838798380964,
 1.5260799086360861,
 1.3778591340708928,
 2.883251291184602,
 3.2824177435209183,
 0.06893598355620925]

In [6]:
import argparse

import numpy as np
import torch

from dance.datasets.singlemodality import ImputationDataset
from dance.modules.single_modality.imputation.deepimpute import DeepImpute
from dance.utils import set_seed
DeepImpute_scores=[]
output_dims=[459,3299,277,1860,743,731,1]
parser = argparse.ArgumentParser()
parser.add_argument("--random_seed", type=int, default=10)
parser.add_argument("--dropout", type=float, default=0.1, help="dropout probability")
parser.add_argument("--gpu", type=int, default=0, help="GPU id, -1 for cpu")
parser.add_argument("--lr", type=float, default=1e-5, help="learning rate")
parser.add_argument("--n_epochs", type=int, default=500, help="number of training epochs")
parser.add_argument("--batch_size", type=int, default=64, help="Batch size.")
parser.add_argument("--sub_outputdim", type=int, default=512,
                    help="Output dimension - number of genes being imputed per AE.")
parser.add_argument("--hidden_dim", type=int, default=256,
                    help="Hidden layer dimension - number of neurons in the dense layer.")
parser.add_argument("--patience", type=int, default=20, help="Early stopping patience")
parser.add_argument("--min_cells", type=float, default=0.05,
                    help="Minimum proportion of cells expressed required for a gene to pass filtering")
parser.add_argument("--data_dir", type=str, default='/home/zyxing/dance/data', help='test directory')
parser.add_argument("--dataset", default='mouse_brain_data', type=str, help="dataset id")
parser.add_argument("--n_top", type=int, default=5, help="Number of predictors.")
parser.add_argument("--train_size", type=float, default=0.9, help="proportion of testing set")
parser.add_argument("--mask_rate", type=float, default=.1, help="Masking rate.")
parser.add_argument("--cache", action="store_true", help="Cache processed data.")
parser.add_argument("--mask", type=bool, default=True, help="Mask data for validation.")
for i,dataset in enumerate(datasets):
    params = parser.parse_args(['--dataset',dataset,'--sub_outputdim',str(output_dims[i])]) #注意需要调参
    print(vars(params))
    set_seed(params.random_seed)

    dataloader = ImputationDataset(data_dir=params.data_dir, dataset=params.dataset, train_size=params.train_size)
    preprocessing_pipeline = DeepImpute.preprocessing_pipeline(min_cells=params.min_cells, n_top=params.n_top,
                                                            sub_outputdim=params.sub_outputdim, mask=params.mask,
                                                            seed=params.random_seed, mask_rate=params.mask_rate)
    data = dataloader.load_data(transform=preprocessing_pipeline, cache=params.cache)

    if params.mask:
        X, X_raw, targets, predictors, mask = data.get_x(return_type="default")
    else:
        mask = None
        X, X_raw, targets, predictors = data.get_x(return_type="default")
    X = torch.tensor(X.toarray())
    X_raw = torch.tensor(X_raw.toarray())
    train_idx = data.train_idx
    test_idx = data.test_idx
    X,X_raw=X.float(),X_raw.float()
    model = DeepImpute(predictors, targets, params.dataset, params.sub_outputdim, params.hidden_dim, params.dropout,
                    params.random_seed, params.gpu)
    model.fit(X[train_idx], X[train_idx], train_idx, mask, params.batch_size, params.lr, params.n_epochs,
            params.patience)
    imputed_data = model.predict(X[test_idx], test_idx, mask)
    score = model.score(X_raw[test_idx], imputed_data, test_idx, mask, metric='RMSE')
    DeepImpute_scores.append(score)
    print("RMSE: %.4f" % score)
"""To reproduce deepimpute benchmarks, please refer to command lines belows:

Mouse Brain
$ python deepimpute.py --dataset mouse_brain_data

Mouse Embryo
$ python deepimpute.py --dataset mouse_embryo_data

PBMC
$ python graphsci.py --dataset pbmc_data

"""

[INFO][2023-09-09 00:37:21,401][dance][set_seed] Setting global random seed to 10


{'random_seed': 10, 'dropout': 0.1, 'gpu': 0, 'lr': 1e-05, 'n_epochs': 500, 'batch_size': 64, 'sub_outputdim': 459, 'hidden_dim': 256, 'patience': 20, 'min_cells': 0.05, 'data_dir': '/home/zyxing/dance/data', 'dataset': 'pbmc_data', 'n_top': 5, 'train_size': 0.9, 'mask_rate': 0.1, 'cache': False, 'mask': True}


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
[INFO][2023-09-09 00:37:21,877][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 5247 × 33538
    var: 'gene_ids', 'feature_types', 'genome'
    uns: 'dance_config'
[INFO][2023-09-09 00:37:21,879][dance.Compose][__call__] Applying composed transformations:
Compose(
  FilterGenesScanpy(min_counts=None, min_cells=0.05, max_counts=None, max_cells=None, split_name=None),
  FilterCellsScanpy(min_counts=1, min_genes=None, max_counts=None, max_genes=None, split_name=None),
  SaveRaw(),
  AnnDataTransform(func=scanpy.preprocessing._simple.log1p, func_kwargs={}),
  GeneHoldout(batch_size=459, n_top=5),
  CellwiseMaskData(distr='exp', mask_rate=0.1, seed=10),
  SetConfig(config_dict={'feature_channel': [None, None, 'targets', 'predictors', 'train_mask'], 'feature_channel_type': ['X', 'raw_X', 'uns', 'uns', 'layers'], 'label_channel': [None, None], 'label_channel_type':

Model 0, epoch 0, train loss: 2.224626, valid loss: 1.155449.
Model 0, epoch 1, train loss: 2.150255, valid loss: 1.104380.
Model 0, epoch 2, train loss: 2.065887, valid loss: 1.050077.
Model 0, epoch 3, train loss: 1.973156, valid loss: 0.993720.
Model 0, epoch 4, train loss: 1.874573, valid loss: 0.935785.
Model 0, epoch 5, train loss: 1.769029, valid loss: 0.877176.
Model 0, epoch 6, train loss: 1.653118, valid loss: 0.819302.
Model 0, epoch 7, train loss: 1.535020, valid loss: 0.763615.
Model 0, epoch 8, train loss: 1.413292, valid loss: 0.711868.
Model 0, epoch 9, train loss: 1.289273, valid loss: 0.665870.
Model 0, epoch 10, train loss: 1.168248, valid loss: 0.626326.
Model 0, epoch 11, train loss: 1.053733, valid loss: 0.593280.
Model 0, epoch 12, train loss: 0.948026, valid loss: 0.565489.
Model 0, epoch 13, train loss: 0.854841, valid loss: 0.541822.
Model 0, epoch 14, train loss: 0.776838, valid loss: 0.520865.
Model 0, epoch 15, train loss: 0.715278, valid loss: 0.503817.
Mo

[INFO][2023-09-09 00:38:12,355][dance][set_seed] Setting global random seed to 10


Model 2, epoch 80, train loss: 0.230373, valid loss: 0.235030.
Early stopped
RMSE: 7.5722
{'random_seed': 10, 'dropout': 0.1, 'gpu': 0, 'lr': 1e-05, 'n_epochs': 500, 'batch_size': 64, 'sub_outputdim': 3299, 'hidden_dim': 256, 'patience': 20, 'min_cells': 0.05, 'data_dir': '/home/zyxing/dance/data', 'dataset': 'mouse_brain_data', 'n_top': 5, 'train_size': 0.9, 'mask_rate': 0.1, 'cache': False, 'mask': True}


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
[INFO][2023-09-09 00:38:14,736][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 11843 × 31053
    var: 'gene_ids', 'feature_types', 'genome'
    uns: 'dance_config'
[INFO][2023-09-09 00:38:14,738][dance.Compose][__call__] Applying composed transformations:
Compose(
  FilterGenesScanpy(min_counts=None, min_cells=0.05, max_counts=None, max_cells=None, split_name=None),
  FilterCellsScanpy(min_counts=1, min_genes=None, max_counts=None, max_genes=None, split_name=None),
  SaveRaw(),
  AnnDataTransform(func=scanpy.preprocessing._simple.log1p, func_kwargs={}),
  GeneHoldout(batch_size=3299, n_top=5),
  CellwiseMaskData(distr='exp', mask_rate=0.1, seed=10),
  SetConfig(config_dict={'feature_channel': [None, None, 'targets', 'predictors', 'train_mask'], 'feature_channel_type': ['X', 'raw_X', 'uns', 'uns', 'layers'], 'label_channel': [None, None], 'label_channel_type

Model 0, epoch 0, train loss: 0.341862, valid loss: 0.352759.
Model 0, epoch 1, train loss: 0.286932, valid loss: 0.283165.
Model 0, epoch 2, train loss: 0.213250, valid loss: 0.230794.
Model 0, epoch 3, train loss: 0.146899, valid loss: 0.231127.
Model 0, epoch 4, train loss: 0.118248, valid loss: 0.238698.
Model 0, epoch 5, train loss: 0.110658, valid loss: 0.203553.
Model 0, epoch 6, train loss: 0.098929, valid loss: 0.166110.
Model 0, epoch 7, train loss: 0.090200, valid loss: 0.149739.
Model 0, epoch 8, train loss: 0.088908, valid loss: 0.147759.
Model 0, epoch 9, train loss: 0.090885, valid loss: 0.147490.
Model 0, epoch 10, train loss: 0.090095, valid loss: 0.144509.
Model 0, epoch 11, train loss: 0.083996, valid loss: 0.141205.
Model 0, epoch 12, train loss: 0.075663, valid loss: 0.144305.
Model 0, epoch 13, train loss: 0.070974, valid loss: 0.154619.
Model 0, epoch 14, train loss: 0.070603, valid loss: 0.156240.
Model 0, epoch 15, train loss: 0.067965, valid loss: 0.142131.
Mo

[INFO][2023-09-09 00:39:04,239][dance][set_seed] Setting global random seed to 10


Model 1, epoch 49, train loss: 0.069254, valid loss: 0.126369.
Early stopped
RMSE: 1.5337
{'random_seed': 10, 'dropout': 0.1, 'gpu': 0, 'lr': 1e-05, 'n_epochs': 500, 'batch_size': 64, 'sub_outputdim': 277, 'hidden_dim': 256, 'patience': 20, 'min_cells': 0.05, 'data_dir': '/home/zyxing/dance/data', 'dataset': 'mouse_embryo_data', 'n_top': 5, 'train_size': 0.9, 'mask_rate': 0.1, 'cache': False, 'mask': True}


[INFO][2023-09-09 00:39:07,535][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 2717 × 24175
    obs: 'time'
    uns: 'dance_config'
[INFO][2023-09-09 00:39:07,536][dance.Compose][__call__] Applying composed transformations:
Compose(
  FilterGenesScanpy(min_counts=None, min_cells=0.05, max_counts=None, max_cells=None, split_name=None),
  FilterCellsScanpy(min_counts=1, min_genes=None, max_counts=None, max_genes=None, split_name=None),
  SaveRaw(),
  AnnDataTransform(func=scanpy.preprocessing._simple.log1p, func_kwargs={}),
  GeneHoldout(batch_size=277, n_top=5),
  CellwiseMaskData(distr='exp', mask_rate=0.1, seed=10),
  SetConfig(config_dict={'feature_channel': [None, None, 'targets', 'predictors', 'train_mask'], 'feature_channel_type': ['X', 'raw_X', 'uns', 'uns', 'layers'], 'label_channel': [None, None], 'label_channel_type': ['X', 'raw_X']}),
)
[INFO][2023-09-09 00:39:07,641][dance.FilterGenesScanpy][__call__] Subsetting genes 

Model 0, epoch 0, train loss: 0.781402, valid loss: 0.565500.
Model 0, epoch 1, train loss: 0.758835, valid loss: 0.545172.
Model 0, epoch 2, train loss: 0.732583, valid loss: 0.523376.
Model 0, epoch 3, train loss: 0.704303, valid loss: 0.500871.
Model 0, epoch 4, train loss: 0.675249, valid loss: 0.477896.
Model 0, epoch 5, train loss: 0.643650, valid loss: 0.454576.
Model 0, epoch 6, train loss: 0.612593, valid loss: 0.431137.
Model 0, epoch 7, train loss: 0.578999, valid loss: 0.407996.
Model 0, epoch 8, train loss: 0.544795, valid loss: 0.385501.
Model 0, epoch 9, train loss: 0.510199, valid loss: 0.364256.
Model 0, epoch 10, train loss: 0.476761, valid loss: 0.345050.
Model 0, epoch 11, train loss: 0.443346, valid loss: 0.328647.
Model 0, epoch 12, train loss: 0.410184, valid loss: 0.315863.
Model 0, epoch 13, train loss: 0.379198, valid loss: 0.307250.
Model 0, epoch 14, train loss: 0.351781, valid loss: 0.303027.
Model 0, epoch 15, train loss: 0.328755, valid loss: 0.303036.
Mo

[INFO][2023-09-09 00:42:29,901][dance][set_seed] Setting global random seed to 10


RMSE: 3.3490
{'random_seed': 10, 'dropout': 0.1, 'gpu': 0, 'lr': 1e-05, 'n_epochs': 500, 'batch_size': 64, 'sub_outputdim': 1860, 'hidden_dim': 256, 'patience': 20, 'min_cells': 0.05, 'data_dir': '/home/zyxing/dance/data', 'dataset': 'human_breast_TGFb_data', 'n_top': 5, 'train_size': 0.9, 'mask_rate': 0.1, 'cache': False, 'mask': True}


[INFO][2023-09-09 00:42:48,273][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 7523 × 28910
    obs: 'nums'
    uns: 'dance_config'
[INFO][2023-09-09 00:42:48,275][dance.Compose][__call__] Applying composed transformations:
Compose(
  FilterGenesScanpy(min_counts=None, min_cells=0.05, max_counts=None, max_cells=None, split_name=None),
  FilterCellsScanpy(min_counts=1, min_genes=None, max_counts=None, max_genes=None, split_name=None),
  SaveRaw(),
  AnnDataTransform(func=scanpy.preprocessing._simple.log1p, func_kwargs={}),
  GeneHoldout(batch_size=1860, n_top=5),
  CellwiseMaskData(distr='exp', mask_rate=0.1, seed=10),
  SetConfig(config_dict={'feature_channel': [None, None, 'targets', 'predictors', 'train_mask'], 'feature_channel_type': ['X', 'raw_X', 'uns', 'uns', 'layers'], 'label_channel': [None, None], 'label_channel_type': ['X', 'raw_X']}),
)
[INFO][2023-09-09 00:42:48,346][dance.FilterGenesScanpy][__call__] Subsetting genes

Model 0, epoch 0, train loss: 0.313372, valid loss: 0.317438.
Model 0, epoch 1, train loss: 0.290540, valid loss: 0.288394.
Model 0, epoch 2, train loss: 0.260275, valid loss: 0.254902.
Model 0, epoch 3, train loss: 0.222093, valid loss: 0.223957.
Model 0, epoch 4, train loss: 0.180509, valid loss: 0.207042.
Model 0, epoch 5, train loss: 0.143577, valid loss: 0.207436.
Model 0, epoch 6, train loss: 0.118284, valid loss: 0.209563.
Model 0, epoch 7, train loss: 0.104163, valid loss: 0.200513.
Model 0, epoch 8, train loss: 0.098679, valid loss: 0.186393.
Model 0, epoch 9, train loss: 0.097288, valid loss: 0.171710.
Model 0, epoch 10, train loss: 0.095432, valid loss: 0.157271.
Model 0, epoch 11, train loss: 0.091817, valid loss: 0.146003.
Model 0, epoch 12, train loss: 0.088406, valid loss: 0.138836.
Model 0, epoch 13, train loss: 0.086486, valid loss: 0.135147.
Model 0, epoch 14, train loss: 0.085671, valid loss: 0.133595.
Model 0, epoch 15, train loss: 0.085659, valid loss: 0.133203.
Mo

[INFO][2023-09-09 00:43:17,039][dance][set_seed] Setting global random seed to 10


Model 1, epoch 40, train loss: 0.070254, valid loss: 0.122858.
Early stopped
RMSE: 2.6038
{'random_seed': 10, 'dropout': 0.1, 'gpu': 0, 'lr': 1e-05, 'n_epochs': 500, 'batch_size': 64, 'sub_outputdim': 743, 'hidden_dim': 256, 'patience': 20, 'min_cells': 0.05, 'data_dir': '/home/zyxing/dance/data', 'dataset': 'human_breast_Dox_data', 'n_top': 5, 'train_size': 0.9, 'mask_rate': 0.1, 'cache': False, 'mask': True}


[INFO][2023-09-09 00:43:24,387][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 3496 × 27059
    obs: 'nums'
    uns: 'dance_config'
[INFO][2023-09-09 00:43:24,388][dance.Compose][__call__] Applying composed transformations:
Compose(
  FilterGenesScanpy(min_counts=None, min_cells=0.05, max_counts=None, max_cells=None, split_name=None),
  FilterCellsScanpy(min_counts=1, min_genes=None, max_counts=None, max_genes=None, split_name=None),
  SaveRaw(),
  AnnDataTransform(func=scanpy.preprocessing._simple.log1p, func_kwargs={}),
  GeneHoldout(batch_size=743, n_top=5),
  CellwiseMaskData(distr='exp', mask_rate=0.1, seed=10),
  SetConfig(config_dict={'feature_channel': [None, None, 'targets', 'predictors', 'train_mask'], 'feature_channel_type': ['X', 'raw_X', 'uns', 'uns', 'layers'], 'label_channel': [None, None], 'label_channel_type': ['X', 'raw_X']}),
)
[INFO][2023-09-09 00:43:24,423][dance.FilterGenesScanpy][__call__] Subsetting genes 

Model 0, epoch 0, train loss: 0.818032, valid loss: 0.543828.
Model 0, epoch 1, train loss: 0.784196, valid loss: 0.516791.
Model 0, epoch 2, train loss: 0.744830, valid loss: 0.487329.
Model 0, epoch 3, train loss: 0.701108, valid loss: 0.455978.
Model 0, epoch 4, train loss: 0.652505, valid loss: 0.423424.
Model 0, epoch 5, train loss: 0.599790, valid loss: 0.390919.
Model 0, epoch 6, train loss: 0.544223, valid loss: 0.360208.
Model 0, epoch 7, train loss: 0.486619, valid loss: 0.333742.
Model 0, epoch 8, train loss: 0.430241, valid loss: 0.314354.
Model 0, epoch 9, train loss: 0.376962, valid loss: 0.304424.
Model 0, epoch 10, train loss: 0.332192, valid loss: 0.304584.
Model 0, epoch 11, train loss: 0.297612, valid loss: 0.312665.
Model 0, epoch 12, train loss: 0.273848, valid loss: 0.323317.
Model 0, epoch 13, train loss: 0.258711, valid loss: 0.329874.
Model 0, epoch 14, train loss: 0.249743, valid loss: 0.328173.
Model 0, epoch 15, train loss: 0.242705, valid loss: 0.318551.
Mo

[INFO][2023-09-09 00:43:41,514][dance][set_seed] Setting global random seed to 10


Model 1, epoch 50, train loss: 0.169685, valid loss: 0.212270.
Early stopped
RMSE: 8.7708
{'random_seed': 10, 'dropout': 0.1, 'gpu': 0, 'lr': 1e-05, 'n_epochs': 500, 'batch_size': 64, 'sub_outputdim': 731, 'hidden_dim': 256, 'patience': 20, 'min_cells': 0.05, 'data_dir': '/home/zyxing/dance/data', 'dataset': 'human_melanoma_data', 'n_top': 5, 'train_size': 0.9, 'mask_rate': 0.1, 'cache': False, 'mask': True}


[INFO][2023-09-09 00:44:18,945][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 8640 × 32287
    obs: 'nums'
    uns: 'dance_config'
[INFO][2023-09-09 00:44:18,946][dance.Compose][__call__] Applying composed transformations:
Compose(
  FilterGenesScanpy(min_counts=None, min_cells=0.05, max_counts=None, max_cells=None, split_name=None),
  FilterCellsScanpy(min_counts=1, min_genes=None, max_counts=None, max_genes=None, split_name=None),
  SaveRaw(),
  AnnDataTransform(func=scanpy.preprocessing._simple.log1p, func_kwargs={}),
  GeneHoldout(batch_size=731, n_top=5),
  CellwiseMaskData(distr='exp', mask_rate=0.1, seed=10),
  SetConfig(config_dict={'feature_channel': [None, None, 'targets', 'predictors', 'train_mask'], 'feature_channel_type': ['X', 'raw_X', 'uns', 'uns', 'layers'], 'label_channel': [None, None], 'label_channel_type': ['X', 'raw_X']}),
)
[INFO][2023-09-09 00:44:18,990][dance.FilterGenesScanpy][__call__] Subsetting genes 

Model 0, epoch 0, train loss: 0.605952, valid loss: 0.549341.
Model 0, epoch 1, train loss: 0.561468, valid loss: 0.495695.
Model 0, epoch 2, train loss: 0.509224, valid loss: 0.440520.
Model 0, epoch 3, train loss: 0.455227, valid loss: 0.387398.
Model 0, epoch 4, train loss: 0.398957, valid loss: 0.345315.
Model 0, epoch 5, train loss: 0.347221, valid loss: 0.322864.
Model 0, epoch 6, train loss: 0.307131, valid loss: 0.322254.
Model 0, epoch 7, train loss: 0.282381, valid loss: 0.335313.
Model 0, epoch 8, train loss: 0.268414, valid loss: 0.342799.
Model 0, epoch 9, train loss: 0.258433, valid loss: 0.332026.
Model 0, epoch 10, train loss: 0.245519, valid loss: 0.305408.
Model 0, epoch 11, train loss: 0.231202, valid loss: 0.276840.
Model 0, epoch 12, train loss: 0.218731, valid loss: 0.254258.
Model 0, epoch 13, train loss: 0.210704, valid loss: 0.239287.
Model 0, epoch 14, train loss: 0.204751, valid loss: 0.231364.
Model 0, epoch 15, train loss: 0.201449, valid loss: 0.227684.
Mo

[INFO][2023-09-09 00:44:50,001][dance][set_seed] Setting global random seed to 10


Model 1, epoch 37, train loss: 0.081871, valid loss: 0.175998.
Early stopped
RMSE: 8.0286
{'random_seed': 10, 'dropout': 0.1, 'gpu': 0, 'lr': 1e-05, 'n_epochs': 500, 'batch_size': 64, 'sub_outputdim': 1, 'hidden_dim': 256, 'patience': 20, 'min_cells': 0.05, 'data_dir': '/home/zyxing/dance/data', 'dataset': 'mouse_visual_data', 'n_top': 5, 'train_size': 0.9, 'mask_rate': 0.1, 'cache': False, 'mask': True}


[INFO][2023-09-09 00:45:17,703][dance][load_data] Raw data loaded:
Data object that wraps (.data):
AnnData object with n_obs × n_vars = 8950 × 25202
    obs: 'time'
    uns: 'dance_config'
[INFO][2023-09-09 00:45:17,705][dance.Compose][__call__] Applying composed transformations:
Compose(
  FilterGenesScanpy(min_counts=None, min_cells=0.05, max_counts=None, max_cells=None, split_name=None),
  FilterCellsScanpy(min_counts=1, min_genes=None, max_counts=None, max_genes=None, split_name=None),
  SaveRaw(),
  AnnDataTransform(func=scanpy.preprocessing._simple.log1p, func_kwargs={}),
  GeneHoldout(batch_size=1, n_top=5),
  CellwiseMaskData(distr='exp', mask_rate=0.1, seed=10),
  SetConfig(config_dict={'feature_channel': [None, None, 'targets', 'predictors', 'train_mask'], 'feature_channel_type': ['X', 'raw_X', 'uns', 'uns', 'layers'], 'label_channel': [None, None], 'label_channel_type': ['X', 'raw_X']}),
)
[INFO][2023-09-09 00:45:17,743][dance.FilterGenesScanpy][__call__] Subsetting genes (-

Model 0, epoch 0, train loss: 0.865665, valid loss: 0.969123.
Model 0, epoch 1, train loss: 0.752551, valid loss: 0.835795.
Model 0, epoch 2, train loss: 0.649959, valid loss: 0.715583.
Model 0, epoch 3, train loss: 0.547700, valid loss: 0.618707.
Model 0, epoch 4, train loss: 0.462675, valid loss: 0.548217.
Model 0, epoch 5, train loss: 0.387137, valid loss: 0.507282.
Model 0, epoch 6, train loss: 0.341014, valid loss: 0.497098.
Model 0, epoch 7, train loss: 0.304158, valid loss: 0.517474.
Model 0, epoch 8, train loss: 0.289677, valid loss: 0.566419.
Model 0, epoch 9, train loss: 0.292122, valid loss: 0.640662.
Model 0, epoch 10, train loss: 0.312265, valid loss: 0.735480.
Model 0, epoch 11, train loss: 0.336033, valid loss: 0.844579.
Model 0, epoch 12, train loss: 0.378265, valid loss: 0.960569.
Model 0, epoch 13, train loss: 0.415517, valid loss: 1.074270.
Model 0, epoch 14, train loss: 0.456819, valid loss: 1.174674.
Model 0, epoch 15, train loss: 0.489478, valid loss: 1.250478.
Mo

'To reproduce deepimpute benchmarks, please refer to command lines belows:\n\nMouse Brain\n$ python deepimpute.py --dataset mouse_brain_data\n\nMouse Embryo\n$ python deepimpute.py --dataset mouse_embryo_data\n\nPBMC\n$ python graphsci.py --dataset pbmc_data\n\n'

In [7]:
DeepImpute_scores

[7.572206935305832,
 1.5337278597973962,
 3.3490265186224675,
 2.6038380428231656,
 8.77081525956298,
 8.028613893583755,
 0.15002921882800682]