In [None]:
import scvelo as scv
import scanpy as sc
import cell2fate as c2f
import pickle as pickle
from eval_utils import cross_boundary_correctness
from datetime import datetime
import pandas as pd
import numpy as np
from os.path import exists
import matplotlib.pyplot as plt
import torch
import unitvelo as utv
import torch
method = 'cell2fate'
data_dir = '/nfs/team283/aa16/data/fate_benchmarking/benchmarking_datasets/'
save_dir = '/nfs/team283/aa16/data/fate_benchmarking/benchmarking_results/'
dataset = 'MouseErythroid'
n_genes_list = np.array((2000, 3000))
n_counts_list = np.array((20, 40, 10))
# Model params:
mod_list = [0.5, 1, 1.5, 2.0]

def robust_optimization(mod, save_dir, max_epochs = [200, 400], lr = [0.01, 0.01], **model_kwargs):
    n_modules = mod.module.model.n_modules
    adata = mod.adata
    print('First optimization run.')
    mod.train(use_gpu=True, max_epochs = max_epochs[0], lr = lr[0])
    sample_kwarg = {"num_samples": 1, "batch_size" : 1000,
                     "use_gpu" : True, 'return_samples': False}
    mod.adata = mod.export_posterior(mod.adata, sample_kwargs=sample_kwarg)
    t_c = np.argsort(np.array(mod.samples['post_sample_means']['t_c']).flatten())/len(np.array(mod.samples['post_sample_means']['t_c']))
    t_c_reversed = -1*(t_c - np.max(t_c))
    print('Second optimization run.')
    del mod
    mod1 = c2f.Cell2fate_DynamicalModel_PreprocessedCounts(adata, n_modules = n_modules,
                                        init_vals = {'t_c': torch.tensor(t_c).reshape([len(t_c), 1, 1])},
                                       **model_kwargs)
    mod1.train(use_gpu=True, max_epochs = max_epochs[1], lr = lr[1])
    history1 = mod1.history
    mod1.save(save_dir+'c2f_model', overwrite=True)
    mod1.adata.write(save_dir+"c2f_model_anndata.h5ad")
    del mod1
    print('Third optimization run.')
    mod2 = c2f.Cell2fate_DynamicalModel_PreprocessedCounts(adata, n_modules = n_modules,
                                        init_vals = {'t_c': torch.tensor(t_c_reversed).reshape([len(t_c_reversed), 1, 1])},
                                        **model_kwargs)
    del adata
    mod2.train(use_gpu=True, max_epochs = max_epochs[1], lr = lr[1])
    history2 = mod2.history

    iter_start=0
    iter_end=-1

    fig, ax = plt.subplots(1,2, figsize = (15,5))

    iter_end = len(history1["elbo_train"])

    ax[0].plot(
        history1["elbo_train"].index[iter_start:iter_end],
        np.array(history1["elbo_train"].values.flatten())[iter_start:iter_end],
        label="Original Direction",
    )
    ax[0].plot(
        history2["elbo_train"].index[iter_start:iter_end],
        np.array(history2["elbo_train"].values.flatten())[iter_start:iter_end],
        label="Reversed Direction",
    )
    ax[0].legend()
    ax[0].set_xlim(0, len(history1["elbo_train"]))
    ax[0].set_xlabel("Training epochs")
    ax[0].set_ylabel("-ELBO loss")

    ax[1].plot(
        history1["elbo_train"].index[iter_start:iter_end],
        np.array(history1["elbo_train"].values.flatten())[iter_start:iter_end],
        label="Original Direction",
    )
    ax[1].plot(
        history2["elbo_train"].index[iter_start:iter_end],
        np.array(history2["elbo_train"].values.flatten())[iter_start:iter_end],
        label="Reversed Direction",
    )
    ax[1].legend()
    ax[1].set_xlim(np.round(0.8*len(history1["elbo_train"])), len(history1["elbo_train"]))
    ax[1].set_xlabel("Training epochs")
    ax[1].set_ylabel("-ELBO loss")
    plt.tight_layout()
    plt.show()
    
    print('-ELBO second run:', np.mean(np.array(history1['elbo_train'][-50:])))
    print('-ELBO third run:', np.mean(np.array(history2['elbo_train'][-50:])))
    
    if np.mean(np.array(history1['elbo_train'][-50:])) > np.mean(np.array(history2['elbo_train'][-50:])):
        return mod2
    else:
        del mod2
        adata = sc.read_h5ad(save_dir+"c2f_model_anndata.h5ad")
        mod1 = c2f.Cell2fate_DynamicalModel_PreprocessedCounts.load(save_dir+'c2f_model', adata)
        return mod1 

for i in range(len(n_genes_list)):
    for j in range(len(n_counts_list)):
        for k in range(len(mod_list)):
                print(i)
                print(j)
                print(k)
                n_genes = n_genes_list[i]
                min_counts = n_counts_list[j]
                model_index = str(i) + '-' + str(j) + '-' + str(k)
                save_name = method + '_' + dataset
                if exists(save_dir + save_name + '_CBDC_And_Posterior_withBatch_preprocessed_oneRun.csv'):
                    tab = pd.read_csv(save_dir + save_name + '_CBDC_And_Posterior_withBatch_preprocessed_oneRun.csv', index_col = 0)
                    if model_index in tab.index:
                        continue
                adata = sc.read_h5ad(data_dir + dataset + '/' + dataset + '_anndata.h5ad')
                scv.pp.filter_and_normalize(adata, min_shared_counts=min_counts, n_top_genes=n_genes)
                scv.pp.moments(adata, n_pcs=30, n_neighbors=30)
                c2f.Cell2fate_DynamicalModel_PreprocessedCounts.setup_anndata(adata, spliced_label='spliced', unspliced_label='unspliced',
                                                          batch_key = 'sequencing.batch')    
                n_modules = int(np.round(c2f.utils.get_max_modules(adata)*mod_list[k]))
                mod = c2f.Cell2fate_DynamicalModel_PreprocessedCounts(adata,
                                                   n_modules = n_modules)   
                mod.train()
                sample_kwarg = {"num_samples": 3, "batch_size" : adata.n_obs,
                     "use_gpu" : True, 'return_samples': False}
                adata = mod.export_posterior(adata, sample_kwargs = sample_kwarg)
                mod.compute_and_plot_total_velocity_scvelo(adata, save = False, delete = False)
                # Calculate performance metrics:
                file = open(data_dir + dataset + '/' + dataset + '_groundTruth.pickle' ,'rb')
                ground_truth = pickle.load(file)
                metrics = utv.evaluate(adata, ground_truth, 'clusters', 'velocity')
                cb_score = [np.mean(metrics['Cross-Boundary Direction Correctness (A->B)'][x])
                            for x in metrics['Cross-Boundary Direction Correctness (A->B)'].keys()]
                if exists(save_dir + save_name + '_CBDC_And_Posterior_withBatch_preprocessed_oneRun.csv'):
                    tab = pd.read_csv(save_dir + save_name + '_CBDC_And_Posterior_withBatch_preprocessed_oneRun.csv', index_col = 0)
                else:
                    c_names = ['CBDC']
                    tab = pd.DataFrame(columns = c_names)
                tab.loc[model_index, 'CBDC'] = np.mean(cb_score)
                tab.loc[model_index, 'splicing_alpha'] = mod.samples['post_sample_means']['splicing_alpha']
                tab.loc[model_index, 'splicing_mean'] = mod.samples['post_sample_means']['splicing_mean']
                tab.loc[model_index, 'beta_g_mean'] = np.mean(mod.samples['post_sample_means']['beta_g'])
                tab.loc[model_index, 'beta_g_var'] = np.var(mod.samples['post_sample_means']['beta_g'])
                tab.loc[model_index, 'degredation_alpha'] = mod.samples['post_sample_means']['degredation_alpha']
                tab.loc[model_index, 'degredation_mean'] = mod.samples['post_sample_means']['degredation_mean']
                tab.loc[model_index, 'gamma_g_mean'] = np.mean(mod.samples['post_sample_means']['gamma_g'])
                tab.loc[model_index, 'gamma_g_var'] = np.var(mod.samples['post_sample_means']['gamma_g'])
                tab.loc[model_index, 'factor_level_g_mean'] = np.mean(mod.samples['post_sample_means']['factor_level_g'])
                tab.loc[model_index, 'factor_level_g_var'] = np.var(mod.samples['post_sample_means']['factor_level_g'])
                tab.loc[model_index, 'g_fg_mean'] = np.mean(mod.samples['post_sample_means']['g_fg'])
                tab.loc[model_index, 'g_fg_var'] = np.var(mod.samples['post_sample_means']['g_fg'])
                tab.loc[model_index, 'A_mgON_mean'] = np.mean(mod.samples['post_sample_means']['A_mgON'])
                tab.loc[model_index, 'A_mgON_var'] = np.var(mod.samples['post_sample_means']['A_mgON'])
                tab.loc[model_index, 'lam_mu'] = mod.samples['post_sample_means']['lam_mu']
                tab.loc[model_index, 'lam_sd'] = mod.samples['post_sample_means']['lam_sd']
                tab.loc[model_index, 'lam_m_mu_mean'] = np.mean(mod.samples['post_sample_means']['lam_m_mu'])
                tab.loc[model_index, 'lam_m_mu_var'] = np.var(mod.samples['post_sample_means']['lam_m_mu'])
                tab.loc[model_index, 'lam_mi_mean'] = np.mean(mod.samples['post_sample_means']['lam_mi'])
                tab.loc[model_index, 'lam_mi_var'] = np.var(mod.samples['post_sample_means']['lam_mi'])
                tab.loc[model_index, 'Tmax'] = mod.samples['post_sample_means']['Tmax']
                tab.loc[model_index, 't_c_loc'] = mod.samples['post_sample_means']['t_c_loc']
                tab.loc[model_index, 't_c_scale'] = mod.samples['post_sample_means']['t_c_scale']
                tab.loc[model_index, 'T_c_mean'] = np.mean(mod.samples['post_sample_means']['T_c'])
                tab.loc[model_index, 'T_c_var'] = np.var(mod.samples['post_sample_means']['T_c'])
                tab.loc[model_index, 't_delta_mean'] = np.mean(mod.samples['post_sample_means']['t_delta'])
                tab.loc[model_index, 't_delta_var'] = np.var(mod.samples['post_sample_means']['t_delta'])
                tab.loc[model_index, 'T_mON_mean'] = np.mean(mod.samples['post_sample_means']['T_mON'])
                tab.loc[model_index, 'T_mON_var'] = np.var(mod.samples['post_sample_means']['T_mON'])
                tab.loc[model_index, 'T_mON_mean'] = np.mean(mod.samples['post_sample_means']['T_mON'])
                tab.loc[model_index, 'T_mON_var'] = np.var(mod.samples['post_sample_means']['T_mON'])
                tab.loc[model_index, 'T_mOFF_mean'] = np.mean(mod.samples['post_sample_means']['T_mOFF'])
                tab.loc[model_index, 'T_mOFF_var'] = np.var(mod.samples['post_sample_means']['T_mOFF'])
                tab.loc[model_index, 'mu_expression_mean'] = np.mean(mod.samples['post_sample_means']['mu_expression'])
                tab.loc[model_index, 'mu_expression_var'] = np.var(mod.samples['post_sample_means']['mu_expression'])
                tab.to_csv(save_dir + save_name + '_CBDC_And_Posterior_withBatch_preprocessed_oneRun.csv')

Global seed set to 0


(Running UniTVelo 0.2.5)
2023-10-18 12:28:37
0
0
0
Filtered out 47456 genes that are detected 20 counts (shared).
Normalized count data: X, spliced, unspliced.
Extracted 2000 highly variable genes.
Logarithmized X.
computing neighbors
    finished (0:03:36) --> added 
    'distances' and 'connectivities', weighted adjacency matrices (adata.obsp)
computing moments based on connectivities
    finished (0:00:01) --> added 
    'Ms' and 'Mu', moments of un/spliced abundances (adata.layers)
Leiden clustering ...
Number of Leiden Clusters: 9
Maximal Number of Modules: 10


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Epoch 500/500: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [11:19<00:00,  1.36s/it, v_num=1, elbo_train=1.69e+7]
sample_kwargs['batch_size'] 9815
Sampling local variables, batch: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.25s/it]
Sampling global variables, sample: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  2.23it/s]
Computing total RNAvelocity ...


  0%|          | 0/9815 [00:00<?, ?cells/s]

# Cross-Boundary Direction Correctness (A->B)
{('Blood progenitors 1', 'Blood progenitors 2'): 0.4015152195894096, ('Blood progenitors 2', 'Erythroid1'): 0.7064251272761252, ('Erythroid1', 'Erythroid2'): 0.8108220849756597, ('Erythroid2', 'Erythroid3'): 0.7878059911601162}
Total Mean: 0.6766421057503277
# In-cluster Coherence
{'Blood progenitors 1': 0.87499374, 'Blood progenitors 2': 0.9089133, 'Erythroid1': 0.8405186, 'Erythroid2': 0.84556776, 'Erythroid3': 0.9672953}
Total Mean: 0.8874577283859253
0
0
1
Filtered out 47456 genes that are detected 20 counts (shared).
Normalized count data: X, spliced, unspliced.
Extracted 2000 highly variable genes.
Logarithmized X.
computing neighbors
    finished (0:00:04) --> added 
    'distances' and 'connectivities', weighted adjacency matrices (adata.obsp)
computing moments based on connectivities
    finished (0:00:01) --> added 
    'Ms' and 'Mu', moments of un/spliced abundances (adata.layers)
Leiden clustering ...


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Number of Leiden Clusters: 9
Maximal Number of Modules: 10
Epoch 500/500: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [15:27<00:00,  1.85s/it, v_num=1, elbo_train=1.7e+7]
sample_kwargs['batch_size'] 9815
Sampling local variables, batch: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.67s/it]
Sampling global variables, sample: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:01<00:00,  1.81it/s]
Computing total RNAvelocity ...


  0%|          | 0/9815 [00:00<?, ?cells/s]

# Cross-Boundary Direction Correctness (A->B)
{('Blood progenitors 1', 'Blood progenitors 2'): 0.7987811060501351, ('Blood progenitors 2', 'Erythroid1'): 0.6661831853625334, ('Erythroid1', 'Erythroid2'): 0.8315865762077614, ('Erythroid2', 'Erythroid3'): 0.8190823071675725}
Total Mean: 0.7789082936970005
# In-cluster Coherence
{'Blood progenitors 1': 0.97691196, 'Blood progenitors 2': 0.8836922, 'Erythroid1': 0.9880362, 'Erythroid2': 0.9926175, 'Erythroid3': 0.99496466}
Total Mean: 0.9672445058822632
0
0
2
Filtered out 47456 genes that are detected 20 counts (shared).
Normalized count data: X, spliced, unspliced.
Extracted 2000 highly variable genes.
Logarithmized X.
computing neighbors
    finished (0:00:05) --> added 
    'distances' and 'connectivities', weighted adjacency matrices (adata.obsp)
computing moments based on connectivities
    finished (0:00:01) --> added 
    'Ms' and 'Mu', moments of un/spliced abundances (adata.layers)
Leiden clustering ...


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Number of Leiden Clusters: 9
Maximal Number of Modules: 10
Epoch 500/500: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [19:32<00:00,  2.34s/it, v_num=1, elbo_train=1.69e+7]
sample_kwargs['batch_size'] 9815
Sampling local variables, batch: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.07s/it]
Sampling global variables, sample: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:01<00:00,  1.52it/s]
Computing total RNAvelocity ...


  0%|          | 0/9815 [00:00<?, ?cells/s]

# Cross-Boundary Direction Correctness (A->B)
{('Blood progenitors 1', 'Blood progenitors 2'): 0.19590265362426912, ('Blood progenitors 2', 'Erythroid1'): 0.47827094935724407, ('Erythroid1', 'Erythroid2'): 0.61212009928755, ('Erythroid2', 'Erythroid3'): 0.7537237184564237}
Total Mean: 0.5100043551813717
# In-cluster Coherence
{'Blood progenitors 1': 0.5544065, 'Blood progenitors 2': 0.708443, 'Erythroid1': 0.8171845, 'Erythroid2': 0.82493055, 'Erythroid3': 0.85858965}
Total Mean: 0.7527108192443848
0
0
3
Filtered out 47456 genes that are detected 20 counts (shared).
Normalized count data: X, spliced, unspliced.
Extracted 2000 highly variable genes.
Logarithmized X.
computing neighbors
    finished (0:00:05) --> added 
    'distances' and 'connectivities', weighted adjacency matrices (adata.obsp)
computing moments based on connectivities
    finished (0:00:01) --> added 
    'Ms' and 'Mu', moments of un/spliced abundances (adata.layers)
Leiden clustering ...


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Number of Leiden Clusters: 9
Maximal Number of Modules: 10
Epoch 500/500: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [23:40<00:00,  2.84s/it, v_num=1, elbo_train=1.69e+7]
sample_kwargs['batch_size'] 9815
Sampling local variables, batch: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.51s/it]
Sampling global variables, sample: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:01<00:00,  1.31it/s]
Computing total RNAvelocity ...


  0%|          | 0/9815 [00:00<?, ?cells/s]

# Cross-Boundary Direction Correctness (A->B)
{('Blood progenitors 1', 'Blood progenitors 2'): 0.4010234886802227, ('Blood progenitors 2', 'Erythroid1'): 0.5823364646627314, ('Erythroid1', 'Erythroid2'): 0.5769706841225687, ('Erythroid2', 'Erythroid3'): 0.754833521694438}
Total Mean: 0.5787910397899901
# In-cluster Coherence
{'Blood progenitors 1': 0.7651291, 'Blood progenitors 2': 0.7878145, 'Erythroid1': 0.86862636, 'Erythroid2': 0.858849, 'Erythroid3': 0.9458287}
Total Mean: 0.8452495336532593
0
1
0
Filtered out 48372 genes that are detected 40 counts (shared).
Normalized count data: X, spliced, unspliced.
Extracted 2000 highly variable genes.
Logarithmized X.
computing neighbors
    finished (0:00:04) --> added 
    'distances' and 'connectivities', weighted adjacency matrices (adata.obsp)
computing moments based on connectivities
    finished (0:00:01) --> added 
    'Ms' and 'Mu', moments of un/spliced abundances (adata.layers)
Leiden clustering ...


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Number of Leiden Clusters: 9
Maximal Number of Modules: 10
Epoch 500/500: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [11:15<00:00,  1.35s/it, v_num=1, elbo_train=2.09e+7]
sample_kwargs['batch_size'] 9815
Sampling local variables, batch: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.34s/it]
Sampling global variables, sample: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  2.22it/s]
Computing total RNAvelocity ...


  0%|          | 0/9815 [00:00<?, ?cells/s]

# Cross-Boundary Direction Correctness (A->B)
{('Blood progenitors 1', 'Blood progenitors 2'): 0.4784021386224449, ('Blood progenitors 2', 'Erythroid1'): 0.7401547744232011, ('Erythroid1', 'Erythroid2'): 0.663515446773122, ('Erythroid2', 'Erythroid3'): 0.8186287082279885}
Total Mean: 0.6751752670116892
# In-cluster Coherence
{'Blood progenitors 1': 0.8694388, 'Blood progenitors 2': 0.91197264, 'Erythroid1': 0.8614514, 'Erythroid2': 0.9125346, 'Erythroid3': 0.97371024}
Total Mean: 0.9058215022087097
0
1
1
Filtered out 48372 genes that are detected 40 counts (shared).
Normalized count data: X, spliced, unspliced.
Extracted 2000 highly variable genes.
Logarithmized X.
computing neighbors
    finished (0:00:04) --> added 
    'distances' and 'connectivities', weighted adjacency matrices (adata.obsp)
computing moments based on connectivities
    finished (0:00:01) --> added 
    'Ms' and 'Mu', moments of un/spliced abundances (adata.layers)
Leiden clustering ...


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Number of Leiden Clusters: 9
Maximal Number of Modules: 10
Epoch 500/500: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [15:29<00:00,  1.86s/it, v_num=1, elbo_train=2.1e+7]
sample_kwargs['batch_size'] 9815
Sampling local variables, batch: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.66s/it]
Sampling global variables, sample: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:01<00:00,  1.81it/s]
Computing total RNAvelocity ...


  0%|          | 0/9815 [00:00<?, ?cells/s]

# Cross-Boundary Direction Correctness (A->B)
{('Blood progenitors 1', 'Blood progenitors 2'): 0.42986848426315977, ('Blood progenitors 2', 'Erythroid1'): 0.6602539203191057, ('Erythroid1', 'Erythroid2'): 0.7411115314304522, ('Erythroid2', 'Erythroid3'): 0.772440689020475}
Total Mean: 0.6509186562582981
# In-cluster Coherence
{'Blood progenitors 1': 0.64355206, 'Blood progenitors 2': 0.82264215, 'Erythroid1': 0.9472717, 'Erythroid2': 0.7380858, 'Erythroid3': 0.98244876}
Total Mean: 0.8268000483512878
0
1
2
Filtered out 48372 genes that are detected 40 counts (shared).
Normalized count data: X, spliced, unspliced.
Extracted 2000 highly variable genes.
Logarithmized X.
computing neighbors
    finished (0:00:04) --> added 
    'distances' and 'connectivities', weighted adjacency matrices (adata.obsp)
computing moments based on connectivities
    finished (0:00:01) --> added 
    'Ms' and 'Mu', moments of un/spliced abundances (adata.layers)
Leiden clustering ...


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Number of Leiden Clusters: 9
Maximal Number of Modules: 10
Epoch 500/500: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [19:41<00:00,  2.36s/it, v_num=1, elbo_train=2.09e+7]
sample_kwargs['batch_size'] 9815
Sampling local variables, batch: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.10s/it]
Sampling global variables, sample: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:01<00:00,  1.52it/s]
Computing total RNAvelocity ...


  0%|          | 0/9815 [00:00<?, ?cells/s]

# Cross-Boundary Direction Correctness (A->B)
{('Blood progenitors 1', 'Blood progenitors 2'): 0.36828848060152053, ('Blood progenitors 2', 'Erythroid1'): 0.6122270773046179, ('Erythroid1', 'Erythroid2'): 0.7646287243823384, ('Erythroid2', 'Erythroid3'): 0.7561951925857385}
Total Mean: 0.6253348687185538
# In-cluster Coherence
{'Blood progenitors 1': 0.8682632, 'Blood progenitors 2': 0.7287186, 'Erythroid1': 0.7975939, 'Erythroid2': 0.7949317, 'Erythroid3': 0.92709655}
Total Mean: 0.823320746421814
0
1
3
Filtered out 48372 genes that are detected 40 counts (shared).
Normalized count data: X, spliced, unspliced.
Extracted 2000 highly variable genes.
Logarithmized X.
computing neighbors
    finished (0:00:05) --> added 
    'distances' and 'connectivities', weighted adjacency matrices (adata.obsp)
computing moments based on connectivities
    finished (0:00:02) --> added 
    'Ms' and 'Mu', moments of un/spliced abundances (adata.layers)
Leiden clustering ...


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Number of Leiden Clusters: 9
Maximal Number of Modules: 10
Epoch 500/500: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [23:44<00:00,  2.85s/it, v_num=1, elbo_train=2.1e+7]
sample_kwargs['batch_size'] 9815
Sampling local variables, batch: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.55s/it]
Sampling global variables, sample: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:01<00:00,  1.30it/s]
Computing total RNAvelocity ...


  0%|          | 0/9815 [00:00<?, ?cells/s]

# Cross-Boundary Direction Correctness (A->B)
{('Blood progenitors 1', 'Blood progenitors 2'): 0.3568800821948874, ('Blood progenitors 2', 'Erythroid1'): 0.5988871505066992, ('Erythroid1', 'Erythroid2'): 0.6091472043605416, ('Erythroid2', 'Erythroid3'): 0.7495051655509729}
Total Mean: 0.5786049006532753
# In-cluster Coherence
{'Blood progenitors 1': 0.6516025, 'Blood progenitors 2': 0.8038841, 'Erythroid1': 0.8631212, 'Erythroid2': 0.8657759, 'Erythroid3': 0.9549516}
Total Mean: 0.8278670310974121
0
2
0
Filtered out 46616 genes that are detected 10 counts (shared).
Normalized count data: X, spliced, unspliced.
Extracted 2000 highly variable genes.
Logarithmized X.
computing neighbors
    finished (0:00:05) --> added 
    'distances' and 'connectivities', weighted adjacency matrices (adata.obsp)
computing moments based on connectivities
    finished (0:00:01) --> added 
    'Ms' and 'Mu', moments of un/spliced abundances (adata.layers)
Leiden clustering ...


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Number of Leiden Clusters: 9
Maximal Number of Modules: 10
Epoch 500/500: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [11:15<00:00,  1.35s/it, v_num=1, elbo_train=1.27e+7]
sample_kwargs['batch_size'] 9815
Sampling local variables, batch: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.35s/it]
Sampling global variables, sample: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  2.23it/s]
Computing total RNAvelocity ...


  0%|          | 0/9815 [00:00<?, ?cells/s]

# Cross-Boundary Direction Correctness (A->B)
{('Blood progenitors 1', 'Blood progenitors 2'): 0.8191006711034708, ('Blood progenitors 2', 'Erythroid1'): 0.7688344044255953, ('Erythroid1', 'Erythroid2'): 0.837853446519417, ('Erythroid2', 'Erythroid3'): 0.8244632853895272}
Total Mean: 0.8125629518595026
# In-cluster Coherence
{'Blood progenitors 1': 0.9803896, 'Blood progenitors 2': 0.9931595, 'Erythroid1': 0.9954231, 'Erythroid2': 0.9969314, 'Erythroid3': 0.99196726}
Total Mean: 0.9915741682052612
0
2
1
Filtered out 46616 genes that are detected 10 counts (shared).
Normalized count data: X, spliced, unspliced.
Extracted 2000 highly variable genes.
Logarithmized X.
computing neighbors
    finished (0:00:04) --> added 
    'distances' and 'connectivities', weighted adjacency matrices (adata.obsp)
computing moments based on connectivities
    finished (0:00:01) --> added 
    'Ms' and 'Mu', moments of un/spliced abundances (adata.layers)
Leiden clustering ...


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Number of Leiden Clusters: 9
Maximal Number of Modules: 10
Epoch 500/500: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [15:28<00:00,  1.86s/it, v_num=1, elbo_train=1.24e+7]
sample_kwargs['batch_size'] 9815
Sampling local variables, batch: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.66s/it]
Sampling global variables, sample: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:01<00:00,  1.81it/s]
Computing total RNAvelocity ...


  0%|          | 0/9815 [00:00<?, ?cells/s]

# Cross-Boundary Direction Correctness (A->B)
{('Blood progenitors 1', 'Blood progenitors 2'): 0.2383099044702216, ('Blood progenitors 2', 'Erythroid1'): 0.6904365843834226, ('Erythroid1', 'Erythroid2'): 0.8008089091637645, ('Erythroid2', 'Erythroid3'): 0.7791222071428205}
Total Mean: 0.6271694012900573
# In-cluster Coherence
{'Blood progenitors 1': 0.687281, 'Blood progenitors 2': 0.78778774, 'Erythroid1': 0.9015007, 'Erythroid2': 0.8220028, 'Erythroid3': 0.9202796}
Total Mean: 0.8237704038619995
0
2
2
Filtered out 46616 genes that are detected 10 counts (shared).
Normalized count data: X, spliced, unspliced.
Extracted 2000 highly variable genes.
Logarithmized X.
computing neighbors
    finished (0:00:04) --> added 
    'distances' and 'connectivities', weighted adjacency matrices (adata.obsp)
computing moments based on connectivities
    finished (0:00:01) --> added 
    'Ms' and 'Mu', moments of un/spliced abundances (adata.layers)
Leiden clustering ...


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Number of Leiden Clusters: 9
Maximal Number of Modules: 10
Epoch 500/500: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [19:24<00:00,  2.33s/it, v_num=1, elbo_train=1.25e+7]
sample_kwargs['batch_size'] 9815
Sampling local variables, batch: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.06s/it]
Sampling global variables, sample: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:01<00:00,  1.52it/s]
Computing total RNAvelocity ...


  0%|          | 0/9815 [00:00<?, ?cells/s]

# Cross-Boundary Direction Correctness (A->B)
{('Blood progenitors 1', 'Blood progenitors 2'): 0.38011665515190374, ('Blood progenitors 2', 'Erythroid1'): 0.717595486852391, ('Erythroid1', 'Erythroid2'): 0.810984297192927, ('Erythroid2', 'Erythroid3'): 0.7431697067665335}
Total Mean: 0.6629665364909387
# In-cluster Coherence
{'Blood progenitors 1': 0.5539245, 'Blood progenitors 2': 0.83756113, 'Erythroid1': 0.860124, 'Erythroid2': 0.7526244, 'Erythroid3': 0.9638705}
Total Mean: 0.7936209440231323
0
2
3
Filtered out 46616 genes that are detected 10 counts (shared).
Normalized count data: X, spliced, unspliced.
Extracted 2000 highly variable genes.
Logarithmized X.
computing neighbors
    finished (0:00:04) --> added 
    'distances' and 'connectivities', weighted adjacency matrices (adata.obsp)
computing moments based on connectivities
    finished (0:00:01) --> added 
    'Ms' and 'Mu', moments of un/spliced abundances (adata.layers)
Leiden clustering ...


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Number of Leiden Clusters: 9
Maximal Number of Modules: 10
Epoch 500/500: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [23:38<00:00,  2.84s/it, v_num=1, elbo_train=1.25e+7]
sample_kwargs['batch_size'] 9815
Sampling local variables, batch: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.50s/it]
Sampling global variables, sample: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:01<00:00,  1.31it/s]
Computing total RNAvelocity ...


  0%|          | 0/9815 [00:00<?, ?cells/s]

# Cross-Boundary Direction Correctness (A->B)
{('Blood progenitors 1', 'Blood progenitors 2'): 0.5513532791917938, ('Blood progenitors 2', 'Erythroid1'): 0.7555109313143316, ('Erythroid1', 'Erythroid2'): 0.8070558570684808, ('Erythroid2', 'Erythroid3'): 0.7641889021616042}
Total Mean: 0.7195272424340526
# In-cluster Coherence
{'Blood progenitors 1': 0.8523523, 'Blood progenitors 2': 0.8770883, 'Erythroid1': 0.8890606, 'Erythroid2': 0.8575194, 'Erythroid3': 0.98202014}
Total Mean: 0.8916081190109253
1
0
0
Filtered out 47456 genes that are detected 20 counts (shared).
Normalized count data: X, spliced, unspliced.
Extracted 3000 highly variable genes.
Logarithmized X.
computing neighbors
    finished (0:00:04) --> added 
    'distances' and 'connectivities', weighted adjacency matrices (adata.obsp)
computing moments based on connectivities
    finished (0:00:02) --> added 
    'Ms' and 'Mu', moments of un/spliced abundances (adata.layers)
Leiden clustering ...


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Number of Leiden Clusters: 9
Maximal Number of Modules: 10
Epoch 500/500: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [12:03<00:00,  1.45s/it, v_num=1, elbo_train=2.33e+7]
sample_kwargs['batch_size'] 9815
Sampling local variables, batch: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.38s/it]
Sampling global variables, sample: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:01<00:00,  1.51it/s]
Computing total RNAvelocity ...


  0%|          | 0/9815 [00:00<?, ?cells/s]

# Cross-Boundary Direction Correctness (A->B)
{('Blood progenitors 1', 'Blood progenitors 2'): 0.43895111833140865, ('Blood progenitors 2', 'Erythroid1'): 0.7398975163670144, ('Erythroid1', 'Erythroid2'): 0.6258374560521788, ('Erythroid2', 'Erythroid3'): 0.7869990658870823}
Total Mean: 0.647921289159421
# In-cluster Coherence
{'Blood progenitors 1': 0.9318937, 'Blood progenitors 2': 0.89943135, 'Erythroid1': 0.8614888, 'Erythroid2': 0.7859925, 'Erythroid3': 0.95790917}
Total Mean: 0.8873431086540222
1
0
1
Filtered out 47456 genes that are detected 20 counts (shared).
Normalized count data: X, spliced, unspliced.
Extracted 3000 highly variable genes.
Logarithmized X.
computing neighbors
    finished (0:00:04) --> added 
    'distances' and 'connectivities', weighted adjacency matrices (adata.obsp)
computing moments based on connectivities
    finished (0:00:02) --> added 
    'Ms' and 'Mu', moments of un/spliced abundances (adata.layers)
Leiden clustering ...


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Number of Leiden Clusters: 9
Maximal Number of Modules: 10
Epoch 500/500: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [16:56<00:00,  2.03s/it, v_num=1, elbo_train=2.32e+7]
sample_kwargs['batch_size'] 9815
Sampling local variables, batch: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.94s/it]
Sampling global variables, sample: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:01<00:00,  1.22it/s]
Computing total RNAvelocity ...


  0%|          | 0/9815 [00:00<?, ?cells/s]

# Cross-Boundary Direction Correctness (A->B)
{('Blood progenitors 1', 'Blood progenitors 2'): 0.29993446911221094, ('Blood progenitors 2', 'Erythroid1'): 0.34266846425826747, ('Erythroid1', 'Erythroid2'): 0.7825606302056307, ('Erythroid2', 'Erythroid3'): 0.8018760470667065}
Total Mean: 0.5567599026607039
# In-cluster Coherence
{'Blood progenitors 1': 0.6701197, 'Blood progenitors 2': 0.6962647, 'Erythroid1': 0.7866288, 'Erythroid2': 0.8633455, 'Erythroid3': 0.93196994}
Total Mean: 0.7896657586097717
1
0
2
Filtered out 47456 genes that are detected 20 counts (shared).
Normalized count data: X, spliced, unspliced.
Extracted 3000 highly variable genes.
Logarithmized X.
computing neighbors
    finished (0:00:04) --> added 
    'distances' and 'connectivities', weighted adjacency matrices (adata.obsp)
computing moments based on connectivities
    finished (0:00:02) --> added 
    'Ms' and 'Mu', moments of un/spliced abundances (adata.layers)
Leiden clustering ...


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Number of Leiden Clusters: 9
Maximal Number of Modules: 10
Epoch 500/500: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [22:08<00:00,  2.66s/it, v_num=1, elbo_train=2.33e+7]
sample_kwargs['batch_size'] 9815
Sampling local variables, batch: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.55s/it]
Sampling global variables, sample: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:01<00:00,  1.03it/s]
Computing total RNAvelocity ...


  0%|          | 0/9815 [00:00<?, ?cells/s]

# Cross-Boundary Direction Correctness (A->B)
{('Blood progenitors 1', 'Blood progenitors 2'): 0.4453066757820847, ('Blood progenitors 2', 'Erythroid1'): 0.557590618951337, ('Erythroid1', 'Erythroid2'): 0.6495025410231053, ('Erythroid2', 'Erythroid3'): 0.7091302589515536}
Total Mean: 0.5903825236770202
# In-cluster Coherence
{'Blood progenitors 1': 0.6871128, 'Blood progenitors 2': 0.7676731, 'Erythroid1': 0.74039173, 'Erythroid2': 0.68774444, 'Erythroid3': 0.73996717}
Total Mean: 0.724577784538269
1
0
3
Filtered out 47456 genes that are detected 20 counts (shared).
Normalized count data: X, spliced, unspliced.
Extracted 3000 highly variable genes.
Logarithmized X.
computing neighbors
    finished (0:00:04) --> added 
    'distances' and 'connectivities', weighted adjacency matrices (adata.obsp)
computing moments based on connectivities
    finished (0:00:02) --> added 
    'Ms' and 'Mu', moments of un/spliced abundances (adata.layers)
Leiden clustering ...


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Number of Leiden Clusters: 9
Maximal Number of Modules: 10
Epoch 197/500:  39%|████████████████████████████████████████████████████████████▎                                                                                             | 196/500 [10:40<17:11,  3.39s/it, v_num=1, elbo_train=2.34e+7]

In [None]:
tab = pd.read_csv('/nfs/team283/aa16/data/fate_benchmarking/benchmarking_results/cell2fate_MouseErythroid_CBDC_And_Posterior_withBatch_preprocessed.csv')

In [None]:
tab