In [1]:
import anndata
import numpy as np
import scvelo as scv
import scanpy as sc
import sys
import torch
import os.path
import deepvelo as dv
import pickle as pickle
import matplotlib.pyplot as plt
import pandas as pd
import unitvelo as utv
from os.path import exists
method = 'DeepVelo'

(Running UniTVelo 0.2.5)
2023-05-16 14:55:44


In [2]:
datasets = ['Pancreas_with_cc', 'DentateGyrus' , 'MouseBoneMarrow', 'MouseErythroid', 'HumanBoneMarrow', 'HumanDevelopingBrain']
data_dir = '/nfs/team283/aa16/data/fate_benchmarking/benchmarking_datasets/'
save_dir = '/nfs/team283/aa16/data/fate_benchmarking/benchmarking_results/'

In [None]:
for dataset in datasets:
    print(dataset)
    adata = sc.read_h5ad(data_dir + dataset + '/' + dataset + '_anndata.h5ad')
    scv.pp.filter_and_normalize(adata, min_shared_counts=20, n_top_genes=3000)
    scv.pp.moments(adata, n_pcs=30, n_neighbors=30)
    trainer = dv.train(adata, dv.Constants.default_configs)
    scv.pp.neighbors(adata)
    scv.tl.velocity_graph(adata, vkey = 'velocity')
    scv.tl.velocity_embedding(adata, vkey = 'velocity')
    fix, ax = plt.subplots(1, 1, figsize = (8, 6))
    scv.pl.velocity_embedding_stream(adata, basis='umap', save = False, vkey='velocity',
                                     show = False, ax = ax)
    plt.savefig(save_dir + 'UMAPs/' + dataset + '_UMAP_DeepVelo.svg')
    # Calculate performance metrics:
    file = open(data_dir + dataset + '/' + dataset + '_groundTruth.pickle' ,'rb')
    ground_truth = pickle.load(file)
    metrics = utv.evaluate(adata, ground_truth, 'clusters', 'velocity')
    if exists(save_dir + dataset + '_CBDC_scores.csv'):
        tab = pd.read_csv(save_dir + dataset + '_CBDC_scores.csv', index_col = 0)
    else:
        tab = pd.DataFrame(columns = list(metrics['Cross-Boundary Direction Correctness (A->B)'].keys()) + ['Mean'],
                 index = [method])
    cb_score = [np.mean(metrics['Cross-Boundary Direction Correctness (A->B)'][x])
                for x in metrics['Cross-Boundary Direction Correctness (A->B)'].keys()]
    tab.loc[method,:] = cb_score + [np.mean(cb_score)]
    tab.to_csv(save_dir + dataset + '_CBDC_scores.csv')
    metrics = utv.evaluate(adata, ground_truth, 'clusters', 'velocity')
    if exists(save_dir + dataset + '_ICC_scores.csv'):
        tab = pd.read_csv(save_dir + dataset + '_ICC_scores.csv', index_col = 0)
    else:
        tab = pd.DataFrame(columns = list(np.unique(np.concatenate(ground_truth))) + ['Mean'],
                 index = [method])
    icc_score = [np.mean(metrics['In-cluster Coherence'][x]) for x in np.unique(np.concatenate(ground_truth))]
    tab.loc[method,:] = icc_score + [np.mean(icc_score)]
    tab.to_csv(save_dir + dataset + '_ICC_scores.csv')
    fix, ax = plt.subplots(1, 1, figsize = (8, 6))
    scv.pl.velocity_embedding_stream(adata, basis='umap', save = False, vkey='velocity',
                                     show = False, ax = ax)
    plt.savefig(save_dir + 'UMAPs/' + dataset + '_UMAP_' + method + '.svg')

Pancreas_with_cc
Filtered out 20801 genes that are detected 20 counts (shared).
Normalized count data: X, spliced, unspliced.
Extracted 3000 highly variable genes.
Logarithmized X.
computing neighbors
    finished (0:00:11) --> added 
    'distances' and 'connectivities', weighted adjacency matrices (adata.obsp)
computing moments based on connectivities
    finished (0:00:00) --> added 
    'Ms' and 'Mu', moments of un/spliced abundances (adata.layers)
building graph


INFO:train:Beginning training of DeepVelo_Base ...


velo data shape: torch.Size([3696, 3000])


  if not hasattr(tensorboard, "__version__") or LooseVersion(
  ) < LooseVersion("1.15"):
INFO:trainer:    epoch          : 1
INFO:trainer:    time:          : 10.626794576644897
INFO:trainer:    loss           : 98578.53125
INFO:trainer:    mse            : 2.556227922439575
INFO:trainer:    epoch          : 2
INFO:trainer:    time:          : 10.040715217590332
INFO:trainer:    loss           : 18010.15625
INFO:trainer:    mse            : 1.5266255140304565
INFO:trainer:    epoch          : 3
INFO:trainer:    time:          : 9.94972562789917
INFO:trainer:    loss           : 9833.1630859375
INFO:trainer:    mse            : 1.5814744234085083
INFO:trainer:    epoch          : 4
INFO:trainer:    time:          : 9.874797105789185
INFO:trainer:    loss           : 6979.2314453125
INFO:trainer:    mse            : 1.92239248752594
INFO:trainer:    epoch          : 5
INFO:trainer:    time:          : 10.059625625610352
INFO:trainer:    loss           : 5580.23046875
INFO:trainer:    ms

INFO:trainer:    time:          : 10.01677393913269
INFO:trainer:    loss           : 2230.870361328125
INFO:trainer:    mse            : 0.5351672768592834
INFO:trainer:    epoch          : 42
INFO:trainer:    time:          : 10.1201012134552
INFO:trainer:    loss           : 2224.52734375
INFO:trainer:    mse            : 0.491735577583313
INFO:trainer:    epoch          : 43
INFO:trainer:    time:          : 10.051969051361084
INFO:trainer:    loss           : 2219.3623046875
INFO:trainer:    mse            : 0.48965513706207275
INFO:trainer:    epoch          : 44
INFO:trainer:    time:          : 10.045467376708984
INFO:trainer:    loss           : 2212.611572265625
INFO:trainer:    mse            : 0.4926551580429077
INFO:trainer:    epoch          : 45
INFO:trainer:    time:          : 10.014889001846313
INFO:trainer:    loss           : 2202.308349609375
INFO:trainer:    mse            : 0.49902263283729553
INFO:trainer:    epoch          : 46
INFO:trainer:    time:          :

INFO:trainer:    mse            : 0.4108200669288635
INFO:trainer:    epoch          : 84
INFO:trainer:    time:          : 10.241234540939331
INFO:trainer:    loss           : 2073.707275390625
INFO:trainer:    mse            : 0.42630037665367126
INFO:trainer:    epoch          : 85
INFO:trainer:    time:          : 10.337786674499512
INFO:trainer:    loss           : 2075.701171875
INFO:trainer:    mse            : 0.4460844099521637
INFO:trainer:    epoch          : 86
INFO:trainer:    time:          : 10.282583236694336
INFO:trainer:    loss           : 2072.56103515625
INFO:trainer:    mse            : 0.39354532957077026
INFO:trainer:    epoch          : 87
INFO:trainer:    time:          : 10.182904243469238
INFO:trainer:    loss           : 2073.71142578125
INFO:trainer:    mse            : 0.4196650981903076
INFO:trainer:    epoch          : 88
INFO:trainer:    time:          : 10.309855699539185
INFO:trainer:    loss           : 2068.3837890625
INFO:trainer:    mse          

velo_mat shape: (3696, 3000)
--> added 'velocity' (adata.layers)
--> added 'velocity_unspliced' (adata.layers)
--> added 'cell_specific_beta' (adata.layers)
--> added 'cell_specific_gamma' (adata.layers)
computing neighbors
    finished (0:00:00) --> added 
    'distances' and 'connectivities', weighted adjacency matrices (adata.obsp)
computing velocity graph (using 1/64 cores)


  0%|          | 0/3696 [00:00<?, ?cells/s]

    finished (0:00:21) --> added 
    'velocity_graph', sparse matrix with cosine correlations (adata.uns)
computing velocity embedding
    finished (0:00:00) --> added
    'velocity_umap', embedded velocity vectors (adata.obsm)
# Cross-Boundary Direction Correctness (A->B)
{('Ngn3 high EP', 'Pre-endocrine'): 0.7638681882592249, ('Pre-endocrine', 'Alpha'): 0.3553044939264237, ('Pre-endocrine', 'Beta'): 0.47253457128227305, ('Pre-endocrine', 'Delta'): -0.24480072462109212, ('Pre-endocrine', 'Epsilon'): -0.4735415508171434}
Total Mean: 0.17467299560593724
# In-cluster Coherence
{'Alpha': 0.87884545, 'Beta': 0.8917102, 'Delta': 0.9379041, 'Ductal': 0.953963, 'Epsilon': 0.82792366, 'Ngn3 high EP': 0.94498867, 'Ngn3 low EP': 0.9526061, 'Pre-endocrine': 0.92413455}
Total Mean: 0.9140094518661499
# Cross-Boundary Direction Correctness (A->B)
{('Ngn3 high EP', 'Pre-endocrine'): 0.7638681882592249, ('Pre-endocrine', 'Alpha'): 0.3553044939264237, ('Pre-endocrine', 'Beta'): 0.47253457128227305, (

INFO:train:Beginning training of DeepVelo_Base ...


velo data shape: torch.Size([2930, 3000])


INFO:trainer:    epoch          : 1
INFO:trainer:    time:          : 7.987591028213501
INFO:trainer:    loss           : 98339.65625
INFO:trainer:    mse            : 0.045380014926195145
INFO:trainer:    epoch          : 2
INFO:trainer:    time:          : 7.982825994491577
INFO:trainer:    loss           : 10450.41015625
INFO:trainer:    mse            : 0.0556332990527153
INFO:trainer:    epoch          : 3
INFO:trainer:    time:          : 7.980865478515625
INFO:trainer:    loss           : 5999.197265625
INFO:trainer:    mse            : 0.07936916500329971
INFO:trainer:    epoch          : 4
INFO:trainer:    time:          : 8.041043281555176
INFO:trainer:    loss           : 4370.6025390625
INFO:trainer:    mse            : 0.12508724629878998
INFO:trainer:    epoch          : 5
INFO:trainer:    time:          : 8.016708612442017
INFO:trainer:    loss           : 3566.6396484375
INFO:trainer:    mse            : 0.16055800020694733
INFO:trainer:    epoch          : 6
INFO:train

INFO:trainer:    loss           : 1454.6390380859375
INFO:trainer:    mse            : 0.12448208034038544
INFO:trainer:    epoch          : 44
INFO:trainer:    time:          : 8.058297634124756
INFO:trainer:    loss           : 1446.5899658203125
INFO:trainer:    mse            : 0.12235294282436371
INFO:trainer:    epoch          : 45
INFO:trainer:    time:          : 7.985539436340332
INFO:trainer:    loss           : 1444.890869140625
INFO:trainer:    mse            : 0.11898846179246902
INFO:trainer:    epoch          : 46
INFO:trainer:    time:          : 8.08238673210144
INFO:trainer:    loss           : 1442.5430908203125
INFO:trainer:    mse            : 0.11468503624200821
INFO:trainer:    epoch          : 47
INFO:trainer:    time:          : 8.062665939331055
INFO:trainer:    loss           : 1439.594482421875
INFO:trainer:    mse            : 0.1164957657456398
INFO:trainer:    epoch          : 48
INFO:trainer:    time:          : 8.165328025817871
INFO:trainer:    loss   

INFO:trainer:    mse            : 0.08559639751911163
INFO:trainer:    epoch          : 86
INFO:trainer:    time:          : 8.050222873687744
INFO:trainer:    loss           : 1349.73388671875
INFO:trainer:    mse            : 0.08243066072463989
INFO:trainer:    epoch          : 87
INFO:trainer:    time:          : 7.992918968200684
INFO:trainer:    loss           : 1353.366455078125
INFO:trainer:    mse            : 0.08503949642181396
INFO:trainer:    epoch          : 88
INFO:trainer:    time:          : 8.053787231445312
INFO:trainer:    loss           : 1347.7244873046875
INFO:trainer:    mse            : 0.08227013796567917
INFO:trainer:    epoch          : 89
INFO:trainer:    time:          : 8.031780004501343
INFO:trainer:    loss           : 1348.6094970703125
INFO:trainer:    mse            : 0.08543594926595688
INFO:trainer:    epoch          : 90
INFO:trainer:    time:          : 8.006088972091675
INFO:trainer:    loss           : 1346.470458984375
INFO:trainer:    mse    

velo_mat shape: (2930, 3000)
--> added 'velocity' (adata.layers)
--> added 'velocity_unspliced' (adata.layers)
--> added 'cell_specific_beta' (adata.layers)
--> added 'cell_specific_gamma' (adata.layers)
computing neighbors
    finished (0:00:00) --> added 
    'distances' and 'connectivities', weighted adjacency matrices (adata.obsp)
computing velocity graph (using 1/64 cores)


  0%|          | 0/2930 [00:00<?, ?cells/s]

    finished (0:00:18) --> added 
    'velocity_graph', sparse matrix with cosine correlations (adata.uns)
computing velocity embedding
    finished (0:00:00) --> added
    'velocity_umap', embedded velocity vectors (adata.obsm)
# Cross-Boundary Direction Correctness (A->B)
{('nIPC', 'Neuroblast'): 0.2063583893474332, ('Neuroblast', 'Granule immature'): 0.5803122016341171, ('Granule immature', 'Granule mature'): 0.2928744669680778, ('Radial Glia-like', 'Astrocytes'): -0.7277663400474056, ('OPC', 'OL'): 0.11593580564319025}
Total Mean: 0.09354290470908258
# In-cluster Coherence
{'Astrocytes': 0.9158868, 'Cajal Retzius': 0.99141276, 'Cck-Tox': 0.9688492, 'Endothelial': 0.9218514, 'GABA': 0.95909286, 'Granule immature': 0.93300873, 'Granule mature': 0.8965042, 'Microglia': 0.9072363, 'Mossy': 0.9468922, 'Neuroblast': 0.9395334, 'OL': 0.98034024, 'OPC': 0.9475288, 'Radial Glia-like': 0.9166216, 'nIPC': 0.9162967}
Total Mean: 0.9386467337608337
# Cross-Boundary Direction Correctness (A->B)


INFO:train:Beginning training of DeepVelo_Base ...


velo data shape: torch.Size([2600, 1252])


INFO:trainer:    epoch          : 1
INFO:trainer:    time:          : 2.621732234954834
INFO:trainer:    loss           : 65928.84375
INFO:trainer:    mse            : 56.59944152832031
INFO:trainer:    epoch          : 2
INFO:trainer:    time:          : 2.6145009994506836
INFO:trainer:    loss           : 15385.1875
INFO:trainer:    mse            : 20.875686645507812
INFO:trainer:    epoch          : 3
INFO:trainer:    time:          : 2.553560972213745
INFO:trainer:    loss           : 8040.634765625
INFO:trainer:    mse            : 6.42738151550293
INFO:trainer:    epoch          : 4
INFO:trainer:    time:          : 2.5921831130981445
INFO:trainer:    loss           : 5812.0849609375
INFO:trainer:    mse            : 2.9780197143554688
INFO:trainer:    epoch          : 5
INFO:trainer:    time:          : 2.5486016273498535
INFO:trainer:    loss           : 4686.34033203125
INFO:trainer:    mse            : 3.7621498107910156
INFO:trainer:    epoch          : 6
INFO:trainer:    t

INFO:trainer:    loss           : 1214.034423828125
INFO:trainer:    mse            : 0.40187379717826843
INFO:trainer:    epoch          : 44
INFO:trainer:    time:          : 2.6114232540130615
INFO:trainer:    loss           : 1208.8997802734375
INFO:trainer:    mse            : 0.4257236421108246
INFO:trainer:    epoch          : 45
INFO:trainer:    time:          : 2.5877387523651123
INFO:trainer:    loss           : 1202.627197265625
INFO:trainer:    mse            : 0.38274314999580383
INFO:trainer:    epoch          : 46
INFO:trainer:    time:          : 2.54402494430542
INFO:trainer:    loss           : 1205.94775390625
INFO:trainer:    mse            : 0.415799617767334
INFO:trainer:    epoch          : 47
INFO:trainer:    time:          : 2.5795018672943115
INFO:trainer:    loss           : 1194.8587646484375
INFO:trainer:    mse            : 0.4276178181171417
INFO:trainer:    epoch          : 48
INFO:trainer:    time:          : 2.624359607696533
INFO:trainer:    loss     

INFO:trainer:    mse            : 0.3151862323284149
INFO:trainer:    epoch          : 86
INFO:trainer:    time:          : 2.6212210655212402
INFO:trainer:    loss           : 1098.372314453125
INFO:trainer:    mse            : 0.3378569781780243
INFO:trainer:    epoch          : 87
INFO:trainer:    time:          : 2.5782554149627686
INFO:trainer:    loss           : 1094.8271484375
INFO:trainer:    mse            : 0.3218957483768463
INFO:trainer:    epoch          : 88
INFO:trainer:    time:          : 2.5552890300750732
INFO:trainer:    loss           : 1094.0272216796875
INFO:trainer:    mse            : 0.3132927119731903
INFO:trainer:    epoch          : 89
INFO:trainer:    time:          : 2.6170148849487305
INFO:trainer:    loss           : 1094.7432861328125
INFO:trainer:    mse            : 0.3345695436000824
INFO:trainer:    epoch          : 90
INFO:trainer:    time:          : 2.5774502754211426
INFO:trainer:    loss           : 1093.2314453125
INFO:trainer:    mse       

velo_mat shape: (2600, 1252)
--> added 'velocity' (adata.layers)
--> added 'velocity_unspliced' (adata.layers)
--> added 'cell_specific_beta' (adata.layers)
--> added 'cell_specific_gamma' (adata.layers)
computing neighbors
    finished (0:00:00) --> added 
    'distances' and 'connectivities', weighted adjacency matrices (adata.obsp)
computing velocity graph (using 1/64 cores)


  0%|          | 0/2600 [00:00<?, ?cells/s]

    finished (0:00:05) --> added 
    'velocity_graph', sparse matrix with cosine correlations (adata.uns)
computing velocity embedding
    finished (0:00:00) --> added
    'velocity_umap', embedded velocity vectors (adata.obsm)
# Cross-Boundary Direction Correctness (A->B)
{('dividing', 'progenitors'): -0.42908860070870725, ('progenitors', 'activating'): -0.5703333831148938}
Total Mean: -0.4997109919118005
# In-cluster Coherence
{'B cell lineage': 0.8856354, 'activating': 0.8885332, 'dividing': 0.79850686, 'macrophages': 0.90042406, 'progenitors': 0.83670336}
Total Mean: 0.8619605302810669
# Cross-Boundary Direction Correctness (A->B)
{('dividing', 'progenitors'): -0.42908860070870725, ('progenitors', 'activating'): -0.5703333831148938}
Total Mean: -0.4997109919118005
# In-cluster Coherence
{'B cell lineage': 0.8856354, 'activating': 0.8885332, 'dividing': 0.79850686, 'macrophages': 0.90042406, 'progenitors': 0.83670336}
Total Mean: 0.8619605302810669
MouseErythroid
Filtered out 47456

INFO:train:Beginning training of DeepVelo_Base ...


velo data shape: torch.Size([9815, 3000])


INFO:trainer:    epoch          : 1
INFO:trainer:    time:          : 26.259074449539185
INFO:trainer:    loss           : 163653.484375
INFO:trainer:    mse            : 1185.632080078125
INFO:trainer:    epoch          : 2
INFO:trainer:    time:          : 26.435172080993652
INFO:trainer:    loss           : 40205.65625
INFO:trainer:    mse            : 410.6335754394531
INFO:trainer:    epoch          : 3
INFO:trainer:    time:          : 26.729791164398193
INFO:trainer:    loss           : 41017.203125
INFO:trainer:    mse            : 590.7056884765625
INFO:trainer:    epoch          : 4
INFO:trainer:    time:          : 26.711156368255615
INFO:trainer:    loss           : 31551.3046875
INFO:trainer:    mse            : 468.87152099609375
INFO:trainer:    epoch          : 5
INFO:trainer:    time:          : 26.6386399269104
INFO:trainer:    loss           : 16632.234375
INFO:trainer:    mse            : 214.6481170654297
INFO:trainer:    epoch          : 6
INFO:trainer:    time:  

INFO:trainer:    loss           : 2396.504150390625
INFO:trainer:    mse            : 11.932602882385254
INFO:trainer:    epoch          : 45
INFO:trainer:    time:          : 26.627253770828247
INFO:trainer:    loss           : 2332.75390625
INFO:trainer:    mse            : 11.012228965759277
INFO:trainer:    epoch          : 46
INFO:trainer:    time:          : 26.66788601875305
INFO:trainer:    loss           : 2396.9052734375
INFO:trainer:    mse            : 12.52389907836914


In [None]:
    fix, ax = plt.subplots(1, 1, figsize = (8, 6))
    scv.pl.velocity_embedding_stream(adata, basis='umap', save = False, vkey='velocity',
                                     show = False, ax = ax)
    plt.savefig(save_dir + 'UMAPs/' + dataset + '_UMAP_' + method + '.svg')