In [10]:
from typing import Dict, Iterable, Optional

import numpy as np
import torch
from torch.distributions import Normal, Poisson
from torch.distributions import kl_divergence as kld
from torch import tensor
from complementary_models import HALOVIR as HALOVI
from complementary_models import HALOVAER as HALOVAE
import scanpy as sc
import scvi
import pandas as pd
# torch.autograd.set_detect_anomaly(True)

In [11]:
### test whole data with RNA only 
adata_multi = sc.read_h5ad("PBMC/CD8_Tlymph.h5ad")
adata_multi.obs["batch_id"] = 1
adata_multi.var["modality"] =adata_multi.var["feature_types"]
adata_mvi = scvi.data.organize_multiome_anndatas(adata_multi)

In [12]:
HALOVI.setup_anndata(adata_mvi, batch_key="modality", time_key='latent_time')
model = HALOVI(
    adata_mvi,
    n_genes=(adata_mvi.var['modality']=='Gene Expression').sum(),
    n_regions=(adata_mvi.var['modality']=='Peaks').sum()
)

In [13]:
model.train(max_epochs=10)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Epoch 10/10: 100%|██████████| 10/10 [00:14<00:00,  1.48s/it, loss=5.9e+03, v_num=1] 


In [14]:
model.module.set_train_params(expr_train=False, acc_train=True)
model.train(max_epochs=200)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Epoch 200/200: 100%|██████████| 200/200 [02:45<00:00,  1.21it/s, loss=1.4e+04, v_num=1] 


In [15]:
model.module.set_train_params(expr_train=True, acc_train=False)
model.train(max_epochs=200)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Epoch 200/200: 100%|██████████| 200/200 [02:50<00:00,  1.17it/s, loss=5.21e+03, v_num=1]


In [16]:
model.module.set_train_params(expr_train=True, acc_train=True)
model.train(max_epochs=200)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Epoch 200/200: 100%|██████████| 200/200 [05:08<00:00,  1.54s/it, loss=2.78e+04, v_num=1]


In [17]:
model.module.set_train_params(expr_train=True, acc_train=True)
model.module.set_finetune_params(2)
model.train(max_epochs=100)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Epoch 100/100: 100%|██████████| 100/100 [02:05<00:00,  1.26s/it, loss=9.78e+04, v_num=1]


In [35]:
model.module.set_train_params(expr_train=True, acc_train=False)
model.module.set_finetune_params(1)
model.train(max_epochs=100)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Epoch 100/100: 100%|██████████| 100/100 [02:05<00:00,  1.26s/it, loss=5.55e+04, v_num=1]


In [22]:
model.module.set_train_params(expr_train=True, acc_train=True)
model.module.set_finetune_params(2)
model.train(max_epochs=100)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Epoch 100/100: 100%|██████████| 100/100 [02:26<00:00,  1.46s/it, loss=8.6e+04, v_num=1] 


In [40]:
model.module.set_train_params(expr_train=True, acc_train=False)
model.module.set_finetune_params(1)
model.train(max_epochs=100)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Epoch 100/100: 100%|██████████| 100/100 [02:04<00:00,  1.24s/it, loss=4.48e+04, v_num=1]


torch.cat(latent_expr).numpy(), torch.cat(latent_atac).numpy(), \
            torch.cat(latent_expr_dep).numpy(), torch.cat(latent_atac_dep).numpy(), \
                torch.cat(latent_expr_indep).numpy(), torch.cat(latent_atac_indep).numpy(), torch.cat(times).numpy()
    

In [18]:
latent_expr, latent_acc,latent_expr_dep, latent_atac_dep, latent_expr_indep, latent_atac_indep, times  = model.get_latent_representation()

In [19]:
## benchmark clustering 
from sklearn.metrics.cluster import adjusted_rand_score as ARI
from sklearn.metrics import normalized_mutual_info_score as NMI


latent_rep = np.concatenate((latent_expr, latent_acc), axis=1)
adata_mvi.obsm["latent_rep"] = latent_rep
sc.pp.neighbors(adata_mvi, use_rep="latent_rep")
sc.tl.leiden(adata_mvi, key_added="leiden_latent", resolution=0.4)

latent_rep_dep = np.concatenate((latent_expr_dep, latent_atac_dep), axis=1)
latent_rep_dep.shape
adata_mvi.obsm["latent_rep_dep"] = latent_rep_dep
sc.pp.neighbors(adata_mvi, use_rep="latent_rep_dep")
sc.tl.leiden(adata_mvi, key_added="leiden_latent_dep", resolution=0.4)

latent_rep_indep = np.concatenate((latent_expr_indep, latent_atac_indep), axis=1)
adata_mvi.obsm["latent_rep_indep"] = latent_rep_indep
sc.pp.neighbors(adata_mvi, use_rep="latent_rep_indep")
sc.tl.leiden(adata_mvi, key_added="leiden_latent_indep", resolution=0.4)




In [20]:
ari_whole = ARI(adata_mvi.obs['celltype'], adata_mvi.obs['leiden_latent'])
nmi_whole = NMI(adata_mvi.obs['celltype'], adata_mvi.obs['leiden_latent'])


nmi_dep = NMI(adata_mvi.obs['celltype'], adata_mvi.obs['leiden_latent_dep'])
ari_dep = ARI(adata_mvi.obs['celltype'], adata_mvi.obs['leiden_latent_dep'])


nmi_indep = NMI(adata_mvi.obs['celltype'], adata_mvi.obs['leiden_latent_indep'])
ari_indep = ARI(adata_mvi.obs['celltype'], adata_mvi.obs['leiden_latent_indep'])

print("ari_whole {}, nmi_whole: {}".format(ari_whole, nmi_whole))
print("ari_dep {}, nmi_dep: {}".format(ari_dep, nmi_dep))
print("ari_indep {}, nmi_indep: {}".format(ari_indep, nmi_indep))


ari_whole 0.21310027126634526, nmi_whole: 0.32710693910568406
ari_dep 0.18816797844561942, nmi_dep: 0.2593583939655677
ari_indep 0.15716708953291902, nmi_indep: 0.22508399691354886


In [42]:
from complementary_models import torch_infer_nonsta_dir


latent_atac_indep = torch.tensor(latent_atac_indep).to('cuda')
latent_expr_indep = torch.tensor(latent_expr_indep).to('cuda')
times = torch.tensor(times).to('cuda')

score3, _, _ = torch_infer_nonsta_dir(latent_atac_indep, latent_expr_indep, times)
score4, _, _ = torch_infer_nonsta_dir(latent_expr_indep, latent_atac_indep, times)
print("score3 {} and score4 {}".format(score3, score4))
score3 - score4

score3 0.04265802739506264 and score4 0.051204012411572956


tensor(-0.0085, device='cuda:0', dtype=torch.float64)

In [43]:
latent_atac_dep = torch.tensor(latent_atac_dep).to('cuda')
latent_expr_dep = torch.tensor(latent_expr_dep).to('cuda')
times = torch.tensor(times).to('cuda')

score1, _, _ = torch_infer_nonsta_dir(latent_atac_dep, latent_expr_dep, times)
score2, _, _ = torch_infer_nonsta_dir(latent_expr_dep, latent_atac_dep, times)
print("score1 {} and score2 {}".format(score1, score2))
score1 - score2

  times = torch.tensor(times).to('cuda')


score1 0.056266649235981396 and score2 0.05626094452238623


tensor(5.7047e-06, device='cuda:0', dtype=torch.float64)

In [3]:
#### MultiVI and GLUE test

HALOVI.setup_anndata(adata_mvi, batch_key="modality", time_key='latent_time')
model2 = HALOVI(
    adata_mvi,
    n_genes=(adata_mvi.var['modality']=='Gene Expression').sum(),
    n_regions=(adata_mvi.var['modality']=='Peaks').sum()
)

In [4]:
model2.train(max_epochs=100)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Epoch 100/100: 100%|██████████| 100/100 [02:25<00:00,  1.46s/it, loss=5.48e+03, v_num=1]


In [5]:
latent_expr, latent_acc,latent_expr_dep, latent_atac_dep, latent_expr_indep, latent_atac_indep, times  = model2.get_latent_representation()

In [48]:
z_multi_VI = 1/2 * (latent_expr+ latent_acc)
z_multi_VI.shape

(1934, 10)

In [50]:
from complementary_models import torch_infer_nonsta_dir

length=z_multi_VI.shape[1]
scores = []
z_multi_acc = z_multi_VI[: ,:5]
z_multi_expr = z_multi_VI[: ,5:]

z_multi_acc = torch.tensor(z_multi_acc).to('cuda')
z_multi_expr = torch.tensor(z_multi_expr).to('cuda')
times = torch.tensor(times).to('cuda')
score1, _, _ = torch_infer_nonsta_dir(z_multi_acc, z_multi_expr, times)
score2, _, _ = torch_infer_nonsta_dir(z_multi_expr, z_multi_acc, times)

# for i in range(0, length, 2):
#     z1 =  torch.tensor(z_multi_VI[:, i]).to('cuda')
#     z2 = torch.tensor(z_multi_VI[:, i+1]).to('cuda')
#     print(z1.shape, z2.shape)
#     score, _, _ = torch_infer_nonsta_dir(z1, z2, times)
#     scores.append(score)

print("score1 {} and score2 {}".format(score1, score2))
score1 - score2

score1 0.05626836621398739 and score2 0.05625706189779424


tensor(1.1304e-05, device='cuda:0', dtype=torch.float64)

In [1]:
glue_acc =  torch.tensor(latent_acc).to('cuda')
glue_exp = torch.tensor(latent_expr).to('cuda')
times = torch.tensor(times).to('cuda')

score1, _, _ = torch_infer_nonsta_dir(glue_acc, glue_exp, times)
score2, _, _ = torch_infer_nonsta_dir(glue_acc, glue_exp, times)

print("score1 {} and score2 {}".format(score1, score2))
score1 - score2

NameError: name 'torch' is not defined

In [52]:
print((adata_mvi.var["feature_types"]=="Gene Expression").sum())
print((adata_mvi.var["feature_types"]!="Gene Expression").sum())

36601
108377


In [53]:
gene_expr = adata_mvi.X[:, :36601]
gene_peak = adata_mvi.X[:, 108377:]
print(gene_expr.shape, gene_peak.shape)

(1934, 36601) (1934, 36601)


In [54]:
Gene_PCA = sc.pp.pca(gene_expr, n_comps=10)
Peak_SVD = sc.pp.pca(gene_expr, n_comps=10, svd_solver='arpack')

Epoch 1/100:   0%|          | 0/100 [14:38<?, ?it/s]
Epoch 1/100:   0%|          | 0/100 [11:06<?, ?it/s]


In [55]:
Peak_SVD =  torch.tensor(Peak_SVD).to('cuda')
Gene_PCA = torch.tensor(Gene_PCA).to('cuda')
times = torch.tensor(times).to('cuda')

score1, _, _ = torch_infer_nonsta_dir(Peak_SVD, Gene_PCA, times)
score2, _, _ = torch_infer_nonsta_dir(Gene_PCA, Peak_SVD, times)

print("score1 {} and score2 {}".format(score1, score2))
score1 - score2

  times = torch.tensor(times).to('cuda')


score1 0.056260825610453986 and score2 0.05626105133835672


tensor(-2.2573e-07, device='cuda:0', dtype=torch.float64)

In [6]:
#### test the clustering 
## test the GLUE


from sklearn.metrics.cluster import adjusted_rand_score as ARI
from sklearn.metrics import normalized_mutual_info_score as NMI


latent_rep = np.concatenate((latent_expr, latent_acc), axis=1)
adata_mvi.obsm["latent_rep"] = latent_rep
sc.pp.neighbors(adata_mvi, use_rep="latent_rep")
sc.tl.leiden(adata_mvi, key_added="leiden_latent", resolution=0.4)
ari_score = ARI(adata_mvi.obs['celltype'], adata_mvi.obs['leiden_latent'])
nmi_whole = NMI(adata_mvi.obs['celltype'], adata_mvi.obs['leiden_latent'])

print("ARI {}, NMI {}".format(ari_score, nmi_whole))


ARI 0.3562473312994934, NMI 0.4953843181608691


In [7]:
z_multi_VI = 1/2 * (latent_expr+ latent_acc)
adata_mvi.obsm["latent_rep"] = z_multi_VI
sc.pp.neighbors(adata_mvi, use_rep="latent_rep")
sc.tl.leiden(adata_mvi, key_added="leiden_latent", resolution=0.4)
ari_score = ARI(adata_mvi.obs['celltype'], adata_mvi.obs['leiden_latent'])
nmi_whole = NMI(adata_mvi.obs['celltype'], adata_mvi.obs['leiden_latent'])

print("ARI {}, NMI {}".format(ari_score, nmi_whole))

ARI 0.24111202586923244, NMI 0.28652621046312665


In [8]:
gene_expr = adata_mvi.X[:, :14583]
gene_peak = adata_mvi.X[:, 14583:]
Gene_PCA = sc.pp.pca(gene_expr, n_comps=10)

adata_mvi.obsm["latent_rep"] = Gene_PCA
sc.pp.neighbors(adata_mvi, use_rep="latent_rep")
sc.tl.leiden(adata_mvi, key_added="leiden_latent", resolution=0.4)
ari_score = ARI(adata_mvi.obs['celltype'], adata_mvi.obs['leiden_latent'])
nmi_whole = NMI(adata_mvi.obs['celltype'], adata_mvi.obs['leiden_latent'])
print("ARI {}, NMI {}".format(ari_score, nmi_whole))

ARI 0.1347572955189734, NMI 0.24654630257300728


In [9]:
Peak_SVD = sc.pp.pca(gene_peak, n_comps=10, svd_solver='arpack')
adata_mvi.obsm["latent_rep"] = Peak_SVD
sc.pp.neighbors(adata_mvi, use_rep="latent_rep")
sc.tl.leiden(adata_mvi, key_added="leiden_latent", resolution=0.4)
ari_score = ARI(adata_mvi.obs['celltype'], adata_mvi.obs['leiden_latent'])
nmi_whole = NMI(adata_mvi.obs['celltype'], adata_mvi.obs['leiden_latent'])
print("ARI {}, NMI {}".format(ari_score, nmi_whole))

ARI 0.17463192634071842, NMI 0.26909877018391515
