In [1]:
import scvi
import numpy as np
import scanpy as sc
scvi.settings.seed = 420
from scipy.sparse import csr_matrix
from scipy.io import mmread
import pandas as pd
import anndata as ad
import jax
jax.devices()
import os 
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "0"

  self.seed = seed
  self.dl_pin_memory_gpu_training = (
Global seed set to 420
No GPU/TPU found, falling back to CPU. (Set TF_CPP_MIN_LOG_LEVEL=0 and rerun for more info.)


In [4]:
cell_names = pd.read_csv('./Dataset37/Train/RNA/barcodes.tsv', sep = '\t', header=None, index_col=None)
cell_names.columns = ['cell_ids'] 
X = csr_matrix(mmread('./Dataset37/Train/RNA/matrix.mtx').T)
gene_names = pd.read_csv('./Dataset37/Train/RNA/features.tsv', sep = '\t', header=None, index_col=None) 
gene_names.columns = ['gene_ids'] 
adata_paired_rna = ad.AnnData(X, obs=pd.DataFrame(index=cell_names.cell_ids), var=pd.DataFrame(index = gene_names.gene_ids))
adata_paired_rna.var_names_make_unique()

cell_names = pd.read_csv('./Dataset37/Test/RNA/barcodes.tsv', sep = '\t', header=None, index_col=None)
cell_names.columns = ['cell_ids'] 
X = csr_matrix(mmread('./Dataset37/Test/RNA/matrix.mtx').T)
gene_names = pd.read_csv('./Dataset37/Train/RNA/features.tsv', sep = '\t', header=None, index_col=None) 
gene_names.columns = ['gene_ids'] 
adata_rna = ad.AnnData(X, obs=pd.DataFrame(index=cell_names.cell_ids), var=pd.DataFrame(index = gene_names.gene_ids))
adata_rna.var_names_make_unique()


cell_names = pd.read_csv('./Dataset37/Train/ATAC/barcodes.tsv', sep = '\t', header=None, index_col=None)
cell_names.columns = ['cell_ids'] 
X = csr_matrix(mmread('./Dataset37/Train/ATAC/matrix.mtx').T)
gene_names = pd.read_csv('./Dataset37/Train/ATAC/features.tsv', sep = '\t', header=None, index_col=None) 
gene_names.columns = ['gene_ids'] 
adata_paired_atac = ad.AnnData(X, obs=pd.DataFrame(index=cell_names.cell_ids), var=pd.DataFrame(index = gene_names.gene_ids))
adata_paired_atac.var_names_make_unique()

cell_names = pd.read_csv('./Dataset37/Test/ATAC/barcodes.tsv', sep = '\t', header=None, index_col=None)
cell_names.columns = ['cell_ids'] 
X = csr_matrix(mmread('./Dataset37/Test/ATAC/matrix.mtx').T)
gene_names = pd.read_csv('./Dataset37/Train/ATAC/features.tsv', sep = '\t', header=None, index_col=None) 
gene_names.columns = ['gene_ids'] 
adata_atac = ad.AnnData(X, obs=pd.DataFrame(index=cell_names.cell_ids), var=pd.DataFrame(index = gene_names.gene_ids))
adata_atac.var_names_make_unique()

save_path = './Results/'

modality = ['Gene Expression']*adata_rna.shape[1]+['Peaks']*adata_atac.shape[1]
adata_paired = ad.concat([adata_paired_rna, adata_paired_atac], merge = "same",axis=1)
adata_paired.var_names_make_unique()
adata_paired.var['modality']=modality
adata_paired.var_names_make_unique()
adata_rna.var['modality']=['Gene Expression']*adata_rna.shape[1]
adata_mvi = scvi.data.organize_multiome_anndatas(adata_paired, rna_anndata=adata_rna)
adata_mvi = adata_mvi[:, adata_mvi.var["modality"].argsort()].copy()
sc.pp.filter_genes(adata_mvi, min_cells=int(adata_mvi.shape[0] * 0.001))
scvi.model.MULTIVI.setup_anndata(adata_mvi, batch_key='modality')
mvi = scvi.model.MULTIVI(
adata_mvi,
n_genes=(adata_mvi.var['modality']=='Gene Expression').sum(),
n_regions=(adata_mvi.var['modality']=='Peaks').sum()
)
mvi.view_anndata_setup()
mvi.train(max_epochs=100,use_gpu='cuda:1')
imputed_accessibility = mvi.get_accessibility_estimates()
pred = imputed_accessibility[adata_paired.n_obs:]
obs_name = [name.rsplit('_',1)[0] for name in list(pred.index)]
true_list = adata_atac[obs_name,list(imputed_accessibility)]
true = pd.DataFrame(true_list.X.toarray(), columns= true_list.var_names,index= true_list.obs_names)
pred.index = obs_name
pred.to_hdf(save_path + 'MultiVI_pred.h5', 'a')
true.to_hdf(save_path + 'MultiVI_true.h5', 'a')


See the tutorial for concat at: https://anndata.readthedocs.io/en/latest/concatenation.html


  accelerator, lightning_devices, device = parse_device_args(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Epoch 100/100: 100%|██████████| 100/100 [01:57<00:00,  1.09s/it, v_num=1, train_loss_step=1.39e+4, train_loss_epoch=1.37e+4]

`Trainer.fit` stopped: `max_epochs=100` reached.


Epoch 100/100: 100%|██████████| 100/100 [01:57<00:00,  1.17s/it, v_num=1, train_loss_step=1.39e+4, train_loss_epoch=1.37e+4]
