In [1]:
import os
import torch
import pandas as pd
import scanpy as sc

In [2]:
import MVAADT

In [3]:
"""
Sets the device to GPU if available, otherwise defaults to CPU.
Also sets the environment variable 'R_HOME' to the specified path.

- `device`: A torch.device object set to 'cuda:1' if a GPU is available, otherwise 'cpu'.
- `os.environ['R_HOME']`: Sets the R_HOME environment variable to the specified path for R installation.
"""
device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
os.environ['R_HOME'] = '/home/zxx/miniforge3/envs/MDI/lib/R'

In [5]:
# read data
file_fold = '/home/zxx/MVAADT/data/Dataset3_PBMC/' #please replace 'file_fold' with the download path

adata_omics1 = sc.read_h5ad(file_fold + 'rna.h5ad')
adata_omics2 = sc.read_h5ad(file_fold + 'protein_expression.h5ad')

adata_omics1.var_names_make_unique()
adata_omics2.var_names_make_unique()

print(adata_omics1.shape)
print(adata_omics2.shape)

(11137, 11296)
(11137, 210)


In [39]:
# Specify data type
data_type = 'xxx' # please replace 'xxx' with Specify data type

# Fix random seed
from MVAADT.preprocess import fix_seed
random_seed = 2050
fix_seed(random_seed)

In [40]:
from MVAADT.preprocess import clr_normalize_each_cell, pca

# RNA
sc.pp.filter_genes(adata_omics1, min_cells=10)
sc.pp.filter_cells(adata_omics1, min_genes=80)

sc.pp.filter_genes(adata_omics2, min_cells=50)
adata_omics2 = adata_omics2[adata_omics1.obs_names].copy()

sc.pp.highly_variable_genes(adata_omics1, flavor="seurat_v3", n_top_genes=3000)
sc.pp.normalize_total(adata_omics1, target_sum=1e4)
sc.pp.log1p(adata_omics1)

adata_omics1_high =  adata_omics1[:, adata_omics1.var['highly_variable']]
adata_omics1.obsm['feat'] = pca(adata_omics1_high, n_comps=adata_omics2.n_vars-1)

# Protein
adata_omics2 = clr_normalize_each_cell(adata_omics2)
adata_omics2.obsm['feat'] = pca(adata_omics2, n_comps=adata_omics2.n_vars-1)

In [41]:
from MVAADT.preprocess import construct_neighbor_graph_single_cell
data = construct_neighbor_graph_single_cell(adata_omics1, adata_omics2, datatype=data_type)

In [None]:
# define model
from MVAADT.MVAADT_SC import Train_MVAADT_SC
model = Train_MVAADT_SC(data, datatype=data_type, device=device)

# train model
output = model.train()

In [43]:
adata = adata_omics1.copy()
adata.obsm['emb_latent_omics1'] = output['emb_latent_omics1']
adata.obsm['emb_latent_omics2'] = output['emb_latent_omics2']
adata.obsm['GAN_Align'] = output['GAN_Align']