In [None]:
import pandas as pd
import anndata as ad
from scipy.sparse import csr_matrix, coo_matrix, csc_matrix
from scipy.io import mmread, mmwrite
import os
import numpy as np

In [None]:
def read_RNA_ATAC(RNA_path, ATAC_path):
    # gene expression
    cell_names = pd.read_csv(RNA_path+'barcodes.tsv', sep = '\t', header=None, index_col=None)
    cell_names.columns =  ['cell_ids'] 
    X = csr_matrix(mmread(RNA_path+'matrix.mtx').T)
    gene_names = pd.read_csv(RNA_path+'genes.tsv', sep = '\t',  header=None, index_col=None) 
    gene_names.columns =  ['gene_ids'] 
    adata_RNA = ad.AnnData(X, obs=pd.DataFrame(index=cell_names.cell_ids), var=pd.DataFrame(index = gene_names.gene_ids))
    adata_RNA.var_names_make_unique()
    if ATAC_path is None:
        return adata_RNA
    # peak information
    cell_names = pd.read_csv(ATAC_path + 'barcodes.tsv', sep = '\t', header=None, index_col=None)
    cell_names.columns =  ['cell_ids'] 
    X = csr_matrix(mmread(ATAC_path + 'matrix.mtx').T)
    peak_name = pd.read_csv(ATAC_path + 'peaks.bed',header=None,index_col=None)
    peak_name.columns = ['peak_ids']
    adata_ATAC  = ad.AnnData(X, obs=pd.DataFrame(index=cell_names.cell_ids), var=pd.DataFrame(index = peak_name.peak_ids))
    return adata_RNA, adata_ATAC

In [None]:
# adata = ad.read_h5ad("../data_raw/openproblems_bmmc_multiome_phase2_rna/openproblems_bmmc_multiome_phase2_rna.censor_dataset.output_train_mod1.h5ad")

In [None]:
train_id = "Dataset33"
test_id = "Dataset34"
path_train = "../data/"+ train_id
path_test = "../data/"+ test_id
input_train_mod1, input_train_mod2 = read_RNA_ATAC(path_train + "/RNA/", path_train + "/ATAC/")
input_train_mod1.var['feature_types'] = pd.Categorical(len(input_train_mod1.var_names)*['GEX'])
input_train_mod1.obs['batch'] = pd.Categorical(len(input_train_mod1.obs)*['batch1'])
input_train_mod1.uns = {'dataset_id': 'human_pbmc_3k', 'organism': 'human'}
input_train_mod1.layers['counts'] = input_train_mod1.X.copy()
input_train_mod2.var['feature_types'] = pd.Categorical(len(input_train_mod2.var_names)*['ATAC'])
input_train_mod2.obs['batch'] = pd.Categorical(len(input_train_mod2.obs)*['batch1'])
input_train_mod2.uns = {'dataset_id': 'human_pbmc_3k', 'organism': 'human'}
input_train_mod2.layers['counts'] = input_train_mod2.X.copy()

In [None]:
input_test_mod1, input_test_mod2 = read_RNA_ATAC(path_test + "/RNA/", path_test + "/ATAC/")
input_test_mod1.var['feature_types'] = pd.Categorical(len(input_test_mod1.var_names)*['GEX'])
input_test_mod1.obs['batch'] = pd.Categorical(len(input_test_mod1.obs)*['batch1'])
input_test_mod1.uns = {'dataset_id': 'human_pbmc_3k', 'organism': 'human'}
input_train_mod1.layers['counts'] = input_train_mod1.X.copy()
input_test_mod2.var['feature_types'] = pd.Categorical(len(input_test_mod2.var_names)*['ATAC'])
input_test_mod2.obs['batch'] = pd.Categorical(len(input_test_mod2.obs)*['batch1'])
input_test_mod2.uns = {'dataset_id': 'human_pbmc_3k', 'organism': 'human'}
input_train_mod2.layers['counts'] = input_train_mod2.X.copy()

In [None]:
os.chdir("../RNA2ATAC/methods/RunDANCE")

In [None]:
input_train_mod1.write_h5ad("../data/openproblems_bmmc_multiome_phase2_rna/openproblems_bmmc_multiome_phase2_rna.censor_dataset.output_train_mod1.h5ad", compression = "gzip")
input_train_mod2.write_h5ad("../data/openproblems_bmmc_multiome_phase2_rna/openproblems_bmmc_multiome_phase2_rna.censor_dataset.output_train_mod2.h5ad", compression = "gzip")
input_test_mod1.write_h5ad("../data/openproblems_bmmc_multiome_phase2_rna/openproblems_bmmc_multiome_phase2_rna.censor_dataset.output_test_mod1.h5ad", compression = "gzip")
input_test_mod2.write_h5ad("../data/openproblems_bmmc_multiome_phase2_rna/openproblems_bmmc_multiome_phase2_rna.censor_dataset.output_test_mod2.h5ad", compression = "gzip")

In [None]:
os.system('python ../babel.py --subtask openproblems_bmmc_multiome_phase2_rna --device cuda')

In [None]:
os.system('python ../cmae.py --subtask openproblems_bmmc_multiome_phase2_rna --device cuda')

In [None]:
os.system('python ../scmogcn.py --subtask openproblems_bmmc_multiome_phase2_rna --device cuda')