In [14]:
import scanpy as sc
import pandas as pd
from scipy import sparse, io

base = "/media/chang/HDD-8/joseph/bmi206"
dataset = "NeurIPS"
h5ad_filename = "GSE194122_openproblems_neurips2021_multiome_BMMC_processed.h5ad"
h5ad_path = f"{base}/{dataset}/{h5ad_filename}"
adata = sc.read_h5ad(h5ad_path)

rna_mask = adata.var["feature_types"] == "GEX"
atac_mask = adata.var["feature_types"] == "ATAC"

adata_rna = adata[:, rna_mask].copy()
adata_atac = adata[:, atac_mask].copy()

X_rna = adata_rna.X
X_atac = adata_atac.X

if not sparse.isspmatrix_csr(X_rna):
    X = sparse.csr_matrix(X_rna)
if not sparse.isspmatrix_csr(X_atac):
    X = sparse.csr_matrix(X_atac)

io.mmwrite(f"{base}/{dataset}/expr.mtx", X_rna)
io.mmwrite(f"{base}/{dataset}/atac.mtx", X_atac)

adata_rna.var.to_csv(f"{base}/{dataset}/expr_features.tsv", sep="\t") # feature metadata
adata_rna.obs.to_csv(f"{base}/{dataset}/expr_cells.tsv", sep="\t") # cell metadata

adata_atac.var.to_csv(f"{base}/{dataset}/atac_features.tsv", sep="\t") # feature metadata
adata_atac.obs.to_csv(f"{base}/{dataset}/atac_cells.tsv", sep="\t") # cell metadata