In [None]:
x=1

In [None]:
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')
import numpy as np
import scipy.sparse as sp
import matplotlib.pyplot as plt
import scanpy as sc
import anndata as ad
from rpy2.robjects import r, default_converter
from rpy2.robjects.conversion import localconverter
from anndata2ri import converter as anndata2ri_converter
%load_ext rpy2.ipython

In [None]:
rna = sc.read('./data/neurips-multiome/rna_hvg.h5ad')
adata_rna = ad.AnnData(rna.layers['counts'])
adata_rna.obs_names = rna.obs_names
adata_rna.var_names = rna.var_names
adata_rna.obs['cell_type'] = rna.obs['cell_type']
adata_rna.obs['batch'] = rna.obs['batch']

atac = sc.read('./data/neurips-multiome/atac_hvf.h5ad')
adata_atac = ad.AnnData(atac.layers['binary'])
adata_atac.obs_names = atac.obs_names
adata_atac.var_names = atac.var_names
adata_atac.obs['cell_type'] = atac.obs['cell_type']
adata_atac.obs['batch'] = atac.obs['batch']

with localconverter(default_converter + anndata2ri_converter):
    r.assign("adata_rna", adata_rna)
    r.assign("adata_peaks", adata_atac)

In [None]:
r('''
library(Seurat)

rna = as.Seurat(adata_rna, counts='X', data=NULL)
rna <- RenameAssays(rna, originalexp="RNA")
rna.list <- SplitObject(rna, split.by = "batch")
rna.list <- lapply(X = rna.list, FUN = SCTransform, variable.features.n = 1000)
features <- SelectIntegrationFeatures(object.list = rna.list, nfeatures = 1000)
rna.list <- PrepSCTIntegration(object.list = rna.list, anchor.features = features)
anchors <- FindIntegrationAnchors(object.list = rna.list, normalization.method = "SCT", 
    anchor.features = features)
integrated <- IntegrateData(anchorset = anchors, normalization.method = "SCT")
integrated <- RunPCA(integrated)

peaks = as.Seurat(adata_peaks, counts='X', data=NULL)
peaks <- RenameAssays(peaks, originalexp='ATAC')
peaks.list <- SplitObject(peaks, split.by = "batch")
peaks.list <- lapply(X = peaks.list, FUN = function(x) {
    x <- RunTFIDF(x, verbose=FALSE)
    x <- FindTopFeatures(x, min.cutoff = 'q0', verbose=FALSE)
    x <- RunSVD(x, verbose=FALSE)
})
features <- SelectIntegrationFeatures(object.list = peaks.list)
anchors <- FindIntegrationAnchors(object.list = peaks.list, reduction = "rlsi", 
    dims = 2:30, anchor.features = features)
peaks <- RunTFIDF(peaks, verbose=FALSE)
peaks <- FindTopFeatures(peaks, min.cutoff = 'q0', verbose=FALSE)
peaks <- RunSVD(peaks, verbose=FALSE)
integrated_atac <- IntegrateEmbeddings(
  anchorset = anchors,
  reductions = peaks[["lsi"]],
  new.reduction.name = "integrated_lsi",
  dims.to.integrate = 1:30
)
integrated_atac <- RunUMAP(integrated_atac, dims=1:30, reduction = "integrated_lsi")
p2 <- DimPlot(integrated_atac, group.by = "batch")

integrated[["ATAC"]] <- peaks[["ATAC"]]
integrated[["integrated_lsi"]] <- integrated_atac[["integrated_lsi"]]
integrated <- FindMultiModalNeighbors(integrated, reduction.list = list("pca", "integrated_lsi"), 
                              dims.list = list(1:50, 2:30), modality.weight.name = "RNA.weight")
integrated <- RunSPCA(integrated, assay = 'integrated', graph = 'wsnn', npcs = 20)

spca <- Embeddings(object = integrated[["spca"]])
''')

In [None]:
adata = sc.AnnData(np.array(r['spca'])) 
adata.obs = adata_rna.obs
rna.obsm["latent"] = adata.X
rna.write_h5ad("./results/neurips-multiome-seurat.h5ad")

In [None]:
rna = sc.read('./data/neurips-cite/rna_hvg.h5ad')
adata_rna = ad.AnnData(rna.layers['counts'])
adata_rna.obs_names = rna.obs_names
adata_rna.var_names = rna.var_names
adata_rna.obs['cell_type'] = rna.obs['cell_type']
adata_rna.obs['batch'] = rna.obs['batch']

adt = sc.read('./data/neurips-cite/protein.h5ad')
adata_adt = ad.AnnData(adt.layers['counts'])
adata_adt.obs_names = adt.obs_names
adata_adt.var_names = adt.var_names
adata_adt.obs['cell_type'] = adt.obs['cell_type']
adata_adt.obs['batch'] = adt.obs['batch']

with localconverter(default_converter + anndata2ri_converter):
    r.assign("adata_rna", adata_rna)
    r.assign("adata_adt", adata_adt)

In [None]:
r('''
library(future)
options(future.globals.maxSize = 20 * 1024^3)
library(Seurat)

rna <- as.Seurat(adata_rna, counts='X', data=NULL)
rna <- RenameAssays(rna, originalexp="RNA")
rna.list <- SplitObject(rna, split.by = "batch")
rna.list <- lapply(X = rna.list, FUN = SCTransform, variable.features.n = 1000)
features <- SelectIntegrationFeatures(object.list = rna.list, nfeatures = 1000)
rna.list <- PrepSCTIntegration(object.list = rna.list, anchor.features = features)
anchors <- FindIntegrationAnchors(object.list = rna.list, normalization.method = "SCT",anchor.features = features)

integrated <- IntegrateData(anchorset = anchors, normalization.method = "SCT")

integrated <- RunPCA(integrated)

cite <- as.Seurat(adata_adt, counts='X', data=NULL)
cite <- RenameAssays(cite, originalexp='ADT')
cite.list <- SplitObject(cite, split.by = "batch")
cite.list <- lapply(X = cite.list, FUN = function(x) {
    VariableFeatures(x) <- rownames(x[["ADT"]])
    x <- NormalizeData(x, normalization.method = 'CLR', margin = 2, verbose=FALSE)
})
features <- SelectIntegrationFeatures(object.list = cite.list)
cite.list <- lapply(X = cite.list, FUN = function(x) {
    x <- ScaleData(x, features = features, verbose=FALSE)
    x <- RunPCA(x, features = features, reduction.name = "pca", verbose=FALSE)
})
anchors <- FindIntegrationAnchors(object.list = cite.list, reduction = "rpca", 
    dims = 1:30, verbose=FALSE)
integrated_adt <- IntegrateData(anchorset = anchors, dims = 1:30)
integrated_adt <- ScaleData(integrated_adt, verbose=FALSE)
integrated_adt <- RunPCA(integrated_adt, reduction.name = "apca", verbose=FALSE)

integrated[["ADT"]] <- integrated_adt[["ADT"]]
integrated[["apca"]] <- integrated_adt[["apca"]]
integrated <- FindMultiModalNeighbors(
    integrated, 
    reduction.list = list("pca", "apca"), 
    dims.list = list(1:50, 1:30), 
    modality.weight.name = "RNA.weight"
)
integrated <- RunSPCA(integrated, assay = 'integrated', graph = 'wsnn', npcs = 20)
spca <- Embeddings(object = integrated[["spca"]])
''')

In [None]:
adata = sc.AnnData(np.array(r['spca'])) 
adata.obs = adata_rna.obs
rna.obsm["latent"] = adata.X
rna.write_h5ad("./results/neurips-cite-seurat.h5ad")