In [None]:
import numpy as np
import scanpy as sc
import numpy as np
import pandas as pd

import anndata2ri
import logging
from scipy.sparse import issparse
from CSCORE.CSCORE_IRLS import CSCORE_IRLS

import rpy2.rinterface_lib.callbacks as rcb
import rpy2.robjects as ro

In [None]:
rcb.logger.setLevel(logging.ERROR)
ro.pandas2ri.activate()
anndata2ri.activate()
%load_ext rpy2.ipython

In [None]:
%%R
library(Seurat)
library(sctransform)
library(Hmisc)

In [None]:
tissue_id = "spleen"
identify = "637C"

for i in glob.glob("./spleen/scrna_*.h5ad"):
    print(i)

read_path = f"/ysm-gpfs/pi/zhao/tl688/GIANT/GIANT/src/analysis/{tissue_id}/scrna_{tissue_id}_{identify}.h5ad"

adata = sc.read_h5ad(read_path)


adata.var['mt'] = adata.var_names.str.startswith('MT-')  # annotate the group of mitochondrial genes as 'mt'
sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)

sc.pl.scatter(adata, x='total_counts', y='pct_counts_mt')
sc.pl.scatter(adata, x='total_counts', y='n_genes_by_counts')

adata = adata[adata.obs.n_genes_by_counts < 4000, :]
adata = adata[adata.obs.pct_counts_mt < 20, :]

adata = adata[:,adata.var['mt']==False]

if issparse(adata.X):
    if not adata.X.has_sorted_indices:
        adata.X.sort_indices()
ro.globalenv["adata"] = adata

adata.obs['n_counts'] = np.array(np.sum(adata.X, axis= 1))



In [None]:
%%R
seurat_obj = as.Seurat(adata, counts="X", data = NULL)
seurat_obj = RenameAssays(seurat_obj, originalexp = "RNA")
res = SCTransform(object=seurat_obj, vst.flavor = "v2", variable.features.n = 1000 , method = "glmGamPoi", verbose = FALSE)

In [None]:
gene_list = list(ro.r("rownames(res@assays$SCT@scale.data)"))
norm_x = ro.r("res@assays$SCT@scale.data")
exp_matrix = pd.DataFrame(norm_x, index=gene_list)
adata_new = adata[:,gene_list]

counts = adata_new.X
seq_depth = adata_new.obs['n_counts'].values

B_cell_result = CSCORE_IRLS(np.array(counts), seq_depth)
p_value = B_cell_result[1]
cor_matrix = (p_value<0.005)*1
print(cor_matrix)


In [None]:
exp_matrix.to_csv(f"./{tissue_id}_atlas/scrna_{tissue_id}_{identify}"+"_rna_expression.csv")
cor_matrix = pd.DataFrame(cor_matrix, index = gene_list, columns = gene_list)
cor_matrix.to_csv(f"./{tissue_id}_atlas/scrna_{tissue_id}_{identify}"+"_pvalue.csv")