In [None]:
%load_ext autoreload
%autoreload 2

import os
import h5py
import numpy as np
import pandas as pd
import scanpy as sc
import anndata
import csv
import gzip
import scipy.io

import scipy.sparse as sps

from os.path import join
from sklearn.decomposition import PCA, IncrementalPCA

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

np.random.seed(1234)

sc.settings.verbosity = 3
sc.logging.print_header()

In [None]:
data_root = '/home/yanxh/data/pbmc_10x'


# adata_atac = sc.read_h5ad(join(data_root, 'ATAC/adata_atac.h5ad'))
adata_rna = sc.read_h5ad(join(data_root, 'RNA/adata_rna.h5ad'))
adata_atac_gam = sc.read_h5ad(join(data_root, 'ATAC_GAM/adata_atac_gam.h5ad'))


gene_share = adata_atac_gam.var_names.intersection(adata_rna.var_names)
adata_atac_gam2 = adata_atac_gam[:, gene_share]
adata_rna2 = adata_rna[:, gene_share]


In [None]:
meta_rna = adata_rna2.obs
meta_atac = adata_atac_gam2.obs

meta = pd.concat([meta_rna, meta_atac], axis=0)

# Integration using Portal

In [None]:
import portal

# Specify the GPU device
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

# Create a folder for saving results
result_path = "./result"
if not os.path.exists(result_path):
    os.makedirs(result_path)

In [None]:
## standard portal pipeline

model = portal.model.Model(training_steps=2000, 
                           lambdacos=10., lambdaAE=10., lambdaLA=10., lambdaGAN=1.0)
model.preprocess(adata_rna2, adata_atac_gam2, hvg_num=4000, norm_pca=False) # perform preprocessing and PCA
print('preprocessed feature dim: ', len(model.hvg_total))

model.train() # train the model
model.eval() # get integrated latent representation of cells

# portal.utils.plot_UMAP(model.latent, meta, colors=["domain", "cell_type"], save=False, result_path=result_path)

In [None]:
from portal.knn_classifier import knn_classifier_top_k, faiss_knn, knn_classifier_prob_concerto
rna_lab = np.array(adata_rna2.obs.cell_type.values)
atac_lab = np.array(adata_atac_gam2.obs.cell_type.values)

feat_A, feat_B = model.latent[:len(rna_lab)], model.latent[len(rna_lab):]
# feat_A, feat_B = normalize(feat_A, axis=1), normalize(feat_B, axis=1)

# knn_classifier
atac_pred, atac_prob = knn_classifier_prob_concerto(feat_A, feat_B, rna_lab, n_sample=None, knn=30, num_chunks=100)

shr_mask = np.in1d(atac_lab, np.unique(rna_lab))
(np.ravel(atac_pred)[shr_mask] == atac_lab[shr_mask]).mean() 

In [None]:
from portal.metrics import osr_evaluator


open_score = 1 - atac_prob

kn_data_pr = atac_pred[shr_mask]
kn_data_gt = atac_lab[shr_mask]
kn_data_open_score = open_score[shr_mask]

unk_data_open_score = open_score[np.logical_not(shr_mask)]

closed_acc, os_auroc, os_aupr, oscr = osr_evaluator(kn_data_pr, kn_data_gt, kn_data_open_score, unk_data_open_score)
closed_acc, os_auroc, os_aupr, oscr