In [1]:
%load_ext autoreload
%autoreload 2

import os
import sys
sys.path.append("../")
from concerto_function5_3 import *
from sklearn.metrics import f1_score, accuracy_score
import numpy as np
import scanpy as sc
import scipy.sparse as sps
import matplotlib.pyplot as plt
from metrics import osr_evaluator

from os.path import join

In [2]:
#Select an available GPU to run on a multi-GPU computer or you can run it directly on the CPU without executing this cell
import tensorflow as tf
os.environ["CUDA_VISIBLE_DEVICES"] = '1' 
gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True) 

In [3]:
exp_id = 'MCAOS'
data_root = '/home/yanxh/data/MCA/scjoint/data_atlas'

adata_rna = sc.read_h5ad('/home/yanxh/gitrepo/Portal-main/cache/adata_rna_facs.h5ad')
adata_atac = sc.read_h5ad(join(data_root, 'adata_atac_cache.h5ad'))

batch_key = 'domain'
type_key = 'cell_type'

adata_rna, adata_atac

(AnnData object with n_obs × n_vars = 19726 × 15519
     obs: 'cell_type', 'domain',
 AnnData object with n_obs × n_vars = 81173 × 15519
     obs: 'Unnamed: 0', 'cell_label', 'domain', 'cell_type')

In [4]:
adata_all = sc.concat([adata_rna, adata_atac])
adata_all

AnnData object with n_obs × n_vars = 100899 × 15519
    obs: 'cell_type', 'domain'

# Preprocess

In [5]:
# filter cells, normalize_total, hvg(if), no scale
adata = preprocessing_rna(adata_all, 
                          min_features=0, 
                          n_top_features=None, 
                          is_hvg=False, 
                          batch_key=batch_key)

adata_ref = adata[adata.obs[batch_key] == 'rna_facs']
adata_query = adata[adata.obs[batch_key] == 'atac']

shr_mask = np.in1d(adata_query.obs[type_key], adata_ref.obs[type_key].unique())
atac_lab = np.array(adata_query.obs[type_key].values)

save_path = './'
# if not os.path.exists(save_path):
#     os.makedirs(save_path)
# adata_ref.write_h5ad(save_path + 'adata_ref.h5ad')
# adata_query.write_h5ad(save_path + 'adata_query.h5ad')  # .tech=='indrop'

Processed dataset shape: (100899, 15519)


In [6]:
ref_tf_path = concerto_make_tfrecord_supervised(adata_ref, tf_path = save_path + f'tfrecord/{exp_id}/ref_tf/',
                                     batch_col_name = batch_key, label_col_name=type_key)
query_tf_path = concerto_make_tfrecord_supervised(adata_query, tf_path = save_path + f'tfrecord/{exp_id}/query_tf/',
                                     batch_col_name = batch_key, label_col_name=type_key)

counter: 10000 shape: (15519,), batch: 0
[1.e-05 1.e-05 1.e-05 ... 1.e-05 1.e-05 1.e-05]
[9.99999975e-06 9.99999975e-06 9.99999975e-06 ... 9.99999975e-06
 9.99999975e-06 9.99999975e-06]
batchs:  ['rna_facs']
counter: 10000 shape: (15519,), batch: 0
[1.e-05 1.e-05 1.e-05 ... 1.e-05 1.e-05 1.e-05]
[9.99999975e-06 9.99999975e-06 9.99999975e-06 ... 9.99999975e-06
 9.99999975e-06 9.99999975e-06]
batchs:  ['atac']
counter: 20000 shape: (15519,), batch: 0
[9.9999997e-06 9.9999997e-06 2.8683867e+00 ... 9.9999997e-06 9.9999997e-06
 9.9999997e-06]
[9.99999975e-06 9.99999975e-06 2.86838675e+00 ... 9.99999975e-06
 9.99999975e-06 9.99999975e-06]
batchs:  ['atac']
counter: 30000 shape: (15519,), batch: 0
[9.9999997e-06 9.9999997e-06 9.9999997e-06 ... 9.9999997e-06 1.3322642e+00
 1.2836080e+00]
[9.99999975e-06 9.99999975e-06 9.99999975e-06 ... 9.99999975e-06
 1.33226418e+00 1.28360796e+00]
batchs:  ['atac']
counter: 40000 shape: (15519,), batch: 0
[9.9999997e-06 9.9999997e-06 9.9999997e-06 ... 1.7124

In [7]:
# train (leave spleen out). If you don't want to train the model, you can just load our trained classifier's weight and test it directly.
weight_path = save_path + f'weight/{exp_id}/'
ref_tf_path = save_path + f'tfrecord/{exp_id}/ref_tf/'
query_tf_path = save_path + f'tfrecord/{exp_id}/query_tf/'

concerto_train_inter_supervised_uda2(ref_tf_path, query_tf_path, weight_path,
                                     super_parameters={'batch_size': 128, 'epoch_pretrain': 1,'epoch_classifier': 10, 'lr': 1e-4,'drop_rate': 0.1})

./tfrecord/MCAOS/ref_tf/tf_0.tfrecord
Epoch 1, step 5, simclr loss: 10.4962.
Epoch 1, step 10, simclr loss: 10.1023.
Epoch 1, step 15, simclr loss: 9.6408.
Epoch 1, step 20, simclr loss: 9.1084.
Epoch 1, step 25, simclr loss: 8.5713.
Epoch 1, step 30, simclr loss: 8.0368.
Epoch 1, step 35, simclr loss: 7.5131.
Epoch 1, step 40, simclr loss: 6.9630.
Epoch 1, step 45, simclr loss: 6.4614.
Epoch 1, step 50, simclr loss: 5.9354.
Epoch 1, step 55, simclr loss: 5.4701.
Epoch 1, step 60, simclr loss: 4.9818.
Epoch 1, step 65, simclr loss: 4.5394.
Epoch 1, step 70, simclr loss: 4.1475.
Epoch 1, step 75, simclr loss: 3.7763.
Epoch 1, step 80, simclr loss: 3.4782.
Epoch 1, step 85, simclr loss: 3.1817.
Epoch 1, step 90, simclr loss: 2.9367.
Epoch 1, step 95, simclr loss: 2.7166.
Epoch 1, step 100, simclr loss: 2.5108.
Epoch 1, step 105, simclr loss: 2.3213.
Epoch 1, step 110, simclr loss: 2.1443.
Epoch 1, step 115, simclr loss: 2.0020.
Epoch 1, step 120, simclr loss: 1.8551.
Epoch 1, step 125, s

'./weight/MCAOS/'

In [6]:
# test (only spleen)
weight_path = save_path + f'weight/{exp_id}/'
ref_tf_path = save_path + f'tfrecord/{exp_id}/ref_tf/'
query_tf_path = save_path + f'tfrecord/{exp_id}/query_tf/'

for epoch in [4]:
    results = concerto_test_inter_supervised2(weight_path, ref_tf_path, query_tf_path,
                                             super_parameters = {'batch_size': 64, 'epoch': epoch, 'lr': 1e-5,'drop_rate': 0.1})

    for knn in [10]:
        # NN classifier
        query_neighbor, query_prob = knn_classifier(results['source_feature'],
                                               results['target_feature'],
                                               adata_ref,
                                               adata_ref.obs_names,
                                               column_name=type_key,
                                               k=knn)
        open_score = 1 - query_prob

        kn_data_pr = query_neighbor[shr_mask]
        kn_data_gt = atac_lab[shr_mask]
        kn_data_open_score = open_score[shr_mask]

        unk_data_open_score = open_score[np.logical_not(shr_mask)]

        closed_acc, os_auroc, os_aupr, oscr = osr_evaluator(kn_data_pr, kn_data_gt, kn_data_open_score, unk_data_open_score)
        print(epoch, knn, closed_acc, os_auroc, os_aupr, oscr)
    

81173 811
close_acc= 0.5676
AUROC= 0.6068
AUPR= 0.3360
OSCR= 0.4122
4 10 0.5675520733804701 0.6067927379907043 0.3359942725256021 0.41220144831757205


# Neural classifier

In [11]:
query_pred, query_prob = results['target_pred'], results['target_prob']

In [12]:
open_score = 1 - query_prob

kn_data_pr = query_pred[shr_mask]
kn_data_gt = atac_lab[shr_mask]
kn_data_open_score = open_score[shr_mask]

unk_data_open_score = open_score[np.logical_not(shr_mask)]

closed_acc, os_auroc, os_aupr, oscr = osr_evaluator(kn_data_pr, kn_data_gt, kn_data_open_score, unk_data_open_score)
closed_acc, os_auroc, os_aupr, oscr

close_acc= 0.4391
AUROC= 0.4491
AUPR= 0.2627
OSCR= 0.2636


(0.43913625071660617,
 0.449138084848685,
 0.2626566505471447,
 0.26359141169107553)