In [6]:
%load_ext autoreload
%autoreload 2

import os
import sys
sys.path.append("../")
from concerto_function5_3 import *
from sklearn.metrics import f1_score, accuracy_score
import numpy as np
import scanpy as sc
import seaborn as sns
import scipy.sparse as sps
import matplotlib.pyplot as plt
from metrics import osr_evaluator

from os.path import join

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
#Select an available GPU to run on a multi-GPU computer or you can run it directly on the CPU without executing this cell
import tensorflow as tf
os.environ["CUDA_VISIBLE_DEVICES"] = '1' 
gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True) 

In [9]:
exp_id = 'CITE-ASAP'
data_root = '/home/yanxh/data'

adata_ref_rna = sc.read_h5ad(join(data_root, 'CITE-ASAP/adata_rna_cache.h5ad'))
adata_query_atac = sc.read_h5ad(join(data_root, 'CITE-ASAP/adata_atac_cache.h5ad'))

batch_key = 'data_type'
type_key = 'cell_type'

adata_ref_rna, adata_query_atac

(AnnData object with n_obs × n_vars = 4644 × 17441
     obs: 'cell_type', 'data_type',
 AnnData object with n_obs × n_vars = 4502 × 17441
     obs: 'cell_type', 'data_type')

In [10]:
# adt features
cite_adt = sps.load_npz(join(data_root, 'CITE-ASAP/citeseq_control_adt.npz'))
adata_ref_adt = sc.AnnData(cite_adt, obs=adata_ref_rna.obs)
asap_adt = sps.load_npz(join(data_root, 'CITE-ASAP/asapseq_control_adt.npz'))
adata_query_adt = sc.AnnData(asap_adt, obs=adata_query_atac.obs)

cite_adt.shape, asap_adt.shape

((4644, 227), (4502, 227))

# Preprocess

In [11]:
# filter cells, normalize_total, hvg(if), no scale
adata_ref_rna = preprocessing_rna(adata_ref_rna, min_features=0, n_top_features=None, is_hvg=False, batch_key=batch_key)
adata_query_atac = preprocessing_rna(adata_query_atac, min_features=0, n_top_features=None, is_hvg=False, batch_key=batch_key)
adata_ref_adt = preprocessing_rna(adata_ref_adt, min_features=0, n_top_features=None, is_hvg=False, batch_key=batch_key)
adata_query_adt = preprocessing_rna(adata_query_adt, min_features=0, n_top_features=None, is_hvg=False, batch_key=batch_key)

adata_ref = sc.AnnData(sps.csr_matrix(sps.hstack([adata_ref_rna.X, adata_ref_adt.X])), obs=adata_ref_rna.obs)
adata_query = sc.AnnData(sps.csr_matrix(sps.hstack([adata_query_atac.X, adata_query_adt.X])), obs=adata_query_adt.obs)

shr_mask = np.in1d(adata_query_atac.obs[type_key], adata_ref_rna.obs[type_key].unique())
atac_lab = np.array(adata_query_atac.obs[type_key].values)

save_path = './'
weight_path = save_path + f'weight/{exp_id}/'

Processed dataset shape: (4644, 17441)
Processed dataset shape: (4502, 17441)


In [14]:
ref_tf_path = concerto_make_tfrecord_supervised(adata_ref, tf_path = save_path + f'tfrecord/{exp_id}/ref_tf/',
                                     batch_col_name = batch_key, label_col_name=type_key)
query_tf_path = concerto_make_tfrecord_supervised(adata_query, tf_path = save_path + f'tfrecord/{exp_id}/query_tf/',
                                     batch_col_name = batch_key, label_col_name=type_key)

In [15]:
# train (leave spleen out). If you don't want to train the model, you can just load our trained classifier's weight and test it directly.
weight_path = save_path + f'weight/{exp_id}/'
ref_tf_path = save_path + f'tfrecord/{exp_id}/ref_tf/'

concerto_train_inter_supervised_uda2(ref_tf_path, query_tf_path, weight_path,
                                     super_parameters={'batch_size': 128, 'epoch_pretrain': 1,'epoch_classifier': 10, 'lr': 1e-4,'drop_rate': 0.1})

./tfrecord/CITE-ASAP/ref_tf/tf_0.tfrecord
Epoch 1, step 5, simclr loss: 10.5876.
Epoch 1, step 10, simclr loss: 10.1897.
Epoch 1, step 15, simclr loss: 9.7408.
Epoch 1, step 20, simclr loss: 9.2589.
Epoch 1, step 25, simclr loss: 8.7338.
Epoch 1, step 30, simclr loss: 8.1835.
Epoch 1, step 35, simclr loss: 7.6388.
./tfrecord/CITE-ASAP/ref_tf/tf_0.tfrecord
Epoch 0, step 5, train cls loss: 2.3179, train acc: 0.4703
Epoch 0, step 10, train cls loss: 2.2925, train acc: 0.5219
Epoch 0, step 15, train cls loss: 2.2380, train acc: 0.5688
Epoch 0, step 20, train cls loss: 2.0901, train acc: 0.5941
Epoch 0, step 25, train cls loss: 1.9114, train acc: 0.6137
Epoch 0, step 30, train cls loss: 1.7582, train acc: 0.6422
Epoch 0, step 35, train cls loss: 1.6244, train acc: 0.6732
./tfrecord/CITE-ASAP/ref_tf/tf_0.tfrecord
Epoch 1, step 5, train cls loss: 0.6583, train acc: 0.9078
Epoch 1, step 10, train cls loss: 0.6598, train acc: 0.9078
Epoch 1, step 15, train cls loss: 0.6404, train acc: 0.9203
Ep

'./weight/CITE-ASAP/'

In [18]:
# test (only spleen)
weight_path = save_path + f'weight/{exp_id}/'
ref_tf_path = save_path + f'tfrecord/{exp_id}/ref_tf/'
query_tf_path = save_path + f'tfrecord/{exp_id}/query_tf/'

for epoch in [10]:
    results = concerto_test_inter_supervised2(weight_path, ref_tf_path, query_tf_path,
                                         super_parameters = {'batch_size': 64, 'epoch': epoch, 'lr': 1e-5,'drop_rate': 0.1})
    
    # NN classifier
    query_neighbor, query_prob = knn_classifier(results['source_feature'],
                                           results['target_feature'],
                                           adata_ref,
                                           adata_ref.obs_names,
                                           column_name=type_key,
                                           k=30)
    
    open_score = 1 - query_prob

    kn_data_pr = query_neighbor[shr_mask]
    kn_data_gt = atac_lab[shr_mask]
    kn_data_open_score = open_score[shr_mask]

    unk_data_open_score = open_score[np.logical_not(shr_mask)]

    closed_acc, os_auroc, os_aupr, oscr = osr_evaluator(kn_data_pr, kn_data_gt, kn_data_open_score, unk_data_open_score)
    print(closed_acc, os_auroc, os_aupr, oscr)

4502 45
close_acc= 0.8598
AUROC= 0.6565
AUPR= 0.1581
OSCR= 0.6031
0.8597546307433245 0.6565213904955148 0.15811356836680499 0.6030941349147412


# Neural classifier

In [11]:
query_pred, query_prob = results['target_pred'], results['target_prob']

In [12]:
open_score = 1 - query_prob

kn_data_pr = query_pred[shr_mask]
kn_data_gt = atac_lab[shr_mask]
kn_data_open_score = open_score[shr_mask]

unk_data_open_score = open_score[np.logical_not(shr_mask)]

closed_acc, os_auroc, os_aupr, oscr = osr_evaluator(kn_data_pr, kn_data_gt, kn_data_open_score, unk_data_open_score)
closed_acc, os_auroc, os_aupr, oscr

close_acc= 0.7618
AUROC= 0.5805
AUPR= 0.1286
OSCR= 0.4701


(0.7618474861679095,
 0.5805412905767467,
 0.1286103684802537,
 0.47013663002513684)