In [None]:
%load_ext autoreload
%autoreload 2

import os
import sys
sys.path.append("../")
from concerto_function5_3 import *
from sklearn.metrics import f1_score, accuracy_score
import numpy as np
import scanpy as sc
import matplotlib.pyplot as plt
from metrics import osr_evaluator

from os.path import join

In [None]:
#Select an available GPU to run on a multi-GPU computer or you can run it directly on the CPU without executing this cell
import tensorflow as tf
os.environ["CUDA_VISIBLE_DEVICES"] = '1' 
gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True) 

In [None]:
exp_id = 'PBMCMultome'
data_root = '/home/yanxh/data/pbmc_10x'

adata_atac = sc.read_h5ad(join(data_root, 'ATAC/adata_atac.h5ad'))
adata_rna = sc.read_h5ad(join(data_root, 'RNA/adata_rna.h5ad'))
adata_atac_gam = sc.read_h5ad(join(data_root, 'ATAC_GAM/adata_atac_gam.h5ad'))

gene_share = adata_atac_gam.var_names.intersection(adata_rna.var_names)
adata_atac_gam2 = adata_atac_gam[:, gene_share]
adata_rna2 = adata_rna[:, gene_share]

batch_key = 'domain'
type_key = 'cell_type'

adata_rna, adata_atac

In [None]:
adata_all = sc.concat([adata_rna2, adata_atac_gam2])
adata_all

# Preprocess

In [None]:
# filter cells, normalize_total, hvg(if), no scale
adata = preprocessing_rna(adata_all, 
                          min_features=0, 
                          n_top_features=None, 
                          is_hvg=False, 
                          batch_key=batch_key)

adata_ref = adata[adata.obs[batch_key] == 'rna']
adata_query = adata[adata.obs[batch_key] == 'atac']

shr_mask = np.in1d(adata_query.obs[type_key], adata_ref.obs[type_key].unique())
atac_lab = np.array(adata_query.obs[type_key].values)

save_path = './'
# if not os.path.exists(save_path):
#     os.makedirs(save_path)
# adata_ref.write_h5ad(save_path + 'adata_ref.h5ad')
# adata_query.write_h5ad(save_path + 'adata_query.h5ad')  # .tech=='indrop'

In [None]:
ref_tf_path = concerto_make_tfrecord_supervised(adata_ref, tf_path = save_path + f'tfrecord/{exp_id}/ref_tf/',
                                     batch_col_name = batch_key, label_col_name=type_key)
query_tf_path = concerto_make_tfrecord_supervised(adata_query, tf_path = save_path + f'tfrecord/{exp_id}/query_tf/',
                                     batch_col_name = batch_key, label_col_name=type_key)

In [None]:
# train (leave spleen out). If you don't want to train the model, you can just load our trained classifier's weight and test it directly.
weight_path = save_path + f'weight/{exp_id}/'
ref_tf_path = save_path + f'tfrecord/{exp_id}/ref_tf/'

concerto_train_inter_supervised_uda2(ref_tf_path, query_tf_path, weight_path,
                                     super_parameters={'batch_size': 128, 'epoch_pretrain': 1,'epoch_classifier': 10, 'lr': 1e-4,'drop_rate': 0.1})

In [None]:
# test (only spleen)
weight_path = save_path + f'weight/{exp_id}/'
ref_tf_path = save_path + f'tfrecord/{exp_id}/ref_tf/'
query_tf_path = save_path + f'tfrecord/{exp_id}/query_tf/'

for epoch in [2]:
    results = concerto_test_inter_supervised2(weight_path, ref_tf_path, query_tf_path,
                                         super_parameters = {'batch_size': 64, 'epoch': epoch, 'lr': 1e-5,'drop_rate': 0.1})
    
    # NN classifier
    query_neighbor, query_prob = knn_classifier(results['source_feature'],
                                           results['target_feature'],
                                           adata_ref,
                                           adata_ref.obs_names,
                                           column_name=type_key,
                                           k=30)
    open_score = 1 - query_prob

    kn_data_pr = query_neighbor[shr_mask]
    kn_data_gt = atac_lab[shr_mask]
    kn_data_open_score = open_score[shr_mask]

    unk_data_open_score = open_score[np.logical_not(shr_mask)]

    closed_acc, os_auroc, os_aupr, oscr = osr_evaluator(kn_data_pr, kn_data_gt, kn_data_open_score, unk_data_open_score)
    print(closed_acc, os_auroc, os_aupr, oscr)

# Neural classifier

In [None]:
query_pred, query_prob = results['target_pred'], results['target_prob']
# query_label = results['target_label'], query_record的编码及编码方式没有意义

In [None]:
open_score = 1 - query_prob

kn_data_pr = query_pred[shr_mask]
kn_data_gt = atac_lab[shr_mask]
kn_data_open_score = open_score[shr_mask]

unk_data_open_score = open_score[np.logical_not(shr_mask)]

closed_acc, os_auroc, os_aupr, oscr = osr_evaluator(kn_data_pr, kn_data_gt, kn_data_open_score, unk_data_open_score)
closed_acc, os_auroc, os_aupr, oscr