In [None]:
%load_ext autoreload
%autoreload 2

import numpy as np
from time import time
import random
import torch
import numpy
from scipy.stats import spearmanr
import scanpy as sc
import pandas as pd
from scTEL.scTEL_API import scTEL_API
from scTEL.scTEL_API import scTEL_API

In [None]:
def setup_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    numpy.random.seed(seed)
    random.seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)


setup_seed(2021)

# Data preprocessing

## Read reference dataset

In [None]:
adata_protein = sc.read_h5ad('./Data/pbmc/pbmc_protein.h5ad')  # 161764,224
adata_gene = sc.read_h5ad('./Data/pbmc/pbmc_gene.h5ad')

## Read query dataset

In [None]:
adata_malt_gene = sc.read_10x_h5("./Data/malt_10k_protein_v3_filtered_feature_bc_matrix.h5")  # 8412,33538
adata_malt = sc.read("./Data/filtered_feature_bc_matrix/matrix.mtx").T  # 8412,33555
malt_features = pd.read_csv("./Data/filtered_feature_bc_matrix/features.tsv", sep="\t", header=None)  # 33555,3
adata_malt.var["feature_type"] = list(malt_features[2])
adata_malt.obs_names = adata_malt_gene.obs_names
adata_malt.var['protein_names'] = list(malt_features[0])
adata_malt.var_names = list(malt_features[0])
adata_malt_protein = adata_malt[:,
                     adata_malt.var['feature_type'] == 'Antibody Capture']  # malt cells8412，proteins17
adata_malt_gene.var_names_make_unique()
adata_gene_test = adata_malt_gene.copy()
adata_protein_test = adata_malt_protein.copy()
adata_protein_test.obs['sample'] = [1] * 8412

In [None]:
adata_protein_test

In [None]:
ref = set(adata_protein_test.var.index)
prots = [] # 10
for x in adata_protein.var.index:
    if x in ref:
        prots.append(x)
adata_protein_test[:, prots].X.mean()/adata_protein[:, prots].X.mean()
common_genes = np.intersect1d(adata_gene.var.index, adata_gene_test.var.index) # 训练集PBMC和测试集Malt相交的基因(20713)和蛋白质(10)
common_proteins = np.intersect1d(adata_protein.var.index, adata_protein_test.var.index)

# Train scTEL model

In [None]:
scTEL = scTEL_API([adata_gene], [adata_protein], adata_gene_test, train_batchkeys=['donor'], batch_size=32,
                    min_cells=30, min_genes=200, h_size=512, h=4,drop_rate=0.15)

In [None]:
start = time()
scTEL.train(n_epochs=100, ES_max=30, decay_max=10,
              decay_step=0.1, lr=10 ** (-3), weights_dir="weights_dir/pbmc_to_malt_TEL1", load=True)
imputed_test = scTEL.predict()
time() - start

## Embedding,protein prediction

In [None]:
embedding = scTEL.embed()
embedding.write("scTEL_maltembedding2.h5ad")

In [None]:
def corr2_coeff(A, B, pearson = True):
    if pearson:
        # Rowwise mean of input arrays & subtract from input arrays themeselves
        A_mA = A - A.mean(1)[:, None]
        B_mB = B - B.mean(1)[:, None]

        # Sum of squares across rows
        ssA = (A_mA**2).sum(1)
        ssB = (B_mB**2).sum(1)

        # Finally get corr coeff
        corr_mat = np.dot(A_mA, B_mB.T) / np.sqrt(np.dot(ssA[:, None],ssB[None]))

        return corr_mat[range(corr_mat.shape[0]), range(corr_mat.shape[0])]

    else:
        corrs = [0.] * A.shape[0]

        for i in range(A.shape[0]):
            corrs[i] = spearmanr(A[i], B[i])[0]

        return corrs

In [None]:
adata_protein_test.X = adata_protein_test.X.toarray()  # adata_protein_test(8412, 17)
adata_protein_test.layers["raw"] = adata_protein_test.X  #

adata_protein_test = adata_protein_test[imputed_test.obs.index]  # imputed_test(8385, 224)

sc.pp.normalize_total(adata_protein_test)
sc.pp.log1p(adata_protein_test)
sc.pp.filter_genes(adata_protein_test, min_counts = 1)

common_proteins = np.intersect1d(imputed_test.var.index, adata_protein_test.var.index) # 10

adata_protein_test = adata_protein_test[:, common_proteins]
adata_protein_test.layers['imputed'] = imputed_test[:, common_proteins].X
adata_protein_test.layers.update(imputed_test[:, common_proteins].layers)  # 8385, 10

patients = np.unique(adata_protein_test.obs['sample'].values)  # 1

for patient in patients:
    indices = [x == patient for x in adata_protein_test.obs['sample']]
    sub_adata = adata_protein_test[indices]

    sc.pp.scale(sub_adata)
    adata_protein_test[indices] = sub_adata.X

In [None]:
corrs = corr2_coeff(adata_protein_test.layers["imputed"].T, adata_protein_test.X.T)
corrs = pd.DataFrame(corrs)
corrs.index = adata_protein_test.var.index
corrs = corrs.dropna()

In [None]:
corrs.mean()

In [None]:
corrs.to_csv('../corrs_results/scTEL_malt.csv')

In [None]:
sq = lambda x, y: (x - y) ** 2
sqs = sq(adata_protein_test.layers["imputed"], adata_protein_test.X).mean(axis=0)
sqs = pd.DataFrame(sqs)
sqs.index = adata_protein_test.var.index
print(sqs)
print(sqs.mean())
sqs.to_csv('./mse_results/scTEL_malt.csv')

In [None]:
r95 = (adata_protein_test.X < adata_protein_test.layers['q75'])
l95 = (adata_protein_test.X > adata_protein_test.layers['q25'])
print(f"Effective Coverage Probability for Nominal 50% PI: {(r95 * l95).mean():.3f}")

r95 = (adata_protein_test.X < adata_protein_test.layers['q90'])
l95 = (adata_protein_test.X > adata_protein_test.layers['q10'])
print(f"Effective Coverage Probability for Nominal 80% PI: {(r95 * l95).mean():.3f}")


In [None]:
adata_protein_test.write("cover_feature/scTEL_maltfeatures.h5ad")