In [None]:
%load_ext autoreload
%autoreload 2

import numpy as np
from scipy.stats import spearmanr
import scanpy as sc
import pandas as pd
import torch,numpy,random
from scTEL.scTEL_API import scTEL_API

In [None]:
def setup_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    numpy.random.seed(seed)
    random.seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)


setup_seed(2021)

In [None]:
adata_gene = sc.read("./Data/pbmc/pbmc_gene.h5ad")
adata_protein = sc.read("./Data/pbmc/pbmc_protein.h5ad")

adata_gene_test = sc.read("./Data/H1N1/gene_data.mtx").T
adata_gene_test.var.index = pd.read_csv("./Data/H1N1/gene_names.txt", index_col=0).iloc[:, 0]
adata_gene_test.obs = pd.read_csv("./Data/H1N1/meta_data.txt", sep=',', index_col=0)

In [None]:
scTEL = scTEL_API([adata_gene], [adata_protein], adata_gene_test,
                  train_batchkeys=['donor'], test_batchkey='sample',batch_size=32,h_size=512,
                  min_cells=30, min_genes=200, h=4, drop_rate=0.15)

In [None]:
scTEL.train(n_epochs=1000, ES_max=30, decay_max=10,
            decay_step=0.1, lr=0.001, weights_dir="weights_dir/pbmc_to_h1n1_scTEL1", load=True)

In [None]:
imputed_test = scTEL.predict()

In [None]:
embedding = scTEL.embed()
embedding.write("sctel_pbmctoh1n1embedding.h5ad")

In [None]:
"""Get test data"""
adata_protein_test = sc.read("./Data/H1N1/protein_data.mtx").T
adata_protein_test.var.index = [x[:len(x) - 5] for x in
                                pd.read_csv("./Data/H1N1/protein_names.txt", index_col=0).iloc[:, 0]]
adata_protein_test.obs = pd.read_csv("./Data/H1N1/meta_data.txt", sep=',', index_col=0)

adata_protein_test.X = adata_protein_test.X.toarray()
adata_protein_test.layers["raw"] = adata_protein_test.X

adata_protein_test = adata_protein_test[imputed_test.obs.index]

sc.pp.normalize_total(adata_protein_test)
sc.pp.log1p(adata_protein_test)

common_proteins = np.intersect1d(imputed_test.var.index, adata_protein_test.var.index)

adata_protein_test = adata_protein_test[:, common_proteins]
adata_protein_test.layers['imputed'] = imputed_test[:, common_proteins].X
adata_protein_test.layers.update(imputed_test[:, common_proteins].layers)

patients = np.unique(adata_protein_test.obs['sample'].values)

for patient in patients:
    indices = [x == patient for x in adata_protein_test.obs['sample']]
    sub_adata = adata_protein_test[indices]

    sc.pp.scale(sub_adata)
    adata_protein_test[indices] = sub_adata.X

In [None]:
def corr2_coeff(A, B, pearson = True):
    if pearson:
        # Rowwise mean of input arrays & subtract from input arrays themeselves
        A_mA = A - A.mean(1)[:, None]
        B_mB = B - B.mean(1)[:, None]

        # Sum of squares across rows
        ssA = (A_mA**2).sum(1)
        ssB = (B_mB**2).sum(1)

        # Finally get corr coeff
        corr_mat = np.dot(A_mA, B_mB.T) / np.sqrt(np.dot(ssA[:, None],ssB[None]))
        
        return corr_mat[range(corr_mat.shape[0]), range(corr_mat.shape[0])]
    
    else:
        corrs = [0.] * A.shape[0]
        
        for i in range(A.shape[0]):
            corrs[i] = spearmanr(A[i], B[i])[0]
            
        return corrs

In [None]:
"""Compute correlation across patients"""
corrs = corr2_coeff(adata_protein_test.layers["imputed"].T, adata_protein_test.X.T)
corrs = pd.DataFrame(corrs)
corrs.index = adata_protein_test.var.index
corrs = corrs.dropna()

In [None]:
MSEs= ((adata_protein_test.X - adata_protein_test.layers["imputed"])**2).mean(axis = 0)**(1/2)

protein_table = pd.DataFrame(np.concatenate((corrs.to_numpy(), np.expand_dims(MSEs, axis = 1), adata_protein_test.layers["raw"].mean(axis = 0, keepdims = True).T), axis = 1), 
                             index = corrs.index, columns = ["Correlations", "RMSE", "Mean Expression"])

protein_table["Log-Mean Expression"] = np.log(protein_table["Mean Expression"])

In [None]:
sq = lambda x, y: (x - y)**2

In [None]:
"""Compute correlations within patient"""
corrs_table = np.zeros((adata_protein_test.shape[1], len(np.unique(adata_protein_test.obs["sample"]))))
sq_table = corrs_table.copy()

i = 0
for patient in np.unique(adata_protein_test.obs["sample"]):
    truth = adata_protein_test[adata_protein_test.obs["sample"] == patient].X.copy()
    imputed = adata_protein_test.layers["imputed"][adata_protein_test.obs["sample"] == patient].copy()

    corrs_table[:, i] = corr2_coeff(truth.T, imputed.T)
    sq_table[:, i] = sq(truth, imputed).mean(axis = 0)
    i += 1

if np.isnan(corrs_table).sum() > 0:
    corrs_table[np.isnan(corrs_table)] = 0

In [None]:
corrs_table = pd.DataFrame(corrs_table)
corrs_table.index, corrs_table.columns = adata_protein_test.var.index, np.unique(adata_protein_test.obs["sample"])

sq_table = pd.DataFrame(sq_table)
sq_table.index, sq_table.columns = adata_protein_test.var.index, np.unique(adata_protein_test.obs["sample"])

In [None]:
corrs_table.mean(axis = 0)

In [None]:
corrs_table.mean().mean()

In [None]:
corrs_table.to_csv('corrs_results/sctel_pbmctoh1n1.csv')

In [None]:
sq_table.mean(axis = 0)

In [None]:
sq_table.mean().mean()

In [None]:
sq_table.to_csv('mse_results/sctel_pbmctoh1n1.csv')

In [None]:
r95 = (adata_protein_test.X < adata_protein_test.layers['q75'])
l95 = (adata_protein_test.X > adata_protein_test.layers['q25'])
print(f"Effective Coverage Probability for Nominal 50% PI: {(r95*l95).mean():.3f}")

In [None]:
r95 = (adata_protein_test.X < adata_protein_test.layers['q90'])
l95 = (adata_protein_test.X > adata_protein_test.layers['q10'])

print(f"Effective Coverage Probability for Nominal 80% PI: {(r95*l95).mean():.3f}")

In [None]:
adata_protein_test.write("sctel_pbmctoh1n1features.h5ad")