In [1]:
import scib, scanpy, scprep
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from tqdm import tqdm

In [2]:
adata_beta = scanpy.read_h5ad('../data/beta_run_2.h5ad')
all_adatas = scanpy.read_h5ad('results/all_mapped_human_datasets.h5ad')

In [3]:
all_adatas.obsm['X_scmmgan_gene'] = pd.DataFrame(all_adatas.obsm['X_scmmgan_gene'],
                                                 index=all_adatas.obs_names,
                                                 columns=adata_beta.var['names'])

In [4]:
adata_post_all = scanpy.concat((all_adatas, adata_beta), label='batch')



In [5]:
adata_post_all.obsm['X_pc'] = np.vstack((adata_beta.obsm['X_pca'], all_adatas.obsm['X_scmmgan_pca']))
adata_post_all.obs['cell_type'] = 'beta cell' # cell type necessary for batch ASW (compares per cell type)

In [6]:
evaluation_summary = pd.DataFrame(columns=['pre_post_corr'])

In [7]:
evaluation_summary.loc['Batch ASW'] = scib.me.silhouette_batch(adata_post_all, batch_key='batch', label_key='cell_type', embed='X_pc')

mean silhouette per group:            silhouette_score
group                      
beta cell          0.971123


In [8]:
all_adatas.obsm['X_scmmgan_gene'].columns = adata_beta.var['human_name']

In [9]:
intersection = list(set(all_adatas.var_names).intersection(all_adatas.obsm['X_scmmgan_gene'].columns))
all_gene_space = all_adatas.obsm['X_scmmgan_gene'].iloc[:, ~all_adatas.obsm['X_scmmgan_gene'].columns.duplicated()]

In [10]:
ref_x = all_adatas[:, intersection].X.toarray()
post_x = all_gene_space[intersection].values

In [11]:
pre_post_corr = []
for i in tqdm(range(all_adatas.n_obs)):
    pre_post_corr.append(cosine_similarity(ref_x[i].reshape(1,-1), post_x[i].reshape(1,-1))[0][0])

100%|██████████| 10663/10663 [00:03<00:00, 3544.56it/s]


In [12]:
evaluation_summary.loc['Mean cell cosine similarity pre- and post-alignment'] = np.mean(pre_post_corr)

In [13]:
all_adatas.obs['pre_post_corr'] = pre_post_corr

In [14]:
evaluation_summary = pd.concat((evaluation_summary, all_adatas.obs[['disease', 'pre_post_corr']].groupby('disease').mean()))
evaluation_summary = pd.concat((evaluation_summary, all_adatas.obs[['sex', 'pre_post_corr']].groupby('sex').mean()))
evaluation_summary = pd.concat((evaluation_summary, all_adatas.obs[['AT_committment', 'pre_post_corr']].groupby('AT_committment').mean()))

In [16]:
evaluation_summary.to_csv('results/scMMGAN_stats.csv')