In [None]:
import scanpy as sc
import scarches as sca
import numpy as np
import pandas as pd
from scarches.plotting.terms_scores import plot_abs_bfs_key

In [None]:
sc.set_figure_params(figsize=(6, 6))
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sb

font = {'family' : 'Arial',
        'size'   : 14}

matplotlib.rc('font', **font)
matplotlib.rc('ytick', labelsize=14)
matplotlib.rc('xtick', labelsize=14)
matplotlib.rcParams["figure.dpi"] = 200

### Pbmc Kang

In [None]:
adata = sc.read('kang_pbmc_integrated.h5ad')

In [None]:
adata.obs['condition_merged'] = adata.obs['condition'].tolist()
adata.obs['condition_merged'][adata.obs['condition_merged'].astype(str)=='nan'] = 'control'
adata.strings_to_categoricals()

In [None]:
intr_cvae = sca.models.EXPIMAP.load('q_intr_cvae_nolog_alpha_kl_0_5_0_1_sd_2020', adata)

In [None]:
directions = intr_cvae.latent_directions(method="sum")

In [None]:
directions = adata.uns['directions']

In [None]:
# adata.obsm['X_cvae'] *= directions[intr_cvae.model.decoder.nonzero_terms()]
adata.obsm['X_cvae'] *= directions[intr_cvae.nonzero_terms()]



In [None]:
print(intr_cvae.latent_enrich)

In [None]:
scores_cond = intr_cvae.latent_enrich('condition_merged', comparison="control", directions_key=directions, adata=adata, n_sample=50000, exact=True)

In [None]:
# scores_cond = adata.uns['bf_scores']

In [None]:
adata_ctrl = adata[adata.obs.condition_merged == 'control'].copy()

In [None]:
scores_ct = intr_cvae.latent_enrich('cell_type_joint', directions_key=directions, n_sample=50000, adata=adata_ctrl, exact=True)

In [None]:
print(type(scores_cond))


In [None]:
scores_ct = adata.uns['bf_scores']

In [None]:
print(adata.uns.keys())


In [None]:
# plot one key
fig = sca.plotting.plot_abs_bfs(adata,terms= np.array([term[:58] for term in adata.uns['terms']]), keys='stimulated', yt_step=1, fontsize=8, scale_y=2)

In [None]:
fig.savefig('pbmc_kang_overall_ranks.png', bbox_inches = "tight")

In [None]:
# plot all keys
fig = sca.plotting.plot_abs_bfs(adata, terms=np.array([term[:45] for term in adata.uns['directions']]), n_cols=6, scale_y=2.6, yt_step=0.6)

In [None]:
fig.set_size_inches(40, 24)

In [None]:
fig

In [None]:
fig.savefig('pbmc_kang_overall_ranks_all.png', bbox_inches = "tight")

In [None]:
terms_idx = sc.read('kang_pbmc_merged.h5ad').uns['terms']

In [None]:
df = pd.DataFrame(index=terms_idx)

In [None]:
df['go_size'] = np.array(intr_cvae.mask_).T.sum(0)

In [None]:
df['stimulated'] = scores_cond['stimulated']['bf']

In [None]:
for ct in scores_ct:
    df[ct] = scores_ct[ct]['bf']

In [None]:
query_ct = adata.obs.cell_type_joint[adata.obs.batch_join == 'Kang (query)'].unique()

In [None]:
scores_ct_q = {k: v for k, v in scores_ct.items() if k in query_ct}

In [None]:
fig = sca.plotting.plot_abs_bfs(scores_ct_q, np.array([term[:45] for term in adata.uns['full_terms']]), n_cols=4, scale_y=4.4, yt_step=0.6)

In [None]:
fig.set_size_inches(20, 12)

In [None]:
fig

In [None]:
# get genes' names for terms
intr_cvae.terms_genes()['APOPTOTIC_CLEAVAGE_OF_CELLULAR']

In [None]:
fig, axs = plt.subplots(2, 4)

In [None]:
terms = np.array([term[:54] for term in adata.uns['full_terms']])
for i, cat in enumerate(adata.obs.cell_type_joint[adata.obs.batch_join == 'Kang (query)'].unique()):
    adata_ct = adata[adata.obs.cell_type_joint == cat]
    scores_c_ct = intr_cvae.latent_enrich('condition_merged', comparison="control", directions=directions, adata=adata_ct, n_perm=50000, exact=True)
    df[f'{cat}_stimulated'] = scores_c_ct['stimulated']['bf']
    plot_abs_bfs_key(scores_c_ct, terms, 'stimulated', title=cat, ax=axs[i//4, i%4], yt_step=1, scale_y=2.45)

In [None]:
fig.set_size_inches(21, 13)

In [None]:
fig

In [None]:
fig.savefig('pbmc_kang_ct_cond.png', bbox_inches = "tight")

In [None]:
df.to_csv('expimap_bayes_factors.csv')