# Analyze results for Treg states

In [None]:
import numpy as np
import pandas as pd
import anndata
import os
import scipy
import h5py
from grelu.interpret.motifs import trim_pwm
from grelu.visualize import plot_attributions
from plotnine import *

from grelu.io.motifs import read_meme_file
from tangermeme.plot import plot_pwm

%matplotlib inline

## Paths

In [None]:
save_dir="/gstore/data/resbioai/grelu/decima/20240823"
matrix_file = os.path.join(save_dir, "data.h5ad")
h5_file = os.path.join(save_dir, "data.h5")
ckpt_dir = os.path.join(save_dir, 'lightning_logs')

## Load data

In [None]:
ad = anndata.read_h5ad(matrix_file)
ad = ad[ad.obs.dataset=='skin_atlas']
ad = ad[ad.obs.cell_type.isin(['Treg cycling','Treg'])]

In [None]:
motifs = read_meme_file('/gstore/data/resbioai/grelu/decima/H12CORE_meme_format.meme')

## Predict differential expression

In [None]:
ad.var['diff_true'] = ad[ad.obs.cell_type == 'Treg cycling'].X.mean(0) - ad[ad.obs.cell_type =='Treg'].X.mean(0)
ad.var['diff_pred'] =  ad[ad.obs.cell_type == 'Treg cycling'].layers['preds'].mean(0) - ad[ad.obs.cell_type =='Treg'].layers['preds'].mean(0)

In [None]:
print(scipy.stats.pearsonr(ad.var.loc[ad.var.dataset=='test', 'diff_true'], ad.var.loc[ad.var.dataset=='test', 'diff_pred']))

In [None]:
(
    ggplot(ad.var[ad.var.dataset=='test'], aes(x='diff_true', y='diff_pred')) 
    + geom_pointdensity(size=.1) + theme_classic() + theme(figure_size=(2.6, 2.5))
    + xlab('Measured log FC') + ylab('Predicted logFC')
    + ggtitle('     Treg cycling vs. Treg')
    + geom_abline(slope=1, intercept=0)
    + geom_vline(xintercept = 0, linetype='--')
    + geom_hline(yintercept = 0, linetype='--')
)

## Plot logos

In [None]:
i=2

modisco_h5 = f'Treg_cycling__vs__tregnoncycling/modisco_full/modisco_report.h5'
f = h5py.File(modisco_h5, 'r')
m = trim_pwm(np.array(f['pos_patterns'][f'pattern_{i}']['contrib_scores']), 0.1)
display(plot_attributions(np.flip(m.T, (0, 1)), figsize=(4, 1)))

In [None]:
plot_pwm(motifs['E2F4.H12CORE.0.P.B'])