# motif enrichment comparisons

## summary

- want to see what motifs are enriched by combining shap contribution and ISM approahces (union)
- want to compare what is enriched in one group relative to background 

## analysis overview

### subclones

- 

In [None]:
%matplotlib inline

import os
import numpy as np
import pandas as pd
from scipy import stats
import datetime
import time
import sys
sys.path.append('./')
from reg_diffs.scripts import evalism_v22 as evalism 
from reg_diffs.scripts.analysis import motif_enrichment_cmp as mtfcmp
from reg_diffs.scripts import utils as regutils
import glob
import pickle

import matplotlib.pyplot as plt
import seaborn as sns

from scipy.stats import chi2_contingency

import re
import pybedtools
import scipy

from sklearn.linear_model import LinearRegression
from adjustText import adjust_text
from scipy.stats import spearmanr

import scanpy as sc

## GBM 39

### cmp btw subclones

* clone A
* clone B

In [None]:
cleaned_motif_file = './results/reg_diffs/ismshap_gbm_subclone_vierstrav1.pkl'
data = regutils.load_from_pickle(cleaned_motif_file)

In [None]:
# # specify groups

# groups = {'luminal': ['brca{}'.format(n) for n in ['10', '12', '15', '20', '22', ]],
#           'basal': ['brca{}'.format(n) for n in ['14', '16', '23', '24', '25', ]],
#           'her2': ['brca{}'.format(n) for n in ['11', '13', '17', '18', '19', '21', ]]}

# sample2grp = {}
# for k, v in groups.items():
#     for s in v:
#         sample2grp[s] = '{}.{}'.format(k, s.split('brca')[-1])
# sample2grp

# names

Need to modify motif names for simplicity

In [None]:
load = True

In [None]:
# get names
names = []
for k in data.keys():
    names += data[k]['group_name'].unique().tolist()
    names = list(np.unique(names))

In [None]:
len(names)

In [None]:
# make keys of middle part, sort, if first 3 letters are shared 
name_keys = {} 
for name in names:
    s = name.split('/')
    s.sort()
    if len(s) > 2:
        print('Problem? See:', s, 'as:', '/'.join(s[1:]))
        s = s[1:]
    if (len(s) > 1 and s[0][:3] == s[1][:3]) or (len(s) == 1): # first 3 letters, take only first element or manual rename
        if (re.search('....\d', s[0]) or s[0][:4] in ['CTCF', 'GATA', ]) and 'ZNF' not in s[0][:4]: 
            s = [s[0][:4]]
        else:
            s = [s[0][:3]]
    elif len(s) == 2 and re.search('^\d', s[0]):
        s = [s[1]]
    name_keys[name] = '/'.join(s)

# remap the solos to existing ones

In [None]:
len(list(np.unique(list(name_keys.values()))))

### transfer name keys

In [None]:
# add name keys
data['GBM39_cloneA']['modified_names'] = data['GBM39_cloneA']['group_name'].map(name_keys)
data['GBM39_cloneB']['modified_names'] = data['GBM39_cloneB']['group_name'].map(name_keys)
data['GBM45_cloneA']['modified_names'] = data['GBM45_cloneA']['group_name'].map(name_keys)
data['GBM45_cloneB']['modified_names'] = data['GBM45_cloneB']['group_name'].map(name_keys)

# split into two clones
gbm39 = {'cloneA': data['GBM39_cloneA'],
         'cloneB': data['GBM39_cloneB']}
gbm45 = {'cloneA': data['GBM45_cloneA'],
         'cloneB': data['GBM45_cloneB']}

# GBM 39

In [None]:
export = False
load = False

In [None]:
# run pipeline

name_key = 'modified_names'

## tst w/o modified names
enrich_out = evalism.chi2ovr(
    data=gbm39,
    groups=None,
    notB_key='end_cnt_before', 
    B_key='end_cnt_after',
    return_df=True,
    verbose=True,
    name_key=name_key,
    )
# print('pct significant: {:.2f}'.format(100*(enrich_out['p_bonferonni'] < 0.5).sum()/enrich_out.shape[0]))
# if export:
#     enrich_out.to_csv('./tmp/reg_diffs/results/mtfcmp_cloneAvB_ism_shap_gbm39.csv')
        
# ## 3. visualize
# agg = mtfcmp.viz_compare_enrich(enrich_out, name_key=name_key, drop_duplicates_key=name_key, names2display=None)
# agg


In [None]:
export = True

In [None]:
####################################################
# args
####################################################
suffix = 'clstrmp_mtfcmp_gbm39AvB'
grid ={'zscore': ['row', 'col', 'rowcol', None, ],
       'topn': [10, 20, None],
       'drop': ['ZNF', None]}
####################################################

dt = datetime.datetime.now().strftime('%y%m%d')
pfp = os.path.join('./results/tmp/', dt)
if not os.path.exists(pfp):
    os.makedirs(pfp)

count = 0
for zscore_type in grid['zscore']:
    for n in grid['topn']:
        for drop_motif in grid['drop']:
            print(count, '{}_{}_{}'.format(zscore_type, n, drop_motif))
            agg, p, dt = mtfcmp.viz_compare_enrich(
                enrich_out, 
                name_key=name_key, 
                drop_duplicates_key='modified_names', 
                names2display=None,
                topn=n,
                drop=drop_motif,
                zscore=zscore_type,
                save_plot=None if not export else os.path.join(pfp, '{}_z{}_top{}_drop{}.pdf'.format(suffix, zscore_type, n, drop_motif)),
                save_mat=None if not export else './tmp/results/reg_diffs/{}_z{}_top{}_drop{}.csv'.format(suffix, zscore_type, n, drop_motif),
                return_plot_obj=True,
            )
            if export:
                dt['df_prez'].to_csv('./tmp/results/reg_diffs/{}_z{}_top{}_drop{}_preZ.csv'.format(suffix, zscore_type, n, drop_motif))
                print('  wrote {}'.format('./tmp/results/reg_diffs/{}_z{}_top{}_drop{}_preZ.csv'.format(suffix, zscore_type, n, drop_motif)))
            count+=1


In [None]:
print(dt.index[p.dendrogram_row.reordered_ind])
print(dt.columns[p.dendrogram_col.reordered_ind])

# GBM 45

In [None]:
# run tst
enrich_out = evalism.chi2ovr(
        data=gbm45,
        groups=None,
        notB_key='end_cnt_before', 
        B_key='end_cnt_after',
        return_df=True,
        verbose=True,
        name_key=name_key,
        )


In [None]:
# ####################################################
# # args
# ####################################################
# suffix = 'clstrmp_mtfcmp_gbm45AvB'
# grid ={'zscore': ['row', 'col', 'rowcol', None, ],
#        'topn': [10, 20, None],
#        'drop': ['ZNF', None]}
# ####################################################

# dt = datetime.datetime.now().strftime('%y%m%d')
# pfp = os.path.join('./results/tmp/', dt)
# if not os.path.exists(pfp):
#     os.makedirs(pfp)

# count = 0
# for zscore_type in grid['zscore']:
#     for n in grid['topn']:
#         for drop_motif in grid['drop']:
#             print(count, '{}_{}_{}'.format(zscore_type, n, drop_motif))
#             agg, p, dt = mtfcmp.viz_compare_enrich(
#                 enrich_out, 
#                 name_key=name_key, 
#                 drop_duplicates_key='modified_names', 
#                 names2display=None,
#                 topn=n,
#                 drop=drop_motif,
#                 zscore=zscore_type,
#                 save_plot=None if not export else os.path.join(pfp, '{}_z{}_top{}_drop{}.pdf'.format(suffix, zscore_type, n, drop_motif)),
#                 save_mat=None if not export else './tmp/results/reg_diffs/{}_z{}_top{}_drop{}.csv'.format(suffix, zscore_type, n, drop_motif),
#                 return_plot_obj=True,
#             )
#             if export:
#                 dt['df_prez'].to_csv('./tmp/results/reg_diffs/{}_z{}_top{}_drop{}_preZ.csv'.format(suffix, zscore_type, n, drop_motif))
#                 print('  wrote {}'.format('./tmp/results/reg_diffs/{}_z{}_top{}_drop{}_preZ.csv'.format(suffix, zscore_type, n, drop_motif)))
#             count+=1


# restrict to sub-seq

## chr 6



In [None]:
gbm_39_chr6 = {'cloneA': gbm39['cloneA'].loc[gbm39['cloneA']['seqnames']=='chr6'],
               'cloneB': gbm39['cloneB'].loc[gbm39['cloneB']['seqnames']=='chr6']}
gbm_45_chr6 = {'cloneA': gbm45['cloneA'].loc[gbm45['cloneA']['seqnames']=='chr6'],
               'cloneB': gbm45['cloneB'].loc[gbm45['cloneB']['seqnames']=='chr6']}

### gbm 39

In [None]:
# modify dataframe, then plot
# run tst
enrich_out = evalism.chi2ovr(
        data=gbm_39_chr6,
        groups=None,
        notB_key='end_cnt_before', 
        B_key='end_cnt_after',
        return_df=True,
        verbose=True,
        name_key=name_key,
        )

In [None]:
####################################################
# args
####################################################
suffix = 'clstrmp_mtfcmp_gbm39AvB_chr6'
grid ={'zscore': ['row', 'col', 'rowcol', None, ],
       'topn': [10, 20, None],
       'drop': ['ZNF', None]}
####################################################

dt = datetime.datetime.now().strftime('%y%m%d')
pfp = os.path.join('./results/tmp/', dt)
if not os.path.exists(pfp):
    os.makedirs(pfp)

count = 0
for zscore_type in grid['zscore']:
    for n in grid['topn']:
        for drop_motif in grid['drop']:
            print(count, '{}_{}_{}'.format(zscore_type, n, drop_motif))
            agg, p, dt = mtfcmp.viz_compare_enrich(
                enrich_out, 
                name_key=name_key, 
                drop_duplicates_key='modified_names', 
                names2display=None,
                topn=n,
                drop=drop_motif,
                zscore=zscore_type,
                save_plot=None if not export else os.path.join(pfp, '{}_z{}_top{}_drop{}.pdf'.format(suffix, zscore_type, n, drop_motif)),
                save_mat=None if not export else './tmp/results/reg_diffs/{}_z{}_top{}_drop{}.csv'.format(suffix, zscore_type, n, drop_motif),
                return_plot_obj=True,
            )
            if export:
                dt['df_prez'].to_csv('./tmp/results/reg_diffs/{}_z{}_top{}_drop{}_preZ.csv'.format(suffix, zscore_type, n, drop_motif))
                print('  wrote {}'.format('./tmp/results/reg_diffs/{}_z{}_top{}_drop{}_preZ.csv'.format(suffix, zscore_type, n, drop_motif)))
            count+=1


### gbm 45

In [None]:
# modify dataframe, then plot
# run tst
enrich_out = evalism.chi2ovr(
        data=gbm_45_chr6,
        groups=None,
        notB_key='end_cnt_before', 
        B_key='end_cnt_after',
        return_df=True,
        verbose=True,
        name_key=name_key,
        )

In [None]:
####################################################
# args
####################################################
suffix = 'clstrmp_mtfcmp_gbm45AvB_chr6'
grid ={'zscore': ['row', 'col', 'rowcol', None, ],
       'topn': [10, 20, None],
       'drop': ['ZNF', None]}
####################################################

dt = datetime.datetime.now().strftime('%y%m%d')
pfp = os.path.join('./results/tmp/', dt)
if not os.path.exists(pfp):
    os.makedirs(pfp)

count = 0
for zscore_type in grid['zscore']:
    for n in grid['topn']:
        for drop_motif in grid['drop']:
            print(count, '{}_{}_{}'.format(zscore_type, n, drop_motif))
            agg, p, dt = mtfcmp.viz_compare_enrich(
                enrich_out, 
                name_key=name_key, 
                drop_duplicates_key='modified_names', 
                names2display=None,
                topn=n,
                drop=drop_motif,
                zscore=zscore_type,
                save_plot=None if not export else os.path.join(pfp, '{}_z{}_top{}_drop{}.pdf'.format(suffix, zscore_type, n, drop_motif)),
                save_mat=None if not export else './tmp/results/reg_diffs/{}_z{}_top{}_drop{}.csv'.format(suffix, zscore_type, n, drop_motif),
                return_plot_obj=True,
            )
            if export:
                dt['df_prez'].to_csv('./tmp/results/reg_diffs/{}_z{}_top{}_drop{}_preZ.csv'.format(suffix, zscore_type, n, drop_motif))
                print('  wrote {}'.format('./tmp/results/reg_diffs/{}_z{}_top{}_drop{}_preZ.csv'.format(suffix, zscore_type, n, drop_motif)))
            count+=1


## exclude chr 6

In [None]:
gbm_39_nochr6 = {'cloneA': gbm39['cloneA'].loc[gbm39['cloneA']['seqnames']!='chr6'],
               'cloneB': gbm39['cloneB'].loc[gbm39['cloneB']['seqnames']!='chr6']}
gbm_45_nochr6 = {'cloneA': gbm45['cloneA'].loc[gbm45['cloneA']['seqnames']!='chr6'],
               'cloneB': gbm45['cloneB'].loc[gbm45['cloneB']['seqnames']!='chr6']}

In [None]:
# modify dataframe, then plot
# run tst
enrich_out = evalism.chi2ovr(
        data=gbm_45_nochr6,
        groups=None,
        notB_key='end_cnt_before', 
        B_key='end_cnt_after',
        return_df=True,
        verbose=True,
        name_key=name_key,
        )

In [None]:
####################################################
# args
####################################################
suffix = 'clstrmp_mtfcmp_gbm45AvB_nochr6'
grid ={'zscore': ['rowcol', None, ],
       'topn': [None],
       'drop': [None]}
####################################################

dt = datetime.datetime.now().strftime('%y%m%d')
pfp = os.path.join('./results/tmp/', dt)
if not os.path.exists(pfp):
    os.makedirs(pfp)

count = 0
for zscore_type in grid['zscore']:
    for n in grid['topn']:
        for drop_motif in grid['drop']:
            print(count, '{}_{}_{}'.format(zscore_type, n, drop_motif))
            agg, p, dt = mtfcmp.viz_compare_enrich(
                enrich_out, 
                name_key=name_key, 
                drop_duplicates_key='modified_names', 
                names2display=None,
                topn=n,
                drop=drop_motif,
                zscore=zscore_type,
                save_plot=None if not export else os.path.join(pfp, '{}_z{}_top{}_drop{}.pdf'.format(suffix, zscore_type, n, drop_motif)),
                save_mat=None if not export else './tmp/results/reg_diffs/{}_z{}_top{}_drop{}.csv'.format(suffix, zscore_type, n, drop_motif),
                return_plot_obj=True,
            )
            if export:
                dt['df_prez'].to_csv('./tmp/results/reg_diffs/{}_z{}_top{}_drop{}_preZ.csv'.format(suffix, zscore_type, n, drop_motif))
                print('  wrote {}'.format('./tmp/results/reg_diffs/{}_z{}_top{}_drop{}_preZ.csv'.format(suffix, zscore_type, n, drop_motif)))
            count+=1


In [None]:
agg

# find genes in chr 6

In [None]:
tss = './data/ref_data/gencodev39_cage_ratio_to_sum_refined_tss_positions_transcripts_protein_coding_inclZeros_withTranscriptID.tsv'
tss = pd.read_csv(tss, sep='\t')
tss_chr6 = tss.loc[tss['chrom']=='chr6']
tss_chr6['endB'] = tss_chr6['tss_pos'] + 1
gbm45_cloneB_mtf_chr6 = data['GBM45_cloneB'].loc[data['GBM45_cloneB']['seqnames']=='chr6']

In [None]:
gbm_39_nochr6['cloneA'].loc[gbm_39_nochr6['cloneA']['modified_names']=='POU']
gbm_39_nochr6['cloneB'].loc[gbm_39_nochr6['cloneB']['modified_names']=='POU']
gbm_45_nochr6['cloneA'].loc[gbm_45_nochr6['cloneA']['modified_names']=='POU']
gbm_45_nochr6['cloneB'].loc[gbm_45_nochr6['cloneB']['modified_names']=='POU']

In [None]:
import pybedtools
Acol = ['seqnames', 'start', 'end'] + [c for c in gbm45_cloneB_mtf_chr6.columns if c not in ['seqnames', 'start', 'end']]
Bcol = ['chrom', 'tss_pos', 'endB'] + [c for c in tss_chr6.columns if c not in ['chrom', 'tss_pos', 'endB']]
A = pybedtools.BedTool.from_dataframe(gbm45_cloneB_mtf_chr6[Acol])
B = pybedtools.BedTool.from_dataframe(tss_chr6[Bcol])

In [None]:
df = A.sort().closest(B.sort(), d=True, t="first")  
target_names = Acol + Bcol + ["distance_to_tss"]
df = df.to_dataframe(names=target_names)#.drop('peak_summit_1',axis=1)

In [None]:
df.loc[df['modified_names']=='POU']

In [None]:
df.loc[df['modified_names']=='POU', ['group_name', 'gene', 'distance_to_tss']].sort_values(by='distance_to_tss').drop_duplicates(subset=['gene']).loc[df['distance_to_tss'] <= 10000]

# genes not in chr6

In [None]:
tss = './data/ref_data/gencodev39_cage_ratio_to_sum_refined_tss_positions_transcripts_protein_coding_inclZeros_withTranscriptID.tsv'
tss = pd.read_csv(tss, sep='\t')
tss_nochr6 = tss.loc[tss['chrom']!='chr6']
tss_nochr6['endB'] = tss_nochr6['tss_pos'] + 1
gbm_sub = data['GBM45_cloneB'].loc[data['GBM45_cloneB']['seqnames']!='chr6']

Acol = ['seqnames', 'start', 'end'] + [c for c in gbm_sub.columns if c not in ['seqnames', 'start', 'end']]
Bcol = ['chrom', 'tss_pos', 'endB'] + [c for c in tss_nochr6.columns if c not in ['chrom', 'tss_pos', 'endB']]
A = pybedtools.BedTool.from_dataframe(gbm_sub[Acol])
B = pybedtools.BedTool.from_dataframe(tss_nochr6[Bcol])

df = A.sort().closest(B.sort(), d=True, t="first")  
target_names = Acol + Bcol + ["distance_to_tss"]
df = df.to_dataframe(names=target_names)#.drop('peak_summit_1',axis=1)

In [None]:
tflist_chr6 = ['AGER', 'ARID1B', 'ATF6B', 'BACH2', 'BCLAF1', 'CASP8AP2', 'CDC5L', 'CDKN1A', 'CITED2', 'DAXX', 'DEK', 'DSP', 'E2F3', 'EDN1', 'ESR1', 'ETV7', 'EXOC2', 'FOXC1', 'FOXF2', 'FOXO3', 'FOXP4', 'FOXQ1', 'GCM1', 'GCM2', 'GTF2H4', 'HDAC2', 'HEY2', 'HIVEP1', 'HIVEP2', 'HLA-DQB1', 'HLA-DQB2', 'HLA-DRB1', 'HLA-DRB5', 'HMGA1', 'HMGN3', 'HSF2', 'HSPA1A', 'HSPA1B', 'ID4', 'IRF4', 'JARID2', 'L3MBTL3', 'LIN28B', 'MAP3K7', 'MAPK14', 'MDFI', 'MED23', 'MYB', 'NCOA7', 'NFKBIL1', 'NFYA', 'NR2E1', 'OLIG3', 'PBX2', 'PGBD1', 'PHF1', 'PIM1', 'PKHD1', 'PLAGL1', 'POLH', 'POU3F2', 'POU5F1', 'PPARD', 'PPP1R10', 'PRDM1', 'PRDM13', 'PRIM2', 'PRKN', 'PRR3', 'RFX6', 'RING1', 'RIPK1', 'RREB1', 'RUNX2', 'RXRB', 'SCML4', 'SGK1', 'SHPRH', 'SIM1', 'SNRPC', 'SOGA3', 'SOX4', 'SPDEF', 'SRF', 'TAB2', 'TAF11', 'TAF8', 'TBP', 'TBPL1', 'TBX18', 'TCF19', 'TCF21', 'TDP2', 'TEAD3', 'TFAP2A', 'TFAP2B', 'TFAP2D', 'TFEB', 'TNF', 'TNFAIP3', 'TRERF1', 'TRIM15', 'TRIM26', 'TRIM27', 'TRIM31', 'TRIM38', 'TRIM40', 'TULP4', 'UFL1', 'VEGFA', 'VGLL2', 'ZBED9', 'ZBTB12', 'ZBTB2', 'ZBTB22', 'ZBTB24', 'ZBTB9', 'ZFP57', 'ZKSCAN3', 'ZKSCAN4', 'ZKSCAN8', 'ZNF165', 'ZNF184', 'ZNF292', 'ZNF311', 'ZNF318', 'ZNF322', 'ZNF391', 'ZNF451', 'ZNF76', 'ZSCAN12', 'ZSCAN16', 'ZSCAN23', 'ZSCAN26', 'ZSCAN31', 'ZSCAN9']

In [None]:
df.columns

In [None]:
dt = df.loc[df['modified_names']=='POU', ['group_name', 'gene', 'es', 'distance_to_tss']].sort_values(by=['es', 'distance_to_tss'], ascending=[False, True]).drop_duplicates(subset=['gene']).loc[df['distance_to_tss'] <= 10000]
dt.head(50)

In [None]:
df.columns

In [None]:
df.loc[df['gene'].isin(['NEUROD1', 'ASCL1']) & [True if 'SOX' in s or 'POU' in s else False for s in df['group_name']]].loc[:,  ['group_name', 'gene', 'es', 'contribution', 'distance_to_tss', 'seqnames', 'start', 'end' ]].to_csv('./tmp/results/gbm_subclones_notchr6_ascl1_neurod1_ebox_sox.csv')

In [None]:
dt = pd.read_csv('./tmp/results/gbm_subclones_notchr6_ascl1_neurod1_ebox_sox.csv')
dt

In [None]:
file = '.data/GBM45_CLONEB_DIFFERENTIALPEAKS.CSV'
dt = pd.read_csv(file, index_col=0)
dt = dt.loc[dt['seqnames']!='chr6'] # omit chr 6


# run enrichment only on this set

In [None]:
gbm_sub2 = {}
for c in ['cloneA', 'cloneB']:
    file = './data/GBM45_{}_DIFFERENTIALPEAKS.CSV'.format(c.upper())
    dt = pd.read_csv(file, index_col=0)
    dt = dt.loc[dt['seqnames']!='chr6'] # omit chr 6
    Acol = ['seqnames', 'start', 'end'] + [c for c in dt.columns if c not in ['seqnames', 'start', 'end']]
    Bcol = ['seqnames', 'start', 'end'] + [c for c in gbm_sub.columns if c not in ['seqnames', 'start', 'end']]
    Acol_prime = ['seqnames_x', 'start_x', 'end_x'] + [c for c in dt.columns if c not in ['seqnames', 'start', 'end']]
    A = pybedtools.BedTool.from_dataframe(dt[Acol])
    B = pybedtools.BedTool.from_dataframe(gbm_sub[Bcol])
    C = A.intersect(B, wao=True,)
    C = C.to_dataframe(names=Acol_prime + Bcol + ['overlap'])
    gbm_sub2[c] = data['GBM45_{}'.format(c)].merge(C.loc[C['overlap']!=0, ['seqnames', 'start', 'end', 'group_name']], 
                                                   on=['seqnames', 'start', 'end', 'group_name'], how='inner')

In [None]:
# remove non-overlapping
non_overlapping = list(set(gbm_sub2['cloneB']['modified_names'].unique()) - set(gbm_sub2['cloneA']['modified_names'].unique())) 
non_overlapping += list(set(gbm_sub2['cloneA']['modified_names'].unique()) - set(gbm_sub2['cloneB']['modified_names'].unique())) 
for k,v in gbm_sub2.items():
    gbm_sub2[k] = v.loc[~(v['modified_names'].isin(non_overlapping))]
print('ignored motifs:', non_overlapping)

In [None]:
# modify dataframe, then plot
# run tst
enrich_out = evalism.chi2ovr(
        data=gbm_sub2,
        groups=None,
        notB_key='end_cnt_before', 
        B_key='end_cnt_after',
        return_df=True,
        verbose=True,
        name_key=name_key,
        )

In [None]:
####################################################
# args
####################################################
suffix = 'clstrmp_mtfcmp_gbm45_diffpk_nochr6'
grid ={'zscore': ['rowcol'],
       'topn': [None],
       'drop': [None]}
####################################################

dt = datetime.datetime.now().strftime('%y%m%d')
pfp = os.path.join('./results/tmp/', dt)
if not os.path.exists(pfp):
    os.makedirs(pfp)

count = 0
for zscore_type in grid['zscore']:
    for n in grid['topn']:
        for drop_motif in grid['drop']:
            print(count, '{}_{}_{}'.format(zscore_type, n, drop_motif))
            agg, p, dt = mtfcmp.viz_compare_enrich(
                enrich_out, 
                name_key=name_key, 
                drop_duplicates_key='modified_names', 
                names2display=None,
                topn=n,
                drop=drop_motif,
                zscore=zscore_type,
                save_plot=None if not export else os.path.join(pfp, '{}_z{}_top{}_drop{}.pdf'.format(suffix, zscore_type, n, drop_motif)),
                save_mat=None if not export else './tmp/results/reg_diffs/{}_z{}_top{}_drop{}.csv'.format(suffix, zscore_type, n, drop_motif),
                return_plot_obj=True,
            )
            if export:
                dt['df_prez'].to_csv('./tmp/results/reg_diffs/{}_z{}_top{}_drop{}_preZ.csv'.format(suffix, zscore_type, n, drop_motif))
                print('  wrote {}'.format('./tmp/results/reg_diffs/{}_z{}_top{}_drop{}_preZ.csv'.format(suffix, zscore_type, n, drop_motif)))
            count+=1


In [None]:
agg

In [None]:
print(dt.index[p.dendrogram_row.reordered_ind])
print(dt.columns[p.dendrogram_col.reordered_ind])