In [1]:
import pandas as pd

In [2]:
import h5py 
import numpy as np
def import_tfmodisco_motifs(tfm_results_path, trim=True, only_pos=True):
    """
    Imports the PFMs to into a dictionary, mapping `(x, y)` to the PFM,
    where `x` is the metacluster index and `y` is the pattern index.
    Arguments:
        `tfm_results_path`: path to HDF5 containing TF-MoDISco results
        `out_dir`: where to save motifs
        `trim`: if True, trim the motif flanks based on information content
        `only_pos`: if True, only return motifs with positive contributions
    Returns the dictionary of PFMs.
    """ 
    pfms = {}
    with h5py.File(tfm_results_path, "r") as f:
        metaclusters = f["metacluster_idx_to_submetacluster_results"]
        num_metaclusters = len(metaclusters.keys())
        for metacluster_i, metacluster_key in enumerate(metaclusters.keys()):
            metacluster = metaclusters[metacluster_key]
            print(len(metacluster["seqlets"].value))
            if "patterns" not in metacluster["seqlets_to_patterns_result"]:
                continue
            patterns = metacluster["seqlets_to_patterns_result"]["patterns"]
            num_patterns = len(patterns["all_pattern_names"][:])
            for pattern_i, pattern_name in enumerate(patterns["all_pattern_names"][:]):
                pattern_name = pattern_name.decode()
                pattern = patterns[pattern_name]
                key = "metacluster_"+str(metacluster_i)+".pattern_"+str(pattern_i)
                
                if key not in pfms:
                    pfms[key] = []

                for seqlet in pattern["seqlets_and_alnmts"]["seqlets"]:
                    pfms[key].append(seqlet.decode("utf-8"))
               
    return pfms

In [5]:
main_dir="/oak/stanford/groups/akundaje/projects/chrombpnet_paper_new/DNASE_SE/GM12878/DNASE_SE_03.06.2022_simplebias/SIGNAL/modisco/"
odir="uncorrected"
name='simpleb'
tfm_results_path=main_dir+"modisco_results_allChroms_profile.hdf5"
pfms = import_tfmodisco_motifs(tfm_results_path)



50000
2704


In [6]:
import pandas as pd
tomtom=main_dir+"profile.tomtom.tsv"
tomtom = pd.read_csv(tomtom, sep="\t", header=0)
tomtom

Unnamed: 0,Pattern,Num_Seqlets,Match_1,q-value,Match_2,q-value.1,Match_3,q-value.2,Match_4,q-value.3,...,Match_6,q-value.5,Match_7,q-value.6,Match_8,q-value.7,Match_9,q-value.8,Match_10,q-value.9
0,metacluster_0.pattern_0,4826,RUNX3_HUMAN.H11MO.0.A,0.0721051,RUNX1_HUMAN.H11MO.0.A,0.0721051,RUNX1_MOUSE.H11MO.0.A,0.0721051,RUNX2_MOUSE.H11MO.0.A,0.0721051,...,RUNX2_HUMAN.H11MO.0.A,0.0772546,RUNX1_MA0002.2,0.0988022,ZNF8_HUMAN.H11MO.0.C,0.212776,PEBB_HUMAN.H11MO.0.C,0.212776,PEBB_MOUSE.H11MO.0.C,0.212776
1,metacluster_0.pattern_1,4787,STAT2_HUMAN.H11MO.0.A,0.000513232,STAT2_MOUSE.H11MO.0.A,0.000513232,IRF1_MOUSE.H11MO.0.A,0.000513232,IRF1_HUMAN.H11MO.0.A,0.000555995,...,IRF2_HUMAN.H11MO.0.A,0.00117056,IRF2_MOUSE.H11MO.0.B,0.00117056,IRF1_MA0050.2,0.00117056,STAT1+STAT2_MA0517.1,0.00151708,IRF8_IRF_1,0.00151708
2,metacluster_0.pattern_2,4537,ETV4_MOUSE.H11MO.0.B,9.20246e-05,EHF_HUMAN.H11MO.0.B,9.20246e-05,EHF_MOUSE.H11MO.0.B,9.20246e-05,ERG_HUMAN.H11MO.0.A,0.000246835,...,ETS1_MOUSE.H11MO.0.A,0.000246835,ELF5_HUMAN.H11MO.0.A,0.000246835,ELF2_MOUSE.H11MO.0.C,0.000920274,ELF3_HUMAN.H11MO.0.A,0.00121552,ELF3_MOUSE.H11MO.0.B,0.00121552
3,metacluster_0.pattern_3,4091,CTCF_MA0139.1,6.09884e-12,CTCF_HUMAN.H11MO.0.A,2.03753e-09,CTCF_MOUSE.H11MO.0.A,5.02972e-08,CTCF_C2H2_1,2.94966e-06,...,CTCFL_MOUSE.H11MO.0.A,2.5136e-05,CTCFL_MA1102.1,0.00027618,ZIC2_MOUSE.H11MO.0.C,0.166444,RARA_nuclearreceptor_6,0.178821,ZIC3_HUMAN.H11MO.0.B,0.178821
4,metacluster_0.pattern_4,3178,JDP2_bZIP_1,0.000159645,BATF+JUN_MA0462.1,0.000159645,Jdp2.mouse_bZIP_1,0.000159645,JDP2_MA0655.1,0.00018878,...,NFE2_MA0841.1,0.000376599,NFE2_bZIP_1,0.000376599,FOSL2+JUND_MA1144.1,0.000376599,FOS+JUNB_MA1134.1,0.000384924,FOSB+JUNB_MA1135.1,0.000392399
5,metacluster_0.pattern_5,1941,IRF4_HUMAN.H11MO.0.A,1.08244e-09,IRF4_MOUSE.H11MO.0.A,1.08244e-09,IRF8_HUMAN.H11MO.0.B,1.55217e-07,IRF8_MOUSE.H11MO.0.A,1.55217e-07,...,BC11A_HUMAN.H11MO.0.A,2.0914e-07,SPIB_MOUSE.H11MO.0.A,2.77574e-07,SPI1_HUMAN.H11MO.0.A,5.53722e-06,SPI1_MOUSE.H11MO.0.A,0.0364,ETV5_HUMAN.H11MO.0.C,0.0464994
6,metacluster_0.pattern_6,1846,ATF3_HUMAN.H11MO.0.A,4.87355e-06,MITF_HUMAN.H11MO.0.A,0.00108212,TFE3_HUMAN.H11MO.0.B,0.00108212,TFE3_MOUSE.H11MO.0.A,0.00108212,...,TFEB_MA0692.1,0.00108212,TFEB_bHLH_1,0.00108212,ARNTL_bHLH_1,0.00114537,BHLHE41_MA0636.1,0.00137225,BHLHE41_bHLH_1,0.00137225
7,metacluster_0.pattern_7,1347,PRDM6_HUMAN.H11MO.0.C,0.00283112,ELF5_MOUSE.H11MO.0.A,0.195298,BC11A_HUMAN.H11MO.0.A,0.195298,ETS2_HUMAN.H11MO.0.B,0.195298,...,ZNF263_MA0528.1,0.195298,ZN341_HUMAN.H11MO.0.C,0.195298,STAT1_MOUSE.H11MO.0.A,0.21106,NKX21_HUMAN.H11MO.0.A,0.21106,SPIB_HUMAN.H11MO.0.A,0.232174
8,metacluster_0.pattern_8,1312,KLF12_HUMAN.H11MO.0.C,1.65125e-05,SP1_HUMAN.H11MO.0.A,0.000161468,KLF3_HUMAN.H11MO.0.B,0.000181662,KLF3_MOUSE.H11MO.0.A,0.000181662,...,SP3_HUMAN.H11MO.0.B,0.000456137,SP3_MOUSE.H11MO.0.B,0.000456137,SP4_HUMAN.H11MO.0.A,0.000701169,SP4_MOUSE.H11MO.0.B,0.000701169,SP2_HUMAN.H11MO.0.A,0.000701169
9,metacluster_0.pattern_9,1264,TF65_MOUSE.H11MO.0.A,5.78763e-08,NFKB1_HUMAN.H11MO.1.B,5.78763e-08,NFKB1_MOUSE.H11MO.0.A,5.78763e-08,NFKB2_HUMAN.H11MO.0.B,5.78763e-08,...,RELB_HUMAN.H11MO.0.C,1.419e-07,RELB_MOUSE.H11MO.0.C,1.419e-07,TF65_HUMAN.H11MO.0.A,2.69542e-06,REL_MA0101.1,2.62743e-05,RELA_MA0107.1,3.17912e-05


In [7]:
bed="/mnt/lab_data2/anusri/chrombpnet/results/chrombpnet/DNASE_SE/GM12878/nautilus_runs/GM12878_03.06.2022_bias_128_4_1234_0.8_fold_0/chrombpnet_model/interpret/GM12878.interpreted_regions.bed"
bed = pd.read_csv(bed, sep="\t", header=None)
bed.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,chr12,27893474,27893851,Peak_24663,660,.,14.74297,66.04445,63.2572,235
1,chr4,116358620,116358915,Peak_18136,897,.,17.95841,89.77394,86.8351,149
2,chr2,181334501,181334800,Peak_144679,42,.,2.96768,4.26829,2.35837,134
3,chr2,216017404,216017796,Peak_36493,418,.,5.24476,41.89147,39.29951,188
4,chr1,207084731,207085224,Peak_96908,89,.,3.95387,8.97699,6.88077,380


In [8]:
lists=[]
for key in pfms:
    print(key)
    if key in tomtom["Pattern"].values:
        match_name = tomtom[tomtom["Pattern"]==key]["Match_1"]
        keyd = key.split("_")[1].replace(".pattern","")+"_"+key.split("_")[-1]+"_"+match_name
        keyd = keyd.values[0]
        #print(keyd.values[0])
        for seqlet in pfms[key]:
            vals = seqlet.split(",")
            peak_id = int(vals[0].split(":")[1])
            ss = vals[1].split(":")[1]
            ee = vals[2].split(":")[1]
            blist = [bed.loc[peak_id,0], bed.loc[peak_id,1]+bed.loc[peak_id,9]-250+int(ss), bed.loc[peak_id,1]+bed.loc[peak_id,9]-250+int(ee), keyd]
            lists.append(blist)
    else:
        print(key)
            

metacluster_0.pattern_0
metacluster_0.pattern_1
metacluster_0.pattern_2
metacluster_0.pattern_3
metacluster_0.pattern_4
metacluster_0.pattern_5
metacluster_0.pattern_6
metacluster_0.pattern_7
metacluster_0.pattern_8
metacluster_0.pattern_9
metacluster_0.pattern_10
metacluster_0.pattern_11
metacluster_0.pattern_12
metacluster_0.pattern_13
metacluster_0.pattern_14
metacluster_0.pattern_15
metacluster_0.pattern_16
metacluster_0.pattern_17
metacluster_0.pattern_18
metacluster_0.pattern_19
metacluster_0.pattern_20
metacluster_0.pattern_21
metacluster_0.pattern_22
metacluster_0.pattern_23
metacluster_0.pattern_24
metacluster_0.pattern_25
metacluster_0.pattern_26
metacluster_0.pattern_27
metacluster_0.pattern_28
metacluster_0.pattern_29
metacluster_0.pattern_30
metacluster_0.pattern_31
metacluster_0.pattern_32
metacluster_0.pattern_33
metacluster_1.pattern_0
metacluster_1.pattern_1
metacluster_1.pattern_2
metacluster_1.pattern_3
metacluster_1.pattern_4
metacluster_1.pattern_5
metacluster_1.pa

In [9]:
print(len(lists))
data = pd.DataFrame(lists)
print(data.head())

41105
       0          1          2                          3
0   chr3  110314950  110315000  0_0_RUNX3_HUMAN.H11MO.0.A
1  chr20   47453881   47453931  0_0_RUNX3_HUMAN.H11MO.0.A
2  chr17   61751005   61751055  0_0_RUNX3_HUMAN.H11MO.0.A
3   chr5  100165738  100165788  0_0_RUNX3_HUMAN.H11MO.0.A
4   chr3  152737406  152737456  0_0_RUNX3_HUMAN.H11MO.0.A


In [10]:
modisco_hits = data

In [11]:
modisco_hits.head()

Unnamed: 0,0,1,2,3
0,chr3,110314950,110315000,0_0_RUNX3_HUMAN.H11MO.0.A
1,chr20,47453881,47453931,0_0_RUNX3_HUMAN.H11MO.0.A
2,chr17,61751005,61751055,0_0_RUNX3_HUMAN.H11MO.0.A
3,chr5,100165738,100165788,0_0_RUNX3_HUMAN.H11MO.0.A
4,chr3,152737406,152737456,0_0_RUNX3_HUMAN.H11MO.0.A


In [12]:

# intersect tf_chip bed with merged peaks and resd it in


In [13]:

chip_regions = pd.read_csv(odir+"/chip_in_bed.bed", sep="\t", header=None)


In [14]:
chip_regions.head()


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,chr1,804745,804765,EED,549,.,34.30701,-1.0,4.75076,242,chr1,804513,804997
1,chr1,804916,804936,BHLHE40,645,.,20.63287,-1.0,3.57074,165,chr1,804761,805091
2,chr1,804922,804942,RELB,617,.,47.90848,-1.0,3.80925,302,chr1,804630,805234
3,chr1,804926,804946,BHLHE40,1000,.,247.93201,-1.0,4.53008,126,chr1,804810,805073
4,chr1,804930,804950,RAD51,1000,.,105.56439,-1.0,3.62972,94,chr1,804846,805068


In [15]:
modisco_hits.head()

Unnamed: 0,0,1,2,3
0,chr3,110314950,110315000,0_0_RUNX3_HUMAN.H11MO.0.A
1,chr20,47453881,47453931,0_0_RUNX3_HUMAN.H11MO.0.A
2,chr17,61751005,61751055,0_0_RUNX3_HUMAN.H11MO.0.A
3,chr5,100165738,100165788,0_0_RUNX3_HUMAN.H11MO.0.A
4,chr3,152737406,152737456,0_0_RUNX3_HUMAN.H11MO.0.A


In [16]:
#chip_intersect = pd.read_csv("/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/merged/GM12878/temp/in_chip_seq_peaks_full.bed", sep="\t", header=None)


In [17]:
#chip_intersect = chip_intersect.drop_duplicates()
#chip_intersect.head()


In [18]:
#motif_chip_count_matrix.shape
peaks = pd.read_csv(odir+"/merged.bed",header=None,sep="\t")
total_bases = np.sum(peaks[2]-peaks[1])

total_bases

15126506

In [None]:
import scipy
import scipy.stats
import subprocess
import os
def compute_cooccurrence_pvals(modisco,chip,total_bases):
    """
    Given the number of motif hits in each peak, computes p-value of
    co-occurrence for each pair of motifs, including self pairs.
    Returns an M x N array of p-values for the M motifs.
    """
    
    motifs_to_chip = np.zeros((len(set(modisco[3])),len(set(chip[3]))))
    unique_motifs = list(set(modisco[3]))
    uniq_tfs = list(set(chip[3]))
    print(unique_motifs)
    
    unique_motifs = [i for i in unique_motifs if i is not np.nan]

    unique_motifs1 = [i for i in unique_motifs if int(i.split("_")[0]) == 0 ]
    unique_motifs2  = [i for i in unique_motifs if int(i.split("_")[0]) == 1]
    indxs1 = np.argsort([int(i.split("_")[1]) for i in unique_motifs1])
    indxs2 = np.argsort([int(i.split("_")[1]) for i in unique_motifs2])
    unique_motifs = list(np.array(unique_motifs1)[indxs1]) +  list(np.array(unique_motifs2)[indxs2])

    num_motifs = len(unique_motifs)
    num_chip = len(uniq_tfs)
    pvals = np.ones((num_motifs, num_chip))
    odds = np.ones((num_motifs, num_chip))

    print(num_motifs,num_chip)
    
    def merge_file(temp3,out_name):
        temp3.to_csv(out_name+".bed",sep="\t",header=False,index=False)
        
        with open(out_name+"_sorted.bed", "w") as f:
            proc = subprocess.Popen(["bedtools", "sort","-i",out_name+".bed"], stdout=f)
            proc.wait()
            
        with open(out_name+"_merged.bed", "w") as f:
            proc = subprocess.Popen(["bedtools", "merge","-i",out_name+"_sorted.bed"], stdout=f)
            proc.wait()
            
        temp3 = pd.read_csv(out_name+"_merged.bed",sep="\t",header=None)
        #print(temp3.shape)
        return temp3

    def intersect_file(out_name,out_name1):
        
        with open(out_name+"_intersect.bed", "w") as f:
            proc = subprocess.Popen(["bedtools", "intersect","-a",out_name1+"_merged.bed", "-b", out_name1+"1_merged.bed", "-wo"], stdout=f)
            proc.wait()
            
        if os.stat(out_name+"_intersect.bed").st_size == 0:
            return 0
        else:
            temp3 = pd.read_csv(out_name+"_intersect.bed",sep="\t",header=None)
            return np.sum(temp3[6])
        
    # Significance is based on a Fisher's exact test. If the motifs were
    # present in peaks randomly, we'd independence of occurrence.
    # For self-co-occurrence, the null model is not independence, but
    # collisions
    dicts = {}
    for i in range(num_motifs):
        temp3  = modisco[modisco[3]==unique_motifs[i]]
        temp3 =  merge_file(temp3,"temp/temp"+name)
        Xin = np.sum(temp3[2]-temp3[1])
        print(i)
        #print(Xin)

        for j in range(num_chip):
            
            #if uniq_tfs[j] not in dicts:
            temp3 = chip[chip[3]==uniq_tfs[j]]
            temp3 = merge_file(temp3,"temp/temp"+name+"1")
            Yin =  np.sum(temp3[2]-temp3[1])
                #print(Yin)
            dicts[uniq_tfs[j]] = Yin
            
            out_file = intersect_file("temp/temp"+name+"3","temp/temp"+name)
            motif_chip=out_file
            #print(motif_chip)
        
            no_chip_only_motif = Xin - motif_chip
            no_motif_only_chip = dicts[uniq_tfs[j]] - motif_chip

            no_chip_no_motif = total_bases - no_chip_only_motif - no_motif_only_chip - motif_chip 

            # Contingency table (universe is set of all possible combination):
            #              no chip 1  |  has chip 1
            # no motif 2       A       |      B
            # -------------------------+--------------
            # has motif 2      C       |      D
            # The Fisher's exact test evaluates the significance of the
            # association between the two classifications
            cont_table = np.array([
                [
                    no_chip_no_motif,
                    no_motif_only_chip
                ],
                [
                   no_chip_only_motif,
                 motif_chip
                ]
            ])
            #print(uniq_tfs[j],cont_table)
            try:
                test_o = scipy.stats.fisher_exact(
                    cont_table, alternative="greater"
                )
                pval = test_o[1]
                odd = test_o[0]
            except:
                print(cont_table)
                print(motif_chip)
                print(Xin)
                print(Yin)
                print(uniq_tfs[j])
                print(unique_motifs[i])


            pvals[i, j] = pval
            odds[i,j] = odd
            
    return pvals, odds, unique_motifs,uniq_tfs 

motif_chip_pval_matrix, odds_ratio,unique_motifs,uniq_tfs  = compute_cooccurrence_pvals(modisco_hits,chip_regions,total_bases)



['1_12_Pou2f2.mouse_POU_2', '0_24_MEF2B_HUMAN.H11MO.0.A', '1_8_ZNF384_MA1125.1', '1_9_NFKB2_HUMAN.H11MO.0.B', '1_5_NFE2_MA0841.1', '0_1_STAT2_HUMAN.H11MO.0.A', '1_6_IRF4_MOUSE.H11MO.0.A', '0_26_ZBTB33_MA0527.1', '0_18_ZN143_HUMAN.H11MO.0.A', '0_14_SOX9_HMG_3', '1_4_STAT1+STAT2_MA0517.1', '0_11_PO2F2_HUMAN.H11MO.0.A', '0_5_IRF4_HUMAN.H11MO.0.A', '0_19_ZNF384_MA1125.1', '1_0_NFATC1_MA0624.1', '1_1_ZNF384_MA1125.1', '0_8_KLF12_HUMAN.H11MO.0.C', '0_15_ZNF384_MA1125.1', '0_21_MEF2B_HUMAN.H11MO.0.A', '1_10_CTCF_MA0139.1', '0_3_CTCF_MA0139.1', '0_13_ZN770_HUMAN.H11MO.0.C', '0_30_Pou5f1+Sox2_MA0142.1', '0_33_MEF2D_MOUSE.H11MO.0.A', '0_23_SP5_MOUSE.H11MO.0.C', '0_17_COE1_MOUSE.H11MO.0.A', '0_31_FLI1_HUMAN.H11MO.0.A', '0_22_BARX1_MOUSE.H11MO.0.C', '1_11_MAZ_HUMAN.H11MO.0.A', '0_16_ZN770_HUMAN.H11MO.0.C', '0_25_RFX2_HUMAN.H11MO.0.A', '1_3_RUNX1_HUMAN.H11MO.0.A', '0_9_TF65_MOUSE.H11MO.0.A', '0_6_ATF3_HUMAN.H11MO.0.A', '0_20_RXRA_MOUSE.H11MO.0.A', '1_13_MEF2A_MOUSE.H11MO.0.A', '1_16_MLX_MA0663.1', 

In [None]:
import sklearn.cluster
import scipy.cluster.hierarchy
import scipy.stats

def cluster_matrix_indices(matrix, num_clusters):
    """
    Clusters matrix using k-means. Always clusters on the first
    axis. Returns the indices needed to optimally order the matrix
    by clusters.
    """
    if len(matrix) == 1:
        # Don't cluster at all
        return np.array([0])

    num_clusters = min(num_clusters, len(matrix))
    
    # Perform k-means clustering
    kmeans = sklearn.cluster.KMeans(n_clusters=num_clusters)
    cluster_assignments = kmeans.fit_predict(matrix)

    # Perform hierarchical clustering on the cluster centers to determine optimal ordering
    kmeans_centers = kmeans.cluster_centers_
    cluster_order = scipy.cluster.hierarchy.leaves_list(
        scipy.cluster.hierarchy.optimal_leaf_ordering(
            scipy.cluster.hierarchy.linkage(kmeans_centers, method="centroid"), kmeans_centers
        )
    )

    # Order the peaks so that the cluster assignments follow the optimal ordering
    cluster_inds = []
    for cluster_id in cluster_order:
        cluster_inds.append(np.where(cluster_assignments == cluster_id)[0])
    cluster_inds = np.concatenate(cluster_inds)
    return cluster_inds

import matplotlib

matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42

import matplotlib.pyplot as plt
from matplotlib.colors import TwoSlopeNorm

# Create a figure with the right dimensions
fig, ax = plt.subplots(figsize=(20, 16))


 
inds = cluster_matrix_indices(motif_chip_pval_matrix,10)

zero_mask = motif_chip_pval_matrix == 0
min_val = np.min(motif_chip_pval_matrix[~zero_mask])
motif_chip_pval_matrix[zero_mask] = min_val
logpval_matrix = -np.log10(motif_chip_pval_matrix)
 

hmap = ax.imshow(logpval_matrix[inds], cmap="Blues")


# Set axes on heatmap
ax.set_yticks(np.arange(len(unique_motifs)))
ax.set_yticklabels(np.array(unique_motifs)[inds])
ax.set_xticks(np.arange(len(uniq_tfs)))
ax.set_xticklabels(np.array(uniq_tfs), rotation=90)

fig.colorbar(hmap, orientation="horizontal")

fig.tight_layout()
    

plt.show()

In [None]:
unique_motifs

from statsmodels.sandbox.stats.multicomp import multipletests


In [None]:
fig, axs = plt.subplots(20,figsize=(30,250))
from matplotlib.ticker import StrMethodFormatter


for i in range(20):
    print(unique_motifs[i])
    p_adjusted = multipletests(motif_chip_pval_matrix[i,:], method='fdr_bh')
    idxs = np.argsort(odds_ratio[i,np.array(p_adjusted[0])])[::-1]
 
    ax = axs[i]
    tfs = np.array(uniq_tfs)[np.array(p_adjusted[0])][idxs]
    counts = odds_ratio[i,np.array(p_adjusted[0])][idxs]
    ax.bar(tfs, counts,color="#86bf91")
    ax.set_xticks(tfs,rotation='vertical')

    #ax.set_ylabel('odds ratio')
    #ax.set_ylabel('TFs significantly co-occuring with motif')

    ax.set_title(unique_motifs[i])

    print(np.array(uniq_tfs)[np.array(p_adjusted[0])][idxs][0:5])
    #print(odds_ratio[i,np.array(p_adjusted[0])][idxs][-5:])
    
for i,x in enumerate(axs):

    # Despine
    x.spines['right'].set_visible(False)
    x.spines['top'].set_visible(False)
    x.spines['left'].set_visible(False)

    # Switch off ticks
    x.tick_params(axis="both", which="both", bottom="off", top="off", labelbottom="on", left="off", right="off", labelleft="on")

    # Draw horizontal axis lines
    vals = x.get_yticks()
    for tick in vals:
        x.axhline(y=tick, linestyle='dashed', alpha=0.4, color='#eeeeee', zorder=1)

    # Set x-axis label
    x.set_xlabel("Tfs intersecting with motif", weight='bold', size=15)

    # Set y-axis label
    if i == 1:
        x.set_ylabel("TFs", labelpad=50, weight='bold', size=12)

    # Format y-axis label
    x.yaxis.set_major_formatter(StrMethodFormatter('{x:,g}'))

    x.tick_params(axis='x', rotation=90)
plt.show()
plt.tight_layout()

In [None]:
unique_motifs