In [None]:
#%%appyter init
from appyter import magic
magic.init(lambda _=globals: _())

# Augmented Pathway Enrichment Analysis (APEA)
The APEA Appyter performs enrichment analysis against 4 major pathway databases given an input set of genes. The APEA Appyter performs enrichment analysis with the Fisher's exact test, and reports the results as bar graphs and sorted tables. By augmenting the pathway databases with gene-gene similarity matrices, APEA improves pathway enrichment analysis while also providing additional genes that are likely members of the enriched pathways and are relevant to the biological process under investigation

In [None]:
from maayanlab_bioinformatics.enrichment.crisp import enrich_crisp, fisher_overlap

%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns
import numpy as np
from IPython.display import display, FileLink, Markdown, HTML
from statsmodels.stats.multitest import multipletests
from collections import OrderedDict
import urllib

import base64
import csv
import itertools
import json
import requests
import time
import urllib
import networkx as nx

In [None]:
%%appyter hide_code
{% do SectionField(
    name='Set_Section',
    title='Submit Your Set',
    subtitle='Upload a text file containing your set or copy and paste your set into the text box below (one item per row). You can also try the default set provided.',
    img='gene-library-3.png'
    
) %}
{% do SectionField(
    name='Similarity_Section',
    title='Choose Your Similarity',
    subtitle='Choose a similarity for your augmented pathway enrichment analysis.',
    img='analysis.png'
    
) %}

In [None]:
%%appyter hide_code

{% set set_kind = TabField(
    name='set_kind',
    label='Set',
    default='Try Example Gene Set',
    description='Paste or upload your set',
    choices={
        'Paste': [ 
            TextField(
                name='set_input1',
                label='Set',
                default='',
                description='Paste your set (one item per row). Names in the set should match the names in the GMT file.',
                section = 'Set_Section'
            )
        ],
        
        'Upload': [
            FileField(
                name='set_filename',
                label='Set File',
                default='',
                description='Upload your set as a text file (one item per row). Names in the set should match the names in the GMT file.',
                section = 'Set_Section'
            ),
        ],
        
        'Try Example Gene Set': [
            TextField(
                name='set_input2',
                label='Set',
                default='TAAR9\nEBF2\nWDR78\nRRAGA\nSPATA18\nSPINT2\nMRGPRD\nCD9\nRBP1\nCYB5RL\nMXRA8\nPM20D1\nITIH5\nEPAS1\nAHCYL2\nPANK2\nPON2\nLRP5\nSLC5A3\nNSL1\nCLDN2\nLRP8\nAQP1\nCLDN1\nTMEM72\nGNG4\nNHLH2\nC10ORF107\nS100A13\nLY6G6C\nPOF1B\nWLS\nC2ORF82\nFZD4\nCOG7\nFZD6\nFOXF1\nFZD7\nERLIN2\nTYSND1\nACADSB\nOR51I2\nPARP12\nPPFIBP2\nATP4A\nALDH7A1\nTCN2\nSLCO5A1\nSFXN4\nPRR15\nMOXD1\nCAPSL\nCOL13A1\nC1ORF177\nWFDC2\nSLC6A2\nDNALI1\nTNS1\nLGALS2\nT\nBLOC1S1\nHMOX1\nPDK4\nLRAT\nMNX1\nSLC19A1\nHOXC9\nSCARF2\nAS3MT\nARGLU1\nACE\nANXA2\nCARD9\nPAX7\nSORCS1\nRAB33B\nPHOX2A\nKIF9\nCLDN16\nPTPRB\nID3\nITPKB\nNCR1\nGAS6\nCC2D1B\nATR\nMYCBP\nIGSF6\nTPH1\nWFIKKN2\nIGSF5\nACY3\nMAOA\nCAB39L\nCTSZ\nPRDM16\nCYP7A1\nLIMD1\nTMEM27\nSLC22A18\nKRT28\nTIMP3\nEMB\nRNF152\nPLEKHN1\nCLIC3\nSTRA6\nCTSC\nCGNL1\nPARP4\nTMEM176B\nELOVL7\nSORBS3\nGPR4\nF5\nGUCA2B\nSERPINB6\nHADHB\nFOXR1\nNBR1\nSHKBP1\nRLIM\nDHRS13\nHRSP12\nCD63\nCCL11\nF13A1\nFAM69C\nKCNA7\nHCCS\nGUCA1A\nADAMTSL2\nLMAN1\nING3\nEGFLAM\nSCML4\nOLFML1\nSOSTDC1\nCTNNA1\nC16ORF78\nFADS1\nCCDC157\nPDGFRB\nCA12\nCD164\nPRLR\nLRRC69\nUNC5CL\nMPEG1\nSLC31A1\nTECRL\nVCAM1\nATP11A\nUBXN10\nZNF558\nDYDC1\nCD69\nS100A8\nFIGF\nPHLDB2\nERVFRD-1\nCD82\nASB14\nGPR65\nVWCE\nTEKT1\nTEKT4\nMSX1\nSLC16A9\nZNF423\nCA14\nIGFBP2\nSLC30A7\nLRRC46\nPDIA2\nPPEF1\nEPHX1\nFANCM\nRBPMS\nTTC21A\nMR1\nDDX52\nLSM5\nKRT31\nMAVS\nTMEM237\nSMO\nC6ORF118\nPGPEP1L\nIL7R\nC21ORF62\nC11ORF97\nDOCK6\nAKNA\nISYNA1\nCD151\nCBFB\nPYROXD2\nSLC2A1\nGSTCD\nLGALS3BP\nHIGD1B\nAK7\nLTBP1\nARHGAP5\nRGS5\nSALL1\nCOBLL1\nFHAD1\nMAEL\nBTLA\nIGFBP7\nODF1\nACAA2\nKL\nTTC16\nEMX2\nTTC12\nGGH\nCCDC37\nCFLAR\nGPR98\nLAMB2\nBICC1\nBMP6\nCUL4B\nDNAJC3\nSP1\nDAP\nDNAJC1\nPIKFYVE\nDMRTA1\nALPL\nMTRF1L\nBCAR3\nKDM5D\nSHC4\nTTC25\nDBH\nDBI\nCHD1\nWNT6\nSPN\nTTC23L\nPLTP\nCYP26B1\nCASP6\nTMEM204\nTMEM207\nCCDC180\nCCDC34\nCA9\nOVGP1\nPLEKHG2\nCPT1A\nPLEKHG3\nMYO10\nRNASET2\nTBC1D9\nNAGA\nPCOLCE\nMUT\nFOXJ1\nSOD3\nATOX1\nKRT73\nSNTB1\nRP2\nRPIA\nCOL8A1\nALS2\nCOL8A2\nSMPDL3A\nPCOLCE2\nSLC25A13\nTAF3\nFOLR1\nITGB2\nHEMGN\nPRPS2\nSLC24A5\nFLT1\nALAS2\nLSP1\nSYCP2\nSEMA3B\nETFB\nPRELP\nZBTB40\nPBXIP1\nSLC4A5\nCLN8\nEFS\nTTR\nRBM3\nHECTD3\nNAGLU\nALDH2\nCTNNAL1\nPCBD1\nCYTH2',
                description='Paste your set (one item per row). Names in the set should match the names in the GMT file.',
                section = 'Set_Section'
            )
        ],
        
    },
    section = 'Set_Section',
) %}

{% set input_name = StringField(
    name='input_name', 
    label='Input Set Name', 
    default='M1 Genes', 
    description='Input net name', 
    section='Set_Section')
%}

In [None]:
%%appyter code_exec
{% set similarity_kind = TabField(
    name='similarity_kind',
    label='Similarity',
    default='Select a Similarity',
    description='',
    choices={
        'Select a Similarity': [
            ChoiceField(
                name='similarity', 
                description='Select one similarity for enrichment analysis.', 
                label='Similarity', 
                default='ARCHS4_mRNA_Coexpression', 
                section = 'Similarity_Section',
                choices=[
                    'ARCHS4_mRNA_Coexpression',
                    'Enrichr_Gene_Coocurrence',
                    'GeneRIF_Coocurrence',
                    'Tagger_Cooccurence',
                    'Proteomics_Coexpression',
                ]
            )
        ],
        
        
    },
    section = 'Similarity_Section',
) %}

In [None]:
%%appyter code_exec
# Input Set and Parameters

{%- if set_kind.raw_value == 'Paste' or set_kind.raw_value == 'Try Example Gene Set'%}
set_input = {{ set_kind.value[0] }}
{%- else %}
set_filename = {{ set_kind.value[0] }}
{%- endif %}

input_name = "{{ input_name }}"
similarity_kind = "Select a Similarity"
similarity_name = "{{ similarity_kind.value[0] }}"
data_dir="https://appyters.maayanlab.cloud/storage/Augmented_Pathway_Enrichment_Analysis/"

In [None]:
# Table Parameters
significance_value = 0.05
display_topk = 20

# Bar Chart Parameters
figure_file_format = ['png', 'svg']
color = 'deepskyblue'
topk = 10

# Tiebow Parameters
# display_num = 5

In [None]:
%%appyter code_exec

{%- if set_kind.raw_value == 'Paste' or set_kind.raw_value == 'Try Example Gene Set' %}
items = set_input.split('\n')
items = [x.strip() for x in items]
{%- else %}
open_set_file = open(set_filename,'r')
lines = open_set_file.readlines()
items = [x.strip() for x in lines]
open_set_file.close()
{%- endif %}

# remove duplicates in items
items = list(OrderedDict.fromkeys(items))

In [None]:
#loading and validating data
def load(similarity_name, items, separate=False):

    similarity_data, pathways = load_similarity(similarity_name, separate)
    # to upper case
    items = [x.upper() for x in items]
    validate_inputs(items, similarity_data)

    return similarity_data, pathways, items

def load_similarity(similarity_name, separate=False):
    similarity_filename = similarity_name+'.csv'
    pathway_filename = similarity_name.split('_')[0]+'_Pathways.txt'
    
    if separate == False:
        pathway_filename = 'All_Pathways.txt'
    
    similarity_data = dict()
    pathways = []
    lib=[]

    file = urllib.request.urlopen(data_dir+pathway_filename)
    for line in file:
        decoded_line = line.decode("utf-8")[:-1]
        pathways.append(decoded_line)
        
    response = urllib.request.urlopen(data_dir+similarity_filename)
    lines = [l.decode('utf-8') for l in response.readlines()]
    cr = csv.reader(lines)
    for row in cr:
        lib.append(row)
    similarity_data=lib
    
    
    return similarity_data, pathways

def validate_inputs(items, similarity_data):
    if len(items) == 0:
        raise Exception('No items in the input set. Please check the background information.') 
    if not bool(set(items) & set(itertools.chain(*similarity_data))):
        raise Exception('No matches in the input set and similarity.')    

In [None]:
#Enrichment analysis
def get_similarity_iter(similarity_data, pathways):
    for i, gset in enumerate(similarity_data):
        yield pathways[i], gset

def get_enrichment_results(items, similarity_data, pathways):
    return sorted(enrich_crisp(items, get_similarity_iter(similarity_data, pathways), 20000, True), key=lambda r: r[1].pvalue)


def get_pvalue(row, unzipped_results, all_results):
    if row['Name'] in list(unzipped_results[0]):
        index = list(unzipped_results[0]).index(row['Name'])
        return all_results[index][1].pvalue
    else:
        return 1
    
def get_qvalue(p_vals):
    r = multipletests(p_vals, method="fdr_bh")
    return r[1]
    
def get_values(obj_list):
    pvals = []
    odds_ratio = []
    n_overlap = []
    overlap = []
    for i in obj_list:
        pvals.append(i.pvalue)
        odds_ratio.append(i.odds_ratio)
        n_overlap.append(i.n_overlap)
        overlap.append(i.overlap)
    return pvals, odds_ratio, n_overlap, overlap
    
def enrichment_analysis(items, similarity_data, pathways):    
    all_results = get_enrichment_results(items, similarity_data, pathways)
    unzipped_results = list(zip(*all_results))
    pvals, odds_ratio, n_overlap, overlap = get_values(unzipped_results[1])
    df = pd.DataFrame({"Name":unzipped_results[0], "p value": pvals, \
                       "odds_ratio": odds_ratio, "n_overlap": n_overlap, "overlap": overlap})
    df["-log(p value)"] = -np.log10(df["p value"])
    df["q value"] = get_qvalue(df["p value"].tolist())
    return [list(unzipped_results[0])], [pvals], df

def create_download_link(df, title = "Download CSV file of this table", filename = "data.csv"):  
    csv = df.to_csv(index = False)
    b64 = base64.b64encode(csv.encode())
    payload = b64.decode()
    html = '<a download="{filename}" href="data:text/csv;base64,{payload}" target="_blank">{title}</a>'
    html = html.format(payload=payload, title=title, filename=filename)
    return HTML(html)

In [None]:
def bold_augmented(df):
    h = df.to_html(index=False)
    for i, row in df.iterrows():
        pathway = pathways.index(row['Name'])
        ol = ', '.join(row['overlap'])
        ol2 = ol
        for g in row['overlap']:
            if not g in similarity_data[pathway][:-100]:
                ol2 = ol2.replace(g,'<b>'+g+'*</b>')
        h = h.replace(ol, ol2)
    display(HTML(h))
                

In [None]:
# Bar Chart Functions
def enrichr_figure(all_terms, all_pvalues, all_qvalues, all_libraries, bar_color, topk=10): 
    all_terms = [all_terms[0][:topk]]
    all_pvalues = [all_pvalues[0][:topk]]
    all_qvalues = [all_qvalues[:topk]]
    # Bar colors
    if bar_color != 'lightgrey':
        bar_color_not_sig = 'lightgrey'
        edgecolor=None
        linewidth=0
    else:
        bar_color_not_sig = 'white'
        edgecolor='black'
        linewidth=1    

    plt.figure(figsize=(24, 12))
    
    i = 0
    bar_colors = [bar_color if (x < 0.05) else bar_color_not_sig for x in all_pvalues[i]]
    fig = sns.barplot(x=np.log10(all_pvalues[i])*-1, y=all_terms[i], alpha =.3, palette=bar_colors, edgecolor=edgecolor, linewidth=linewidth)
    fig.axes.get_yaxis().set_visible(False)
    fig.set_title(all_libraries[i].replace('_', ' '), fontsize=26)
    fig.set_xlabel('−log₁₀(p‐value)', fontsize=25)
    fig.tick_params(axis='x', which='major', labelsize=20)
    if max(np.log10(all_pvalues[i])*-1)<1:
        fig.xaxis.set_ticks(np.arange(0, max(np.log10(all_pvalues[i])*-1), 0.1))
    for ii,annot in enumerate(all_terms[i]):
        if all_qvalues[i][ii] < 0.05:
            annot = '  *'.join([annot, str(str(np.format_float_scientific(all_qvalues[i][ii], precision=2)))]) 
        else:
            annot = '  '.join([annot, str(str(np.format_float_scientific(all_qvalues[i][ii], precision=2)))])

        title_start= max(fig.axes.get_xlim())/200
        fig.text(title_start, ii, annot, ha='left', wrap = True, fontsize = 26)

    fig.spines['right'].set_visible(False)
    fig.spines['top'].set_visible(False)
    # Save results 
    output_file_names = ['{0}_bar.{1}'.format(all_libraries[i], file_type) for file_type in figure_file_format]
    for f in output_file_names:
        plt.savefig(f, bbox_inches = 'tight')
    
    # Show plot 
    plt.show()
    
    
    return output_file_names

In [None]:
# Tiebow
def tiebow(df, augmented=True):
    i=0
    p=df['Name'][i]
    pathway = pathways.index(p)
    if(len(p.split())>4):
        p=add_breaks3(p)
        
    if len(items)>len(similarity_data[pathway]):
        b=5000
        a=5000*(len(items)/len(similarity_data[pathway]))
    else:
        a=5000
        b=5000*(len(similarity_data[pathway])/len(items))
    d=b*len(df['overlap'][i])/len(similarity_data[pathway])

    plt.figure(figsize=(12,12))
    G = nx.Graph()
    G.add_node('Input\n Set', size=a, pos=(-25,25), color='mediumorchid', s='s')
    G.add_node('Pathway\n Set', size=b, pos=(25,25), color='salmon', s='s')
    G.add_node('', size=d, pos=(25,25), color='mediumorchid', s='s')
    l=len(df['overlap'][0])
    for j,g in enumerate(df['overlap'][i]):
        c = 'lightskyblue'
        if augmented and not g in similarity_data[pathway][:-100]:
            c = 'lightgreen'
        G.add_node(g, size= 15000, pos=(0,j*50/(l-1)), color=c, s='_')
        G.add_edge(g, 'Input\n Set')
        G.add_edge(g, 'Pathway\n Set')

    # #         bbox=dict(facecolor='none', edgecolor='black', boxstyle='round,pad=0.2'),
    #         alpha=0.5, with_labels=True, node_shape='_')

    nodeShapes = set((aShape[1]['s'] for aShape in G.nodes(data = True)))
    #For each node class...
    for aShape in nodeShapes:
        if aShape=='_':
            l=50
        else:
            l=1
        #...filter and draw the subset of nodes with the same symbol in the positions that are now known through the use of the layout.
        nx.draw_networkx_nodes(G,nx.get_node_attributes(G,'pos'),
                               node_shape = aShape, alpha = 1,
                               linewidths = l,
                               node_size = [sNode[1]['size'] for sNode in filter(lambda x: x[1]["s"]==aShape,G.nodes(data = True))],
                               node_color = [sNode[1]['color'] for sNode in filter(lambda x: x[1]["s"]==aShape,G.nodes(data = True))],
                               nodelist = [sNode[0] for sNode in filter(lambda x: x[1]["s"]==aShape,G.nodes(data = True))])

    #Finally, draw the edges between the nodes
    nx.draw_networkx_edges(G,nx.get_node_attributes(G,'pos'))    
    nx.draw_networkx_labels(G,nx.get_node_attributes(G,'pos'), font_size=20)

    

    # Legend
    if augmented:
        plt.text(-30,-4.5, 'P-value for augmented pathway: {:.2e}\nQ-value for augmented pathway: {:.3e}'.format(df['p value'][i],df['q value'][i]), fontsize=18)
        plt.scatter([],[], c='lightskyblue', label='Original Pathway Member')
        plt.scatter([],[], c='lightgreen', label='Augmented Pathway Member')
        l=plt.legend(loc=4, scatterpoints=1, fontsize=15)
        l.legendHandles[0]._sizes= [100]
        l.legendHandles[1]._sizes= [100]
    else:
        plt.text(-30,-4.5, 'P-value for unaugmented pathway: {:.2e}\nQ-value for unaugmented pathway: {:.3e}'.format(df['p value'][i],df['q value'][i]), fontsize=18)
    
    plt.text(-25,40, input_name+'\n\u2193', fontsize=20,
        horizontalalignment="center")
    plt.text(25,40, p+'\n\u2193', fontsize=20,
        horizontalalignment="center")
     
    plt.axis('off')
    axis = plt.gca()
    axis.set_xlim([1.3*x for x in axis.get_xlim()])
#     axis.set_ylim([1.2*y for y in axis.get_ylim()])
    plt.tight_layout() 
    
    # Save results 
    output_file_names = ['{0}_tiebow.{1}'.format(df['Name'][i], file_type) for file_type in figure_file_format]
    for f in output_file_names:
        plt.savefig(f, bbox_inches = 'tight')
        
    plt.show()
    
    return output_file_names
    
    
#     display(tab)


def add_breaks3(string):
    words = string.split()
    grouped_words = [' '.join(words[i: i + 3]) for i in range(0, len(words), 3)]
    return '\n'.join(grouped_words)

In [None]:
%%appyter code_exec 
display(Markdown(f"The bar charts display the top {topk} enriched terms for the chosen pathway database similarity based on their p-values, along with their q-values. Colored bars correspond to terms with significant p-values (<0.05). An asterisk next to a q-value indicates the term also has a significant q-value (<0.05). The bar chart is downloadable as an image in the PNG and SVG formats. "))
display(Markdown(f"The tables display the top {display_topk} enrichment analysis matching pathways for the chosen database. The table contains the pathway name, p-value, odds ratio, the number of overlapping genes, the overlapping genes, and -log(p-value). The bolded genes with an asterisk in the overlap column are the additional genes added to the pathway via the augmentation. The table is sorted by p-values in ascending order. The full results are downloadable in CSV format."))
display(Markdown(f"The tiebow graphs of the overlapping genes between the input set and the top pathway result based on p-values is shown. For the augmented results, the genes ar colored by whether they were in the original pathway set or from augmentation. The set squares are sizes proportionally, and the purple square within the pathway set node indicates the size of overlap relative to the pathway set. The tiebow graph is downloadable as an image in the PNG and SVG formats."))

In [None]:
%%appyter code_exec 
plasma = mpl.cm.get_cmap('plasma')
display(Markdown(f"# Enrichment Analysis with Unaugmented Pathway Databases"))
display(Markdown(f"Enrichment analysis results are separated for each unaugmented database."))
db = ['KEGG', 'PANTHER', 'Reactome', 'WikiPathways']
for i,l in enumerate(db):
    file = l+'_Unaugmented'
    display(Markdown(f"### {l+' Unaugmented'}"))
    similarity_data, pathways, items = load(file, items, separate=True)
    results, pvals, results_df = enrichment_analysis(items, similarity_data, pathways)

    output_file_names = enrichr_figure(results, pvals, results_df["q value"].tolist(), [file], plasma(i*0.25), topk)
    display(Markdown(f"*Figure {2*i+1}. Bar chart of the top {topk} enriched terms from the {file.replace('_', ' ')} similarity based on p-values, along with their corresponding q-values. Colored bars correspond to terms with significant p-values (<0.05). An asterisk next to a q-value indicates the term also has a significant q-value (<0.05)*"))     

    # Download Bar Chart
    for k, file in enumerate(output_file_names):
        display(FileLink(file, result_html_prefix=str('Download ' + figure_file_format[k] + ': ')))

    if 'p value' in results_df.columns:
        sorted_df = results_df.sort_values(by = ['p value'])
        filtered_df = sorted_df.iloc[:display_topk]
        if len(filtered_df) != 0:
            display(HTML(filtered_df.to_html(index = False)))
            display(Markdown(f"*Table {i+1}. Enrichment analysis results from the {l.replace('_', ' ')} similarity.*"))        
            display(create_download_link(sorted_df))
            
    output_file_names = tiebow(sorted_df, augmented=False)
    display(Markdown(f"*Figure {2*i+2}. Tiebow graph of the overlapping genes between the input set and the top pathway result based on p-values. The purple square within the pathway set indicates the size of overlap relative to the pathway set.*"))     

    # Download Tiebow Graph
    for i, file in enumerate(output_file_names):
        display(FileLink(file, result_html_prefix=str('Download ' + figure_file_format[i] + ': ')))    

In [None]:
%%appyter code_exec 
plasma = mpl.cm.get_cmap('plasma')
display(Markdown(f"# Enrichment Analysis by Pathway Database Augmented with {similarity_name.replace('_', ' ')}"))
display(Markdown(f"Enrichment analysis results are separated for each augmented pathway database."))
suf = similarity_name.split('_')[0]
db = ['KEGG', 'PANTHER', 'Reactome', 'WikiPathways']
for i,l in enumerate(db):
    file = l+'_Augmented_with_'+suf
    display(Markdown(f"### {l+' Augmented'}"))
    similarity_data, pathways, items = load(file, items, separate=True)
    results, pvals, results_df = enrichment_analysis(items, similarity_data, pathways)

    output_file_names = enrichr_figure(results, pvals, results_df["q value"].tolist(), [l+'_Augmented_with_'+similarity_name], plasma(i*0.25), topk)
    display(Markdown(f"*Figure {2*i+9}. Bar chart of the top {topk} enriched terms from the {l+' similarity augmented with '+similarity_name.replace('_', ' ')} based on p-values, along with their corresponding q-values. Colored bars correspond to terms with significant p-values (<0.05). An asterisk next to a q-value indicates the term also has a significant q-value (<0.05)*"))     

    # Download Bar Chart
    for k, file in enumerate(output_file_names):
        display(FileLink(file, result_html_prefix=str('Download ' + figure_file_format[k] + ': ')))

    if 'p value' in results_df.columns:
        sorted_df = results_df.sort_values(by = ['p value'])
        filtered_df = sorted_df.iloc[:display_topk]
        if len(filtered_df) != 0:
            bold_augmented(filtered_df)
            display(Markdown(f"*Table {i+5}. Enrichment analysis results from the augmented {l} similarity. Bolded terms with an asterisk are genes from augmenting the {l} pathways with {similarity_name.replace('_', ' ')}.*"))        
            display(create_download_link(sorted_df))
            
    output_file_names = tiebow(sorted_df)
    display(Markdown(f"*Figure {2*i+10}. Tiebow graph of the overlapping genes between the input set and the top pathway result based on p-values. The purple square within the pathway set indicates the size of overlap relative to the pathway set.*"))     

    # Download Tiebow Graph
    for i, file in enumerate(output_file_names):
        display(FileLink(file, result_html_prefix=str('Download ' + figure_file_format[i] + ': ')))

In [None]:
%%appyter code_exec
display(Markdown(f"# Combined Enrichment Analysis with All 4 Unaugmented Databases"))
display(Markdown(f"Enrichment analysis based on the unaugmented data."))

lib = 'Unaugmented'
similarity_data, pathways, items = load(lib, items)
results, pvals, results_df = enrichment_analysis(items, similarity_data, pathways)

display(Markdown(f"## Bar Chart"))
output_file_names = enrichr_figure(results, pvals, results_df["q value"].tolist(), ['Combined Unaugmented Databases'], color, topk)
display(Markdown(f"*Figure 17. Bar chart of the top {topk} enriched terms across all 4 pathway databases based on p-values, along with their corresponding q-values. Colored bars correspond to terms with significant p-values (<0.05). An asterisk next to a q-value indicates the term also has a significant q-value (<0.05)*"))     
# Download Bar Chart
for i, file in enumerate(output_file_names):
    display(FileLink(file, result_html_prefix=str('Download ' + figure_file_format[i] + ': ')))

display(Markdown(f"## Results Table"))
if 'p value' in results_df.columns:
    sorted_df = results_df.sort_values(by = ['p value'])
    filtered_df = sorted_df.iloc[:display_topk]
    if len(filtered_df) != 0:
        display(HTML(filtered_df.to_html(index = False)))
        display(Markdown(f"*Table 9. Enrichment analysis results of {lib.replace('_', ' ')}*"))        
        display(create_download_link(sorted_df))    
        
output_file_names = tiebow(sorted_df, augmented=False)
display(Markdown(f"*Figure 18. Tiebow graph of the overlapping genes between the input set and the top pathway result based on p-values. The purple square within the pathway set indicates the size of overlap relative to the pathway set.*"))     

# Download Tiebow Graph
for i, file in enumerate(output_file_names):
    display(FileLink(file, result_html_prefix=str('Download ' + figure_file_format[i] + ': ')))



In [None]:
%%appyter code_exec
display(Markdown(f"# Combined Enrichment Analysis with All 4 Pathway Databases Augmented with {similarity_name.replace('_', ' ')}"))

In [None]:
%%appyter markdown
## Bar Chart

In [None]:
%%appyter code_exec
similarity_data, pathways, items = load(similarity_name, items)
results, pvals, results_df = enrichment_analysis(items, similarity_data, pathways)

output_file_names = enrichr_figure(results, pvals, results_df["q value"].tolist(), ['Combined Databases Augmented with '+similarity_name], color, topk)
display(Markdown(f"*Figure 19. Bar chart of the top {topk} enriched terms across all 4 pathway augmented databases based on p-values, along with their corresponding q-values. Colored bars correspond to terms with significant p-values (<0.05). An asterisk next to a q-value indicates the term also has a significant q-value (<0.05)*"))     

# Download Bar Chart
for i, file in enumerate(output_file_names):
    display(FileLink(file, result_html_prefix=str('Download ' + figure_file_format[i] + ': ')))

In [None]:
%%appyter code_exec    
display(Markdown(f"## Results Table"))
if 'p value' in results_df.columns:
    sorted_df = results_df.sort_values(by = ['p value'])
    filtered_df = sorted_df.iloc[:display_topk]
    if len(filtered_df) != 0:
        bold_augmented(filtered_df)
        display(Markdown(f"*Table 10. Enrichment analysis results from the augmented combined similarity. Bolded terms with an asterisk are genes from augmenting the pathways with {similarity_name.replace('_', ' ')}.*")) 
        display(create_download_link(sorted_df))

In [None]:
%%appyter markdown
## Tiebow Graph

In [None]:
%%appyter code_exec   
output_file_names = tiebow(sorted_df)
display(Markdown(f"*Figure 20. Tiebow graph of the overlapping genes between the input set and the top pathway result based on p-values. The purple square within the pathway set indicates the size of overlap relative to the pathway set.*"))     

# Download Tiebow Graph
for i, file in enumerate(output_file_names):
    display(FileLink(file, result_html_prefix=str('Download ' + figure_file_format[i] + ': ')))

# Link to Enrichr

In [None]:
# Get complete enrichment analysis results from Enrichr 

ENRICHR_URL = 'https://maayanlab.cloud/Enrichr/addList'
genes_str = '\n'.join(items)
description = 'Input gene list'
payload = {
    'list': (None, genes_str),
    'description': (None, description)
}

response = requests.post(ENRICHR_URL, files=payload)
if not response.ok:
    raise Exception('Error analyzing gene list')

data = json.loads(response.text)
short_id = data["shortId"]


if short_id:
    url = 'https://maayanlab.cloud/Enrichr/enrich?dataset=' + short_id
    display(HTML(f"<div style='font-size:1.25rem; padding:1rem 0;'><a href='https://maayanlab.cloud/Enrichr/enrich?dataset={short_id}'>Access additional enrichment analysis results for the input set from the Enrichr website. </a></div>"))
else:
    display(HTML("<div style='font-size:1.5rem; padding:1rem 0;'><b>No Enrichr results available for the current query</b></div>"))
    display(HTML("<div style='font-size:1rem; padding:1rem 0;'>Please try again with a different input list and/or similarity."))