In [None]:
#%%appyter init
from appyter import magic
magic.init(lambda _=globals: _())

# AGSEA Appyter

The Augmented Gene Set Enrichment Analysis (AGSEA) Appyter performs and visualizes standard gene set enrichment analysis (GSEA) (Subramanian et al., 2005), as well as augmented GSEA. The augmented feature adds novel genes that are potentially missing from the annotated prior knowledge gene set that are also relevant to the input signature.  

AGSEA outputs a table containing the enrichment results statistics for the top gene sets from the chosen gene set library selected by the user. The full results are available for download as a CSV file, and an interactive GSEA plot is also generated. The AGSEA Appyter also highlights genes that are co-expressed with each annotated gene set and are highly ranked within the input signature. These genes are typically understudied genes that should be further explored as relevant to the biological process under investigation. 

In [None]:
#imports
import numpy as np
import pandas as pd
import scipy
import math
from decimal import Decimal

#loading libraries
import requests
import urllib.request
from flask import url_for

#performing GSEA
import gseapy as gp
from gseapy.gsea import GSEA
from maayanlab_bioinformatics.dge import limma_voom
from maayanlab_bioinformatics.dge.characteristic_direction import _chdir

#creating visualizations
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from IPython.display import display, HTML, IFrame

In [None]:
%%appyter hide
{% do SectionField(
    name='gsea', 
    title='Augmented Gene Set Enrichment Analysis', 
    subtitle='Perform AGSEA by inputting the following files. Please make sure that the files are loaded properly before submitting, or else there will be an error. Follow the [link](https://software.broadinstitute.org/cancer/software/gsea/wiki/index.php/Data_formats) to learn about the proper data formats.',
    img='plot_icon.png'
) %}
{% do SectionField(
    name='data', 
    title='Data Submission', 
    subtitle='Option 1: Upload a preranked gene list (.rnk). Option 2: Upload an expression dataset with gene counts (.gct) and a phenotype labels file (.cls), and then choose a ranking method. Please make sure that the gene symbols used in the expression file match the types used in the library file for the appyter to work properly.',
    img='data_icon.png'
) %}
{% do SectionField(
    name='library', 
    title='Library', 
    subtitle='Choose an Enrichr gene set library or upload your own (.gmt). ',
    img='library_icon.png'
) %}
{% do SectionField(
    name='result', 
    title='Result Options', 
    subtitle='Choose how to get the top gene sets and whether you want co-expressed genes. NOTE: co-expressed genes will only be retrieved for certain Enrichr libraries and are not available for your own library files.',
    img='results_icon.png'
) %}

In [None]:
%%appyter hide
{% set data_tab = TabField(
    name='data_tab',
    label='Submit Your Data',
    default='Option 1',
    description='',
    choices={
        'Option 1': [FileField(
                name='ranked_filename',
                label='Preranked Gene List',
                default='logFC_ranked_GSE70466.rnk',
                examples={'logFC_ranked_GSE70466.rnk': 'https://appyters.maayanlab.cloud/storage/GSEA_Appyter/logFC_ranked_GSE70466.rnk'},
                description='RNK file containing preranked genelist.',
                required='true',
                section='data',
            )],
        'Option 2': [FileField(
                name='expression_filename',
                label='Expression Dataset',
                description='GCT file containing sample expression dataset.',
                default='GSE70466.gct',
                examples={
        'GSE70466.gct': 'https://appyters.maayanlab.cloud/storage/GSEA_Appyter/GSE70466.gct'},
                required='true',
                section='data'),
            FileField(
                name='phenotype_filename',
                label='Phenotype Labels',
                description='CLS file containing sample expression dataset.',
                default='GSE70466.cls',
                examples={
        'GSE70466.cls': 'https://appyters.maayanlab.cloud/storage/GSEA_Appyter/GSE70466.cls'},
                section='data'),
            ChoiceField(
                name='ranking_method',
                label='Method for Ranking Genes',
                choices={'Characteristic Direction': 'cd',  
                'Difference of Class Means': 'diff_of_classes',
                'Limma': 'limma',
                'Log2 Ratio of Class Means': 'log2_ratio_of_classes',
                'Ratio of Class Means (Fold Change)': 'ratio_of_classes',
                'Signal-to-Noise': 'signal_to_noise', 
                'T-test': 't_test'},
                default='Characteristic Direction',
                description='The method used to calculate a correlation or ranking.',
                required='true',
                section='data')]
            },
        section='data')
    %}

In [None]:
%%appyter hide
{% set library_tab = TabField(
    name='library_tab',
    label='Submit Your Library',
    default='Upload',
    description='',
    choices={'Upload':[FileField(
            name='geneset_filename',
            label='Library',
            description='GMT file containing sample gene set.',
            default='Data_Acquisition_Method.gmt',
            examples={'Data_Acquisition_Method.gmt': 'https://appyters.maayanlab.cloud/storage/GSEA_Appyter/Data_Acquisition_Method_Most_Popular_Genes.gmt'}
            )],
        'Select an Enrichr Library': [ChoiceField(
            name='enrichr_choice',
            label='Enrichr Library',
            default='MSigDB_Hallmark_2020',
            description='',
            choices=['ChEA_2016',
            'ARCHS4_TFs_Coexp',
            'COVID-19_Related_Gene_Sets', 
            'ENCODE_TF_ChIP-seq_2015', 
            'KEGG_2021_Human', 
            'WikiPathway_2021_Human', 
            'Reactome_2016', 
            'GO_Biological_Process_2021', 
            'GO_Cellular_Component_2021', 
            'GO_Molecular_Function_2021', 
            'MGI_Mammalian_Phenotype_Level_4_2021', 
            'MSigDB_Hallmark_2020',
            'Human_Phenotype_Ontology', 
            'GWAS_Catalog_2019', 
            'UK_Biobank_GWAS_v1', 
            'DepMap_WG_CRISPR_Screens_Broad_CellLines_2019',
            'DepMap_WG_CRISPR_Screens_Sanger_CellLines_2019', 
            'DisGeNET', 
            'CellMarker_Augmented_2021',  
            'HuBMAP_ASCT_plus_B_augmented_w_RNAseq_Coexpression', 
            'Allen_Brain_Atlas_10x_scRNA_2021', 
            'Pfam_InterPro_Domains'])]
        },
    section='library')
%}

In [None]:
%%appyter hide
{% set result_number = IntField(
    name='result_number',
    label='Number of Top Gene Sets',
    min=1,
    max=50,
    default=5,
    description='The number of gene sets that will be displayed in the enrichment results table and plots.',
    section='result')
%}
{% set result_criteria = ChoiceField(
    name='result_criteria',
    label='Criteria for Top Gene Sets',
    choices={'Enrichment Score': 'es', 'Normalized Enrichment Score': 'nes', 'P-Value': 'pval', 'False Discovery Rate': 'fdr'},
    default='P-Value',
    description='The top gene sets will be calculated with the chosen criteria.',
    section='result')
%}
{% set augmentation = BoolField(
    name='augmentation', 
    label='Apply Augmented GSEA?', 
    default='false',
    description='Choose whether you want to get co-expressed genes.',
    section='result')
%}

In [None]:
%%appyter code_exec
result_number = {{ result_number.value }}
result_criteria = '{{ result_criteria.value }}'
augmentation = {{augmentation.value}}

{%- if library_tab.raw_value == 'Upload'%}
library_tab = 'Upload'
library_filename = {{ library_tab.value[0] }}
library_name = library_filename.replace('_', ' ').replace('.gmt', '')

{%- else %}
library_tab = 'Select an Enrichr Library'
library_filename = '{{ library_tab.value[0] }}'
library_name = '{{ library_tab.value[0] }}'
{%- endif %}

{%- if data_tab.raw_value == 'Option 1'%}
data_tab = 'Option 1'
ranked_filename = {{ data_tab.value[0] }}

{%- else %}
data_tab = 'Option 2'
expression_filename = {{ data_tab.value[0] }}
phenotype_filename = {{ data_tab.value[1] }}
ranking_method = '{{ data_tab.value[2] }}'
{%- endif %}

## Loading Data

Loading the data may take a while... Please be patient. It may take up to five minutes for the data to load, for inputs similar in size to the default examples and input choices. Once the data is done loading, a message will be printed underneath this section. 

In [None]:
#checks if inputs are valid
def checkInputs():
    if data_tab == 'Option 1':
        if library_filename == '' or ranked_filename == '':
            raise Exception('Please submit necessary materials for Option 1 to continue.')
        if ranked_filename.endswith('.rnk') == False:
            raise Exception('Please upload a RNK file (ends in .rnk).')
    else:
        if (library_filename == '' or expression_filename == '' or
        phenotype_filename == ''):
            raise Exception('Please submit necessary materials for Option 2 to continue.')
        if expression_filename.endswith('.gct') == False:
            raise Exception('Please upload a GCT file (ends in .gct).')
        if phenotype_filename.endswith('.cls') == False:
            raise Exception('Please upload a CLS file (ends in .cls).')
    if (library_tab == 'Upload' and library_filename.endswith('.gmt') 
    == False):
        raise Exception('Please upload a GMT file (ends in .gmt).')

#performs GSEA
def gsea():
    results = gp.prerank(rnk=ranked, gene_sets=library_filename, max_size=500)
    return results

In [None]:
#loads a preranked list and converts it to a dataframe for Option 1
#creates a ranked list with chosen ranking method and converts to dataframe for Option 2
def loadRanked():
    if data_tab == 'Option 1':
        values = pd.read_csv(ranked_filename, header=None, sep="\t")
        if len(values.index) < 5000:
            raise Exception('Current ranked gene list has less than 5000 genes. We expect all human coding genes to be in the list, which is around 20000.')
        values.sort_values(by=values.columns[1], ascending=False, inplace=True)
        values.reset_index(drop=True, inplace=True)
        values.columns = ['Gene', 'Rank']
        pos, neg = None, None
    elif data_tab == 'Option 2':
        neg, pos, classes = gp.parser.gsea_cls_parser(phenotype_filename)
        exp = pd.read_csv(expression_filename, skiprows=1, header=None, index_col=0, 
            sep="\t")
        obj = GSEA(data=exp, gene_sets=library_filename, classes=classes)
        exp_df, classesDict = obj.load_data(classes)
        if ranking_method == 'cd':
            data = exp_df.to_numpy()
            sampleclass = [1 if s == neg else 2 if s == pos else 0 for s in classes]
            genes = exp_df.index.tolist()
            chdir_res = _chdir(data=data, sampleclass=sampleclass,
                genes=genes)
            chdir_res = sorted(chdir_res, key=lambda x: x[0], reverse=True)
            chdir_res = {k:v for v,k in chdir_res}
            values = pd.Series(data=chdir_res)
        elif ranking_method == 'limma':
            neg_columns = [i for i in range(len(classes)) if classes[i] == neg]
            neg_df = exp_df.iloc[:,neg_columns]
            pos_columns = [i for i in range(len(classes)) if classes[i] == pos]
            pos_df = exp_df.iloc[:,pos_columns]
            limma_res = limma_voom.limma_voom_differential_expression(
                controls_mat=neg_df, cases_mat=pos_df, all_data_mat=exp_df)
            values = pd.Series(data=limma_res['logFC'], index=limma_res.index)
            values.sort_values(axis=0, ascending=False, inplace=True)
        else:
            values = gp.algorithm.ranking_metric(df=exp_df, pos=pos, neg=neg, 
            method=ranking_method, classes=classesDict, ascending=False)
        values = pd.DataFrame({'Gene':values.index, 'Rank':values.values})
    return values, pos, neg

In [None]:
#loads libraries
def downloadLibrary(name):
    urllib.request.urlretrieve(f'https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName={name}', filename=f'{name}')
    if library_tab != 'Upload' and augmentation == True:
        urllib.request.urlretrieve(f'https://appyters.maayanlab.cloud/storage/GSEA_Appyter/{name}_augmented.gmt', filename=f'{name}_augmented')

def getLibraryData(library):
    data = dict()
    with open(library, 'r') as f:
        for line in f:
            splited = line.strip().split("\t")
            elements = splited[2:]
            if len(elements) > 0:
                # to upper case
                data[splited[0]] = [x.upper() for x in elements]
    return data

def loadLibrary(library):
    if library_tab == 'Select an Enrichr Library':
        downloadLibrary(library)
    library_data = getLibraryData(library)
    if len(library_data) < result_number:
        raise Exception('There cannot be less gene sets in the library than the number of gene sets displayed for the results.')
    if library_tab != 'Upload' and augmentation == True:
        augmented_data = getLibraryData(f'{library}_augmented')
    else:
        augmented_data = None
    return library_data, augmented_data

In [None]:
%%appyter_code_exec
#loading all needed data
print('Checking inputs...')
checkInputs()
print('Loading data for plots...')
ranked, pheno1, pheno2 = loadRanked()
library_data, augmented_data = loadLibrary(library_filename)
print('Performing GSEA...')
result = gsea().results
print('Data loaded successfully!')

## Enrichment Results Table

The following table displays the enrichment analysis result statistics of the top ranked most enriched annotated gene sets with the chosen criteria. The columns can be re-arranged by dragging.  

The table is available for download as a PNG file by clicking the camera icon on the upper right. The CSV file containing the full version of the results sorted by the input criteria is available for download by clicking the link below the table. 

In [None]:
#makes values appropriate for table viewing
def validateNumber(num):
    num = round(num, 4)
    if abs(num) < 0.01:
        num = '{:.2E}'.format(Decimal(num)) #scientific notation
    if str(num) == '0.00E+2':
        num = '<0.001'
    return num

def geneStringToList(string):
    return string.split(';')

#create a downloadable csv file of all the results
def create_download_link(df, title = "Download CSV file of all results", filename = "GSEA_Enrichment_Results_data.csv"):  
    csv = df.to_csv(filename, index = True)
    html = f'<a href="{filename}" target=_blank>{title}</a>'
    return HTML(f"<div style='font-size:1.25rem;'>{html}")

In [None]:
#organizes gsea results into sorted dataframe (for download)
result_df = pd.DataFrame.from_dict(result, orient='index')
result_df.index.name = 'gene set'
if result_criteria == 'pval':
    sorted_result_df = result_df.sort_values(by=['pval', 'fdr'], ascending=True)
elif result_criteria == 'fdr':
     sorted_result_df = result_df.sort_values(by=['fdr', 'pval'], ascending=True)
else:
    sorted_result_df = result_df.sort_values(by=result_criteria, ascending=False)
sorted_result_df = sorted_result_df.drop(['RES', 'hit_indices'], axis=1)
sorted_result_df['genes'] = sorted_result_df['genes'].map(geneStringToList)
sorted_result_df['ledge_genes'] = sorted_result_df['ledge_genes'].map(geneStringToList)

#gets top gene sets' data (calculated statistics only) needed for the table display
top_result_df = sorted_result_df.copy(deep=True)
top_result_df.reset_index(inplace=True)
top_result_df = top_result_df.truncate(before=0, after=result_number-1)
top_result_df.set_index(keys='gene set', inplace=True)
top_result_df = top_result_df.drop(['genes', 'ledge_genes'], axis=1)
top_result_df = top_result_df.applymap(validateNumber)

#converts necessary dataframes to list
rankedGenes = ranked['Gene'].tolist()
rankedCorrelation = ranked['Rank'].tolist()
top = top_result_df.index.tolist()

In [None]:
#displays this section
fig1 = go.Figure(data=[go.Table(columnwidth=[200, 75, 75, 75, 75, 75, 75],
    header=dict(values=['Gene Set', 'ES', 'NES', 'P-value', 'FDR', 'Set Size', 'Genes Matched'],
    height=40), 
    cells=dict(values=[top_result_df.index, top_result_df.es, top_result_df.nes, 
    top_result_df.pval, top_result_df.fdr, top_result_df.geneset_size, 
    top_result_df.matched_size], height=30))])
fig1.update_layout(width=900, font_size=14)
fig1.show()
display(create_download_link(sorted_result_df))
display(HTML(f"<i><b>Table 1: Enrichment Results Table for the top {len(top)} gene sets in the {library_name} library. The Enrichment Score (ES) is the random walk's farthest deviation from 0. NES is the Normalized Enrichment Score, the P-value is a measure of significance of the NES, and the FDR is the False Discovery Rate corrected p-value. Set Size is the number of genes in the set, and Genes Matched is how many genes in the gene set are also in the ranked list of genes.</i></b>"))

## Generating GSEA Plots

For each gene set library, the top gene sets with the top enrichment scores are plotted. You can choose which gene set to view from the dropdown menu. Note that the hit indices plot will only be present for individual gene sets. 

Various interactive options are available from the toolbar on the upper right. Lines can be toggled on and off by clicking the name in the legend. Hovering over the running sum line on the Enrichment Plot will display the gene, its ranking, and the running sum score at that point. Also be aware that zooming in and out of the GSEA plot will simultaneously zoom in or out of the Hit Indices plot. 

The GSEA plots are available for download as a PNG file by clicking the camera icon on the toolbar. 

In [None]:
#breaks strings that are too long into new lines
def lineBreak(string):
    words = string.split(" ")
    name = ""
    check = ""
    for i in range(len(words)):
        check += words[i] + " "
        if len(check) > 20 and i < len(words)-1:
            name += check + "<br>"
            check = ""
    name += check
    return name

In [None]:
#initialize figure basics
fig2 = make_subplots(rows=2, cols=1, row_heights = [0.8, 0.2],
        shared_xaxes=True, vertical_spacing = 0.10,
        subplot_titles=("Enrichment Plot", "Hit Indices"))
arbitrary = [dict(type="line", xref="x2", yref="y2", x0=0, y0=0,
            x1=len(rankedGenes), y1=0, line=dict(color="White", width=0.5)), 
            dict(type="line", xref="x2", yref="y2", x0=0, y0=-1,
            x1=0, y1=1, line=dict(color="White", width=0.5))]
            #pair of arbitrary lines that maintain scale of plot
fig2.update_layout(shapes=arbitrary, height=600, width=750, font_size=16)
buttons = [dict(label='All Top Gene Sets', method="update",
            args=[{"visible": [True for i in range(len(top))]}, {"shapes": arbitrary}])]

xvals = [i for i in range(len(rankedGenes))]
for geneSet in top:
#plots values for each top gene set 
    #gets hit indices and running sum vector
    hits, yvals = result_df.at[geneSet, 'hit_indices'], result_df.at[geneSet, 'RES']
    index = top.index(geneSet)
    pval = top_result_df.at[geneSet, 'pval']
    #creates enrichment plot
    setName = lineBreak(geneSet)
    fig2.add_trace(go.Scatter(x=xvals, y=yvals, mode='lines', showlegend=True,
        name=setName, meta=[pval],
        text = ['Gene: {}'.format(gene) for gene in rankedGenes],
        hovertemplate=('%{text}' + 
        '<br>Ranking: %{x}' + 
        '<br>Running Sum: %{y}' +
        '<br>P-value: %{meta[0]}')),
        row=1, col=1)
    #creates hit indices
    lines = []
    for x in hits:
        line = dict(type="line", x0=x, y0=-1, x1=x, y1=1, xref = "x2", yref = "y2",
        line=dict(color="Blue", width=0.5))
        lines.append(line)
    #creates drop down menu option
    visible = [True if i==index else False for i in range(len(top))]
    button = dict(label=f'{geneSet}', method="update",
        args=[{"visible": visible}, {"shapes": lines}])
    buttons.append(button)

#make drop down menu
fig2.update_layout(updatemenus=[
    dict(buttons=buttons, pad = {"r": 10, "t": 10}, showactive = True,
    x = 0, xanchor = "left", y = 1.2, yanchor = "top")])
fig2.update_xaxes(title_text="Gene Rankings", row=2, col=1)
fig2.update_yaxes(title_text="Enrichment Score (ES)", row=1, col=1)
fig2.update_yaxes(visible=False, showticklabels=False, row=2, col=1)

fig2.show()

if pheno1 != None and pheno2 != None:
    display(HTML(f'<b><i>Figure 1: GSEA plot for the top {len(top)} gene sets in the {library_name} library. Those with a positive Enrichment Score and more hits on the left are correlated with {pheno1}, while those with a negative Enrichment Score and more hits on the right are correlated with {pheno2}.</i></b>'))
else:
    display(HTML(f'<b><i>Figure 1: GSEA plot for the top {len(top)} gene sets in the {library_name} library. Those with a positive Enrichment Score and more hits on the left are correlated with genes at the top of the ranked list, while those with a negative Enrichment Score and more hits on the right are correlated with genes at the bottom.</i></b>'))

In [None]:
%%appyter markdown
{%- if library_tab.raw_value == 'Select an Enrichr Library' and augmentation.value == True %}
## Highlighting Co-expressed Genes

The augmented gene set library was retrieved using the GMT Augmentation Appyter (Clarke et al., 2021) and the ARCHS4 Co-expression matrix (Lachmann et al., 2018). The augmented gene sets are plotted against the original gene sets and the combination of the two sets below. The blue hit indices are hit indices from the original gene set and the red ones represent hits from the augmented gene set. 

The table below the plots displays each gene set and the list of the top ten co-expressed genes from the augmented set, which is derived from the plots.
{%- endif %}

In [None]:
%%appyter code_exec
{%- if library_tab.raw_value == 'Select an Enrichr Library' and augmentation.value == True %}
#retrieves leading edge genes
def ledgeGenes(results):
    es, hit_indices, res = results
    res = res.tolist()
    rank = res.index(es)
    ledge = []
    hits = []
    if es > 0:
        i = 0
        while rank >= hit_indices[i]:
            ledge.append(rankedGenes[hit_indices[i]])
            hits.append(hit_indices[i])
            i += 1
    else:
        i = len(hit_indices)-1
        while rank <= hit_indices[i]:
            ledge.append(rankedGenes[hit_indices[i]])
            hits.append(hit_indices[i])
            i -= 1
    if len(ledge) > 10:
        ledge = ledge[:10]
        hits = hits[:10]
    return {'genes': ledge, 'ranks': hits}
{%- endif %}

In [None]:
%%appyter code_exec
{%- if library_tab.raw_value == 'Select an Enrichr Library' and augmentation.value == True %}
fig3 = make_subplots(rows=2, cols=1, row_heights = [0.8, 0.2],
        shared_xaxes=True, vertical_spacing = 0.10,
        subplot_titles=("Enrichment Plot", "Hit Indices"))
fig3.update_layout(shapes=arbitrary, height=600, width=750, font_size=16)
plot_buttons = [dict(label='All', method="update",
            args=[{"visible": [True for i in range(len(top*3))]}, {"shapes": arbitrary}])]
fig4 = go.Figure()
table_buttons = list()

for geneSet in top:
    #gets values to plot
    hits0, yvals0 = result_df.at[geneSet, 'hit_indices'], result_df.at[geneSet, 'RES']
    augmented = gp.algorithm.enrichment_score(rankedGenes, rankedCorrelation, augmented_data[geneSet])
    hits1, yvals1 = augmented[2], augmented[3]
    both = gp.algorithm.enrichment_score(rankedGenes, rankedCorrelation, augmented_data[geneSet]+library_data[geneSet])
    yvals2 = both[3]
    yvals = [yvals0, yvals1, yvals2]
    #gets top co-expressed genes
    results = both[0], augmented[2], both[3]
    coexp = ledgeGenes(results)
    coexp_df = pd.DataFrame.from_dict(data=coexp, orient='columns')
    fig4.add_trace(go.Table(columnwidth=[200, 200],
    header=dict(values=['Genes', 'Rank'], height=40), 
    cells=dict(values=[coexp_df.genes, coexp_df.ranks], height=30)))
    #plots the three random walks
    for i in range(len(yvals)):
        if i == 0:
            newName = geneSet + ' original'
        elif i == 1:
            newName = geneSet + ' augmented'
        elif i == 2:
            newName = geneSet + ' both'
        newName = lineBreak(newName)
        fig3.add_trace(go.Scatter(x=xvals, y=yvals[i], mode='lines', showlegend=True,
        name=newName, text = ['Gene: {}'.format(gene) for gene in rankedGenes],
        hovertemplate=('%{text}' + 
        '<br>Ranking: %{x}' + 
        '<br>Running Sum: %{y}')),
        row=1, col=1)
    #creates hit indices
    lines = []
    for x in hits0:
        line = dict(type='line', x0=x, y0=-1, x1=x, y1=1, xref = 'x2', yref = 'y2',
        line=dict(color="Blue", width=0.5))
        lines.append(line)
    for x in hits1:
        line = dict(type='line', x0=x, y0=-1, x1=x, y1=1, xref = 'x2', yref = 'y2',
        line=dict(color="Red", width=0.5))
        lines.append(line)
    #creates drop down menu option
    index = top.index(geneSet)
    plot_visible = [True if i >= index*3 and i < (index+1)*3 else False for i in range(len(top*3))]
    plot_button = dict(label=f'{geneSet}', method="update",
        args=[{"visible": plot_visible}, {"shapes": lines}])
    plot_buttons.append(plot_button)
    table_visible = [True if i == index else False for i in range(len(top))]
    table_button = dict(label=f'{geneSet}', method="update",
        args=[{"visible": table_visible}])
    table_buttons.append(table_button)

fig3.update_layout(updatemenus=[
    dict(buttons=plot_buttons, pad = {"r": 10, "t": 10}, showactive = True,
    x = 0, xanchor = "left", y = 1.2, yanchor = "top")])
fig3.update_xaxes(title_text="Gene Rankings", row=2, col=1)
fig3.update_yaxes(title_text="Enrichment Score (ES)", row=1, col=1)
fig3.update_yaxes(visible=False, showticklabels=False, row=2, col=1)
fig3.show()
display(HTML(f"<i><b>Figure 2: Random walk plots of augmented gene sets for the top {len(top)} gene sets in the {library_name} library. Each gene set has three plots: one with the original set, one with the augmented genes only, and one with them combined.</i></b>"))
{%- endif %}

In [None]:
%%appyter code_exec
{%- if library_tab.raw_value == 'Select an Enrichr Library' and augmentation.value == True %}
#displays top co-expressed genes in table
fig4.update_layout(updatemenus=[
    dict(buttons=table_buttons, pad = {"r": 10, "t": 10}, showactive = True,
    x = 0, xanchor = "left", y = 1.2, yanchor = "top")])
fig4.update_layout(width=400, font_size=14)
fig4.show()
display(HTML(f"<i><b>Table 2: The Top Co-Expressed Genes Table for the top {len(top)} gene sets in the {library_name} library. The table displays the top ten co-expressed genes for each set, as well as their ranking within the input signature.</i></b>"))
{%- endif %}

## References
Blanco-Melo D, Nilsson-Payant BE, Liu WC, Uhl S et al. Imbalanced Host Response to SARS-CoV-2 Drives Development of COVID-19. Cell 2020 May 28;181(5):1036-1045.e9. PMID: 32416070. 

Clark, N.R., Hu, K.S., Feldmann, A.S. et al. The characteristic direction: a geometrical approach to identify differentially expressed genes. BMC Bioinformatics 15, 79 (2014). https://doi.org/10.1186/1471-2105-15-79 

Clarke, D. J. B., Jeon, M., Stein, D. J., Moiseyev, N., Kropiwnicki, E., Dai, C., Xie, Z., Wojciechowicz, M. L., Litz, S., Hom, J., Evangelista, J. E., Goldman, L., Zhang, S., Yoon, C., Ahamed, T., Bhuiyan, S., Cheng, M., Karam, J., Jagodnik, K. M., … Ma'ayan, A. (2021). Appyters: Turning Jupyter notebooks into data-driven web apps. Patterns, 2(3), 100213. https://doi.org/10.1016/j.patter.2021.100213. 

Daamen AR, Bachali P, Owen KA, Kingsmore KM et al. Comprehensive transcriptomic analysis of COVID-19 blood, lung, and airway. Sci Rep 2021 Mar 29;11(1):7052. PMID: 33782412. 

Kuleshov, M. V., Jones, M. R., Rouillard, A. D., Fernandez, N. F., Duan, Q., Wang, Z., Koplev, S., Jenkins, S. L., Jagodnik, K. M., Lachmann, A., McDermott, M. G., Monteiro, C. D., Gundersen, G. W., & Ma'ayan, A. (2016). Enrichr: A Comprehensive Gene set enrichment analysis web Server 2016 update. Nucleic Acids Research, 44(W1). https://doi.org/10.1093/nar/gkw377. 

Lachmann, A., Torre, D., Keenan, A.B. et al. Massive mining of publicly available RNA-seq data from human and mouse. Nat Commun 9, 1366 (2018). https://doi.org/10.1038/s41467-018-03751-6. 

Law, CW, Chen, Y, Shi, W, and Smyth, GK (2014). Voom: precision weights unlock linear model analysis tools for RNA-seq read counts. Genome Biology 15, R29. 

Subramanian, A., Tamayo, P., Mootha, V. K., Mukherjee, S., Ebert, B. L., Gillette, M. A., Paulovich, A., Pomeroy, S. L., Golub, T. R., Lander, E. S., & Mesirov, J. P. (2005). Gene set enrichment analysis: A knowledge-based approach for interpreting genome-wide expression profiles. Proceedings of the National Academy of Sciences, 102(43), 15545–15550. https://doi.org/10.1073/pnas.0506580102 