In [None]:
#%%appyter init
from appyter import magic
magic.init(lambda _=globals: _())

In [None]:
# imports
import numpy as np
import pandas as pd
import requests
import urllib.request
from maayanlab_bioinformatics.enrichment import enrich_crisp
import matplotlib.cm as cm
import matplotlib.colors as colors
import base64
from IPython.display import display, Markdown

# bokeh
from bokeh.io import output_notebook
from bokeh.plotting import figure, show
from bokeh.models import HoverTool, CustomJS, ColumnDataSource, Span
from bokeh.layouts import layout, column, gridplot
from bokeh.layouts import row as bkrow
from bokeh.palettes import all_palettes

# display graphics
output_notebook()

# Enrichr Volcano Plot Visualizer

In [None]:
%%appyter hide
{% do SectionField(
    name = 'GENE_SETS',
    title = 'Gene Set Upload',
    subtitle = 'Upload up and down gene sets.',
    img = 'enrichr.png'
)%}

{% do SectionField(
    name = 'ENRICHR_LIB',
    title = 'Enrichr Library Selection',
    subtitle = 'Select background library for enrichment analysis.',
    img = 'enrichr.png'
)%}

{% set up_genes_kind = TabField(
    name = 'up_genes_kind',
    label = 'Up Genes',
    default = 'Paste',
    description = 'Paste or upload your up-regulated gene list.',
    choices = {
        'Paste': [
            TextField(
                name = 'up_genes_input',
                label = 'Up Gene List',
                default = 'KIAA0907\nKDM5A\nCDC25A\nEGR1\nGADD45B\nRELB\nTERF2IP\nSMNDC1\nTICAM1\nNFKB2\nRGS2\nNCOA3\nICAM1\nTEX10\nCNOT4\nARID4B\nCLPX\nCHIC2\nCXCL2\nFBXO11\nMTF2\nCDK2\nDNTTIP2\nGADD45A\nGOLT1B\nPOLR2K\nNFKBIE\nGABPB1\nECD\nPHKG2\nRAD9A\nNET1\nKIAA0753\nEZH2\nNRAS\nATP6V0B\nCDK7\nCCNH\nSENP6\nTIPARP\nFOS\nARPP19\nTFAP2A\nKDM5B\nNPC1\nTP53BP2\nNUSAP1',
                description = 'Paste your list of up-regulated genes (one gene per row).',
                section = 'GENE_SETS'
            ),
        ],
        'Upload': [
            FileField(
                name = 'up_genes_filename',
                label = 'Up Gene List File',
                default = '',
                description = 'Upload your list of up-regulated genes as a text file (one gene per row).',
                section = 'GENE_SETS'
            ),
        ],
    },
    section = 'GENE_SETS'
) %}

{% set dn_genes_kind = TabField(
    name = 'dn_genes_kind',
    label = 'Down Genes',
    default = 'Paste',
    description = 'Paste or upload your down-regulated gene list.',
    choices = {
        'Paste': [
            TextField(
                name = 'dn_genes_input',
                label = 'Down Gene List',
                default = 'SCCPDH\nKIF20A\nFZD7\nUSP22\nPIP4K2B\nCRYZ\nGNB5\nEIF4EBP1\nPHGDH\nRRAGA\nSLC25A46\nRPA1\nHADH\nDAG1\nRPIA\nP4HA2\nMACF1\nTMEM97\nMPZL1\nPSMG1\nPLK1\nSLC37A4\nGLRX\nCBR3\nPRSS23\nNUDCD3\nCDC20\nKIAA0528\nNIPSNAP1\nTRAM2\nSTUB1\nDERA\nMTHFD2\nBLVRA\nIARS2\nLIPA\nPGM1\nCNDP2\nBNIP3\nCTSL1\nCDC25B\nHSPA8\nEPRS\nPAX8\nSACM1L\nHOXA5\nTLE1\nPYGL\nTUBB6\nLOXL1',
                description = 'Paste your list of down-regulated genes (one gene per row).',
                section = 'GENE_SETS'
            ),
        ],
        'Upload': [
            FileField(
                name = 'dn_genes_filename',
                label = 'Down Gene List File',
                default = '',
                description = 'Upload your list of down-regulated genes as a text file (one gene per row).',
                section = 'GENE_SETS'
            ),
        ],
    },
    section = 'GENE_SETS'
) %}

{% set enrichr_library = ChoiceField(
    name='enrichr_library', 
    description='Select one Enrichr library for which to create visualizations.', 
    label='Enrichr Library', 
    default='WikiPathways_2019_Human', 
    section = 'ENRICHR_LIB',
    choices=[
        'ARCHS4_Cell-lines',
        'ARCHS4_IDG_Coexp',
        'ARCHS4_Kinases_Coexp',
        'ARCHS4_TFs_Coexp',
        'ARCHS4_Tissues',
        'Achilles_fitness_decrease',
        'Achilles_fitness_increase',
        'Aging_Perturbations_from_GEO_down',
        'Aging_Perturbations_from_GEO_up',
        'Allen_Brain_Atlas_10x_scRNA_2021',
        'Allen_Brain_Atlas_down',
        'Allen_Brain_Atlas_up',
        'BioCarta_2013',
        'BioCarta_2015',
        'BioCarta_2016',
        'BioPlanet_2019',
        'BioPlex_2017',
        'CCLE_Proteomics_2020',
        'CORUM',
        'COVID-19_Related_Gene_Sets',
        'Cancer_Cell_Line_Encyclopedia',
        'ChEA_2013',
        'ChEA_2015',
        'ChEA_2016',
        'Chromosome_Location',           
        'Chromosome_Location_hg19',
        'ClinVar_2019',
        'dbGaP',
        'DSigDB',
        'Data_Acquisition_Method_Most_Popular_Genes',
        'DepMap_WG_CRISPR_Screens_Broad_CellLines_2019',
        'DepMap_WG_CRISPR_Screens_Sanger_CellLines_2019',
        'DisGeNET',
        'Disease_Perturbations_from_GEO_down',
        'Disease_Perturbations_from_GEO_up',
        'Disease_Signatures_from_GEO_down_2014',
        'Disease_Signatures_from_GEO_up_2014',
        'DrugMatrix',
        'Drug_Perturbations_from_GEO_2014',
        'Drug_Perturbations_from_GEO_down',
        'Drug_Perturbations_from_GEO_up',
        'ENCODE_Histone_Modifications_2013',
        'ENCODE_Histone_Modifications_2015',
        'ENCODE_TF_ChIP-seq_2014',
        'ENCODE_TF_ChIP-seq_2015',
        'ENCODE_and_ChEA_Consensus_TFs_from_ChIP-X',
        'ESCAPE',
        'Elsevier_Pathway_Collection',
        'Enrichr_Libraries_Most_Popular_Genes',
        'Enrichr_Submissions_TF-Gene_Coocurrence',
        'Enrichr_Users_Contributed_Lists_2020',
        'Epigenomics_Roadmap_HM_ChIP-seq',
        'GO_Biological_Process_2013',
        'GO_Biological_Process_2015',
        'GO_Biological_Process_2017',
        'GO_Biological_Process_2017b',
        'GO_Biological_Process_2018',
        'GO_Cellular_Component_2013',
        'GO_Cellular_Component_2015',
        'GO_Cellular_Component_2017',
        'GO_Cellular_Component_2017b',
        'GO_Cellular_Component_2018',
        'GO_Molecular_Function_2013',
        'GO_Molecular_Function_2015',
        'GO_Molecular_Function_2017',
        'GO_Molecular_Function_2017b',
        'GO_Molecular_Function_2018',
        'GTEx_Tissue_Sample_Gene_Expression_Profiles_down',
        'GTEx_Tissue_Sample_Gene_Expression_Profiles_up',
        'GWAS_Catalog_2019',
        'GeneSigDB',
        'Gene_Perturbations_from_GEO_down',
        'Gene_Perturbations_from_GEO_up',
        'Genes_Associated_with_NIH_Grants',
        'Genome_Browser_PWMs',
        'HMDB_Metabolites',
        'HMS_LINCS_KinomeScan',
        'HomoloGene',
        'HumanCyc_2015',
        'HumanCyc_2016',
        'Human_Gene_Atlas',
        'Human_Phenotype_Ontology',
        'huMAP',
        'InterPro_Domains_2019',
        'Jensen_COMPARTMENTS',
        'Jensen_DISEASES',
        'Jensen_TISSUES',
        'KEA_2013',
        'KEA_2015',
        'KEGG_2013',
        'KEGG_2015',
        'KEGG_2016',
        'KEGG_2019_Human',
        'KEGG_2019_Mouse',
        'Kinase_Perturbations_from_GEO_down',
        'Kinase_Perturbations_from_GEO_up',
        'L1000_Kinase_and_GPCR_Perturbations_down',
        'L1000_Kinase_and_GPCR_Perturbations_up',
        'LINCS_L1000_Chem_Pert_down',
        'LINCS_L1000_Chem_Pert_up',
        'LINCS_L1000_Ligand_Perturbations_down',
        'LINCS_L1000_Ligand_Perturbations_up',
        'Ligand_Perturbations_from_GEO_down',
        'Ligand_Perturbations_from_GEO_up',
        'lncHUB_lncRNA_Co-Expression',
        'MCF7_Perturbations_from_GEO_down',
        'MCF7_Perturbations_from_GEO_up',
        'MGI_Mammalian_Phenotype_2013',
        'MGI_Mammalian_Phenotype_2017',
        'MGI_Mammalian_Phenotype_Level_3',
        'MGI_Mammalian_Phenotype_Level_4',
        'MGI_Mammalian_Phenotype_Level_4_2019',
        'MSigDB_Computational',
        'MSigDB_Hallmark_2020',
        'MSigDB_Oncogenic_Signatures',
        'Microbe_Perturbations_from_GEO_down',
        'Microbe_Perturbations_from_GEO_up',
        'miRTarBase_2017',
        'Mouse_Gene_Atlas',
        'NCI-60_Cancer_Cell_Lines',
        'NCI-Nature_2015',
        'NCI-Nature_2016',
        'NIH_Funded_PIs_2017_AutoRIF_ARCHS4_Predictions',
        'NIH_Funded_PIs_2017_GeneRIF_ARCHS4_Predictions',
        'NIH_Funded_PIs_2017_Human_AutoRIF',
        'NIH_Funded_PIs_2017_Human_GeneRIF',
        'NURSA_Human_Endogenous_Complexome',
        'OMIM_Disease',
        'OMIM_Expanded',
        'Old_CMAP_down',
        'Old_CMAP_up',
        'PPI_Hub_Proteins',
        'Panther_2015',
        'Panther_2016',
        'Pfam_Domains_2019',
        'Pfam_InterPro_Domains',
        'PheWeb_2019',
        'Phosphatase_Substrates_from_DEPOD',
        'ProteomicsDB_2020',
        'RNA-Seq_Disease_Gene_and_Drug_Signatures_from_GEO',
        'Rare_Diseases_AutoRIF_ARCHS4_Predictions',
        'Rare_Diseases_AutoRIF_Gene_Lists',
        'Rare_Diseases_GeneRIF_ARCHS4_Predictions',
        'Rare_Diseases_GeneRIF_Gene_Lists',
        'Reactome_2013',
        'Reactome_2015',
        'Reactome_2016',
        'SILAC_Phosphoproteomics',
        'SubCell_BarCode',
        'SysMyo_Muscle_Gene_Sets'
        'TF-LOF_Expression_from_GEO',
        'TF_Perturbations_Followed_by_Expression',
        'TG_GATES_2020',
        'TRANSFAC_and_JASPAR_PWMs',
        'TRRUST_Transcription_Factors_2019',
        'Table_Mining_of_CRISPR_Studies',
        'TargetScan_microRNA',
        'TargetScan_microRNA_2017',
        'Tissue_Protein_Expression_from_Human_Proteome_Map',
        'Tissue_Protein_Expression_from_ProteomicsDB.csv',
        'Transcription_Factor_PPIs',
        'UK_Biobank_GWAS_v1',
        'Virus-Host_PPI_P-HIPSTer_2020',
        'VirusMINT',
        'Virus_Perturbations_from_GEO_down',
        'Virus_Perturbations_from_GEO_up',
        'WikiPathways_2013',
        'WikiPathways_2015',
        'WikiPathways_2016',
        'WikiPathways_2019_Human',
        'WikiPathways_2019_Mouse'
    ]
) %}

In [None]:
%%appyter code_exec

{%- if up_genes_kind.raw_value == 'Paste' %}
up_genes_input = {{ up_genes_kind.value[0] }}
{%- else %}
up_genes_filename = {{ up_genes_kind.value[0] }}
{%- endif %}

{%- if dn_genes_kind.raw_value == 'Paste' %}
dn_genes_input = {{ dn_genes_kind.value[0] }}
{%- else %}
dn_genes_filename = {{ dn_genes_kind.value[0] }}
{%- endif %}

enrichr_library = '{{ enrichr_library }}'

In [None]:
%%appyter code_exec

{%- if up_genes_kind.raw_value == 'Paste' %}
up_genes = up_genes_input.split('\n')
up_genes = [x.strip() for x in up_genes]
{%- else %}
open_up_genes_file = open(up_genes_filename,'r')
lines = open_up_genes_file.readlines()
up_genes = [x.strip() for x in lines]
open_up_genes_file.close()
{%- endif %}

{%- if dn_genes_kind.raw_value == 'Paste' %}
dn_genes = dn_genes_input.split('\n')
dn_genes = [x.strip() for x in dn_genes]
{%- else %}
open_dn_genes_file = open(dn_genes_filename,'r')
lines = open_dn_genes_file.readlines()
dn_genes = [x.strip() for x in lines]
open_dn_genes_file.close()
{%- endif %}

In [None]:
# Parameters and Functions
significance_value = 0.05

def get_library(lib_name):
    ''' Returns a dictionary mapping each term from the input library to
    its associated geneset. '''

    raw_lib_data = []

    with urllib.request.urlopen('https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=' + lib_name) as f:
        for line in f.readlines():
            raw_lib_data.append(line.decode("utf-8").split("\t\t"))

    name = []
    gene_list = []
    lib_data = {}

    for i in range(len(raw_lib_data)):
        name += [raw_lib_data[i][0]]
        raw_genes = raw_lib_data[i][1].replace('\t', ' ')
        gene_list += [raw_genes[:-1]]
    
    lib_data = {a[0]:a[1].split(' ') for a in zip(name, gene_list)}

    return lib_data


def enrich_genes(direction, genelist, libname):
    ''' Returns dataframe of enrichment results for input gene list and
    libray. '''

    enrich_results = enrich_crisp(genelist, libname, 21000, True)
    df = pd.DataFrame(
        [ [
            term, 
            res.pvalue, 
            res.odds_ratio,
            direction,
            ] for (term, res) in enrich_results ], 
        columns=['term', 'pvalue', 'odds_ratio', 'direction']
    )
    df['log_pval'] = np.negative(np.log10(df['pvalue']))
    return df

**Note:** Some libraries may take longer to load than others due to their size.

In [None]:
# access Enrichr background library
print(f"Loading library data for {enrichr_library}...")
lib = get_library(enrichr_library)
print('Library data loaded!')

The two volcano plots below provide visualizations of the enrichment results for the up-regulated and down-regulated gene lists when compared to the selected library. 

Each point represents a term/gene set from the selected library. The x-position gives the odds ratio, while the y-position gives the -log(p-value). Significant terms are larger and colored either blue (up-regulated genes) or red (down-regulated genes). The darker the color, the more significant the term. 

Hover over a point to view the gene set it represents and its corresponding odds ratio and p-value. 

Use the toolbar to the right of each plot to pan, zoom, or download the plot as an SVG.

*Note: The two plots may have different x and y scales.*

In [None]:
if lib == {}:
    # handle failure to retrieve library
    display(Markdown('## Failed to access library, please try again later.'))

else:
    # get enrichment results
    up_res_df = enrich_genes('up', up_genes, lib)
    dn_res_df = enrich_genes('down', dn_genes, lib)

    # set plot color scheme
    up_cmap = cm.get_cmap('Blues_r')
    up_cnorm = colors.Normalize(vmin = up_res_df['pvalue'].min(), vmax = 0.1)

    dn_cmap = cm.get_cmap('Reds_r')
    dn_cnorm = colors.Normalize(vmin = dn_res_df['pvalue'].min(), vmax = 0.1)

    # set point colors and sizes
    up_colors = []
    up_sizes = []
    for r in up_res_df.itertuples():
        if r.pvalue < 0.05:
            up_colors.append(colors.to_hex(up_cmap(up_cnorm(r.pvalue))))
            up_sizes.append(12)
        else:
            up_colors.append('#808080')
            up_sizes.append(6)
    dn_colors = []
    dn_sizes = []
    for r in dn_res_df.itertuples():
        if r.pvalue < 0.05:
            dn_colors.append(colors.to_hex(dn_cmap(dn_cnorm(r.pvalue))))
            dn_sizes.append(12)
        else:
            dn_colors.append('#808080')
            dn_sizes.append(6)

    # create data sources
    up_source = ColumnDataSource(
        data=dict(
            x = up_res_df['odds_ratio'],
            y = up_res_df['log_pval'],
            gene_set = up_res_df['term'],
            pvalue = up_res_df['pvalue'],
            oddsratio = up_res_df['odds_ratio'],
            colors = up_colors,
            sizes = up_sizes
        )
    )
    dn_source = ColumnDataSource(
        data=dict(
            x = dn_res_df['odds_ratio'],
            y = dn_res_df['log_pval'],
            gene_set = dn_res_df['term'],
            pvalue = dn_res_df['pvalue'],
            oddsratio = dn_res_df['odds_ratio'],
            colors = dn_colors,
            sizes = dn_sizes
        )
    )

    # create hover tooltips
    tools = [
        ("Gene Set", "@gene_set"),
        ("P-Value", "@pvalue"),
        ("Odds Ratio", "@oddsratio"),
    ]

    # create plots
    up_plot = figure(
        plot_width = 700, 
        plot_height = 700,
        tooltips=tools
    )
    up_plot.circle(
        'x', 'y', size = 'sizes', 
        alpha = 0.7, line_alpha = 0, 
        line_width = 0.01, source = up_source, 
        fill_color = 'colors', name = 'up_res_df'
    )    

    dn_plot = figure(
        plot_width = 700, 
        plot_height = 700,
        tooltips=tools
    )
    dn_plot.circle(
        'x', 'y', size = 'sizes',
        alpha = 0.7, line_alpha = 0,
        line_width = 0.01, source = dn_source,
        fill_color = 'colors', name = 'dn_res_df'
    )

    # set axis labels and title 
    up_plot.xaxis.axis_label = "Odds Ratio"
    up_plot.yaxis.axis_label = "-log10(p-value)"
    up_plot.title.text = f"Enriched terms for up-regulated genes from {enrichr_library}"
    up_plot.title.align = "center"
    up_plot.title.text_font_size = "16px"

    dn_plot.xaxis.axis_label = "Odds Ratio"
    dn_plot.yaxis.axis_label = "-log10(p-value)"
    dn_plot.title.text = f"Enriched terms for down-regulated genes from {enrichr_library}"
    dn_plot.title.align = "center"
    dn_plot.title.text_font_size = "16px"

    up_plot.output_backend = "svg"
    dn_plot.output_backend = "svg"

    # show plot
    grid = gridplot([[up_plot, dn_plot]], plot_width=250, plot_height=250, sizing_mode="scale_both", merge_tools=False)
    show(grid)