In [None]:
#%%appyter init
from appyter import magic
magic.init(lambda _=globals: _())

# Enrichr Manhattan Plot Figure

This appyter creates a figure visualizing enrichment analysis results from Enrichr (https://amp.pharm.mssm.edu/Enrichr/) in a manhattan plot. 

The resulting figure will contain a manhattan plot of the p-values of all the gene sets in the Enrichr libraries selected.

In [None]:
import pandas as pd 
import math
import json
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import matplotlib.cm as cm
import numpy as np
import requests
import time
import bokeh.io
from operator import itemgetter
from IPython.display import display, FileLink, Markdown
from bokeh.plotting import ColumnDataSource, figure, output_notebook, show
from bokeh.models import Legend, LegendItem, Span
from bokeh.resources import INLINE

In [None]:
%%appyter hide_code

{% do SectionField(name='section0', title = 'Visualize Your Enrichr Results', subtitle = 'Create a manhattan plot summarizing your Enrichr analysis results. The resulting figure will contain a manhattan plot of the p-values of all the gene sets in the Enrichr libraries selected.', img = 'enrichr-icon.png')%}
{% do SectionField(name='section1', title = '1. Submit Your Gene List', subtitle = 'Upload a text file containing your gene list -OR- copy and paste your gene list into the text box below (One gene per row). You can also try it with the default gene list provided.', img = 'enrichr-icon.png')%}
{% do SectionField(name='section2', title = '2. Choose Enrichr Libraries', subtitle = 'Select the Enrichr libraries you would like in your figure.', img = 'enrichr-icon.png')%}
{% do SectionField(name='section3', title = '3. Other Options', subtitle = 'Choose color scheme of the plot, add a horizontal line to designate a significance level, and choose location of the legend and point labels. NOTE: for the static plot, legends will only appear if there is more than one Enrichr library selected.', img = 'enrichr-icon.png')%}
{% do SectionField(name='section4', title = '4. Output Options', subtitle = 'Choose a format and name to save your static figure.', img = 'enrichr-icon.png')%}


### Input options

In [None]:
%%appyter code_eval

gene_list_filename = {{ FileField(name='gene_list_filename', label='Gene List File', default='', description='Upload your gene list as a text file (One gene per row).',section = 'section1') }}

gene_list_input = {{ TextField(name='gene_list_input', label='Gene List', default='NSUN3\nPOLRMT\nNLRX1\nSFXN5\nZC3H12C\nSLC25A39\nARSG\nDEFB29\nNDUFB6\nZFAND1\nTMEM77\n5730403B10RIK\nRP23-195K8.6\nTLCD1\nPSMC6\nSLC30A6\nLOC100047292\nLRRC40\nORC5L\nMPP7\nUNC119B\nPRKACA\nTCN2\nPSMC3IP\nPCMTD2\nACAA1A\nLRRC1\n2810432D09RIK\nSEPHS2\nSAC3D1\nTMLHE\nLOC623451\nTSR2\nPLEKHA7\nGYS2\nARHGEF12\nHIBCH\nLYRM2\nZBTB44\nENTPD5\nRAB11FIP2\nLIPT1\nINTU\nANXA13\nKLF12\nSAT2\nGAL3ST2\nVAMP8\nFKBPL\nAQP11\nTRAP1\nPMPCB\nTM7SF3\nRBM39\nBRI3\nKDR\nZFP748\nNAP1L1\nDHRS1\nLRRC56\nWDR20A\nSTXBP2\nKLF1\nUFC1\nCCDC16\n9230114K14RIK\nRWDD3\n2610528K11RIK\nACO1\nCABLES1\nLOC100047214\nYARS2\nLYPLA1\nKALRN\nGYK\nZFP787\nZFP655\nRABEPK\nZFP650\n4732466D17RIK\nEXOSC4\nWDR42A\nGPHN\n2610528J11RIK\n1110003E01RIK\nMDH1\n1200014M14RIK\nAW209491\nMUT\n1700123L14RIK\n2610036D13RIK\nCOX15\nTMEM30A\nNSMCE4A\nTM2D2\nRHBDD3\nATXN2\nNFS1\n3110001I20RIK\nBC038156\nLOC100047782\n2410012H22RIK\nRILP\nA230062G08RIK\nPTTG1IP\nRAB1\nAFAP1L1\nLYRM5\n2310026E23RIK\nC330002I19RIK\nZFYVE20\nPOLI\nTOMM70A\nSLC7A6OS\nMAT2B\n4932438A13RIK\nLRRC8A\nSMO\nNUPL2\nTRPC2\nARSK\nD630023B12RIK\nMTFR1\n5730414N17RIK\nSCP2\nZRSR1\nNOL7\nC330018D20RIK\nIFT122\nLOC100046168\nD730039F16RIK\nSCYL1\n1700023B02RIK\n1700034H14RIK\nFBXO8\nPAIP1\nTMEM186\nATPAF1\nLOC100046254\nLOC100047604\nCOQ10A\nFN3K\nSIPA1L1\nSLC25A16\nSLC25A40\nRPS6KA5\nTRIM37\nLRRC61\nABHD3\nGBE1\nPARP16\nHSD3B2\nESM1\nDNAJC18\nDOLPP1\nLASS2\nWDR34\nRFESD\nCACNB4\n2310042D19RIK\nSRR\nBPNT1\n6530415H11RIK\nCLCC1\nTFB1M\n4632404H12RIK\nD4BWG0951E\nMED14\nADHFE1\nTHTPA\nCAT\nELL3\nAKR7A5\nMTMR14\nTIMM44\nSF1\nIPP\nIAH1\nTRIM23\nWDR89\nGSTZ1\nCRADD\n2510006D16RIK\nFBXL6\nLOC100044400\nZFP106\nCD55\n0610013E23RIK\nAFMID\nTMEM86A\nALDH6A1\nDALRD3\nSMYD4\nNME7\nFARS2\nTASP1\nCLDN10\nA930005H10RIK\nSLC9A6\nADK\nRBKS\n2210016F16RIK\nVWCE\n4732435N03RIK\nZFP11\nVLDLR\n9630013D21RIK\n4933407N01RIK\nFAHD1\nMIPOL1\n1810019D21RIK\n1810049H13RIK\nTFAM\nPAICS\n1110032A03RIK\nLOC100044139\nDNAJC19\nBC016495\nA930041I02RIK\nRQCD1\nUSP34\nZCCHC3\nH2AFJ\nPHF7\n4921508D12RIK\nKMO\nPRPF18\nMCAT\nTXNDC4\n4921530L18RIK\nVPS13B\nSCRN3\nTOR1A\nAI316807\nACBD4\nFAH\nAPOOL\nCOL4A4\nLRRC19\nGNMT\nNR3C1\nSIP1\nASCC1\nFECH\nABHD14A\nARHGAP18\n2700046G09RIK\nYME1L1\nGK5\nGLO1\nSBK1\nCISD1\n2210011C24RIK\nNXT2\nNOTUM\nANKRD42\nUBE2E1\nNDUFV1\nSLC33A1\nCEP68\nRPS6KB1\nHYI\nALDH1A3\nMYNN\n3110048L19RIK\nRDH14\nPROZ\nGORASP1\nLOC674449\nZFP775\n5430437P03RIK\nNPY\nADH5\nSYBL1\n4930432O21RIK\nNAT9\nLOC100048387\nMETTL8\nENY2\n2410018G20RIK\nPGM2\nFGFR4\nMOBKL2B\nATAD3A\n4932432K03RIK\nDHTKD1\nUBOX5\nA530050D06RIK\nZDHHC5\nMGAT1\nNUDT6\nTPMT\nWBSCR18\nLOC100041586\nCDK5RAP1\n4833426J09RIK\nMYO6\nCPT1A\nGADD45GIP1\nTMBIM4\n2010309E21RIK\nASB9\n2610019F03RIK\n7530414M10RIK\nATP6V1B2\n2310068J16RIK\nDDT\nKLHDC4\nHPN\nLIFR\nOVOL1\nNUDT12\nCDAN1\nFBXO9\nFBXL3\nHOXA7\nALDH8A1\n3110057O12RIK\nABHD11\nPSMB1\nENSMUSG00000074286\nCHPT1\nOXSM\n2310009A05RIK\n1700001L05RIK\nZFP148\n39509\nMRPL9\nTMEM80\n9030420J04RIK\nNAGLU\nPLSCR2\nAGBL3\nPEX1\nCNO\nNEO1\nASF1A\nTNFSF5IP1\nPKIG\nAI931714\nD130020L05RIK\nCNTD1\nCLEC2H\nZKSCAN1\n1810044D09RIK\nMETTL7A\nSIAE\nFBXO3\nFZD5\nTMEM166\nTMED4\nGPR155\nRNF167\nSPTLC1\nRIOK2\nTGDS\nPMS1\nPITPNC1\nPCSK7\n4933403G14RIK\nEI24\nCREBL2\nTLN1\nMRPL35\n2700038C09RIK\nUBIE\nOSGEPL1\n2410166I05RIK\nWDR24\nAP4S1\nLRRC44\nB3BP\nITFG1\nDMXL1\nC1D\n', description='Paste your gene list (One gene per row).', section = 'section1') }}

transcription_libraries = {{ MultiCheckboxField(name='transcription_libraries', description='Select the Enrichr libraries you would like in your figure.', label='Transcription', default=[], section = 'section2',choices=[
    'ARCHS4_TFs_Coexp',
    'ChEA_2016',
    'ENCODE_and_ChEA_Consensus_TFs_from_ChIP-X',
    'ENCODE_Histone_Modifications_2015',
    'ENCODE_TF_ChIP-seq_2015',
    'Epigenomics_Roadmap_HM_ChIP-seq',
    'Enrichr_Submissions_TF-Gene_Coocurrence',
    'Genome_Browser_PWMs',
    'lncHUB_lncRNA_Co-Expression',
    'miRTarBase_2017',
    'TargetScan_microRNA_2017',
    'TF-LOF_Expression_from_GEO',
    'TF_Perturbations_Followed_by_Expression',
    'Transcription_Factor_PPIs',
    'TRANSFAC_and_JASPAR_PWMs',
    'TRRUST_Transcription_Factors_2019']) }}


pathways_libraries = {{ MultiCheckboxField(name='pathways_libraries', description='Select the Enrichr libraries you would like in your figure.', label='Pathways', default=[], section = 'section2',choices=[
    'ARCHS4_Kinases_Coexp',
    'BioCarta_2016',
    'BioPlanet_2019',
    'BioPlex_2017',
    'CORUM',
    'Elsevier_Pathway_Collection',
    'HMS_LINCS_KinomeScan',
    'HumanCyc_2016',
    'huMAP',
    'KEA_2015',
    'KEGG_2019_Human',
    'KEGG_2019_Mouse',
    'Kinase_Perturbations_from_GEO_down',
    'Kinase_Perturbations_from_GEO_up',
    'L1000_Kinase_and_GPCR_Perturbations_down',
    'L1000_Kinase_and_GPCR_Perturbations_up',
    'NCI-Nature_2016',
    'NURSA_Human_Endogenous_Complexome',
    'Panther_2016',
    'Phosphatase_Substrates_from_DEPOD',
    'PPI_Hub_Proteins',
    'Reactome_2016',
    'SILAC_Phosphoproteomics',
    'SubCell_BarCode',
    'Virus-Host_PPI_P-HIPSTer_2020',
    'WikiPathways_2019_Human',
    'WikiPathways_2019_Mouse']) }}    
    
  
ontologies_libraries = {{ MultiCheckboxField(name='ontologies_libraries', description='Select the Enrichr libraries you would like in your figure.', label='Ontologies', default=[], section = 'section2',choices=[
    'GO_Biological_Process_2018',
    'GO_Cellular_Component_2018',
    'GO_Molecular_Function_2018',
    'Human_Phenotype_Ontology',
    'Jensen_COMPARTMENTS',
    'Jensen_DISEASES',
    'Jensen_TISSUES',
    'MGI_Mammalian_Phenotype_Level_4_2019']) }} 

diseases_drugs_libraries = {{ MultiCheckboxField(name='diseases_drugs_libraries', description='Select the Enrichr libraries you would like in your figure.', label='Diseases/Drugs', default=[], section = 'section2',choices=[    
    'Achilles_fitness_decrease',
    'Achilles_fitness_increase',
    'ARCHS4_IDG_Coexp',
    'ClinVar_2019',
    'dbGaP',
    'DepMap_WG_CRISPR_Screens_Broad_CellLines_2019',
    'DepMap_WG_CRISPR_Screens_Sanger_CellLines_2019',
    'DisGeNET',
    'DrugMatrix',
    'DSigDB',
    'GeneSigDB',
    'GWAS_Catalog_2019',
    'LINCS_L1000_Chem_Pert_down',
    'LINCS_L1000_Chem_Pert_up',
    'LINCS_L1000_Ligand_Perturbations_down',
    'LINCS_L1000_Ligand_Perturbations_up',
    'MSigDB_Computational',
    'MSigDB_Oncogenic_Signatures',
    'Old_CMAP_down',
    'Old_CMAP_up',
    'OMIM_Disease',
    'OMIM_Expanded',
    'PheWeb_2019',
    'Rare_Diseases_AutoRIF_ARCHS4_Predictions',
    'Rare_Diseases_AutoRIF_Gene_Lists',
    'Rare_Diseases_GeneRIF_ARCHS4_Predictions',
    'Rare_Diseases_GeneRIF_Gene_Lists',
    'UK_Biobank_GWAS_v1',
    'Virus_Perturbations_from_GEO_down',
    'Virus_Perturbations_from_GEO_up',
    'VirusMINT']) }}
    
    
cell_types_libraries = {{ MultiCheckboxField(name='cell_types_libraries', description='Select the Enrichr libraries you would like in your figure.', label='Cell Types', default=[], section = 'section2',choices=[        
    'Allen_Brain_Atlas_down',
    'Allen_Brain_Atlas_up',
    'ARCHS4_Cell-lines',
    'ARCHS4_Tissues',
    'Cancer_Cell_Line_Encyclopedia',
    'CCLE_Proteomics_2020',
    'ESCAPE',
    'GTEx_Tissue_Sample_Gene_Expression_Profiles_down',
    'GTEx_Tissue_Sample_Gene_Expression_Profiles_up',
    'Human_Gene_Atlas',
    'Mouse_Gene_Atlas',
    'NCI-60_Cancer_Cell_Lines',
    'ProteomicsDB_2020',
    'Tissue_Protein_Expression_from_Human_Proteome_Map']) }}    
    
    
    
miscellaneous_libraries = {{ MultiCheckboxField(name='miscellaneous_libraries', description='Select the Enrichr libraries you would like in your figure.', label='Miscellaneous', default=[], section = 'section2',choices=[            
    'Chromosome_Location_hg19',
    'Data_Acquisition_Method_Most_Popular_Genes',
    'Enrichr_Libraries_Most_Popular_Genes',
    'Genes_Associated_with_NIH_Grants',
    'HMDB_Metabolites',
    'HomoloGene',
    'InterPro_Domains_2019',
    'NIH_Funded_PIs_2017_AutoRIF_ARCHS4_Predictions',
    'NIH_Funded_PIs_2017_GeneRIF_ARCHS4_Predictions',
    'NIH_Funded_PIs_2017_Human_AutoRIF',
    'NIH_Funded_PIs_2017_Human_GeneRIF',
    'Pfam_Domains_2019',
    'Pfam_InterPro_Domains',
    'Table_Mining_of_CRISPR_Studies']) }}    
    
    
legacy_libraries = {{ MultiCheckboxField(name='legacy_libraries', description='Select the Enrichr libraries you would like in your figure.', label='Legacy', default=[], section = 'section2',choices=[                
    'BioCarta_2013',
    'BioCarta_2015',
    'ChEA_2013',
    'ChEA_2015',
    'Chromosome_Location',
    'Disease_Signatures_from_GEO_down_2014',
    'Disease_Signatures_from_GEO_up_2014',
    'Drug_Perturbations_from_GEO_2014',
    'ENCODE_Histone_Modifications_2013',
    'ENCODE_TF_ChIP-seq_2014',
    'GO_Biological_Process_2013',
    'GO_Biological_Process_2015',
    'GO_Biological_Process_2017',
    'GO_Biological_Process_2017b',
    'GO_Cellular_Component_2013',
    'GO_Cellular_Component_2015',
    'GO_Cellular_Component_2017',
    'GO_Cellular_Component_2017b',
    'GO_Molecular_Function_2013',
    'GO_Molecular_Function_2015',
    'GO_Molecular_Function_2017',
    'GO_Molecular_Function_2017b',
    'HumanCyc_2015',
    'KEA_2013',
    'KEGG_2013',
    'KEGG_2015',
    'KEGG_2016',
    'MGI_Mammalian_Phenotype_2013',
    'MGI_Mammalian_Phenotype_2017',
    'MGI_Mammalian_Phenotype_Level_3',
    'MGI_Mammalian_Phenotype_Level_4',
    'NCI-Nature_2015',
    'Panther_2015',
    'Reactome_2013',
    'Reactome_2015',
    'TargetScan_microRNA',
    'Tissue_Protein_Expression_from_ProteomicsDB',
    'WikiPathways_2013',
    'WikiPathways_2015',
    'WikiPathways_2016']) }} 

crowd_libraries = {{ MultiCheckboxField(name='crowd_libraries', description='Select the Enrichr libraries you would like in your figure.', label='Crowd', default=[], section = 'section2',choices=[                
    'Aging_Perturbations_from_GEO_down',
    'Aging_Perturbations_from_GEO_up',
    'Disease_Perturbations_from_GEO_down',
    'Disease_Perturbations_from_GEO_up',
    'Drug_Perturbations_from_GEO_down',
    'Drug_Perturbations_from_GEO_up',
    'Gene_Perturbations_from_GEO_down',
    'Gene_Perturbations_from_GEO_up',
    'Ligand_Perturbations_from_GEO_down',
    'Ligand_Perturbations_from_GEO_up',
    'MCF7_Perturbations_from_GEO_down',
    'MCF7_Perturbations_from_GEO_up',
    'Microbe_Perturbations_from_GEO_down',
    'Microbe_Perturbations_from_GEO_up',
    'RNA-Seq_Disease_Gene_and_Drug_Signatures_from_GEO',
    'SysMyo_Muscle_Gene_Sets']) }}


color_choice = '{{ ChoiceField(name='color_choice', label='Color Scheme', default='blue/purple/orange', description='Choose the color scheme you want for your plots.', section = 'section3', choices=[
    'blue/purple/orange',
    'orange',
    'red/orange',
    'blue/purple',
    'purple/pink',
    'green',
    'rainbow',
    'grayscale']) }}'

significance_line = {{ StringField(name='significance_line', label='Significance Line Value', default='', description='Enter a value at which you want a horizontal line to appear (generally used to indicate that values above the line are significant).', section = 'section3') }}

legend_location = '{{ ChoiceField(name='legend_location', label='Legend Location', default='below', description='Choose if you want the plot legend to be to the side or below the figure (static plot).', choices= ['below', 'side'], section = 'section3') }}'

label_location = '{{ ChoiceField(name='label_location', label='Point Label Location', default='right', description='Choose where you want the labels of the significant points to be located (static plot).', choices= ['right', 'above', 'below'], section = 'section3') }}'

figure_file_format = {{ MultiCheckboxField(name='figure_file_format', label='File Format', choices = ['png', 'pdf', 'svg' ], default=['png'], description='Select the format(s) to save your figure (static plot).', section = 'section4') }}

output_file_name = {{ StringField(name='output_file_name', label='File Name', default='Enrichr_results', description='Enter a name/description to save your figure (static plot).', section = 'section4') }}

final_output_file_names = [str(output_file_name+'.'+file_type) for file_type in figure_file_format]

enrichr_libraries = np.sort(transcription_libraries+pathways_libraries+ontologies_libraries+diseases_drugs_libraries+cell_types_libraries+miscellaneous_libraries+legacy_libraries+crowd_libraries)


### Import gene list

In [None]:
# Import gene list as file or from text box file
# Will choose file upload over textbox if a file is given 
if gene_list_filename != '':
    open_gene_list_file = open(gene_list_filename,'r')
    lines = open_gene_list_file.readlines()
    genes = [x.strip() for x in lines]
    open_gene_list_file.close()
else:
    genes = gene_list_input.split('\n')
    genes = [x.strip() for x in genes]

### Get Enrichr Results

In [None]:
# Function to get Enrichr Results 
# Takes a gene list and Enrichr libraries as input
def Enrichr_API(enrichr_gene_list, all_libraries):

    for library_name in all_libraries : 
        ENRICHR_URL = 'http://amp.pharm.mssm.edu/Enrichr/addList'
        genes_str = '\n'.join(enrichr_gene_list)
        description = 'Example gene list'
        payload = {
            'list': (None, genes_str),
            'description': (None, description)
        }

        response = requests.post(ENRICHR_URL, files=payload)
        if not response.ok:
            raise Exception('Error analyzing gene list')

        data = json.loads(response.text)
        time.sleep(0.5)
        ENRICHR_URL = 'http://amp.pharm.mssm.edu/Enrichr/enrich'
        query_string = '?userListId=%s&backgroundType=%s'
        user_list_id = data['userListId']
        short_id = data["shortId"]
        gene_set_library = library_name
        response = requests.get(
            ENRICHR_URL + query_string % (user_list_id, gene_set_library)
         )
        if not response.ok:
            raise Exception('Error fetching enrichment results')

        data = json.loads(response.text)

        #results_df  = pd.DataFrame(data[library_name][0:5])
        
        results_df  = pd.DataFrame(data[library_name])
        # adds library name to the data frame so the libraries can be distinguished
        results_df['library'] = library_name.replace('_', '')


    return([results_df, str(short_id)])


### Assign Color Scheme

In [None]:
colors = []

if color_choice == 'orange':
    colors = ['#FF5A00', '#FFA700', '#FF7400', '#FFDB00']
if color_choice == 'red/orange':
    colors = ['#FF0000',  '#FFCC00', '#FF6600', '#FF9800']
if color_choice == 'blue/purple':
    colors = ['#0000FF', '#A3A3FF', '#4949FF', '#7879FF']
if color_choice == 'green':
    colors = ['#2eb62c', '#abe098', '#57c84d', '#c5e8b7']
if color_choice == 'rainbow':
    colors = ['red', '#fef200', 'green', 'blue', 'purple']
if color_choice == 'blue/purple/orange':
    colors = ['#003f5c', '#7a5195', '#ef5675', '#ffa600']
if color_choice == 'purple/pink':
    colors = ['#9800b0', '#ef83bd', '#bc37b1', '#d95db5']
if color_choice == 'grayscale':
    colors = ['#000000', '#7a7a7a', '#3c3c3c', '#bdbdbd']

### Plot Enrichr Results 

In [None]:
# Function plots results 

def enrichr_figure():

    if len(enrichr_libraries) == 1:

        results_df = Enrichr_API(genes, enrichr_libraries)[0]

        all_terms = []
        all_pvalues = []

        all_terms.append(list(results_df[1]))
        all_pvalues.append(list(results_df[2]))

        # make a simple scatterplot
        fig, ax = plt.subplots(figsize=(10,4))

        # sort the elements alphabetically
        x=np.log10(all_pvalues[0])*-1
        sorted_terms = list(zip(all_terms[0], x))
        sorted_terms = sorted(sorted_terms, key = itemgetter(0))
        unzipped_sorted_list = list(zip(*sorted_terms))

        data = pd.DataFrame({"Gene Set": unzipped_sorted_list[0], "-log(p value)": unzipped_sorted_list[1]})

        # add significance line and label significant points
        if significance_line != '':
            ax.axes.axhline(y = float(significance_line), color = 'black', lw = 1)

            # label points above the significance line

            if label_location == 'right':
                coords = (6, -3)
            elif label_location == 'below':
                coords = (-3, -14)
            elif label_location == 'above':
                coords = (-4, 5)

            point_label = 1
            sig_point_handles = []
            for index, row in data.iterrows():
                if row["-log(p value)"] > float(significance_line):
                    ax.annotate(point_label, xy = (row["Gene Set"], row["-log(p value)"]), xytext = coords, textcoords='offset points')
                    actual_pvalue = "{:.5e}".format(10**(-1*row["-log(p value)"]))
                    sig_point_handles += [mpatches.Patch(color = 'white', label = str(point_label) + ": " + row["Gene Set"] + ", " + str(actual_pvalue))]
                    point_label += 1

            # create a legend for the significant points
            if point_label != 1:
                leg = plt.legend(handles = sig_point_handles, handlelength=0, handletextpad=0, loc='center left', bbox_to_anchor=(1, 0.5), title="Significant Points")
                ax.add_artist(leg)

        ax.scatter(unzipped_sorted_list[0], unzipped_sorted_list[1], color = colors[0])
        ax.axes.get_xaxis().set_ticks([])
        plt.ylabel("-log(p value)")
        plt.xlabel(enrichr_libraries[0])

    elif len(enrichr_libraries) > 1:
        # make a manhattan plot

        sorted_data = pd.DataFrame({"Gene Set": [], "-log(p value)": [], "Library": []})
        fig, ax = plt.subplots(figsize=(10,4))

        for i in range(len(enrichr_libraries)):
            # get enrichr results from the library selected
            results_df = Enrichr_API(genes, [enrichr_libraries[i]])[0]

            all_terms = []
            all_pvalues = []
            library_names = []

            all_terms.append(list(results_df[1]))
            all_pvalues.append(list(results_df[2]))
            library_names.append(list(results_df['library']))

            x=np.log10(all_pvalues[0])*-1
            sorted_terms = list(zip(all_terms[0], x, library_names[0]))
            sorted_terms = sorted(sorted_terms, key = itemgetter(0))
            unzipped_sorted_list = list(zip(*sorted_terms))

            data = pd.DataFrame({"Gene Set": unzipped_sorted_list[0], "-log(p value)": unzipped_sorted_list[1], "Library": unzipped_sorted_list[2]})
        
            sorted_data = pd.concat([sorted_data, data])

        # group data by library
        groups = sorted_data.groupby("Library")

        # plot points
        color_index = 0
        for name, group in groups:
            if color_index >= len(colors):
                color_index = 0
            plt.plot(group["Gene Set"], group["-log(p value)"], marker="o", linestyle="", label=name, color = colors[color_index])
            color_index += 1

        # remove labels and tick marks on the x-axis
        ax.axes.get_xaxis().set_ticks([])

        # now sort dataframe by p-value so the significant points are labeled in order
        sorted_pvalue_data = sorted_data.sort_values(by = ["-log(p value)"], ascending = False)

        # add significance line and label significant points
        if significance_line != '':
            ax.axes.axhline(y = float(significance_line), color = 'black', lw = 1)

            # label points above the significance line
            if label_location == 'right':
                coords = (6, -3)
            elif label_location == 'below':
                coords = (-3, -14)
            elif label_location == 'above':
                coords = (-4, 5)

            point_label = 1
            sig_point_handles = []
            for index, row in sorted_pvalue_data.iterrows():
                if row["-log(p value)"] > float(significance_line):
                    ax.annotate(point_label, xy = (row["Gene Set"], row["-log(p value)"]), xytext = coords, textcoords='offset points')
                    actual_pvalue = "{:.5e}".format(10**(-1*row["-log(p value)"]))
                    sig_point_handles += [mpatches.Patch(color = 'white', label = str(point_label) + ": " + row["Gene Set"] + ", " + str(actual_pvalue))]
                    point_label += 1

            # create a legend for the significant points
            if legend_location == 'side' and point_label != 1:
                leg = plt.legend(handles = sig_point_handles, handlelength=0, handletextpad=0, loc='center left', bbox_to_anchor=(1.5, 0.5), title="Significant Points")
                ax.add_artist(leg)
            elif point_label != 1:
                leg = plt.legend(handles = sig_point_handles, handlelength=0, handletextpad=0, loc='center left', bbox_to_anchor=(1, 0.5), title="Significant Points")
                ax.add_artist(leg)

        # adds a legend for the libraries in the location specified
        if legend_location == 'below':
            # shrink current axis's height by 10% on the bottom
            box = ax.get_position()
            ax.set_position([box.x0, box.y0 + box.height * 0.1,
                            box.width, box.height * 0.9])

            # put a legend below current axis
            ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.05),
                    fancybox=True, shadow=True, ncol=5)

        elif legend_location == 'side':
            # shrink current axis by 20%
            box = ax.get_position()
            ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])

            # put a legend to the right of the current axis
            ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))

        plt.ylabel("-log(p value)")

        # save results
    for plot_name in final_output_file_names:
        plt.savefig(plot_name, bbox_inches = 'tight')

    plt.show()
    return sorted_data, groups

In [None]:
sorted_data, groups = enrichr_figure()

In [None]:
# download plots
for i, file in enumerate(final_output_file_names):
    display(FileLink(file, result_html_prefix=str('Download ' + figure_file_format[i] + ': ')))

### Having trouble with overlapping point labels?
Try moving the labels to a different location, plotting fewer libraries, or plot only one library at a time.

## Interactive plot using Bokeh
You can hover over the data points in this plot to see their associated gene set and p-value. Also check out the toolbar on the right side of the plot which will allow you to pan, box zoom, reset view, save the plot, and turn hover viewing on/off. This plot may open in the new window.

In [None]:
bokeh.io.output_notebook(INLINE)

# split data frame into smaller data frames by library
list_of_df = []
for library_name in enrichr_libraries:
    library_name = library_name.replace('_', '')
    df_new = sorted_data[sorted_data['Library'] == library_name]
    list_of_df += [df_new]

list_of_xaxis_values = []
for df in list_of_df:  
    list_of_xaxis_values += df["Gene Set"].values.tolist()

# define the output figure and the features we want
p = figure(x_range = list_of_xaxis_values, plot_height=300, plot_width=1000, tools='pan, box_zoom, hover, reset, save')

# loop over all libraries
r = []
color_index = 0
for df in list_of_df:
    if color_index >= len(colors):
        color_index = 0 

    # calculate actual p value from -log(p value)
    actual_pvalues = []
    for log_value in df["-log(p value)"].values.tolist():
        actual_pvalues += ["{:.5e}".format(10**(-1*log_value))]

    # define ColumnDataSource with our data for this library
    source = ColumnDataSource(data=dict(
        x = df["Gene Set"].values.tolist(),
        y = df["-log(p value)"].values.tolist(),
        pvalue = actual_pvalues,
    ))
    
    # plot data from this library
    r += [p.circle(x = 'x', y = 'y', size=5, fill_color=colors[color_index], line_color= colors[color_index], line_width=1, source = source)]
    color_index += 1

# create custom legend for the libraries
color_index = 0
renderer_index = 0
legend_items = []
for library_name in enrichr_libraries:
    legend_items += [LegendItem(label = library_name, renderers = [r[renderer_index]])]
    renderer_index += 1

legend = Legend(items = legend_items, location = (0, 160))
p.add_layout(legend, 'right')

# add significance line
if significance_line != '':
    hline = Span(location = float(significance_line), dimension='width', line_color='black', line_width=1)
    p.renderers.extend([hline])

p.background_fill_color = 'white'
p.xaxis.major_tick_line_color = None 
p.xaxis.major_label_text_font_size = '0pt'
p.y_range.start = 0
p.yaxis.axis_label = '-log(p value)'

p.hover.tooltips = [
    ("Gene Set", "@x"),
    ("p value", "@pvalue"),
]

show(p)