## Drug Set Consensus Enrichment
Author : Eryk Kropiwnicki | eryk.kropiwnicki@icahn.mssm.eduÂ¶

In [None]:
#%%appyter init
from appyter import magic
magic.init(lambda _=globals: _())

In [None]:
import json
import csv
import time
import math
from IPython.display import display, IFrame, Markdown

from tqdm import tqdm
import pandas as pd
import seaborn as sns
import matplotlib
from matplotlib import pyplot as plt
import requests

In [None]:
%%appyter hide_code
{% do SectionField(
    name='data',
    title='Drugmonizome Consensus Terms',
    subtitle='This appyter returns consensus terms from Drugmonizome signature search results using a set of drug sets',
    img='drugmonizome_logo.ico'
) %}

{% set gmt_file = FileField(
    constraint='.*\.gmt$',
    name='gmt_files', 
    label='Drug Set', 
    default='example.gmt',
    examples={
        'example.gmt': 'https://appyters.maayanlab.cloud/storage/Drugmonizome_Consensus/example.gmt'
    },
    section='data'
) %}

### Set Global Variables and Import Input

In [None]:
%%appyter code_exec
# Drugmonizome API URLs
entities_endpoint = 'https://maayanlab.cloud/drugmonizome/metadata-api/entities/find'
signatures_endpoint = 'https://maayanlab.cloud/drugmonizome/metadata-api/signatures/'
enrichment_endpoint = 'https://maayanlab.cloud/drugmonizome/data-api/api/v1/enrich/overlap'

# Figure & Table Labels
table = 1
figure = 1

# Drug set libraries
libraries = {{MultiCheckboxField(
    name = 'datasets',
    label = 'Drug set libraries from Drugmonizome',
    choices = [
            'L1000FWD_GO_Biological_Processes_drugsetlibrary_up',
            'L1000FWD_GO_Biological_Processes_drugsetlibrary_down',
            'L1000FWD_GO_Cellular_Component_drugsetlibrary_up',
            'L1000FWD_GO_Cellular_Component_drugsetlibrary_down',
            'L1000FWD_GO_Molecular_Function_drugsetlibrary_up',
            'L1000FWD_GO_Molecular_Function_drugsetlibrary_down',
            'L1000FWD_KEGG_Pathways_drugsetlibrary_up',
            'L1000FWD_KEGG_Pathways_drugsetlibrary_down',
            'L1000FWD_signature_drugsetlibrary_up',
            'L1000FWD_signature_drugsetlibrary_down',
            'L1000FWD_predicted_side_effects',
            'Geneshot_associated_drugsetlibrary',
            'Geneshot_predicted_coexpression_drugsetlibrary',
            'Geneshot_predicted_autorif_drugsetlibrary',
            'Geneshot_predicted_generif_drugsetlibrary',
            'Geneshot_predicted_enrichr_drugsetlibrary',
            'Geneshot_predicted_tagger_drugsetlibrary',
            'KinomeScan_kinase_drugsetlibrary',
            'ATC_drugsetlibrary',
            'CREEDS_signature_drugsetlibrary_up',
            'CREEDS_signature_drugsetlibrary_down',
            'DrugRepurposingHub_moa_drugsetlibrary',
            'DrugRepurposingHub_target_drugsetlibrary',
            'Drugbank_smallmolecule_target_drugsetlibrary',
            'Drugbank_smallmolecule_carrier_drugsetlibrary',
            'Drugbank_smallmolecule_transporter_drugsetlibrary',
            'Drugbank_smallmolecule_enzyme_drugsetlibrary',
            'DrugCentral_target_drugsetlibrary',
            'PharmGKB_OFFSIDES_side_effects_drugsetlibrary',
            'PharmGKB_snp_drugsetlibrary',
            'SIDER_side_effects_drugsetlibrary',
            'SIDER_indications_drugsetlibrary',
            'STITCH_target_drugsetlibrary',
            'RDKIT_maccs_fingerprints_drugsetlibrary'
            ],
    default = ['L1000FWD_GO_Biological_Processes_drugsetlibrary_down',
               'L1000FWD_GO_Biological_Processes_drugsetlibrary_up'
              ],
    section = 'data'
)}}

# User-defined parameters
alpha = {{FloatField(
    name='alpha',
    label='p-value cutoff',
    default=0.05,
    section='data'
)}}

top_results = {{IntField(
    name = 'min_count',
    label = 'Top results', 
    description = 'Number of top results to keep',
    default = 20, 
    section = 'data'
)}}

In [None]:
# Plotting functions
cmap = sns.cubehelix_palette(50, hue=0.05, rot=0, light=1, dark=0)
def heatmap(df, filename, width=20, height=10):
    cg = sns.clustermap(df, cmap=cmap, figsize=(width, height), cbar_pos=(0.02, 0.65, 0.05, 0.18),)
    cg.ax_row_dendrogram.set_visible(False)
    cg.ax_col_dendrogram.set_visible(False)
    display(cg)
    plt.show()
    cg.savefig(filename)
    
def stackedBarPlot(df, filename, width = 15, height = 10):
    df['mean'] = df.mean(axis=1)
    df_bar = df.sort_values(by = 'mean', ascending = False)[0:top_results]\
        .sort_values(by = 'mean')\
        .drop(['mean'], axis = 1)
    
    df_bar.plot.barh(stacked = True, figsize = (width,height), fontsize = 20)
    plt.legend(bbox_to_anchor=(1.25, 0.30), loc='lower right', prop={'size': 16})
    plt.xlabel('-log(p)',labelpad = 20, fontsize = 'xx-large')
    plt.savefig(filename, format = 'svg', bbox_inches='tight')
    plt.show()

In [None]:
# Drugmonizome enrichment functions
def get_entity_uuids(drug_list):
    
    filter_body = {
        "filter": {
            "where": {
                "meta.Name": {
                    "inq": drug_list
                }
            }
        }
    }
    entities = requests.post(entities_endpoint, json=filter_body)

    # create UUID dict matched to names
    entity_lookup = {}
    for item in entities.json():
        entity_lookup[item['id']] = item['meta']['Name']
        
    return entity_lookup

def enrich(entity_lookup, library, alpha):
    output = []
    payload = {'database': library ,'entities': list(entity_lookup.keys())}
    res = requests.post(enrichment_endpoint, json = payload)
    try:
        for item in res.json()['results']:
            if item['p-value'] < alpha:
                signature = requests.get(signatures_endpoint + item['uuid']).json()
                # Create output object with UUIDs for entities and signatures decoded
                output.append({'term': signature['meta']['Term'][0]['Name'],
                               'p-value': item['p-value'],
                               'overlap': list(set([entity_lookup.get(x,x) for x in item['overlap']]))
                                  })
    except (json.decoder.JSONDecodeError, ValueError):
        pass
    
    return output

In [None]:
%%appyter code_exec
drug_sets = {}
with open({{gmt_file}}, 'r') as f:
    reader = csv.reader(f, delimiter = '\t')
    for row in reader:
        drug_sets[row[0]] = {
            "drug_list": list(set([str(drug).lower() for drug in row[2:]]))
        }

### Query drug sets through Drugmonizome to retrieve enrichment results

In [None]:
for description, values in tqdm(drug_sets.items()):
    drugs = values["drug_list"]
    entity_uuids = get_entity_uuids(drugs)
    
    drug_sets[description]["libraries"] = {}
    
    for library in libraries:
        results = enrich(entity_uuids, library, alpha)
        drug_sets[description]["libraries"][library] = results
        time.sleep(0.2)

In [None]:
enrichment_df = {}
num_sets = len(drug_sets)
for lib in libraries:
    term_df = pd.DataFrame(columns=drug_sets.keys())
    for k,v in drug_sets.items():
        sigs = v["libraries"][lib]
        for sig in sigs:
            term = sig['term']
            p = sig['p-value']
            term_df.at[term, k] = -math.log(p)
        term_df.fillna(0.0, inplace = True)
        
    term_df.to_csv("%s_enrichment_table.tsv"%lib, sep="\t")
    enrichment_df[lib] = term_df
    display(term_df.head(10))
    display(Markdown(" **Table %d** The table above shows the enrichment analysis results of %d drug sets \
        with the **%s** library in Drugmonizome. Each score is computed by getting the negative logarithm of the p-value \
        ($-\ln{pval}$). [Download complete table](%s_enrichment_table.tsv)"%(table, num_sets, lib.replace("_"," "), lib)))
    table += 1

## Analysis
Heatmaps and stacked bar plots will be created for each library's enrichment results

In [None]:
for lib in libraries:
    df = enrichment_df[lib]
    consensus = df.sum(1).sort_values(ascending=False)[0:top_results].to_frame(name="scores")
    consensus.to_csv("%s_consensus_table.tsv"%lib, sep="\t")
    display(consensus.head(10))
    display(Markdown("**Table %d** %s consensus terms. \
        [Download top %d terms](%s_consensus_table.tsv)"%(table, lib.replace("_"," "), top_results, lib)))
    table +=1 
    
    consensus_df = df.loc[consensus.index]
    if (consensus_df.shape[1] > 0):
        heatmap(consensus_df, "%s_consensus_heatmap.svg"%lib)
        display(Markdown("**Figure %d** Heatmap for the top %d consensus terms for **%s**. [Download figure](%s_consensus_heatmap.svg)"%(figure, top_results, lib.replace("_"," "), lib)))
        figure += 1
    else:
        print("No terms found")
        
    if (df.shape[1] > 0):
        stackedBarPlot(df, "%s_consensus_barplot.svg"%lib)
        display(Markdown("**Figure %d** Stacked bar plot for the top %d consensus terms for **%s**. [Download figure](%s_consensus_barplot.svg)"%(figure, top_results, lib.replace("_"," "), lib)))
        figure +=1 
    else:
        print("No terms found")