## L1000FWD Drug to EnrichrID Mapping Using the Enrichr API
#### ALL DATABASES ACCESSED 03/2020
##### Author : Eryk Kropiwnicki | eryk.kropiwnicki@icahn.mssm.edu

### Querying Enrichr API to retrieve user IDs for each downregulated gene list 
#### Platform: https://amp.pharm.mssm.edu/Enrichr/help#api

In [1]:
import json
import pandas as pd
import requests
import time
from collections import defaultdict
import csv
import numpy as np
import os

In [2]:
os.chdir('../../scripts')
from export_script import *
os.chdir('../notebooks/L1000FWD')

### Querying Enrichr API to retrieve user IDs for each downregulated gene list 
#### Platform: https://amp.pharm.mssm.edu/Enrichr/help#api

In [3]:
def enrichr_id_retrieval(dictionary):
    '''
    Queries a genelist (value of input dictionary) through the Enrichr API and creates an output dictionary of your input dictionary key matched to the retrieved EnrichrID
    Parameters:
    dictionary (dict): Key (e.g. drug) matched to value (genelist)
    Returns:
    output_dict (dict): Key (e.g. drug) matched to value (EnrichrID)
    '''

    # Associating downregulated genelists with userListIDs in Enrichr #
    enrichr_url = 'http://amp.pharm.mssm.edu/Enrichr/addList'
    failed_list = []
    output_dict = {}

    for term,genelist in dictionary.items():
        genes_str = '\n'.join(genelist)
        
        payload = {'list': (None, genes_str)}
        response = requests.post(enrichr_url, files=payload)
        
        if not response.ok:
            failed_list.append(term)
        
        user_id = response.json()['userListId']
        output_dict[term] = user_id
        time.sleep(0.5)
    
    print(str(len(failed_list))+ " genelists failed to be matched with a EnrichrID!")
    return output_dict

In [4]:
with open('input/L1000FWD_enrichr_query_down.txt', 'r') as f:
    reader = csv.reader(f, delimiter = '\t')
    # Creating dictionary of signature ids matched to gene signatures (upregulated)
    d_down = {line[0]:
            ([(str(g))
            for g in line[2:]])
            for line in reader}

In [5]:
# Associating downregulated genelists with userListIDs in Enrichr #
id_dict_down = enrichr_id_retrieval(d_down)

0 genelists failed to be matched with a EnrichrID!


In [6]:
# Exporting downregulated genelist : userListID associations #
df_down = pd.DataFrame.from_dict(id_dict_down, orient = 'index')
df_down = df_down.reset_index()
df_down.columns = ['pert_id','enrichr_id']
df_down.to_csv('input/EnrichrIDs_downregulated.tsv', sep = '\t', index = False)

### Querying Enrichr API to retrieve user IDs for each upregulated gene list 
#### Platform: https://amp.pharm.mssm.edu/Enrichr/help#api

In [7]:
with open('input/L1000FWD_enrichr_query_up.txt', 'r') as f:
    reader = csv.reader(f, delimiter = '\t')
    # Creating dictionary of signature ids matched to gene signatures (upregulated)
    d_up = {line[0]:
            ([(str(g))
            for g in line[2:]])
            for line in reader}

In [8]:
# Associating upregulated genelists with userListIDs in Enrichr #
id_dict_up = enrichr_id_retrieval(d_up)

0 genelists failed to be matched with a EnrichrID!


In [9]:
# Exporting downregulated genelist : userListID associations #
df_up = pd.DataFrame.from_dict(id_dict_up, orient = 'index')
df_up = df_up.reset_index()
df_up.columns = ['pert_id','enrichr_id']
df_up.to_csv('input/EnrichrIDs_upregulated.tsv', sep = '\t', index = False)