In [1]:
import json
import requests
import pandas as pd
import time
import os

In [2]:
# Getting gene set files
directory = "gene_lists"
gene_files = os.listdir(directory)
gene_files.sort()
gene_files

['cd_200_down_genes.txt',
 'cd_200_up_genes.txt',
 'cd_up+down_genes.txt',
 'limma_200_down_genes.txt',
 'limma_200_up_genes.txt',
 'limma_up+down_genes.txt',
 'logfc_200_down_genes.txt',
 'logfc_200_up_genes.txt',
 'logfc_up+down_genes.txt']

In [3]:
# Read genes from files
gene_lists = []

for filename in gene_files:
    f = os.path.join(directory, filename)
    # checking if it is a file
    if os.path.isfile(f):
        open_gene_list_file = open(f,'r')
        lines = open_gene_list_file.readlines()
        genes = [x.strip() for x in lines]
        open_gene_list_file.close()
        gene_lists.append(genes)

gene_lists

[['Ankrd13a',
  'Ppp6r1',
  'Srebf2',
  'Pdap1',
  'Hdgf',
  'Rab5c',
  'Col5a1',
  'Fxr2',
  'Dnmt1',
  'Raly',
  'Cfl1',
  'Gatad2a',
  'Nisch',
  'Susd5',
  'Fkbp10',
  'Cdc37',
  'Atp5b',
  'Sf1',
  'Amotl2',
  'Pcbp1',
  'Gtf3c1',
  'Ampd2',
  'Col16a1',
  'H1f0',
  'Slc44a1',
  'Atxn2l',
  'Slc8a3',
  'Acly',
  'Ihh',
  'Cap1',
  'Sec24d',
  'Dennd5a',
  'Pgk1',
  'Ubb',
  'Sec24c',
  'Ddx23',
  'Fscn1',
  'Lmo7',
  'Mapk6',
  'Dag1',
  'LOC100360205',
  'Inppl1',
  'Ubqln1',
  'Prrc2a',
  'Pgam1',
  'Igdcc4',
  'Scd1',
  'Slc44a2',
  'Pxylp1',
  'Phospho1',
  'Itga11',
  'Srpk1',
  'Fbln1',
  'Hm13',
  'Tubb4b',
  'Pld3',
  'Hnrnph1',
  'Ppic',
  'Emilin1',
  'Fads2',
  'Pebp1',
  'Vcp',
  'Ywhag',
  'Sf3b2',
  'Raver1',
  'Gnai2',
  'Sema3d',
  'Arhgef1',
  'Alyref',
  'Cdkn1c',
  'Mfi2',
  'Vim',
  'Obsl1',
  'Gtf2i',
  'Mtss1l',
  'Ptbp1',
  'Gaa',
  'Prkcsh',
  'Upf1',
  'Irak1',
  'Golga2',
  'Hsp90b1',
  'Wls',
  'Bcat1',
  'Ski',
  'Ywhab',
  'Hnrnpm',
  'Hnrnpdl',
  'Bsg

In [4]:
# Function to get Enrichr Results, taken from Appyter https://appyters.maayanlab.cloud/Enrichr_compressed_bar_chart_figure/
def Enrichr_API(enrichr_gene_list, all_libraries):


    all_ranks = []
    all_terms = []
    all_pvalues =[] 
    all_adjusted_pvalues = []
    library_success = []
    short_id = ''

    for library_name in all_libraries : 
        ENRICHR_URL = 'http://amp.pharm.mssm.edu/Enrichr/addList'
        genes_str = '\n'.join(enrichr_gene_list)
        description = 'Example gene list'
        payload = {
            'list': (None, genes_str),
            'description': (None, description)
        }

        response = requests.post(ENRICHR_URL, files=payload)
        if not response.ok:
            raise Exception('Error analyzing gene list')

        data = json.loads(response.text)
        time.sleep(0.5)
        ENRICHR_URL = 'http://amp.pharm.mssm.edu/Enrichr/enrich'
        query_string = '?userListId=%s&backgroundType=%s'
        user_list_id = data['userListId']
        short_id = data["shortId"]
        gene_set_library = library_name
        response = requests.get(
            ENRICHR_URL + query_string % (user_list_id, gene_set_library)
         )
        if not response.ok:
            raise Exception('Error fetching enrichment results')
        try:
            data = json.loads(response.text)
            results_df  = pd.DataFrame(data[library_name])
            all_ranks.append(list(results_df[0]))
            all_terms.append(list(results_df[1]))
            all_pvalues.append(list(results_df[2]))
            all_adjusted_pvalues.append(list(results_df[6]))
            library_success.append(library_name)
        except:
            print('Error for ' + library_name + ' library')

    return([all_ranks,all_terms,all_pvalues,all_adjusted_pvalues,str(short_id),library_success])

In [5]:
# Run Enrichr on the gene sets
results = []

for i in range(len(gene_lists)):
    result = Enrichr_API(gene_lists[i], ['ChEA_2016'])
    results.append(result)

In [6]:
# Display raw results
results

[[[[1,
    2,
    3,
    4,
    5,
    6,
    7,
    8,
    9,
    10,
    11,
    12,
    13,
    14,
    15,
    16,
    17,
    18,
    19,
    20,
    21,
    22,
    23,
    24,
    25,
    26,
    27,
    28,
    29,
    30,
    31,
    32,
    33,
    34,
    35,
    36,
    37,
    38,
    39,
    40,
    41,
    42,
    43,
    44,
    45,
    46,
    47,
    48,
    49,
    50,
    51,
    52,
    53,
    54,
    55,
    56,
    57,
    58,
    59,
    60,
    61,
    62,
    63,
    64,
    65,
    66,
    67,
    68,
    69,
    70,
    71,
    72,
    73,
    74,
    75,
    76,
    77,
    78,
    79,
    80,
    81,
    82,
    83,
    84,
    85,
    86,
    87,
    88,
    89,
    90,
    91,
    92,
    93,
    94,
    95,
    96,
    97,
    98,
    99,
    100,
    101,
    102,
    103,
    104,
    105,
    106,
    107,
    108,
    109,
    110,
    111,
    112,
    113,
    114,
    115,
    116,
    117,
    118,
    119,
    120,
    121,
    122,
    123,
 

In [7]:
# Get NR3C1 Rankings

gene_set = []
names = []
ranks = []
p_val = []

for i in range(len(results)):

    for j in range(len(results[i][1][0])):
        if 'NR3C1' in results[i][1][0][j]:
        
            names.append(results[i][1][0][j])
            ranks.append(results[i][0][0][j])
            p_val.append(results[i][2][0][j])
            gene_set.append(gene_files[i].strip(".txt"))

print(gene_set, names, ranks, p_val)

['cd_200_down_genes', 'cd_200_down_genes', 'cd_200_up_genes', 'cd_200_up_genes', 'cd_up+down_genes', 'cd_up+down_genes', 'limma_200_down_genes', 'limma_200_down_genes', 'limma_200_up_genes', 'limma_200_up_genes', 'limma_up+down_genes', 'limma_up+down_genes', 'logfc_200_down_genes', 'logfc_200_down_genes', 'logfc_200_up_genes', 'logfc_200_up_genes', 'logfc_up+down_genes', 'logfc_up+down_genes'] ['NR3C1 23031785 ChIP-Seq PC12 Mouse', 'NR3C1 21868756 ChIP-Seq MCF10A Human', 'NR3C1 23031785 ChIP-Seq PC12 Mouse', 'NR3C1 21868756 ChIP-Seq MCF10A Human', 'NR3C1 23031785 ChIP-Seq PC12 Mouse', 'NR3C1 21868756 ChIP-Seq MCF10A Human', 'NR3C1 23031785 ChIP-Seq PC12 Mouse', 'NR3C1 21868756 ChIP-Seq MCF10A Human', 'NR3C1 23031785 ChIP-Seq PC12 Mouse', 'NR3C1 21868756 ChIP-Seq MCF10A Human', 'NR3C1 23031785 ChIP-Seq PC12 Mouse', 'NR3C1 21868756 ChIP-Seq MCF10A Human', 'NR3C1 23031785 ChIP-Seq PC12 Mouse', 'NR3C1 21868756 ChIP-Seq MCF10A Human', 'NR3C1 23031785 ChIP-Seq PC12 Mouse', 'NR3C1 21868756 Ch

In [8]:
# Display NR3C1 results for each file

df = pd.DataFrame(list(zip(gene_set, names, ranks, p_val)),
                 columns = ['Gene Set', 'Name','Rank','p-value'])
df

Unnamed: 0,Gene Set,Name,Rank,p-value
0,cd_200_down_genes,NR3C1 23031785 ChIP-Seq PC12 Mouse,348,0.436921
1,cd_200_down_genes,NR3C1 21868756 ChIP-Seq MCF10A Human,416,0.582927
2,cd_200_up_genes,NR3C1 23031785 ChIP-Seq PC12 Mouse,90,0.955186
3,cd_200_up_genes,NR3C1 21868756 ChIP-Seq MCF10A Human,117,0.972929
4,cd_up+down_genes,NR3C1 23031785 ChIP-Seq PC12 Mouse,202,0.823345
5,cd_up+down_genes,NR3C1 21868756 ChIP-Seq MCF10A Human,283,0.915026
6,limma_200_down_genes,NR3C1 23031785 ChIP-Seq PC12 Mouse,78,0.209472
7,limma_200_down_genes,NR3C1 21868756 ChIP-Seq MCF10A Human,262,0.701285
8,limma_200_up_genes,NR3C1 23031785 ChIP-Seq PC12 Mouse,14,0.209472
9,limma_200_up_genes,NR3C1 21868756 ChIP-Seq MCF10A Human,132,0.701285


In [9]:
# Todo: statistical test to compare methods