## L1000FWD GO Terms Drug-Set Libraries
#### Drug-Set Labels : GO Terms
#### ALL DATABASES ACCESSED 11/2019
##### Author : Eryk Kropiwnicki | eryk.kropiwnicki@icahn.mssm.edu

In [1]:
import json
import pandas as pd
import requests
import time
from collections import defaultdict
import csv
import os

In [2]:
os.chdir('../scripts')
from enrichr_querying_script import *
from export_script import *
os.chdir('../L1000FWD')

### Importing Drugs matched to EnrichrIDs for querying through Enrichr API
#### Input Files : EnrichrIDs_downregulated.tsv | EnrichrIDs_upregulated.tsv

In [3]:
df_up = pd.read_csv('input/EnrichrIDs_upregulated.tsv', delimiter = '\t')
df_up.head()
id_dict_up = df_up.set_index('drug_name').to_dict()['enrichr_id']

In [4]:
df_down = pd.read_csv('input/EnrichrIDs_downregulated.tsv', delimiter = '\t')
df_down.head()
id_dict_down = df_down.set_index('drug_name').to_dict()['enrichr_id']

#### For all drug-term associations I use a p-value cut-off of 0.01

### Biological Processes drug-set libraries

In [5]:
os.chdir('../data/L1000FWD')

In [6]:
# Upregulated 
GO_BP_drugsetlibrary_up = enrichr_library_generator('GO_Biological_Process_2018', id_dict_up)

# Downregulated 
GO_BP_drugsetlibrary_down = enrichr_library_generator('GO_Biological_Process_2018', id_dict_down)

In [7]:
# Exporting
gmt_formatter(GO_BP_drugsetlibrary_up, 'L1000FWD_GO_Biological_Processes_drugsetlibrary_up.txt')
gmt_formatter(GO_BP_drugsetlibrary_down, 'L1000FWD_GO_Biological_Processes_drugsetlibrary_down.txt')

### Biological Processes Library Counts

In [8]:
library_counts(GO_BP_drugsetlibrary_up)

3793 unique drugs
1141 unique association terms
62372 unique associations
54.66432953549518 average drugs per term


In [9]:
library_counts(GO_BP_drugsetlibrary_down)

3631 unique drugs
988 unique association terms
47035 unique associations
47.60627530364373 average drugs per term


### Cellular Component drug-set libraries

In [9]:
# Upregulated 
GO_CC_drugsetlibrary_up = enrichr_library_generator('GO_Cellular_Component_2018', id_dict_up)

# Downregulated 
GO_CC_drugsetlibrary_down = enrichr_library_generator('GO_Cellular_Component_2018', id_dict_down)

In [10]:
# Exporting
gmt_formatter(GO_CC_drugsetlibrary_up, 'L1000FWD_GO_Cellular_Component_drugsetlibrary_up.txt')
gmt_formatter(GO_CC_drugsetlibrary_down, 'L1000FWD_GO_Cellular_Component_drugsetlibrary_down.txt')

### Cellular Component Library Counts

In [11]:
library_counts(GO_CC_drugsetlibrary_up)

3030 unique drugs
145 unique association terms
13779 unique associations
95.02758620689656 average drugs per term


In [12]:
library_counts(GO_CC_drugsetlibrary_down)

2955 unique drugs
153 unique association terms
14048 unique associations
91.81699346405229 average drugs per term


### Molecular Function drug-set libraries

In [12]:
# Upregulated 
GO_MF_drugsetlibrary_up = enrichr_library_generator('GO_Molecular_Function_2018', id_dict_up)

# Downregulated 
GO_MF_drugsetlibrary_down = enrichr_library_generator('GO_Molecular_Function_2018', id_dict_down)

In [13]:
# Exporting
gmt_formatter(GO_MF_drugsetlibrary_up, 'L1000FWD_GO_Molecular_Function_drugsetlibrary_up.txt')
gmt_formatter(GO_MF_drugsetlibrary_down, 'L1000FWD_GO_Molecular_Function_drugsetlibrary_down.txt')

### Molecular Function Library Counts

In [14]:
library_counts(GO_MF_drugsetlibrary_up)

2180 unique drugs
171 unique association terms
9183 unique associations
53.70175438596491 average drugs per term


In [15]:
library_counts(GO_MF_drugsetlibrary_down)

1904 unique drugs
144 unique association terms
6532 unique associations
45.361111111111114 average drugs per term
