## ATC_FDA_GENESHOT DRUG-SET LIBRARY
### Drug-set labels: Genes
#### ALL DATABASES ACCESSED 05/20/19
##### Author : Eryk Kropiwnicki | eryk.kropiwnicki@icahn.mssm.edu

In [1]:
import json
import pandas as pd
import xml.etree.ElementTree as et
import requests
import time
import pubchempy as pcp
from collections import defaultdict
import csv
import numpy as np

In [2]:
# Import all FDA Approved Drugs #
with open('input/fda_drugs.txt') as f:
    fda_drugs = [word.strip('\n') for word in f]

In [3]:
len(fda_drugs)

1834

### QUERYING FDA APPROVED DRUG NAMES THROUGH GENESHOT API
#### PLATFORM : https://amp.pharm.mssm.edu/geneshot

In [88]:
# Querying each drug through geneshot(AUTORIF FORMAT) #
i = 0
with open('input/atc_fda_geneset_autorif.json','a') as outfile:
    outfile.write("[")
    for drug in fda_drugs:
        try:
            res = requests.get('https://amp.pharm.mssm.edu/geneshot/api/search/auto/' + drug)
            FDA_geneset_autorif = res.json()
        except ValueError:
            pass
        json.dump(FDA_geneset_autorif, outfile, indent = 4)
        i += 1
        time.sleep(1)
        if i < (len(fda_drugs)):
            outfile.write(",")
        else:
            outfile.write("]")
outfile.close()

### CREATING GENESET LIBRARY FROM DRUG-GENE ASSOCIATIONS COLLECTED FROM GENESHOT
#### INPUT FILE : atc_fda_geneset_autorif.json
#### OUTPUT FILE: atc_fda_genesetlibrary.txt

In [89]:
# Geneset Library #
# Creating a dictionary where FDA drugs are terms and genes are set members #
genelist = {}

with open ('input/atc_fda_geneset_autorif.json') as data_file:
    data = json.load(data_file)

for item in data:
    genes = (item["gene_count"])
    genes = [key for key in genes.keys()]
    genes = genes[:51]
    drug = (item["search_term"])
    genelist[drug] = []
    genelist[drug].append(genes)

# Converting dictionary into GMT format #
GSL_FDA = []

for term in genelist.keys():    
    genes = genelist[term][0]
    line = '{0}\t\t{1}'.format(term, '\t'.join(genes))
    GSL_FDA.append(line)

GSL_FDA_out = '\n'.join(GSL_FDA)

In [None]:
# Exporting GSL_FDA_out as TXT file #
dataFile = open('/Users/maayanlab/Documents/DrugSetEnrichment/Genesetlibraries/Drugbank/ATC_FDA/atc_fda_genesetlibrary.txt', 'w')
for eachitem in GSL_FDA_out:
     dataFile.write(eachitem)

### CREATING DRUG-SET LIBRARY FROM GENE-DRUG ASSOCIATIONS COLLECTED FROM GENESHOT
#### INPUT FILE : atc_fda_geneset_autorif.json
#### OUTPUT FILE: atc_fda_geneshot_drugsetlibrary.csv

In [90]:
# Drug-set Library #
# Transposing the 'genelist' dictionary so that genes are now the terms and drugs are set members #
d = defaultdict(list)
for k,v in genelist.items():
    for string in v:
        druglist = list(string)
        for term in druglist:
            term.split(',')
            d[term].append(k)

drugsetlibrary = dict(d)

In [None]:
# Converting drugsetlibrary into dataframe and exporting #
df = pd.DataFrame.from_dict(drugsetlibrary, orient='index')
df = df[df.columns[0:]].apply(
    lambda x: ','.join(x.dropna().astype(str).astype(str)),
    axis = 1
)
df.to_csv('/Users/maayanlab/Documents/DrugSetEnrichment/Drugsetlibraries/Drugbank/ATC_FDA/atc_fda_geneshot_drugsetlibrary.csv')