## Drugbank Experimental Geneshot Drug-Set Library
### Drug-set labels: Genes
#### ALL DATABASES ACCESSED 05/01/19
##### Author : Eryk Kropiwnicki | eryk.kropiwnicki@icahn.mssm.edu

In [2]:
import json
import pandas as pd
import xml.etree.ElementTree as et
import requests
import time
import pubchempy as pcp
from collections import defaultdict
import csv

### QUERYING DRUGBANK EXPERIMENTAL DRUG TERMS THROUGH GENESHOT
#### PLATFORM : https://amp.pharm.mssm.edu/geneshot
#### OUTPUT FILES: drugbank_experimental_geneset_autorif.json

In [3]:
experimental_drugs = []
with open('input/drug links-3.csv') as f:
    csvreader = csv.reader(f)
    for row in csvreader:
        experimental_drugs.append(row[1].lower())
del experimental_drugs[0]

In [4]:
len(experimental_drugs)

5764

In [7]:
# Querying each drug through geneshot(AUTORIF) #
i = 0
with open('input/drugbank_experimental_geneset_autorif.json','a') as outfile:
    outfile.write("[")
    for drug in experimental_drugs:
        try:
            res = requests.get('https://amp.pharm.mssm.edu/geneshot/api/search/auto/' + drug)
            experimental_geneset_autorif = res.json()
        except ValueError:
            pass
        json.dump(experimental_geneset_autorif, outfile, indent = 4)
        i += 1
        time.sleep(1)
        if i < (len(experimental_drugs)):
            outfile.write(",")
        else:
            outfile.write("]")
outfile.close()

### CREATING GENESET and DRUG-SET LIBRARIES FROM DRUG-GENE ASSOCIATIONS COLLECTED FROM GENESHOT
#### INPUT FILE : drugbank_experimental_geneset_autorif.json
#### OUTPUT FILE: drugbank_experimental_genesetlibrary.txt | drugbank_experimental_geneshot_drugsetlibrary.csv

In [8]:
# Creating a dictionary where drugs are terms and genes are set members #
genelist = {}

with open ('input/drugbank_experimental_geneset_autorif.json') as data_file:
    data = json.load(data_file)

for item in data:
    genes = (item["gene_count"])
    genes = [key for key in genes.keys()]
    genes = genes[:51]
    compound = (item["search_term"])
    genelist[compound] = []
    genelist[compound].extend(genes)
    
genelist = {k: v for k,v in genelist.items() if v}

# Converting dictionary into GMT format #
GSL_experimental = []

for term in genelist.keys():    
    genes = genelist[term]
    line = '{0}\t\t{1}'.format(term, '\t'.join(genes))
    GSL_experimental.append(line)

GSL_experimental_out = '\n'.join(GSL_experimental)

In [16]:
# Exporting Drugbank Experimental Drug Geneset library as TXT file #
dataFile = open('/Users/maayanlab/Documents/DrugSetEnrichment/GeneSetLibraries/Drugbank/Experimental/drugbank_experimental_genesetlibrary.txt', 'w')
for eachitem in GSL_experimental_out:
    dataFile.write(eachitem)

In [10]:
# Transposing the 'genelist' dictionary where genes are terms and experimental drugs are set members #
# Each value (gene) in the dictionary is a list, therefore we need to separate all list elements as their own term #
d = defaultdict(list)
for k,v in genelist.items():
    for gene in v:
        gene.split(',')
        d[gene].append(k)

drugsetlibrary = dict(d)

In [18]:
# Converting drugsetlibrary into dataframe and exporting #
df = pd.DataFrame.from_dict(drugsetlibrary, orient='index')
df = df[df.columns[0:]].apply(
    lambda x: ','.join(x.dropna().astype(str).astype(str)),
    axis = 1
)
df.to_csv('/Users/maayanlab/Documents/DrugSetEnrichment/Drugsetlibraries/Drugbank/Experimental/drugbank_experimental_geneshot_drugsetlibrary.csv')