## idmapping UniProt

In [11]:
#https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/README
#https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/README
path = '../data/UniProt'

In [2]:
import os
import csv
import io
import gzip

In [3]:
def generate_idmapping(path):
    """Returns a generator of idmapping.dat.gz rows."""
    read_file = gzip.open(path, 'rb')
    text = io.TextIOWrapper(read_file)
    reader = csv.reader(text, delimiter='\t')
    for row in reader:
        yield row
    read_file.close()

In [4]:
mapping_generator = generate_idmapping(os.path.join(path, 'idmapping.dat.gz'))

In [5]:
extract = {'GeneID', 'HGNC'}
mappings = {target: set() for target in extract}

for accession, target, target_id in mapping_generator:
    if target not in extract:
        continue
    mappings[target].add((accession, target_id))

In [12]:
for target, mapset in mappings.items():
    write_file = gzip.open(os.path.join(path, '{}.tsv.gz'.format(target)), 'wb')
    wrapper = io.TextIOWrapper(write_file)
    writer = csv.writer(wrapper, delimiter='\t')
    writer.writerow(['uniprot', target])
    writer.writerows(sorted(mapset))
    write_file.close()

## Process EntrezGene

In [1]:
import os
import gzip
import re
import json

import pandas

In [4]:
# Read Entrez info dataset
path = '../data/EntrezGene'

with gzip.open(os.path.join(path, 'Homo_sapiens.gene_info.gz'), 'rt') as read_file:
    #matches = re.match(r'#Format: (.+) \(', next(read_file))
    #columns = matches.group(1).split(' ')
    #gene_df = pandas.read_table(read_file, names = columns, na_values=['-'])
    gene_df = pandas.read_table(read_file, na_values=['-'])
    gene_df = gene_df.rename(columns={'#tax_id':'tax_id'})

# Restrict to homo sapiens
gene_df = gene_df.query('tax_id == 9606')

len(gene_df)

  gene_df = pandas.read_table(read_file, na_values=['-'])


193382

In [5]:
# save a select columnset
columns = ['tax_id', 'GeneID', 'Symbol', 'chromosome', 'map_location', 'type_of_gene', 'description']
select_df = gene_df[columns]
select_df.to_csv(os.path.join(path, 'genes-human.tsv'), sep='\t', index=False)
select_df.head()

Unnamed: 0,tax_id,GeneID,Symbol,chromosome,map_location,type_of_gene,description
0,9606,1,A1BG,19,19q13.43,protein-coding,alpha-1-B glycoprotein
1,9606,2,A2M,12,12p13.31,protein-coding,alpha-2-macroglobulin
2,9606,3,A2MP1,12,12p13.31,pseudo,alpha-2-macroglobulin pseudogene 1
3,9606,9,NAT1,8,8p22,protein-coding,N-acetyltransferase 1
4,9606,10,NAT2,8,8p22,protein-coding,N-acetyltransferase 2


## Parse the DrugBank XML and extract TSVs

In [1]:
import os
import csv
import gzip
import collections
import re
import io
import json
import xml.etree.ElementTree as ET

import requests
import pandas

In [2]:
path = '../data/DrugBank'
xml_file = os.path.join(path, 'full database.xml') 
tree = ET.parse(xml_file)
root = tree.getroot()

In [8]:
ns = '{http://www.drugbank.ca}'
inchikey_template = "{ns}calculated-properties/{ns}property[{ns}kind='InChIKey']/{ns}value"
inchi_template = "{ns}calculated-properties/{ns}property[{ns}kind='InChI']/{ns}value"

rows = list()
for i, drug in enumerate(root):
    row = collections.OrderedDict()
    assert drug.tag == ns + 'drug'
    row['type'] = drug.get('type')
    row['drugbank_id'] = drug.findtext(ns + "drugbank-id[@primary='true']")
    row['name'] = drug.findtext(ns + "name")
    row['description'] = drug.findtext(ns + "description")
    row['groups'] = [group.text for group in
        drug.findall("{ns}groups/{ns}group".format(ns = ns))]
    row['atc_codes'] = [code.get('code') for code in
        drug.findall("{ns}atc-codes/{ns}atc-code".format(ns = ns))]
    row['categories'] = [x.findtext(ns + 'category') for x in
        drug.findall("{ns}categories/{ns}category".format(ns = ns))]
    row['inchi'] = drug.findtext(inchi_template.format(ns = ns))
    row['inchikey'] = drug.findtext(inchikey_template.format(ns = ns))
    
    # Add drug aliases
    aliases = {
        elem.text for elem in 
        drug.findall("{ns}international-brands/{ns}international-brand".format(ns = ns)) +
        drug.findall("{ns}synonyms/{ns}synonym[@language='English']".format(ns = ns)) +
        drug.findall("{ns}international-brands/{ns}international-brand".format(ns = ns)) +
        drug.findall("{ns}products/{ns}product/{ns}name".format(ns = ns))

    }
    aliases.add(row['name'])
    row['aliases'] = sorted(aliases)

    rows.append(row)

In [9]:
#alias_dict = {row['drugbank_id']: row['aliases'] for row in rows}
#with open(os.path.join(path, 'aliases.json'), 'w') as fp:
#    json.dump(alias_dict, fp, indent=2, sort_keys=True)

In [10]:
def collapse_list_values(row):
    for key, value in row.items():
        if isinstance(value, list):
            row[key] = '|'.join(value)
    return row

rows = list(map(collapse_list_values, rows))

In [11]:
columns = ['drugbank_id', 'name', 'type', 'groups', 'atc_codes', 'categories', 'inchikey', 'inchi', 'description']
drugbank_df = pandas.DataFrame.from_dict(rows)[columns]
drugbank_df.head()

Unnamed: 0,drugbank_id,name,type,groups,atc_codes,categories,inchikey,inchi,description
0,DB00001,Lepirudin,biotech,approved|withdrawn,B01AE02,"Amino Acids, Peptides, and Proteins|Anticoagul...",,,Lepirudin is a recombinant hirudin formed by 6...
1,DB00002,Cetuximab,biotech,approved,L01FE01,"Amino Acids, Peptides, and Proteins|Antibodies...",,,Cetuximab is a recombinant chimeric human/mous...
2,DB00003,Dornase alfa,biotech,approved,R05CB13,"Amino Acids, Peptides, and Proteins|Cough and ...",,,Dornase alfa is a biosynthetic form of human d...
3,DB00004,Denileukin diftitox,biotech,approved|investigational,L01XX29,"ADP Ribose Transferases|Amino Acids, Peptides,...",,,A recombinant DNA-derived cytotoxic protein co...
4,DB00005,Etanercept,biotech,approved|investigational,L04AB01,"Agents reducing cytokine levels|Amino Acids, P...",,,Dimeric fusion protein consisting of the extra...


In [12]:
drugbank_slim_df = drugbank_df[
    drugbank_df.groups.map(lambda x: 'approved' in x) &
    drugbank_df.inchi.map(lambda x: x is not None) &
    drugbank_df.type.map(lambda x: x == 'small molecule')
]
drugbank_slim_df.head()

Unnamed: 0,drugbank_id,name,type,groups,atc_codes,categories,inchikey,inchi,description
5,DB00006,Bivalirudin,small molecule,approved|investigational,B01AE06,"Amino Acids, Peptides, and Proteins|Anticoagul...",OIRCOABEOLEUMC-GEJPAHFPSA-N,InChI=1S/C98H138N24O33/c1-5-52(4)82(96(153)122...,Bivalirudin is a synthetic 20 residue peptide ...
6,DB00007,Leuprolide,small molecule,approved|investigational,L02AE51|L02AE02,Adrenal Cortex Hormones|Agents Causing Muscle ...,GFIJNRVAKGFPGQ-LIJARHBVSA-N,InChI=1S/C59H84N16O12/c1-6-63-57(86)48-14-10-2...,Leuprolide is a synthetic 9-residue peptide an...
13,DB00014,Goserelin,small molecule,approved,L02AE03,"Adrenal Cortex Hormones|Amino Acids, Peptides,...",BLCLNMBMMGCOAS-URPVMXJPSA-N,InChI=1S/C59H84N18O14/c1-31(2)22-40(49(82)68-3...,"Goserelin is a synthetic hormone. In men, it s..."
25,DB00027,Gramicidin D,small molecule,approved,R02AB30,"Amino Acids, Peptides, and Proteins|Anti-Bacte...",NDAYQJDHGXTBJL-MWWSRJDJSA-N,InChI=1S/C96H135N19O16/c1-50(2)36-71(105-79(11...,Gramcidin D is a heterogeneous mixture of thre...
33,DB00035,Desmopressin,small molecule,approved,H01BA02,"Agents that produce hypertension|Amino Acids, ...",NFLWUMRGJYTJIN-PNIOQBSNSA-N,InChI=1S/C46H64N14O12S2/c47-35(62)15-14-29-40(...,"Desmopressin (dDAVP), a synthetic analogue of ..."


In [None]:
# write drugbank tsv
drugbank_df.to_csv(os.path.join(path, 'drugbank.tsv'), sep='\t', index=False)

# write slim drugbank tsv
drugbank_slim_df.to_csv(os.path.join(path, 'drugbank-slim.tsv'), sep='\t', index=False)

In [4]:
protein_rows = list()
for i, drug in enumerate(root):
    drugbank_id = drug.findtext(ns + "drugbank-id[@primary='true']")
    for category in ['target', 'enzyme', 'carrier', 'transporter']:
        proteins = drug.findall('{ns}{cat}s/{ns}{cat}'.format(ns=ns, cat=category))
        for protein in proteins:
            row = {'drugbank_id': drugbank_id, 'category': category}
            row['organism'] = protein.findtext('{}organism'.format(ns))
            row['known_action'] = protein.findtext('{}known-action'.format(ns))
            actions = protein.findall('{ns}actions/{ns}action'.format(ns=ns))
            row['actions'] = '|'.join(action.text for action in actions)
            uniprot_ids = [polypep.text for polypep in protein.findall(
                "{ns}polypeptide/{ns}external-identifiers/{ns}external-identifier[{ns}resource='UniProtKB']/{ns}identifier".format(ns=ns))]            
            if len(uniprot_ids) != 1:
                continue
            row['uniprot_id'] = uniprot_ids[0]
            #ref_text = protein.findtext("{ns}references[@format='textile']".format(ns=ns))
            #pmids = re.findall(r'pubmed/([0-9]+)', ref_text)
            pmids = protein.findall("{ns}references/{ns}articles/{ns}article/{ns}pubmed-id".format(ns=ns)) #update to fit the latest version of drugbank xml
            row['pubmed_ids'] = '|'.join(str(pmid.text) for pmid in pmids)
            protein_rows.append(row)

protein_df = pandas.DataFrame.from_dict(protein_rows)

In [5]:
protein_df#[protein_df['pubmed_ids']=='']

Unnamed: 0,drugbank_id,category,organism,known_action,actions,uniprot_id,pubmed_ids
0,DB00001,target,Humans,yes,inhibitor,P00734,10505536|10912644|11055889|11467439|11807012
1,DB00002,target,Humans,yes,binder,P00533,11752352|15821783
2,DB00002,target,Humans,unknown,binder,O75015,16336752
3,DB00002,target,Humans,unknown,binder,P02745,32117299
4,DB00002,target,Humans,unknown,binder,P02746,32117299
...,...,...,...,...,...,...,...
28720,DB17992,target,Pediculus humanus subsp. corporis,unknown,antagonist,E0W492,33600484
28721,DB18236,target,Humans,yes,inhibitor,Q07912,28609657
28722,DB18704,target,Humans,yes,stimulator,Q99062,
28723,DB18716,target,Pseudomonas aeruginosa,yes,inhibitor,P37321,


In [14]:
# Read our uniprot to entrez_gene mapping
url = '../data/UniProt/GeneID.tsv.gz'
with gzip.open(url, 'rt') as read_file:
    uniprot_df = pandas.read_table(read_file)
    uniprot_df.rename(columns={'uniprot': 'uniprot_id', 'GeneID': 'entrez_gene_id'}, inplace=True)
uniprot_df.shape

# merge uniprot mapping with protein_df
entrez_df = protein_df.merge(uniprot_df, how='inner')

In [13]:
"""
#legacy
response = requests.get('http://git.dhimmel.com/uniprot/data/map/GeneID.tsv.gz', stream=True)
text = io.TextIOWrapper(gzip.GzipFile(fileobj=response.raw))
uniprot_df = pandas.read_table(text, engine='python')
uniprot_df.rename(columns={'uniprot': 'uniprot_id', 'GeneID': 'entrez_gene_id'}, inplace=True)
uniprot_df
# merge uniprot mapping with protein_df
entrez_df = protein_df.merge(uniprot_df, how='inner')
"""

Unnamed: 0,uniprot_id,entrez_gene_id
0,A0A010PZJ8,19039206
1,A0A010PZK3,19039211
2,A0A010PZK7,19039216
3,A0A010PZK9,19039221
4,A0A010PZL3,19039226
...,...,...
6660567,X5KBI2,23373303
6660568,X5KBJ0,23373148
6660569,X5KBL1,23373174
6660570,X5KBL5,23373179


In [16]:
columns = ['drugbank_id', 'category', 'uniprot_id', 'entrez_gene_id', 'organism',
           'known_action', 'actions', 'pubmed_ids']
entrez_df = entrez_df[columns]
entrez_df.shape

(27391, 8)

In [17]:
entrez_df.to_csv(os.path.join(path, 'proteins.tsv'), sep='\t', index=False)

## Durg Central - Convert drug targets

In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [2]:
import pandas

In [3]:
url = '../data/UniProt/GeneID.tsv.gz'
entrez_map_df = pandas.read_table(url, compression='gzip')
entrez_map_df.head(2)
entrez_map_df.shape

Unnamed: 0,uniprot,GeneID
0,A0A009GNX9,9382146
1,A0A009GQ85,60755424


(13529311, 2)

In [4]:
url = '../DrugBank/drugbank-slim.tsv'
drugbank_df = pandas.read_table(url)
drugbank_df = drugbank_df[['drugbank_id', 'name']]
drugbank_df = drugbank_df.rename(columns={'name': 'drugbank_name'})
drugbank_df.head(2)
drugbank_df.shape

Unnamed: 0,drugbank_id,drugbank_name
0,DB00006,Bivalirudin
1,DB00007,Leuprolide


(2778, 2)

In [6]:
url = '../data/DrugCentral/identifiers.tsv'
id_df = pandas.read_table(url, sep=',')
id_df = id_df.query("id_type == 'DRUGBANK_ID'")[['struct_id', 'identifier']]
id_df = id_df.rename(columns={'identifier': 'drugbank_id', 'struct_id': 'DRUG_ID'})
drugbank_df = id_df.merge(drugbank_df)
drugbank_df.head(2)
drugbank_df.shape

Unnamed: 0,DRUG_ID,drugbank_id,drugbank_name
0,5392,DB11791,Capmatinib
1,5393,DB15685,Selpercatinib


(2674, 3)

In [19]:
"""
path = 'https://raw.githubusercontent.com/olegursu/drugtarget/9a6d84bed8650c6c507a2d3d786814c774568610/drug_target.tsv'
#'https://github.com/legursu/drugtarget/raw/9a6d84bed8650c6c507a2d3d786814c774568610/drug_target.tsv'
check_target_df = pandas.read_table(path)
#check_target_df['SOURCE'].unique()
check_target_df['REFERENCE_check'] = check_target_df['REFERENCE'].str[:20]
check_target_df[check_target_df['SOURCE'].isin(['CHEMBL'])]['REFERENCE_check'].unique()
"""

array(['https://www.ebi.ac.u'], dtype=object)

In [9]:
path = '../data/DrugCentral/drug.target.interaction.tsv'
target_df = pandas.read_table(path)
target_df = target_df.rename(columns={'STRUCT_ID': 'DRUG_ID', 'ACCESSION': 'UNIPROT', 'TARGET_CLASS': 'TARGET_FAMILY', 'ACT_SOURCE':'SOURCE', 'ACT_SOURCE_URL': 'REFERENCE'})
target_df = drugbank_df.merge(target_df)
target_df = target_df[['drugbank_id', 'drugbank_name', 'TARGET_NAME', 'TARGET_FAMILY', 'UNIPROT', 'ACTION_TYPE', 'SOURCE', 'REFERENCE']]
target_df.head(2)
target_df.shape

# Split multi-protein targets into many rows
s = target_df.UNIPROT.str.split('|').apply(pandas.Series, 1).stack()
s.index = s.index.droplevel(-1)
s.name ='uniprot'
del target_df['UNIPROT']
target_df = target_df.join(s)
target_df.head(2)
target_df.shape

target_df = entrez_map_df.merge(target_df)
del target_df['uniprot']

target_df['action'] = target_df['ACTION_TYPE'].str.lower()
del target_df['ACTION_TYPE']

target_df['pubmed_id'] = target_df.REFERENCE.str.extract('pubmed.*/([0-9]+)')

target_df = target_df.drop_duplicates()
target_df.head(2)
target_df.shape

Unnamed: 0,drugbank_id,drugbank_name,TARGET_NAME,TARGET_FAMILY,UNIPROT,ACTION_TYPE,SOURCE,REFERENCE
0,DB11791,Capmatinib,Hepatocyte growth factor receptor,Kinase,P08581,INHIBITOR,SCIENTIFIC LITERATURE,https://pubmed.ncbi.nlm.nih.gov/21918175
1,DB15685,Selpercatinib,Aurora kinase B,Kinase,Q96GD4,INHIBITOR,DRUG LABEL,https://www.accessdata.fda.gov/drugsatfda_docs...


(16823, 8)

Unnamed: 0,drugbank_id,drugbank_name,TARGET_NAME,TARGET_FAMILY,ACTION_TYPE,SOURCE,REFERENCE,uniprot
0,DB11791,Capmatinib,Hepatocyte growth factor receptor,Kinase,INHIBITOR,SCIENTIFIC LITERATURE,https://pubmed.ncbi.nlm.nih.gov/21918175,P08581
1,DB15685,Selpercatinib,Aurora kinase B,Kinase,INHIBITOR,DRUG LABEL,https://www.accessdata.fda.gov/drugsatfda_docs...,Q96GD4


(19981, 8)

Unnamed: 0,GeneID,drugbank_id,drugbank_name,TARGET_NAME,TARGET_FAMILY,SOURCE,REFERENCE,action,pubmed_id
0,125206,DB09038,Empagliflozin,Sodium/glucose cotransporter 5,Transporter,SCIENTIFIC LITERATURE,https://pubmed.ncbi.nlm.nih.gov/21985634,,21985634.0
1,66738197,DB01051,Novobiocin,DNULL gyrase subunit B,Enzyme,CHEMBL,,,


(18579, 9)

In [10]:
target_df[target_df['drugbank_id'].isin(['DB11791','DB15685'])]

Unnamed: 0,GeneID,drugbank_id,drugbank_name,TARGET_NAME,TARGET_FAMILY,SOURCE,REFERENCE,action,pubmed_id
2809,5979,DB15685,Selpercatinib,Proto-oncogene tyrosine-protein kinase recepto...,Kinase,DRUG LABEL,https://www.accessdata.fda.gov/drugsatfda_docs...,inhibitor,
3397,4233,DB11791,Capmatinib,Hepatocyte growth factor receptor,Kinase,SCIENTIFIC LITERATURE,https://pubmed.ncbi.nlm.nih.gov/21918175,inhibitor,21918175.0
17261,9212,DB15685,Selpercatinib,Aurora kinase B,Kinase,DRUG LABEL,https://www.accessdata.fda.gov/drugsatfda_docs...,inhibitor,


In [11]:
target_df['SOURCE'].unique()

array(['SCIENTIFIC LITERATURE', 'CHEMBL', 'WOMBAT-PK', 'DRUG MATRIX',
       'DRUG LABEL', 'IUPHAR', 'PDSP', 'DRUGBANK', 'KEGG DRUG', 'UNKNOWN'],
      dtype=object)

In [12]:
#https://think-lab.github.io/d/186/
#https://www.ncbi.nlm.nih.gov/pmc/articles/PMC10692006/
#DrugCentral contains 20,658 activity endpoints (drug-target pairs) for 2715 drugs across 3171 targets. Bioactivities are compiled from various sources: ChEMBLdb [24] (59.7%), WOMBAT-PK [25] (13.8%), DrugMatrix [26] (11.0%), IUPHAR/BPS Guide to Pharmacology [27] (6.1%), scientific literature (3.7%), PDSP [28] (3.6%), and drug labels (1.6%) (Table ​(Table3).3). 
target_source_map = {
    'CHEMBL': 'DrugCentral (ChEMBL)',
    'SCIENTIFIC LITERATURE': 'DrugCentral (literature)',
    'DRUG LABEL': 'DrugCentral (label)',
    'IUPHAR': 'DrugCentral (IUPHAR)',
    'KEGG DRUG': 'DrugCentral (KEGG DRUG)',
    'WOMBAT-PK': 'DrugCentral (WOMBAT-PK)',
    'DRUG MATRIX': 'DrugCentral (DrugMatrix)',
    'PDSP': 'DrugCentral (PDSP)',
    'DRUGBANK': 'DrugCentral (DrugBank)',
    'UNKNOWN': 'DrugCentral (unknown)',
}
target_df.SOURCE = target_df.SOURCE.map(target_source_map)
target_df.SOURCE.value_counts()
target_df.head(2)
target_df.shape

DrugCentral (ChEMBL)        11249
DrugCentral (WOMBAT-PK)      2876
DrugCentral (DrugMatrix)     2060
DrugCentral (IUPHAR)          944
DrugCentral (literature)      627
DrugCentral (PDSP)            589
DrugCentral (label)           135
DrugCentral (DrugBank)         67
DrugCentral (unknown)          23
DrugCentral (KEGG DRUG)         9
Name: SOURCE, dtype: int64

Unnamed: 0,GeneID,drugbank_id,drugbank_name,TARGET_NAME,TARGET_FAMILY,SOURCE,REFERENCE,action,pubmed_id
0,125206,DB09038,Empagliflozin,Sodium/glucose cotransporter 5,Transporter,DrugCentral (literature),https://pubmed.ncbi.nlm.nih.gov/21985634,,21985634.0
1,66738197,DB01051,Novobiocin,DNULL gyrase subunit B,Enzyme,DrugCentral (ChEMBL),,,


(18579, 9)

In [13]:
def condense_targets(df):
    """Condense drug-target relationships."""
    row = pandas.Series()
    row['pubmed_ids'] = '|'.join(sorted(df.pubmed_id.dropna().unique()))
    row['sources'] = '|'.join(sorted(df.SOURCE.dropna().unique()))
    row['actions'] = '|'.join(sorted(df.action.dropna().unique()))
    row['urls'] = '|'.join(sorted(url for url in df.REFERENCE.dropna().unique() if not 'pubmed' in url))
    return row
    
target_df = target_df.groupby(['GeneID', 'drugbank_id', 'drugbank_name']).apply(condense_targets).reset_index()

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)

  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()


In [14]:
target_df

Unnamed: 0,GeneID,drugbank_id,drugbank_name,pubmed_ids,sources,actions,urls
0,18,DB01080,Vigabatrin,,DrugCentral (ChEMBL),inhibitor,
1,19,DB01599,Probucol,,DrugCentral (ChEMBL),inhibitor,
2,25,DB00317,Gefitinib,,DrugCentral (ChEMBL),,
3,25,DB00398,Sorafenib,,DrugCentral (ChEMBL),,
4,25,DB00530,Erlotinib,,DrugCentral (ChEMBL),,
...,...,...,...,...,...,...,...
17704,106821730,DB11828,Neratinib,,DrugCentral (ChEMBL),,
17705,106821730,DB12500,Fedratinib,,DrugCentral (ChEMBL),,
17706,107364480,DB00431,Lindane,,DrugCentral (ChEMBL),negative allosteric modulator,
17707,107791137,DB00936,Salicylic acid,,DrugCentral (WOMBAT-PK),,


In [18]:
import os
path = '../data/DrugCentral' 
target_df.to_csv(os.path.join(path, 'targets.tsv'), sep='\t', index=False)

## Unichem Map - Drugbank to BindingDB

In [1]:
import os
import csv
import collections
import json
import gzip
import io

import requests

In [2]:
path = '../data/DrugBank'

with open(os.path.join(path, 'drugbank.tsv')) as read_file:
    reader = csv.DictReader(read_file, delimiter='\t')
    drugbank = list(reader)

drugbank_ids = [drug['drugbank_id'] for drug in drugbank]
assert len(drugbank_ids) == len(set(drugbank_ids))

In [3]:
# Compound has InChIKey
collections.Counter(bool(drug['inchikey']) for drug in drugbank)

Counter({False: 4653, True: 11928})

In [4]:
# Compound types
collections.Counter(drug['type'] for drug in drugbank)

Counter({'biotech': 3882, 'small molecule': 12699})

In [5]:
id_to_source = {
    0: None,
    1: 'chembl',
    2: 'drugbank',
    3: 'pdb',
    4: 'iuphar',
    5: 'pubchem_dotf',
    6: 'kegg_ligand',
    7: 'chebi',
    8: 'nih_ncc',
    9: 'zinc',
    10: 'emolecules',
    11: 'ibm',
    12: 'atlas',
    13: 'ibm_patents',
    14: 'fdasrs',
    15: 'surechembl',
    17: 'pharmgkb',
    18: 'hmdb',
    20: 'selleck',
    21: 'pubchem_tpharma',
    22: 'pubchem',
    23: 'mcule',
    24: 'nmrshiftdb2',
    25: 'lincs',
    26: 'actor',
    27: 'recon',
    28: 'molport',
    29: 'nikkaji',
    31: 'bindingdb',
}

source_to_id = {v: k for k, v in id_to_source.items()}

In [6]:
#'https://www.ebi.ac.uk/unichem/rest/cpd_search/DB00006/2/0/0/4/0/0/0/0/1'
def connectivity_query(search_url, target = None, B = 0, C = 0, D = 0, E = 0, F = 0, G = 0):
    """
    https://www.ebi.ac.uk/unichem/info/widesearchInfo
    """
    url = '{search_url}/{A}/{B}/{C}/{D}/{E}/{F}/{G}/{H}'.format(
        search_url = search_url,
        A = source_to_id[target], # Sources
        B = B, # Pattern
        C = C, # Component Mapping
        D = D, # Frequency Block
        E = E, # InChI Length Block
        F = F, # UniChem Labels
        G = G, # Assignment Status
        H = 1, # Data Structure
    )
    response = requests.get(url)
    try:
        response = response.json()
    except ValueError:
        print('cannot decode json:', url)
        return
    if 'error' in response:
        print('UniChem error:', response['error'])
        return
    for assignment in response.values():
        header = assignment.pop(0)
        for match in assignment:
            yield collections.OrderedDict(zip(header, match))

def key_search(inchikey, **kwargs):
    """Search by InChIKeys."""
    if inchikey.startswith('InChIKey='):
        prefix, inchikey = inchikey.split('=', 1)
    base_url = 'https://www.ebi.ac.uk/unichem/rest/key_search'
    search_url = '{base_url}/{StandardInChIKey}'.format(
        base_url = base_url,
        StandardInChIKey = inchikey)
    return connectivity_query(search_url, **kwargs)
    
def cpd_search(source, compound_id, **kwargs):
    """Search by source-specific identifiers."""
    base_url = 'https://www.ebi.ac.uk/unichem/rest/cpd_search'
    search_url = '{base_url}/{src_compound_id}/{src_id}'.format(
        base_url = base_url,
        src_compound_id = compound_id,
        src_id = source_to_id[source])
    return connectivity_query(search_url, **kwargs)

In [7]:
#https://chembl.gitbook.io/unichem/api/sources
#https://www.ebi.ac.uk/unichem/search/connectivity
#https://www.ebi.ac.uk/unichem/search/connectivity?type=sourceID&compound=DB00006&sourceID=2
#https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4158273/
def unichem_search(search_type, source, compound_id):
    url = "https://www.ebi.ac.uk/unichem/api/v1/connectivity"
    
    if search_type=='sourceID':
        payload = {
            "type": "sourceID",
            "compound": compound_id,
            "searchComponents": True,
            "sourceID": str(source_to_id[source])
        }
    elif search_type=='inchikey':
        payload = {
            "type": "inchikey",
            "compound": compound_id,
            "searchComponents": True
        }
    
    headers = {"Content-Type": "application/json"}

    response = requests.request("POST", url, json=payload, headers=headers)

    try:
        response = response.json() #json.dumps(response) 
    except ValueError:
        print('cannot decode json:', url)
        return
    if response['response'] == 'Not found':
        print('UniChem error:', search_type, response['response'])
        return
    
    return response['sources']

In [8]:
# mapping writer
mapping_path = os.path.join(path, 'mapping.tsv.gz')
mapping_file = gzip.open(mapping_path, 'wb')
mapping_buffer = io.TextIOWrapper(mapping_file, line_buffering = True)
mapping_fields = ['drugbank_id', 'drugbank_name', 'id', 'source_name', 'compoundId', 'baseIDURLAvailable', 'longName', 'shortName', 'typeOfSearch', 'url'
              ] #'C', 'Query_InChIKey', 'CpdId_InChIKey', 'Full_Query_InChI', 'Full_CpdId_InChI', 'Matching_Query_InChI', 'Matching_CpdId_InChI', 'b', 'i', 'm', 'p', 's', 't'
mapping_writer = csv.DictWriter(mapping_buffer, delimiter = '\t', fieldnames = mapping_fields, extrasaction = 'ignore')
mapping_writer.writeheader()

# mapping counts writer
count_path = os.path.join(path, 'mapping-counts.tsv')
count_file = open(count_path, 'w')
source_names = [id_to_source[i] for i in sorted(set(id_to_source) - {0})]
count_fields = ['drugbank_id', 'drugbank_name'] + source_names
count_writer = csv.DictWriter(count_file, delimiter = '\t', fieldnames = count_fields, restval = 0)
count_writer.writeheader()


for drug in drugbank:
    if drug['type'] != 'small molecule':
        continue
    if not drug['inchikey']:
        continue
    drugbank_id = drug['drugbank_id']
    drugbank_name = drug['name']
    print(drugbank_id, drugbank_name)
    query_matches = unichem_search('sourceID','drugbank', drugbank_id)
    if not query_matches:
        if drug['inchi'].startswith('InChI=1S'):
            query_matches = unichem_search('inchikey','drugbank', drug['inchikey'])
        else: 
            print('non-standard InChI: cannot query compound')
            continue
    #if query_matches is not None:
    #    continue
    query_matches = list(query_matches)
    #query_matches = list(cpd_search('drugbank', drugbank_id, C = 4))
    #    if drug['inchi'].startswith('InChI=1S'):
    #        query_matches = list(key_search(drug['inchikey'], C = 4))
    #    else: # non-standard InChI
    #        print('non-standard InChI: cannot query compound')
    #        continue
    
    for match in query_matches:
        if match['id']<=31:
            match['drugbank_id'] = drugbank_id
            match['drugbank_name'] = drugbank_name
            match['source_name'] = id_to_source[int(match['id'])]
            mapping_writer.writerow(match)
    
    source_to_matches = dict()
    for match in query_matches:
        if match['id']<=31:
            match['source_name'] = id_to_source[int(match['id'])]
            match_set = source_to_matches.setdefault(match['source_name'], set())
            match_set.add(match['compoundId'])
    count = {k: len(v) for k, v in source_to_matches.items()}
    count = collections.defaultdict(int, count)
    count['drugbank_id'] = drugbank_id
    count['drugbank_name'] = drugbank_name
    count_writer.writerow(count)

mapping_file.close()
count_file.close()

DB00006 Bivalirudin
DB00007 Leuprolide
DB00014 Goserelin
DB00027 Gramicidin D
DB00035 Desmopressin
DB00050 Cetrorelix
DB00067 Vasopressin
DB00080 Daptomycin
DB00091 Cyclosporine
DB00106 Abarelix
DB00114 Pyridoxal phosphate
DB00115 Cyanocobalamin
DB00116 Tetrahydrofolic acid
DB00117 Histidine
DB00118 Ademetionine
DB00119 Pyruvic acid
DB00120 Phenylalanine
DB00121 Biotin
DB00122 Choline
DB00123 Lysine
DB00125 Arginine
DB00126 Ascorbic acid
DB00127 Spermine
DB00128 Aspartic acid
DB00129 Ornithine
DB00130 L-Glutamine
DB00131 Adenosine phosphate
DB00132 alpha-Linolenic acid
DB00133 Serine
DB00134 Methionine
DB00135 Tyrosine
DB00136 Calcitriol
DB00137 Lutein
DB00138 Cystine
DB00139 Succinic acid
DB00140 Riboflavin
DB00141 N-Acetylglucosamine
DB00142 Glutamic acid
DB00143 Glutathione
DB00144 Phosphatidyl serine
DB00145 Glycine
DB00146 Calcifediol
DB00147 Pyridoxal
DB00148 Creatine
DB00149 Leucine
DB00150 Tryptophan
DB00151 Cysteine
DB00152 Thiamine
DB00153 Ergocalciferol
DB00154 Dihomo-gamma-

DB00519 Trandolapril
DB00520 Caspofungin
DB00521 Carteolol
DB00522 Bentiromide
DB00523 Alitretinoin
DB00524 Metolazone
DB00525 Tolnaftate
DB00526 Oxaliplatin
DB00527 Cinchocaine
DB00528 Lercanidipine
DB00529 Foscarnet
DB00530 Erlotinib
DB00531 Cyclophosphamide
DB00532 Mephenytoin
DB00533 Rofecoxib
DB00534 Chlormerodrin
DB00535 Cefdinir
DB00536 Guanidine
DB00537 Ciprofloxacin
DB00538 Gadoversetamide
DB00539 Toremifene
DB00540 Nortriptyline
DB00541 Vincristine
DB00542 Benazepril
DB00543 Amoxapine
DB00544 Fluorouracil
DB00545 Pyridostigmine
DB00546 Adinazolam
DB00547 Desoximetasone
DB00548 Azelaic acid
DB00549 Zafirlukast
DB00550 Propylthiouracil
DB00551 Acetohydroxamic acid
DB00552 Pentostatin
DB00553 Methoxsalen
DB00554 Piroxicam
DB00555 Lamotrigine
DB00556 Perflutren
DB00557 Hydroxyzine
DB00558 Zanamivir
DB00559 Bosentan
DB00560 Tigecycline
DB00561 Doxapram
DB00562 Benzthiazide
DB00563 Methotrexate
DB00564 Carbamazepine
DB00565 Cisatracurium
DB00566 Succimer
DB00567 Cephalexin
DB00568 

DB00931 Metacycline
DB00932 Tipranavir
DB00933 Mesoridazine
DB00934 Maprotiline
DB00935 Oxymetazoline
DB00936 Salicylic acid
DB00937 Diethylpropion
DB00938 Salmeterol
DB00939 Meclofenamic acid
DB00940 Methantheline
DB00941 Hexafluronium
DB00942 Cycrimine
DB00943 Zalcitabine
DB00944 Demecarium
DB00945 Acetylsalicylic acid
DB00946 Phenprocoumon
DB00947 Fulvestrant
DB00948 Mezlocillin
DB00949 Felbamate
DB00950 Fexofenadine
DB00951 Isoniazid
DB00952 Naratriptan
DB00953 Rizatriptan
DB00954 Dirithromycin
DB00955 Netilmicin
DB00956 Hydrocodone
DB00957 Norgestimate
DB00958 Carboplatin
DB00959 Methylprednisolone
DB00960 Pindolol
DB00961 Mepivacaine
DB00962 Zaleplon
DB00963 Bromfenac
DB00964 Apraclonidine
DB00966 Telmisartan
DB00967 Desloratadine
DB00968 Methyldopa
DB00969 Alosetron
DB00970 Dactinomycin
DB00971 Selenium Sulfide
DB00972 Azelastine
DB00973 Ezetimibe
DB00974 Edetic acid
DB00975 Dipyridamole
DB00976 Telithromycin
DB00977 Ethinylestradiol
DB00978 Lomefloxacin
DB00979 Cyclopentolate
D

DB01400 Neostigmine
DB01401 Choline magnesium trisalicylate
DB01403 Methotrimeprazine
DB01405 Temafloxacin
DB01406 Danazol
DB01407 Clenbuterol
DB01408 Bambuterol
DB01409 Tiotropium
DB01410 Ciclesonide
DB01411 Pranlukast
DB01412 Theobromine
DB01413 Cefepime
DB01414 Cefacetrile
DB01415 Ceftibuten
DB01416 Cefpodoxime
DB01418 Acenocoumarol
DB01419 Antrafenine
DB01420 Testosterone propionate
DB01421 Paromomycin
DB01422 Nitroxoline
DB01423 Stepronin
DB01424 Aminophenazone
DB01425 Alizapride
DB01426 Ajmaline
DB01427 Amrinone
DB01428 Oxybenzone
DB01429 Aprindine
DB01430 Almitrine
DB01431 Allylestrenol
DB01433 Methadyl acetate
DB01434 19-norandrostenedione
DB01435 Antipyrine
DB01436 Alfacalcidol
DB01437 Glutethimide
DB01438 Phenazopyridine
DB01439 3-Methylthiofentanyl
DB01440 gamma-Hydroxybutyric acid
DB01441 5-Methoxy-N,N-diisopropyltryptamine
DB01442 MMDA
DB01443 19-Nor-5-androstenedione
DB01444 Dimethylthiambutene
DB01445 Bufotenine
DB01446 Indopan
DB01447 4-Methylaminorex
DB01450 Dihydroeto

DB01718 Cetrimonium
DB01719 Thio-Maltopentaose
DB01720 (2Z)-2-(Benzoylamino)-3-[4-(2-bromophenoxy)phenyl]acrylic acid
DB01721 N-[2-hydroxy-1-indanyl]-5-[(2-tertiarybutylaminocarbonyl)-4(benzo[1,3]dioxol-5-ylmethyl)-piperazino]-4-hydroxy-2-(1-phenylethyl)-pentanamide
DB01723 {3-[3-(3,4-Dimethoxy-Phenyl)-1-(1-{1-[2-(3,4,5-Trimethoxy-Phenyl)-Butyryl]-Piperidin-2yl}-Vinyloxy)-Propyl]-Phenoxy}-Acetic Acid
DB01724 L-Threoninol
DB01725 2-{2-hydroxy-[1,1'-biphenyl]-3-yl}-1H-1,3-benzodiazole-5-carboximidamide
DB01726 2-Aminophenol
DB01727 Isocitric Acid
DB01728 1,2-dihexadecanoyl-sn-glycero-3-phosphoethanolamine
DB01729 1D-myo-inositol 1,3,4-trisphosphate
DB01731 (S)-wiskostatin
DB01732 (4R,5S,6S,7R)-1,3-dibenzyl-4,7-bis(phenoxymethyl)-5,6-dihydroxy-1,3 diazepan-2-one
DB01733 L-Phospholactate
DB01734 3-(Oxalyl-Amino)-Naphthalene-2-Carboxylic Acid
DB01735 3-Chloroalaninate
DB01736 [3-(Dodecanoylamino)Propyl](Hydroxy)Dimethylammonium
DB01737 Nalpha-(2-Naphthylsulfonylglycyl)-3-Amidino-D,L-Phenyla

DB01917 Putrescine
DB01918 [Methyltelluro]Acetate
DB01919 Pentanal
DB01920 1-O-[O-Nitrophenyl]-Beta-D-Galactopyranose
DB01921 Xylose-derived lactam oxime
DB01922 Maltosyl-Alpha (1,4)-D-Gluconhydroximo-1,5-Lactam
DB01923 L-Xylulose 5-Phosphate
DB01924 Benzhydroxamic Acid
DB01925 2'-Chloro-Biphenyl-2,3-Diol
DB01926 Carboxymycobactin S
DB01927 Duroquinone
DB01929 5-Chloryl-2,4,6-quinazolinetriamine
DB01930 2,4-Dihydroxy-3,3-Dimethyl-Butyrate
DB01931 5,7-Dichlorokynurenic acid
DB01932 5-Methylpyrrole
DB01933 7-Hydroxystaurosporine
DB01934 Arylomycin A2
DB01935 3-{[(1r)-1-Benzyl-2-Sulfanylethyl]Amino}-3-Oxopropanoic Acid
DB01936 alpha-D-arabinofuranose
DB01937 Guanosine-2'-monophosphate
DB01938 L-Histidine Beta Naphthylamide
DB01939 5-Amidino-Benzimidazole
DB01940 Balanol Analog 2
DB01941 LG-100268
DB01942 Formic acid
DB01944 (S)-blebbistatin
DB01945 4-Carbamoyl-1-Beta-D-Ribofuranosyl-Imidazolium-5-Olate-5'-Phosphate
DB01946 Bisindolylmaleimide VIII
DB01947 RU78262
DB01948 1-(2,6-Dichloroph

DB02123 Glycochenodeoxycholic Acid
DB02124 (2s,3s)-Trans-2,3-Dihydro-3-Hydroxyanthranilic Acid
DB02125 Adamantanone
DB02126 4-Carboxycinnamic Acid
DB02127 Diisopropyl methylphosphonate
DB02128 [1-(3-hydroxy-2-oxo-1-phenethyl-propylcarbamoyl)2-phenyl-ethyl]-carbamic acid pyridin-4-ylmethyl ester
DB02129 Dihydroorotic Acid
DB02130 Vanillic acid
DB02131 N-1-methylheptylformamide
DB02132 Zenarestat
DB02133 Chlorophyll A
DB02134 Xanthine
DB02135 1-deoxy-1-{2,6,8-trioxo-7-[4-(phosphonooxy)butyl]-1,2,3,6,7,8-hexahydro-9H-purin-9-yl}-D-arabinitol
DB02136 Cephalosporin analog
DB02137 Molybdenum cofactor
DB02138 Diethyl 4-Methylbenzylphosphonate
DB02139 (2e)-N-Allyl-4-{[3-(4-Bromophenyl)-5-Fluoro-1-Methyl-1h-Indazol-6-Yl]Oxy}-N-Methyl-2-Buten-1-Amine
DB02140 N1-(1-Dimethylcarbamoyl-2-Phenyl-Ethyl)-2-Oxo-N4-(2-Pyridin-2-Yl-Ethyl)-Succinamide
DB02141 S,S'-(1,4-Phenylene-Bis(1,2-Ethanediyl))Bis-Isothiourea
DB02142 Pyridoxamine-5'-Phosphate
DB02143 1-hydroxy-2-isopropylguanidine
DB02144 1,2-diacyl-s

DB02317 Alpha-D-Galactose-1-Phosphate
DB02318 2-deoxy-2-fluoro-alpha-D-mannosyl fluoride
DB02319 5,6-dihydroxy-NADP
DB02320 N-beta-D-glucopyranosylacetamide
DB02321 5-(3-Amino-4,4-Dihyroxy-Butylsulfanylmethyl)-Tetrahydro-Furan-2,3,4-Triol
DB02322 Heparin Disaccharide I-S
DB02323 EM-1745
DB02324 5-Iodo-2'-Deoxyuridine-5'-Monophosphate
DB02325 Isopropyl alcohol
DB02326 1-Hydroxyamine-2-Isobutylmalonic Acid
DB02327 Triethylene glycol
DB02328 2-[(3-Hydroxy-2-Methyl-5-Phosphonooxymethyl-Pyridin-4-Ylmethyl)-Imino]-5-Phosphono-Pent-3-Enoic Acid
DB02329 Carbenoxolone
DB02331 (2s)-2-[(5-Benzofuran-2-Yl-Thiophen-2-Ylmethyl)-(2,4-Dichloro-Benzoyl)-Amino]-3-Phenyl-Propionic Acid
DB02332 Flavin-N7 protonated-adenine dinucleotide
DB02333 Deoxyuridine-5'-Triphosphate
DB02334 (R)-2-Hydroxy-3-Sulfopropanoic Acid
DB02335 2-Aminothiazoline
DB02336 RU83876
DB02337 S-(D-Carboxybutyl)-L-Homocysteine
DB02338 NADPH
DB02339 Allyl-{6-[3-(4-Bromo-Phenyl)-Benzofuran-6-Yloxy]-Hexyl-}-Methyl-Amin
DB02340 N-Acetyl-S

DB02511 2-Hydroxy-5-({1-[(2-Naphthyloxy)Methyl]-3-Oxoprop-1-Enyl}Amino)Tyrosine
DB02512 1,6-Fructose Diphosphate (Linear Form)
DB02513 Thymol
DB02514 (2Z)-3-{[Oxido(oxo)phosphoranyl]oxy}-2-phenylacrylate
DB02515 sn-glycerol 3-phosphate
DB02516 (R)-carnitinyl-CoA betaine
DB02517 D-Glutamic Acid
DB02518 N-Acetylalanine
DB02519 Indirubin-5-sulphonate
DB02520 Ditiocarb
DB02521 Flaviolin
DB02522 Phosphonopyruvate
DB02523 2-aminooxyethyl-[[(2R,3S,4R,5R)-5-(6-aminopurin-9-yl)-3,4-dihydroxy-tetrahydrofuran-2-yl]methyl]-methyl-sulfonium
DB02524 2',3'-O-{4-[Hydroxy(oxido)-λ5-azanylidene]-2,6-dinitro-2,5-cyclohexadiene-1,1-diyl}adenosine 5'-(tetrahydrogen triphosphate)
DB02525 D-galactohydroximo-1,5-lactam
DB02526 CRA_10655
DB02527 Cyclic adenosine monophosphate
DB02528 Tetrazolyl Histidine
DB02529 (2R,4S,5R,6R)-5-Acetamido-4-amino-6-(diethylcarbamoyl)oxane-2-carboxylic acid
DB02530 gamma-Aminobutyric acid
DB02531 Isobutyric acid
DB02532 2,4,6-Triaminoquinazoline
DB02534 2-Allylphenol
DB02535 Ami

DB02703 Fusidic acid
DB02704 (2R,3R,4R,5R)-3,4-Dihydroxy-N,N'-bis[(1S,2R)-2-hydroxy-2,3-dihydro-1H-inden-1-yl]-2,5-bis(2-phenylethyl)hexanediamide
DB02705 6-[N-(1-Isopropyl-1,2,3,4-Tetrahydro-7-Isoquinolinyl)Carbamyl]-2-Naphthalenecarboxamidine
DB02706 Mercaptocarboxylate Inhibitor
DB02707 Pentyl Trihydrogen Diphosphate
DB02709 Resveratrol
DB02710 2,3,-Dihydroxybenzoylserine
DB02711 4-{2,6,8-Trioxo-9-[(2S,3R,4R)-2,3,4,5-Tetrahydroxypentyl]-1,2,3,6,8,9-Hexahydro-7h-Purin-7-Yl}Butyl Dihydrogen Phosphate
DB02712 Sorbinil
DB02713 Acetylamino-Acetic Acid
DB02714 3'-Uridinemonophosphate
DB02715 Compound 18
DB02716 7-methyl-guanosine-5'-triphosphate
DB02717 Cellotetraose
DB02719 C-(1-hydrogyl-beta-D-glucopyranosyl) formamide
DB02720 alpha-D-glucopyranosyl-2-carboxylic acid amide
DB02721 4-Iodopyrazole
DB02722 4-O-methyl-beta-D-glucuronic acid
DB02723 4-Oxo-2-Phenylmethanesulfonyl-Octahydro-Pyrrolo[1,2-a]Pyrazine-6-Carboxylic Acid [1-(N-Hydroxycarbamimidoyl)-Piperidin-4-Ylmethyl]-Amide
DB02724

DB02899 N-Carboxymethionine
DB02900 alpha-D-mannose 6-phosphate
DB02901 Stanolone
DB02902 3'-phospho-5'-adenylyl sulfate
DB02903 1,3-bis-([[3-(4-{3-[3-nitro-5-(galactopyranosyloxy)-benzoylamino]-propyl}-piperazin-1-yl)-propylamino-3,4-dioxo-cyclobutenyl]-amino-ethyl]-amino-carbonyloxy)-2-amino-propane
DB02904 Beta-3-Serine
DB02905 N7-(5'-Phospho-alpha-ribosyl)-2-hydroxypurine
DB02906 (2s,4s)-Alpha-Campholinic Acid
DB02907 2-Amino-Vinyl-Phosphate
DB02908 RU78783
DB02909 5-(2-Chlorophenyl)Furan-2-Carboxylic Acid
DB02910 Octanoyl-Coenzyme A
DB02911 2,4-Diamino-6-Phenyl-5,6,7,8,-Tetrahydropteridine
DB02912 Propanoyl-CoA
DB02914 Anhydrovitamin A
DB02915 4-(2,4-Dimethyl-1,3-thiazol-5-yl)-N-[4-(trifluoromethyl)phenyl]-2-pyrimidinamine
DB02916 [(2r,3s,4r,5r)-5-(6-Amino-9h-Purin-9-Yl)-3,4-Dihydroxytetrahydro-2-Furanyl]Methyl Sulfamate
DB02917 N-Hydroxy-4-(Methyl{[5-(2-Pyridinyl)-2-Thienyl]Sulfonyl}Amino)Benzamide
DB02918 Zardaverine
DB02919 2,4-Diamino-6-[N-(3',4',5'-Trimethoxybenzyl)-N-Methyla

DB03095 Tetramethylammonium
DB03096 2-Morpholinoethylamine
DB03097 PMP-hydroxyisoxazole, pyridoxamine-5-phosphate-hydroxyisoxazole
DB03098 [Methylseleno]Acetate
DB03099 5-Amino 6-Nitro Uracil
DB03100 6-Nitroindazole
DB03101 Ribose-1-Phosphate
DB03102 2-(Oxalyl-Amino)-4,7-Dihydro-5h-Thieno[2,3-C]Pyran-3-Carboxylic Acid
DB03103 Thymidine-5'- Diphosphate
DB03104 2-[4-[(Z)-2-Acetamido-3-oxo-3-[[(3S)-2-oxo-1-[(4-phenylphenyl)methyl]azepan-3-yl]amino]prop-1-enyl]-2-formylphenyl]acetic acid
DB03105 L-Pentahomoserine
DB03106 scyllo-inositol
DB03107 beta-Alanine
DB03108 4-phospho-D-erythronic acid
DB03109 2-acetylamino-2-deoxy-b-D-allopyranose
DB03110 2-Chlorophenol
DB03111 Glucosamine 1-Phosphate
DB03112 6-(2-Oxo-Hexahydro-Thieno[3,4-D]Imidazol-4-Yl)-Hexanoic Acid
DB03113 3-Fluoro-2-(Phosphonooxy)Propanoic Acid
DB03114 PAS219
DB03115 5-Bromo-N-[(2S)-2,3-dihydroxypropoxy]-3,4-difluoro-2-[(2-fluoro-4-iodophenyl)amino]benzamide
DB03116 5-(1-Carboxy-1-Phosphonooxy-Ethoxyl)-Shikimate-3-Phosphate
DB

DB03288 5-Chloro-1h-Indole-2-Carboxylic Acid{[Cyclopentyl-(2-Hydroxy-Ethyl)-Carbamoyl]-Methyl}-Amide
DB03289 Thiarsa Dihydroxy Cysteine
DB03290 L-naphthyl-1-acetamido boronic acid alanine
DB03291 4-Deoxy-4-Amino-Beta-D-Glucose
DB03292 3-Phosphono-D-alanine
DB03293 9-Methyl Uric Acid
DB03294 1-Methyl-3-Oxo-1,3-Dihydro-Benzo[C]Isothiazole-5-Sulfonic Acid Amide
DB03295 Glutathionylspermidine
DB03296 3'-beta-Sialyl-beta-lactose
DB03297 Benzylsulfonic acid
DB03298 Phenylphosphate
DB03299 N-Succinyl Phenylglycine
DB03300 Pterin Cytosine Dinucleotide
DB03301 2-Allyl-6-Methyl-Phenol
DB03302 4,5,6,7-Tetrachloro-3h-Isobenzofuran-1-One
DB03303 3-deoxy-D-arabino-hexonic acid
DB03304 7-Deaza-7-Aminomethyl-Guanine
DB03305 N(G)-Iminoethylornithine
DB03306 RU78300
DB03307 4-[(6-Amino-4-Pyrimidinyl)Amino]Benzenesulfonamide
DB03308 L-Leucyl-Hydroxylamine
DB03309 N-cyclohexyltaurine
DB03310 Glutathione disulfide
DB03311 3-(3,5-Dibromo-4-Hydroxy-Benzoyl)-2-Ethyl-Benzofuran-6-Sulfonic Acid [4-(Thiazol-2-Yl

DB03487 (S)-Aspartimide
DB03488 Uridine-5'-diphosphate-2-deoxy-2-fluoro-alpha-D-galactose
DB03489 2-Keto-3-Deoxygluconate
DB03490 3-Pyridin-4-Yl-2,4-Dihydro-Indeno[1,2-.C.]Pyrazole
DB03491 2'-Deoxyguanosine-5'-Diphosphate
DB03492 lambda-bis(2,2'-bipyridine)imidazole osmium (II)
DB03493 7-Methylguanosine
DB03495 4,6-Dideoxy-4-{[4,5,6-Trihydroxy-3-(Hydroxymethyl)Cyclohex-2-En-1-Yl]Amino}-Alpha-D-Lyxo-Hexopyranosyl-(1->4)-Alpha-D-Threo-Hexopyranosyl-(1->6)-Alpha-L-Threo-Hexopyranose
DB03496 Alvocidib
DB03497 L-serine O-sulfate
DB03498 Mercaptomethyl Phosphonate
DB03499 D-Malic acid
DB03500 Tricosanoic acid
DB03501 Galactose-uridine-5'-diphosphate
DB03502 (4s)-4-{[(2s)-2-Amino-3-Oxopropyl]Sulfanyl}-L-Homoserinate
DB03503 4-Acetyl-4-guanidino-6-methyl(propyl)carboxamide-4,5-dihydro-2H-pyran-2-carboxylic acid
DB03504 9-Butyl-8-(2-Chloro-3,4,5-Trimethoxy-Benzyl)-9h-Purin-6-Ylamine
DB03505 2,6-diaminoquinazolin-4-ol
DB03506 5H-pyrrolo[3,2-d]pyrimidin-4-amine
DB03507 6-[3-(4-Morpholinyl)Propyl]

DB03685 Uridine monophosphate
DB03686 S-(4-nitrobenzyl)glutathione
DB03687 4-(Cytidine 5'-diphospho)-2-C-methyl-D-erythritol
DB03688 Hydracrylic acid
DB03690 (Z,Z)-4-Hydroxy-N,N,N-Trimethyl-10-Oxo-7-[(1-Oxo-9-Octadecenyl)Oxy]-3,5,9-Trioxa-4-Phosphaheptacos-18-En-1-Aminium-4-Oxide
DB03691 WRR-112
DB03692 1-Hexadecanosulfonyl-O-L-Serine
DB03693 N-(2-Aminoethyl)-5-Chloroisoquinoline-8-Sulfonamide
DB03694 N-phenylthiourea
DB03695 Piritrexim
DB03696 Lanosterol
DB03697 4-Sulfonamide-[1-(4-Aminobutane)]Benzamide
DB03698 5-Mercaptoethanol-2-decenoyl-coenzyme A
DB03699 Succinyl-Coenzyme A
DB03700 D-Threonine
DB03701 Vanoxerine
DB03702 N-{4-[(Carboxymethyl)carbamoyl]benzoyl}-L-valyl-N-[(3S)-1,1,1-trifluoro-4-methyl-2-oxo-3-pentanyl]-L-prolinamide
DB03703 Cyclohexanol
DB03704 12-Hydroxydodecanoic Acid
DB03705 6-Methylamino-5-Nitroisocytosine
DB03706 1-Hydroxy-2-S-glutathionyl-3-para-nitrophenoxy-propane
DB03707 S-Ethyl-N-Phenyl-Isothiourea
DB03708 Adenosine 5'-phosphosulfate
DB03709 Bicine
DB0371

DB03884 Phenylpyruvic acid
DB03885 L-gamma-glutamyl-S-(naphthalen-1-ylmethyl)-L-cysteinylglycine
DB03886 Biopterin
DB03887 Alpha-Adenosine Monophosphate
DB03888 N-Allyl-6-{[3-(4-bromophenyl)-1-methyl-1H-indazol-6-yl]oxy}-N-methyl-1-hexanamine
DB03889 S-(N-hydroxy-N-bromophenylcarbamoyl)glutathione
DB03890 N-[2-(1-Formyl-2-Methyl-Propyl)-1-(4-Piperidin-1-Yl-but-2-Enoyl)-Pyrrolidin-3-Yl]-Methanesulfonamide
DB03891 Dibenzyl (carbonylbis{2,1-hydrazinediyl[(2S)-4-methyl-1-oxo-1,2-pentanediyl]})biscarbamate
DB03892 5-N-Allyl-arginine
DB03893 Thionicotinamide-Adenine-Dinucleotide
DB03894 N-Propargyl-1(S)-Aminoindan
DB03895 Malachite Green
DB03896 Triphosphoric acid
DB03897 Phloretic acid
DB03898 3-Chloro-4-Hydroxyphenylglycine
DB03899 9-Butyl-8-(4-Methoxybenzyl)-9h-Purin-6-Amine
DB03900 tert-butanol
DB03901 5-Oxoprolinal
DB03902 Oxalic Acid
DB03903 Tmr
DB03904 Urea
DB03905 Succinamide-CoA
DB03906 2-Phenylheme
DB03907 N-[(E)-3-[(2R,3S,4R,5R)-5-(6-Aminopurin-9-yl)-3,4-dihydroxyoxolan-2-yl]prop-

DB04061 Alpha-Amino-2-Indanacetic Acid
DB04062 beta-D-fucose
DB04063 alpha-Methylleucine
DB04064 Nogalaviketone
DB04065 N-Cyclopentyl-N-Cyclobutylformamide
DB04066 p-Coumaric acid
DB04067 4-hydroxybenzyl coenzyme A
DB04068 2',3'-dideoxy-3'-fluoro-urididine-5'-diphosphate
DB04069 5,6-Dihydro-Benzo[H]Cinnolin-3-Ylamine
DB04070 6-Deoxyerythronolide B
DB04071 Cpad
DB04072 Alpha-Methylisocitric Acid
DB04073 N-{3-[4-(3-amino-propyl)-piperazin-1-yl]-propyl}-3-nitro-5-(galactopyranosyl)-beta-benzamide
DB04074 alpha-Ketoisovalerate
DB04075 N-Acetyl-L-Glutamate
DB04076 Hypoxanthine
DB04079 Heptane-1,2,3-Triol
DB04080 RU78191
DB04081 (4s-Trans)-4-(Methylamino)-5,6-Dihydro-6-Methyl-4h-Thieno(2,3-B)Thiopyran-2-Sulfonamide-7,7-Dioxide
DB04082 Decyloxy-Methanol
DB04083 N(6)-(pyridoxal phosphate)-L-lysine
DB04084 2-deoxy-2-fluoro-α-D-mannose
DB04085 Bis(N-maleimidomethyl)ether
DB04086 2',4'-Dinitrophenyl-2deoxy-2-Fluro-B-D-Cellobioside
DB04087 Open Form of 2'-Deoxy-Ribofuranose-5'-Phosphate
DB04088 2-

DB04246 CRA_23653
DB04248 beta-(1->4)-galactotriose
DB04249 Zinc Substituted Heme C
DB04250 Butyrylthiocholine
DB04252 N-Carbamoylaspartic acid
DB04253 Tretazicar
DB04254 8-Benzo[1,3]Dioxol-,5-Ylmethyl-9-Butyl-2-Fluoro-9h-Purin-6-Ylamine
DB04255 Inhibitor BEA388
DB04256 (S)-alpha-methyl-4-carboxyphenylglycine
DB04257 Palmitoleic Acid
DB04258 Seocalcitol
DB04259 7-Methyl-7,8-dihydroguanosine 5'-(dihydrogen phosphate)
DB04260 9-(5,5-Difluoro-5-Phosphonopentyl)Guanine
DB04261 Carbamic Acid
DB04262 3-(7-hydroxy-8-ribityllumazine-6-yl) propionic acid
DB04263 Geneticin
DB04264 (10R)-10-formyl-5,8,10-trideazafolic acid
DB04265 N-acetyl-beta-neuraminic acid
DB04266 5-(6-D-ribitylamino-2,4-dihydroxypyrimidin-5-yl)-1-pentyl-phosphonic acid
DB04267 Dipicolinic acid
DB04268 Methylumbelliferyl chitotriose
DB04269 Cyclotheonamide A
DB04270 (S)-3-(4-(2-Carbazol-9-Yl-Ethoxy)-Phenyl)-2-Ethoxy-Propionic Acid
DB04271 3,5-Dimethyl-1h-Pyrazole-4-Carboxylic Acid Ethyl Ester
DB04272 Citric acid
DB04273 8,9-D

DB04458 2,2-Dichloro-1-methanesulfinyl-3-methyl-cyclopropanecarboxylic acid [1-(4-bromo-phenyl)-ethyl]-amide
DB04459 3,4-Dichloroisocoumarin
DB04460 (C8-S)-Hydantocidin 5'-phosphate
DB04461 Coproporphyrinogen III
DB04462 Tetrabromo-2-Benzotriazole
DB04463 3-(4-Amino-1-Tert-Butyl-1h-Pyrazolo[3,4-D]Pyrimidin-3-Yl)Phenol
DB04464 N-Formylmethionine
DB04465 Lactose
DB04466 SR12813
DB04467 N-(5'-phosphopyridoxyl)-L-alanine
DB04468 Afimoxifene
DB04469 1-(4-Methoxyphenyl)-3,5-Dimethyl-1h-Pyrazole-4-Carboxylic Acid Ethyl Ester
DB04470 CRA_10656
DB04471 2-Phenyl-1-[4-(2-Piperidin-1-Yl-Ethoxy)-Phenyl]-1,2,3,4-Tetrahydro-Isoquinolin-6-Ol
DB04472 (R)-1-Para-Nitro-Phenyl-2-Azido-Ethanol
DB04473 alpha-L-fucose
DB04474 8-anilinonaphthalene-1-sulfonic acid
DB04476 Trencam-3,2-Hopo
DB04477 2'-Deoxy-2'-[(3,5-dimethoxybenzoyl)amino]-N-[(1R)-1,2,3,4-tetrahydro-1-naphthalenyl]adenosine
DB04478 Cp-166572, 2-Hydroxymethyl-4-(4-N,N-Dimethylaminosulfonyl-1-Piperazino)-Pyrimidine
DB04479 4-Nitro-Inden-1-One
DB04

DB04645 5-{3-[3-(2,4-DICHLORO-BENZOYL)-UREIDO]-2-METHYL-PHENOXY}-PENTANOIC ACID
DB04646 Dibromothymoquinone
DB04647 BOC-GAMMA-D-GLU-L-LYS(CBZ)-D-BOROALA
DB04648 S-propylamine-L-cysteine
DB04649 TETRAHEDRAL INTERMEDIATE OF BLASTICIDIN S
DB04650 5-[(3AS,4R,6AR)-2-OXOHEXAHYDRO-1H-THIENO[3,4-D]IMIDAZOL-4-YL]PENTANOIC ACID
DB04651 BIOTINOL-5-AMP
DB04652 Corticosterone
DB04653 N-[(benzyloxy)carbonyl]-L-leucyl-N-[(1S)-3-fluoro-1-(4-hydroxybenzyl)-2-oxopropyl]-L-leucinamide
DB04654 4-PIPERIDIN-4-YLBUTANAL
DB04655 Metoprine
DB04656 1,3,4-TRIHYDROXY-5-(3-PHENOXYPROPYL)-CYCLOHEXANE-1-CARBOXYLIC A CID
DB04657 Carboxin
DB04658 Calystegine B2
DB04659 (1S,2S,3R,4S,5S)-2,3,4-TRIHYDROXY-5-(HYDROXYMETHYL)CYCLOHEXYL (1E)-2-PHENYL-N-(SULFOOXY)ETHANIMIDOTHIOATE
DB04660 Choline alfoscerate
DB04661 cis-tetracosenoyl sulfatide
DB04662 OLOMOUCINE II
DB04663 2-KETO-6-PHOSPHATE-D-GLUCONIC ACID, ALPHA-FURANOSE FORM
DB04664 Cyclohexyl-pentyl-maltoside
DB04665 Coumarin
DB04666 CHROMOPHORE (LYS-TYR-GLY)
DB04667 CHRO

DB04802 D-Erythro-2,3-diaminobutyric acid
DB04803 Verdoheme
DB04804 (2S)-2,3-Diaminobutanoic acid
DB04805 Virginiamycin S1
DB04806 (5-BROMO-4-CHLORO-3-INDOLYL)-A-D-MANNOSE
DB04807 4-NITROPHENYL-(6-S-ALPHA-D-XYLOPYRANOSYL)-BETA-D-GLUCOPYRANOSIDE
DB04808 Neamine
DB04809 SALOPHEN-10-PROPIONATE IRON CHELATE
DB04810 Salophen-10-carboxylate iron chelate
DB04811 Salophen iron chelate
DB04812 Benoxaprofen
DB04813 Bithionol
DB04814 Bunamiodyl
DB04815 Clioquinol
DB04816 Dantron
DB04817 Metamizole
DB04818 Iproniazid
DB04819 Methapyrilene
DB04820 Nialamide
DB04821 Nomifensine
DB04822 Oxeladin
DB04823 Oxyphenisatin
DB04824 Phenolphthalein
DB04825 Prenylamine
DB04826 Thenalidine
DB04827 Urethane
DB04828 Zomepirac
DB04829 Lysergic acid diethylamide
DB04830 Buformin
DB04831 Tienilic acid
DB04832 Zimelidine
DB04833 Methaqualone
DB04834 Rapacuronium
DB04835 Maraviroc
DB04836 Amineptine
DB04837 Clofedanol
DB04838 Cyclandelate
DB04839 Cyproterone acetate
DB04840 Debrisoquine
DB04841 Flunarizine
DB04842 Fl

DB06082 PX-478
DB06083 Tapinarof
DB06087 Friulimicin B
DB06090 Bradanicline
DB06091 Evofosfamide
DB06106 AIT-034
DB06112 3,4-Methylenedioxy-N-isopropylamphetamine
DB06117 1alpha,24S-Dihydroxyvitamin D2
DB06119 Cenobamate
DB06124 L-aminocarnityl-succinyl-leucyl-argininal-diethylacetal
DB06127 Bisegliptin
DB06133 Dimethylcurcumin
DB06134 SNS-314
DB06137 Tirbanibulin
DB06140 SUVN-502
DB06143 Aminoimidazole carboxamide
DB06144 Sertindole
DB06145 Spiramycin
DB06147 Sulfathiazole
DB06148 Mianserin
DB06149 Teicoplanin
DB06150 Sulfadimethoxine
DB06151 Acetylcysteine
DB06152 Nylidrin
DB06153 Pizotifen
DB06154 Pentaerythritol tetranitrate
DB06155 Rimonabant
DB06156 Tesofensine
DB06157 Istaroxime
DB06159 Rubitecan
DB06160 Garenoxacin
DB06163 Plevitrexed
DB06166 Fosdevirine
DB06169 Indibulin
DB06174 Noscapine
DB06176 Romidepsin
DB06177 Glufosfamide
DB06178 Talotrexin
DB06179 Darinaparsin
DB06182 Talabostat
DB06185 Forodesine
DB06187 Valtorcitabine
DB06188 Ispinesib
DB06190 Solabegron
DB06191 Zosuq

DB06827 Viomycin
DB06828 5-[2-(1H-pyrrol-1-yl)ethoxy]-1H-indole
DB06829 4-BROMO-3-(CARBOXYMETHOXY)-5-[3-(CYCLOHEXYLAMINO)PHENYL]THIOPHENE-2-CARBOXYLIC ACID
DB06830 (1-HYDROXYHEPTANE-1,1-DIYL)BIS(PHOSPHONIC ACID)
DB06831 2-((9H-PURIN-6-YLTHIO)METHYL)-5-CHLORO-3-(2-METHOXYPHENYL)QUINAZOLIN-4(3H)-ONE
DB06832 Prinaberel
DB06833 1-CYCLOHEXYL-N-{[1-(4-METHYLPHENYL)-1H-INDOL-3-YL]METHYL}METHANAMINE
DB06834 N-(2-hydroxy-1,1-dimethylethyl)-1-methyl-3-(1H-pyrrolo[2,3-b]pyridin-2-yl)-1H-indole-5-carboxamide
DB06835 (2S)-2-[3-(AMINOMETHYL)PHENYL]-3-{(S)-HYDROXY[(1R)-2-METHYL-1-{[(2-PHENYLETHYL)SULFONYL]AMINO}PROPYL]PHOSPHORYL}PROPANOIC ACID
DB06836 N-(5-{4-Chloro-3-[(2-hydroxyethyl)sulfamoyl]phenyl}-4-methyl-1,3-thiazol-2-yl)acetamide
DB06837 (2R)-N~4~-hydroxy-2-(3-hydroxybenzyl)-N~1~-[(1S,2R)-2-hydroxy-2,3-dihydro-1H-inden-1-yl]butanediamide
DB06838 methyl L-phenylalaninate
DB06839 N-(ethoxycarbonyl)-L-leucine
DB06840 diethyl [(1R)-1,5-diaminopentyl]boronate
DB06841 [(2R)-1-[(2S)-2-[[(2S,3S)-1-Ch

DB06941 (Z)-2-[2-(4-methylpiperazin-1-yl)benzyl]diazenecarbothioamide
DB06942 N-(4-carbamimidoylbenzyl)-1-(3-phenylpropanoyl)-L-prolinamide
DB06943 (3S)-1-{[4-(but-2-yn-1-yloxy)phenyl]sulfonyl}pyrrolidine-3-thiol
DB06944 N-(3-cyclopropyl-1H-pyrazol-5-yl)-2-(2-naphthyl)acetamide
DB06945 N-hydroxy-4-({4-[4-(trifluoromethyl)phenoxy]phenyl}sulfonyl)tetrahydro-2H-pyran-4-carboxamide
DB06946 (2S,3S)-3-(4-fluorophenyl)-2,3-dihydroxypropanoic acid
DB06947 1-[(2R)-2-aminobutanoyl]-N-(4-carbamimidoylbenzyl)-L-prolinamide
DB06948 2-ANILINO-6-CYCLOHEXYLMETHOXYPURINE
DB06949 N'-[(1E)-(3,5-dibromo-2,4-dihydroxyphenyl)methylidene]naphthalene-2-carbohydrazide
DB06950 4-chloro-N'-[(1E)-(3,5-dibromo-2,4-dihydroxyphenyl)methylidene]benzohydrazide
DB06951 (3R)-3-ethyl-N-[(4-methylphenyl)sulfonyl]-L-aspartic acid
DB06952 (2S)-2-HYDROXY-2H-CHROMENE-2-CARBOXYLIC ACID
DB06953 2-CHLORO-5-(3-CHLORO-PHENYL)-6-[(4-CYANO-PHENYL)-(3-METHYL-3H-IMIDAZOL-4-YL)- METHOXYMETHYL]-NICOTINONITRILE
DB06954 2-(cycloheptylmeth

DB07041 N-[2-(2,4-diaminopyrido[2,3-d]pyrimidin-7-yl)-2-methylpropyl]-4-phenoxybenzamide
DB07042 7-amino-2-tert-butyl-4-{[2-(1H-imidazol-4-yl)ethyl]amino}pyrido[2,3-d]pyrimidine-6-carboxamide
DB07043 7-amino-2-tert-butyl-4-(4-pyrimidin-2-ylpiperazin-1-yl)pyrido[2,3-d]pyrimidine-6-carboxamide
DB07044 3-bromo-N'-[(1E)-(3,5-dibromo-2,4-dihydroxyphenyl)methylidene]benzohydrazide
DB07045 (2R,3R,4S,5R)-2-[6-amino-8-[(3,4-dichlorophenyl)methylamino]purin-9-yl]-5-(hydroxymethyl)oxolane-3,4-diol
DB07046 2-[(2-chloro-4-iodophenyl)amino]-N-{[(2R)-2,3-dihydroxypropyl]oxy}-3,4-difluorobenzamide
DB07047 2',4'-DICHLORO-4-HYDROXY-1,1'-BIPHENYL-3-CARBOXYLIC ACID
DB07048 N-[(2R)-5-(aminosulfonyl)-2,3-dihydro-1H-inden-2-yl]-2-propylpentanamide
DB07049 (2R)-1-[(4-tert-butylphenyl)sulfonyl]-2-methyl-4-(4-nitrophenyl)piperazine
DB07050 5-[(phenylsulfonyl)amino]-1,3,4-thiadiazole-2-sulfonamide
DB07051 3,5-DIMETHYL-1-PHENYL-1H-PYRAZOLE-4-CARBOXYLIC ACID ETHYL ESTER
DB07052 5'-S-ethyl-5'-thioadenosine
DB07053 

DB07146 2,3-DIPHENYL-N-(2-PIPERAZIN-1-YLETHYL)FURO[2,3-B]PYRIDIN-4-AMINE
DB07147 methyl (1R,2S)-2-(hydroxycarbamoyl)-1-{4-[(2-methylquinolin-4-yl)methoxy]benzyl}cyclopropanecarboxylate
DB07148 (6S)-1-chloro-3-[(4-fluorobenzyl)oxy]-6-(pyrrolidin-1-ylcarbonyl)pyrrolo[1,2-a]pyrazin-4(6H)-one
DB07149 (7S)-2-(2-aminopyrimidin-4-yl)-7-(2-fluoroethyl)-1,5,6,7-tetrahydro-4H-pyrrolo[3,2-c]pyridin-4-one
DB07150 4-(4-HYDROXYPHENYL)-1-NAPHTHALDEHYDE OXIME
DB07151 4-(4-hydroxy-3-methylphenyl)-6-phenylpyrimidin-2(5H)-one
DB07152 N-[4-(5-fluoro-6-methylpyridin-2-yl)-5-quinoxalin-6-yl-1H-imidazol-2-yl]acetamide
DB07153 6-methyl-5-[3-methyl-3-(3,4,5-trimethoxyphenyl)but-1-yn-1-yl]pyrimidine-2,4-diamine
DB07154 (3R)-4-[(3R)-3-AMINO-4-(2,4,5-TRIFLUOROPHENYL)BUTANOYL]-3-METHYL-1,4-DIAZEPAN-2-ONE
DB07155 (3S)-1-CYCLOHEXYL-5-OXO-N-PHENYLPYRROLIDINE-3-CARBOXAMIDE
DB07156 (4Z)-6-bromo-4-({[4-(pyrrolidin-1-ylmethyl)phenyl]amino}methylidene)isoquinoline-1,3(2H,4H)-dione
DB07157 (5R,6S,8S)-8-[3-(AMINOMETHYL)PHEN

DB07253 N'-(5-chloro-1,3-benzodioxol-4-yl)-N-(3-methylsulfonylphenyl)pyrimidine-2,4-diamine
DB07254 N-[3-[[4-[(5-CHLORO-1,3-BENZODIOXOL-4-YL)AMINO]PYRIMIDIN-2-YL]AMINO]PHENYL]METHANESULFONAMIDE
DB07255 N'-(5-CHLORO-1,3-BENZODIOXOL-4-YL)-N-(3-MORPHOLIN-4-YLPHENYL)PYRIMIDINE-2,4-DIAMINE
DB07256 3-({4-[(5-CHLORO-1,3-BENZODIOXOL-4-YL)AMINO]PYRIMIDIN-2-YL}AMINO)BENZAMIDE
DB07257 4-(2-chlorophenyl)-8-(2-hydroxyethyl)-6-methylpyrrolo[3,4-e]indole-1,3(2H,6H)-dione
DB07258 (R)-pyridin-4-yl[4-(2-pyrrolidin-1-ylethoxy)phenyl]methanol
DB07259 1-(4-thiophen-2-ylphenyl)methanamine
DB07260 N-benzyl-4-[(2R)-pyrrolidin-2-ylmethoxy]aniline
DB07261 THIENO[3,2-B]PYRIDINE-2-SULFONIC ACID [1-(1-AMINO-ISOQUINOLIN-7-YLMETHYL)-2-OXO-PYRROLDIN-3-YL]-AMIDE
DB07262 1-{[N-(1-Imino-guanidino-methyl)]sulfanylmethyl}-3-trifluoromethyl-benzene
DB07263 [{2-bromo-4-[(2R)-3-oxo-2,3-diphenylpropyl]phenyl}(difluoro)methyl]phosphonic acid
DB07264 (S)-N-(1-(3-CHLORO-4-FLUOROPHENYL)-2-HYDROXYETHYL)-4-(4-(3-CHLOROPHENYL)-1H-PY

DB07364 6-PHENYL[5H]PYRROLO[2,3-B]PYRAZINE
DB07365 1-Naphthyl-L-alanine
DB07366 2-[N'-(4-AMINO-BUTYL)-HYDRAZINOCARBONYL]-PYRROLIDINE-1-CARBOXYLIC ACID BENZYL ESTER
DB07367 (3Z,5S,6R,7S,8S,8aR)-3-(octylimino)hexahydro[1,3]oxazolo[3,4-a]pyridine-5,6,7,8-tetrol
DB07368 4-(METHYLSULFONYL)BENZENECARBOXIMIDAMIDE
DB07369 N-(3-chlorophenyl)-N-methyl-2-oxo-3-[(3,4,5-trimethyl-1H-pyrrol-2-yl)methyl]-2H-indole-5-sulfonamide
DB07370 (3Z,5S,6R,7S,8R,8aS)-3-(octylimino)hexahydro[1,3]thiazolo[3,4-a]pyridine-5,6,7,8-tetrol
DB07371 3-(10-methyl-9-anthryl)propanoic acid
DB07373 Boldione
DB07374 Anisomycin
DB07375 Etiocholanedione
DB07376 5-(DIMETHYLAMINO)-1-NAPHTHALENESULFONIC ACID(DANSYL ACID)
DB07377 N'-((2S,3R)-3-AMINO-2-HYDROXY-5-(ISOPROPYLSULFANYL)PENTANOYL)-N-3-CHLOROBENZOYL HYDRAZIDE
DB07378 4-AMINO-2-OCTYLOXY-6-HYDROXYMETHYL-TETRAHYDRO-PYRAN-3,5-DIOL
DB07379 (2S)-2-({6-[(3-Amino-5-chlorophenyl)amino]-9-isopropyl-9H-purin-2-yl}amino)-3-methyl-1-butanol
DB07380 1,1,1-TRIFLUORO-3-ACETAMIDO-4-PHENYL

DB07490 2-[1-(4-CHLORO-PHENYL)-ETHYL]-4,6-DINITRO-PHENOL
DB07491 5-amino-2,4,6-tribromobenzene-1,3-dicarboxylic acid
DB07492 Bromamphenicol
DB07493 5-Bromoindirubin
DB07494 (3-EXO)-3-(10,11-DIHYDRO-5H-DIBENZO[A,D][7]ANNULEN-5-YLOXY)-8,8-DIMETHYL-8-AZONIABICYCLO[3.2.1]OCTANE
DB07495 5-(5-CHLORO-2,4-DIHYDROXYPHENYL)-N-ETHYL-4-(4-METHOXYPHENYL)-1H-PYRAZOLE-3-CARBOXAMIDE
DB07496 1,3-diphenylurea
DB07497 5-(hexahydro-2-oxo-1H-thieno[3,4-D]imidazol-6-yl)pentanal
DB07498 4-[3-(3-NITROPHENYL)-1,2,4-OXADIAZOL-5-YL]BUTANOIC ACID
DB07499 N-(4-{[amino(imino)methyl]amino}butyl)-2,4'-bi-1,3-thiazole-4-carboxamide
DB07500 (2E)-1-[2-hydroxy-4-methoxy-5-(3-methylbut-2-en-1-yl)phenyl]-3-(4-hydroxyphenyl)prop-2-en-1-one
DB07501 (2S)-1-{4-[(4-Anilino-5-bromo-2-pyrimidinyl)amino]phenoxy}-3-(dimethylamino)-2-propanol
DB07502 4-bromo-6-(6-hydroxy-1,2-benzisoxazol-3-yl)benzene-1,3-diol
DB07503 (5E)-5-[(2,2-DIFLUORO-1,3-BENZODIOXOL-5-YL)METHYLENE]-1,3-THIAZOLIDINE-2,4-DIONE
DB07504 (2R)-1-{4-[(4-Anilino-5-brom

DB07596 (17beta)-17-(cyanomethyl)-2-methoxyestra-1(10),2,4-trien-3-yl sulfamate
DB07597 CIS-(1R,2S)-2-AMINO-1,2,3,4-TETRAHYDRONAPHTHALEN-1-OL
DB07598 2,3,6A,7,8,9-HEXAHYDRO-11H-[1,4]DIOXINO[2,3-G]PYRROLO[2,1-B][1,3]BENZOXAZIN-11-ONE
DB07599 [(2-AMINO-ALPHA-METHOXYIMINO-4-THIAZOLYLACETYL)AMINO]METHYLBORONIC ACID
DB07601 4-chloro-6-{5-[(2-morpholin-4-ylethyl)amino]-1,2-benzisoxazol-3-yl}benzene-1,3-diol
DB07602 S-{3-[(4-ANILINOQUINAZOLIN-6-YL)AMINO]-3-OXOPROPYL}-L-CYSTEINE
DB07603 N-hexanoyl-L-homocysteine
DB07604 (6AR,11AS,11BR)-10-ACETYL-9-HYDROXY-7,7-DIMETHYL-2,6,6A,7,11A,11B-HEXAHYDRO-11H-PYRROLO[1',2':2,3]ISOINDOLO[4,5,6-CD]INDOL-11-ONE
DB07605 2-({4-[(5-CHLORO-1H-INDOL-2-YL)SULFONYL]PIPERAZIN-1-YL}CARBONYL)THIENO[3,2-B]PYRIDINE 4-OXIDE
DB07606 6-(3,4-DIHYDROXYBENZYL)-3-ETHYL-1-(2,4,6-TRICHLOROPHENYL)-1H-PYRAZOLO[3,4-D]PYRIMIDIN-4(5H)-ONE
DB07607 4-[5-(3-IODO-PHENYL)-2-(4-METHANESULFINYL-PHENYL)-1H-IMIDAZOL-4-YL]-PYRIDINE
DB07608 N-(5-{[(2S)-4-amino-2-(3-chlorophenyl)butanoyl]amino}

DB07718 4-Hydroxyphenylpyruvic acid
DB07719 [(3R)-3-(Methylcarbamoyl)-2-{[(2-methyl-2-propanyl)oxy]carbonyl}-1,2,3,4-tetrahydro-7-isoquinolinyl]sulfamic acid
DB07720 Epibatidine
DB07721 DIETHYL 4-METHOXYPHENYL PHOSPHATE
DB07722 3-(4-NITRO-PHENOXY)-PROPAN-1-OL
DB07723 3-(5-methoxy-1H-indol-3-yl)propanoic acid
DB07724 Indeglitazar
DB07726 t-Butylhydroquinone
DB07728 2-[2-(2-FLUOROPHENYL)PYRIDIN-4-YL]-1,5,6,7-TETRAHYDRO-4H-PYRROLO[3,2-C]PYRIDIN-4-ONE
DB07729 3-fluoro-N-[3-(1H-tetrazol-5-yl)phenyl]benzamide
DB07730 5-(3-HYDROXYPHENYL)ISOTHIAZOL-3(2H)-ONE 1,1-DIOXIDE
DB07731 CAN-508
DB07732 2-[(2-NAPHTHYLSULFONYL)AMINO]ETHYL DIHYDROGEN PHOSPHATE
DB07733 1-METHYL-3-TRIFLUOROMETHYL-1H-THIENO[2,3-C]PYRAZOLE-5-CARBOXYLIC ACID (2-MERCAPTO-ETHYL)-AMIDE
DB07734 N-(1-benzylpiperidin-4-yl)-4-sulfanylbutanamide
DB07735 N-[1-(2,6-dimethoxybenzyl)piperidin-4-yl]-4-sulfanylbutanamide
DB07736 (2S)-4-(4-fluorobenzyl)-N-(2-sulfanylethyl)piperazine-2-carboxamide
DB07737 (2S)-4-(4-fluorobenzyl)-N-(3-sulfanyl

DB07835 N~3~-cyclopropyl-N~4~'-(cyclopropylmethyl)-6-methylbiphenyl-3,4'-dicarboxamide
DB07836 1-DECYL-3-TRIFLUORO ETHYL-SN-GLYCERO-2-PHOSPHOMETHANOL
DB07837 [4-(5-naphthalen-2-yl-1H-pyrrolo[2,3-b]pyridin-3-yl)phenyl]acetic acid
DB07838 (Z)-3-BENZYL-5-(2-HYDROXY-3-NITROBENZYLIDENE)-2-THIOXOTHIAZOLIDIN-4-ONE
DB07839 N~2~-1,3-BENZOXAZOL-2-YL-3-CYCLOHEXYL-N-{2-[(4-METHOXYPHENYL)AMINO]ETHYL}-L-ALANINAMIDE
DB07840 (E)-[4-(3,5-Difluorophenyl)-3H-pyrrolo[2,3-b]pyridin-3-ylidene](3-methoxyphenyl)methanol
DB07841 Geranylgeranyl diphosphate
DB07842 (2S)-2-(4-ethylphenoxy)-3-phenylpropanoic acid
DB07843 5-CHLORO-N-{(3S)-1-[(1S)-1-METHYL-2-MORPHOLIN-4-YL-2-OXOETHYL]-2-OXOPYRROLIDIN-3-YL}-1-BENZOTHIOPHENE-2-SULFONAMIDE
DB07844 6-CHLORO-N-{(3S)-1-[(1S)-1-METHYL-2-MORPHOLIN-4-YL-2-OXOETHYL]-2-OXOPYRROLIDIN-3-YL}-1-BENZOTHIOPHENE-2-SULFONAMIDE
DB07845 2-fluoro-6-{[2-({2-methoxy-4-[(methylsulfonyl)methyl]phenyl}amino)-7H-pyrrolo[2,3-d]pyrimidin-4-yl]amino}benzamide
DB07846 (3,4,8b-Trimethyl-3-oxido-2,3

DB07959 3-(1H-BENZIMIDAZOL-2-YL)-1H-INDAZOLE
DB07960 5-ACETAMIDO-5,6-DIHYDRO-4-HYDROXY-6-ISOBUTOXY-4H-PYRAN-2-CARBOXYLIC ACID
DB07961 1-(4-Cyano-phenyl)-3-[2-(2,6-dichloro-phenyl)-1-imino-ethyl]-thiourea
DB07962 METHYL N-[(2',4'-DIFLUORO-4-HYDROXY-5-IODOBIPHENYL-3-YL)CARBONYL]-BETA-ALANINATE
DB07963 N-[(2',4'-DIFLUORO-4-HYDROXY-5-IODOBIPHENYL-3-YL)CARBONYL]-BETA-ALANINE
DB07964 (3S)-4-{[4-(BUT-2-YNYLOXY)PHENYL]SULFONYL}-N-HYDROXY-2,2-DIMETHYLTHIOMORPHOLINE-3-CARBOXAMIDE
DB07965 6-(cyclohexylamino)-9-[2-(4-methylpiperazin-1-yl)-ethyl]-9H-purine-2-carbonitrile
DB07966 [4-({4-[(5-cyclopropyl-1H-pyrazol-3-yl)amino]quinazolin-2-yl}amino)phenyl]acetonitrile
DB07967 9-CYCLOPENTYL-6-[2-(3-IMIDAZOL-1-YL-PROPOXY)-PHENYLAMINO]-9H-PURINE-2-CARBONITRILE
DB07968 N-(2-CHLORO-4-FLUOROBENZOYL)-N'-(5-HYDROXY-2-METHOXYPHENYL)UREA
DB07969 3-[3-(4-methylpiperazin-1-yl)-7-(trifluoromethyl)quinoxalin-5-yl]phenol
DB07970 5-[(2-methyl-5-{[3-(trifluoromethyl)phenyl]carbamoyl}phenyl)amino]pyridine-3-carboxamide


DB08070 2-[4-(3-METHYL-1H-PYRAZOL-4-YL)PHENYL]ETHANAMINE
DB08071 (2S)-1-methyl-2-[(2S,4R)-2-methyl-4-phenylpentyl]piperidine
DB08072 4-(2-AMINOETHOXY)-3,5-DICHLORO-N-[3-(1-METHYLETHOXY)PHENYL]BENZAMIDE
DB08073 (2S)-1-(1H-INDOL-3-YL)-3-{[5-(3-METHYL-1H-INDAZOL-5-YL)PYRIDIN-3-YL]OXY}PROPAN-2-AMINE
DB08074 3-(3-Methyl-2-buten-1-yl)-3H-purin-6-amine
DB08075 4-(2-amino-1,3-thiazol-4-yl)pyrimidin-2-amine
DB08076 4-[1-(2,6-dichlorobenzyl)-2-methyl-1H-imidazol-4-yl]pyrimidin-2-amine
DB08077 2-[4-({[(3,5-DICHLOROPHENYL)AMINO]CARBONYL}AMINO)PHENOXY]-2-METHYLPROPANOIC ACID
DB08078 {4-[3-(4-acetyl-3-hydroxy-2-propylphenoxy)propoxy]phenoxy}acetic acid
DB08079 AMG-208
DB08080 Latrunculin B
DB08081 3-OXO-OCTANOIC ACID (2-OXO-TETRAHYDRO-FURAN-3-YL)-AMIDE
DB08082 N-(2-AMINOETHYL)-P-CHLOROBENZAMIDE
DB08083 2-(1,3-thiazol-4-yl)-1H-benzimidazole-5-sulfonamide
DB08084 IDD594
DB08085 1-(4-HEXYLPHENYL)PROP-2-EN-1-ONE
DB08086 N-[12-(1H-imidazol-1-yl)dodecanoyl]-L-leucine
DB08087 4-[(7R,7AS)-7-HYDROXY-1,3-DIOX

DB08187 N-Methylphenylalanyl-N-[(trans-4-aminocyclohexyl)methyl]-L-prolinamide
DB08188 Emivirine
DB08190 N-[2-(2-iodo-5-methoxy-1H-indol-3-yl)ethyl]acetamide
DB08191 4-(5-phenyl-1H-pyrrolo[2,3-b]pyridin-3-yl)benzoic acid
DB08192 2-(4-CARCOXY-5-ISOPROPYLTHIAZOLYL)BENZOPIPERIDINE
DB08193 2-(3-NITROPHENYL)ACETIC ACID
DB08194 4-methyl-7,8-dihydro-5H-thiopyrano[4,3-d]pyrimidin-2-amine
DB08195 (1R)-2-[(CYANOMETHYL)AMINO]-1-({[2-(DIFLUOROMETHOXY)BENZYL]SULFONYL}METHYL)-2-OXOETHYL MORPHOLINE-4-CARBOXYLATE
DB08196 2-((3',5'-DIMETHOXY-4'-HYDROXYPHENYL)AZO)BENZOIC ACID
DB08197 (5E,7S)-2-amino-7-(4-fluoro-2-pyridin-3-ylphenyl)-4-methyl-7,8-dihydroquinazolin-5(6H)-one oxime
DB08198 [(4R)-4-(3-HYDROXYPHENYL)-1,6-DIMETHYL-2-THIOXO-1,2,3,4-TETRAHYDROPYRIMIDIN-5-YL](PHENYL)METHANONE
DB08199 N-[(BENZYLOXY)CARBONYL]-L-CYSTEINYLGLYCINE
DB08200 (1R)-menthyl hexyl phosphonate group
DB08201 (1S)-menthyl hexyl phosphonate group
DB08202 4-({[(4-METHYLPIPERAZIN-1-YL)AMINO]CARBONOTHIOYL}AMINO)BENZENESULFONAMIDE


DB08306 3-[(3-Nitrophenyl)sulfamoyl]-2-thiophenecarboxylic acid
DB08307 2-{HYDROXY[2-NITRO-4-(TRIFLUOROMETHYL)PHENYL]METHYLENE}CYCLOHEXANE-1,3-DIONE
DB08308 SUCCINIC ACID MONO-(13-METHYL-3-OXO-2,3,6,7,8,9,10,11,12,13,14,15,16,17-TETRADECAHYDRO-1H-CYCLOPENTA[A]PHENANTHREN-17-YL) ESTER
DB08309 3-({2-[(4-{[6-(CYCLOHEXYLMETHOXY)-9H-PURIN-2-YL]AMINO}PHENYL)SULFONYL]ETHYL}AMINO)PROPAN-1-OL
DB08310 N-[(2R)-2-{[(2S)-2-(1,3-benzoxazol-2-yl)pyrrolidin-1-yl]carbonyl}hexyl]-N-hydroxyformamide
DB08312 6-CYCLOHEXYLMETHYLOXY-5-NITROSO-PYRIMIDINE-2,4-DIAMINE
DB08313 Nocodazole
DB08314 (2-AMINO-1,3-OXAZOL-5-YL)-(3-BROMOPHENYL)METHANONE
DB08315 2-AMINO-N,N-BIS(PHENYLMETHYL)-1,3-OXAZOLE-5-CARBOXAMIDE
DB08316 4-amino-7,7-dimethyl-7,8-dihydroquinazolin-5(6H)-one
DB08317 5-methyl-6-phenylquinazoline-2,4-diamine
DB08318 6-(2-phenoxyethoxy)-1,3,5-triazine-2,4-diamine
DB08319 2'-HYDROXY-1,1'-BIPHENYL-2-SULFINIC ACID
DB08320 DIETHYL (1R,2S,3R,4S)-5,6-BIS(4-HYDROXYPHENYL)-7-OXABICYCLO[2.2.1]HEPT-5-ENE-2,3-DICARB

DB08424 [5-AMINO-1-(4-FLUOROPHENYL)-1H-PYRAZOL-4-YL](3-{[(2R)-2,3-DIHYDROXYPROPYL]OXY}PHENYL)METHANONE
DB08426 THIENO[3,2-B]PYRIDINE-2-SULFONIC ACID [2-OXO-1-(1H-PYRROLO[2,3-C]PYRIDIN-2-YLMETHYL)-PYRROLIDIN-3-YL]-AMIDE
DB08427 Prephenic acid
DB08428 3(S)-AMINO-4-PHENYL-BUTAN-2(S)-OL
DB08429 N-({(2S)-1-[(3R)-3-amino-4-(3-chlorophenyl)butanoyl]pyrrolidin-2-yl}methyl)-3-(methylsulfonyl)benzamide
DB08430 PARA-NITROPHENYL 1-THIO-BETA-D-GLUCOPYRANOSIDE
DB08431 [(3R,4S)-4-HYDROXY-3-METHYL-2-OXOHEXYL]PHOSPHONIC ACID
DB08432 THYMIDINE-5'-THIOPHOSPHATE
DB08433 phenyl ethenesulfonate
DB08434 2-METHYLCARBAMOYL-3-(4-PHOSPHONOOXY-PHENYL)-CYCLOPROPANECARBOXYLIC ACID
DB08435 (5E,14E)-11-oxoprosta-5,9,12,14-tetraen-1-oic acid
DB08436 8-BENZO[1,3]DIOXOL-,5-YLMETHYL-9-BUTYL-9H-
DB08437 Puromycin
DB08438 (2E,4R,5S)-2,3,4,5-TETRAHYDROXY-6-(PALMITOYLOXY)HEX-2-ENOIC ACID
DB08439 Parecoxib
DB08440 N-1,10-phenanthrolin-5-ylacetamide
DB08441 6-BROMO-13-THIA-2,4,8,12,19-PENTAAZATRICYCLO[12.3.1.1~3,7~]NONADECA-1(

DB08541 [(3S)-9-hydroxy-1-methyl-10-oxo-4,10-dihydro-3H-benzo[g]isochromen-3-yl]acetic acid
DB08542 3,4-dihydroxy-9,10-secoandrosta-1(10),2,4-triene-9,17-dione
DB08543 1-[2-HYDROXY-3-(4-CYCLOHEXYL-PHENOXY)-PROPYL]-4-(2-PYRIDYL)-PIPERAZINE
DB08544 (S)-Fluoxetine
DB08545 (1S)-1-Phenylethyl (4-acetamidobenzyl)phosphonate
DB08546 4-[(3AS,4R,7R,8AS,8BR)-2-(1,3-BENZODIOXOL-5-YLMETHYL)-7-HYDROXY-1,3-DIOXODECAHYDROPYRROLO[3,4-A]PYRROLIZIN-4-YL]BENZENECARBOXIMIDAMIDE
DB08547 PROGESTERONE-11-ALPHA-OL-HEMISUCCINATE
DB08548 [(4S)-2,2-dimethyl-1,3-dioxolan-4-yl]methyl hydrogen hex-5-enylphosphonate
DB08549 (3R)-METHYLCARBAMOYL-7-SULFOAMINO-3,4-DIHYDRO-1H-ISOQUINOLINE-2-CARBOXYLIC ACID BENZYL ESTER
DB08550 7,8-Dichloro-1,2,3,4-tetrahydroisoquinoline
DB08551 3-{(R)-(Dihydroxyboryl)[(2-thienylacetyl)amino]methyl}benzoic acid
DB08552 (1R)-1-(2-thienylacetylamino)-1-phenylmethylboronic acid
DB08553 (1E)-5-(1-piperidin-4-yl-3-pyridin-4-yl-1H-pyrazol-4-yl)-2,3-dihydro-1H-inden-1-one oxime
DB08554 N-(3-car

DB08669 METHYL N-[(2S,3R)-3-AMINO-2-HYDROXY-3-(4-METHYLPHENYL)PROPANOYL]-D-ALANYL-D-LEUCINATE
DB08670 METHYL N-[(2S,3R)-3-AMINO-2-HYDROXY-3-(4-ISOPROPYLPHENYL)PROPANOYL]-D-ALANYL-D-LEUCINATE
DB08671 5-Imino-4-(2-trifluoromethyl-phenylazo)-5H-pyrazol-3-ylamine
DB08672 4-[(3R)-3-{[2-(4-FLUOROPHENYL)-2-OXOETHYL]AMINO}BUTYL]BENZAMIDE
DB08673 4-[(5-ISOPROPYL-1,3-THIAZOL-2-YL)AMINO]BENZENESULFONAMIDE
DB08674 (20S)-19,20,21,22-TETRAHYDRO-19-OXO-5H-18,20-ETHANO-12,14-ETHENO-6,10-METHENO-18H-BENZ[D]IMIDAZO[4,3-K][1,6,9,12]OXATRIAZA-CYCLOOCTADECOSINE-9-CARBONITRILE
DB08675 (5Z)-7-{(1R,4S,5R,6R)-6-[(1E)-1-Octen-1-yl]-2,3-diazabicyclo[2.2.1]hept-2-en-5-yl}-5-heptenoic acid
DB08676 (20S)-19,20,22,23-TETRAHYDRO-19-OXO-5H,21H-18,20-ETHANO-12,14-ETHENO-6,10-METHENOBENZ[D]IMIDAZO[4,3-L][1,6,9,13]OXATRIAZACYCLONOADECOSINE-9-CARBONITRILE
DB08677 N-(5-Isopropyl-thiazol-2-YL)-2-pyridin-3-YL-acetamide
DB08678 (4-ETHYLPHENYL)SULFAMIC ACID
DB08680 N-{3-[(E)-(tert-butoxyimino)methyl]-4-chlorophenyl}-2-methylfu

DB08782 4-(2-AMINOETHYL)BENZENESULFONAMIDE
DB08783 (4-{(2S)-2-[(tert-butoxycarbonyl)amino]-3-methoxy-3-oxopropyl}phenyl)methaneseleninic acid
DB08784 2-(4-CHLORO-PHENYLAMINO)-NICOTINIC ACID
DB08785 4-Methylcoumarin
DB08786 4-(2-methoxyethoxy)-6-methylpyrimidin-2-amine
DB08787 4-(2,4-dichlorophenyl)-5-phenyldiazenyl-pyrimidin-2-amine
DB08788 3,6-DIAMINO-5-CYANO-4-(4-ETHOXYPHENYL)THIENO[2,3-B]PYRIDINE-2-CARBOXAMIDE
DB08789 2-AMINO-4-(2,4-DICHLOROPHENYL)-N-ETHYLTHIENO[2,3-D]PYRIMIDINE-6-CARBOXAMIDE
DB08790 1-PHENYL-1H-PYRAZOLE-4-CARBOXYLIC ACID
DB08791 1-[(2-NITROPHENYL)SULFONYL]-1H-PYRROLO[3,2-B]PYRIDINE-6-CARBOXAMIDE
DB08792 Diloxanide
DB08794 Ethyl biscoumacetate
DB08795 Azidocillin
DB08796 Pipazethate
DB08797 Salicylamide
DB08798 Sulfamoxole
DB08799 Antazoline
DB08800 Chloropyramine
DB08801 Dimetindene
DB08802 Isothipendyl
DB08803 Tymazoline
DB08804 Nandrolone decanoate
DB08805 Metiamide
DB08806 Roxatidine acetate
DB08807 Bopindolol
DB08808 Bupranolol
DB08809 Dichloroacetic acid
DB088

ConnectionError: HTTPSConnectionPool(host='www.ebi.ac.uk', port=443): Max retries exceeded with url: /unichem/api/v1/connectivity (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x106b6ba90>: Failed to establish a new connection: [Errno 8] nodename nor servname provided, or not known'))

In [12]:
for drug in drugbank[8180:]:
    if drug['type'] != 'small molecule':
        continue
    if not drug['inchikey']:
        continue
    drugbank_id = drug['drugbank_id']
    drugbank_name = drug['name']
    print(drugbank_id, drugbank_name)
    query_matches = unichem_search('sourceID','drugbank', drugbank_id)
    if not query_matches:
        if drug['inchi'].startswith('InChI=1S'):
            query_matches = unichem_search('inchikey','drugbank', drug['inchikey'])
        else: 
            print('non-standard InChI: cannot query compound')
            continue
    #if query_matches is not None:
    #    continue
    query_matches = list(query_matches)
    #query_matches = list(cpd_search('drugbank', drugbank_id, C = 4))
    #    if drug['inchi'].startswith('InChI=1S'):
    #        query_matches = list(key_search(drug['inchikey'], C = 4))
    #    else: # non-standard InChI
    #        print('non-standard InChI: cannot query compound')
    #        continue
    
    for match in query_matches:
        if match['id']<=31:
            match['drugbank_id'] = drugbank_id
            match['drugbank_name'] = drugbank_name
            match['source_name'] = id_to_source[int(match['id'])]
            mapping_writer.writerow(match)
    
    source_to_matches = dict()
    for match in query_matches:
        if match['id']<=31:
            match['source_name'] = id_to_source[int(match['id'])]
            match_set = source_to_matches.setdefault(match['source_name'], set())
            match_set.add(match['compoundId'])
    count = {k: len(v) for k, v in source_to_matches.items()}
    count = collections.defaultdict(int, count)
    count['drugbank_id'] = drugbank_id
    count['drugbank_name'] = drugbank_name
    count_writer.writerow(count)

mapping_file.close()
count_file.close()

DB09164 Technetium Tc-99m disofenin
DB09165 Technetium Tc-99m pyrophosphate
DB09166 Etizolam
DB09167 Dosulepin
DB09168 4-Phenylfentanyl
DB09169 3-Allylfentanyl
DB09170 β-Hydroxythiofentanyl
DB09171 β-Methylfentanyl
DB09172 Brifentanil
DB09173 Butyrfentanyl
DB09174 Lofentanil
DB09175 Mirfentanil
DB09177 p-Fluorofentanyl
DB09178 Phenaridine
DB09179 R-30490
DB09180 Thienylfentanyl
DB09181 Trefentanil
DB09182 Benzylfentanyl
DB09183 Dasabuvir
DB09184 Edivoxetine
DB09185 Viloxazine
DB09186 Nisoxetine
DB09187 Lortalamine
DB09188 Amedalin
DB09189 Daledalin
DB09190 Talopram
DB09191 Talsupram
DB09192 Tandamine
DB09193 CP-39,332
DB09194 Etoperidone
DB09195 Lorpiprazole
DB09196 Lubazodone
DB09197 Mepiprazole
DB09198 Lobeglitazone
DB09199 Netoglitazone
DB09200 Rivoglitazone
DB09201 Ciglitazone
DB09202 Cirazoline
DB09203 Synephrine
DB09204 Arotinolol
DB09205 Moxisylyte
DB09206 Trimazosin
DB09207 AS-8112
DB09209 Pholcodine
DB09210 Piracetam
DB09211 Limaprost
DB09212 Loxoprofen
DB09213 Dexibuprofen
DB

DB11283 DL-dimyristoylphosphatidylcholine
DB11284 DL-dimyristoylphosphatidylglycerol
DB11285 Ethyl ferulate
DB11288 Borneol
DB11290 Aluminium tristearate
DB11291 2-ethylhexyl benzoate
DB11296 Prezatide
DB11297 Palmitoyl oligopeptide
DB11299 Vanillyl butyl ether
DB11304 Phenoxyethanol
DB11309 Sulfuric acid
DB11315 Methscopolamine
DB11318 1,2-Docosahexanoyl-sn-glycero-3-phosphoserine
DB11323 Glycol salicylate
DB11324 Nonivamide
DB11326 Boric acid
DB11327 Dipyrithione
DB11328 Tetradecyl hydrogen sulfate (ester)
DB11331 1-Palmitoyl-2-oleoyl-sn-glycero-3-(phospho-rac-(1-glycerol))
DB11332 Sinapultide
DB11335 Ascorbyl glucoside
DB11336 Kinetin
DB11337 Zeatin
DB11340 Ubiquinol
DB11342 Aluminum oxide
DB11343 Silanol
DB11344 (+)-menthol
DB11345 (S)-camphor
DB11346 Rubidium
DB11348 Calcium Phosphate
DB11349 Cetyl ethylhexanoate
DB11359 Guaiacol
DB11362 Selexipag
DB11363 Alectinib
DB11364 Pidotimod
DB11365 Sennosides
DB11366 Roquinimex
DB11367 Cefroxadine
DB11368 Chlorophetanol
DB11369 Afoxolaner

DB11837 Osilodrostat
DB11838 Revamilast
DB11839 PZ-128
DB11841 Entinostat
DB11843 PF-04958242
DB11844 Pritelivir
DB11845 Lucitanib
DB11846 Creatinine
DB11847 Cadazolid
DB11848 Mannose 6-phosphate
DB11851 Bafetinib
DB11852 Tegobuvir
DB11853 Relugolix
DB11855 Revefenacin
DB11858 Leukotriene D4
DB11859 Brexanolone
DB11860 Aramchol
DB11861 Litronesib
DB11863 Alvelestat
DB11864 Preladenant
DB11865 Brivanib alaninate
DB11867 JNJ-39393406
DB11868 Etiracetam
DB11869 Valspodar
DB11870 RG-4733
DB11871 PF-00610355
DB11872 ZD-6126
DB11873 Verinurad
DB11874 Crocin
DB11875 3,3'-diindolylmethane
DB11876 Alanyl glutamine
DB11877 Basmisanil
DB11878 Filibuvir
DB11879 Dusquetide
DB11880 Acridine Carboxamide
DB11881 AUY922
DB11882 BPC-157
DB11883 SB-705498
DB11885 Anlotinib
DB11886 Infigratinib
DB11888 Laninamivir octanoate
DB11889 Lanicemine
DB11890 Cilengitide
DB11891 Fimepinostat
DB11892 Prulifloxacin
DB11893 Avagacestat
DB11894 Efatutazone
DB11896 Gedatolisib
DB11898 2,4-thiazolidinedione
DB11899 MK-8

DB12343 Temocillin
DB12345 MBX-2982
DB12347 CG-400549
DB12348 5-amino-1,3,4-thiadiazole-2-thiol
DB12350 Rostafuroxin
DB12351 Siagoside
DB12352 Bizelesin
DB12353 Ulodesine
DB12354 Imrecoxib
DB12355 Netazepide
DB12357 BMS-863233
DB12358 Enbucrilate
DB12359 BIIB021
DB12360 PF-03654764
DB12361 Piclozotan
DB12362 Diaminopropanol tetraacetic acid
DB12364 Betrixaban
DB12365 Perzinfotel
DB12367 Selurampanel
DB12368 AZD-3839
DB12369 Sotrastaurin
DB12370 Ipamorelin
DB12371 Siponimod
DB12375 Oglemilast
DB12376 Ricolinostat
DB12377 Relebactam
DB12378 Apricoxib
DB12379 Indirubin
DB12380 GDC-0152
DB12381 Merestinib
DB12382 R-306465
DB12383 Cytochlor
DB12384 Silatecan
DB12385 10-hydroxycamptothecin
DB12387 OSI-027
DB12388 Evocalcet
DB12389 Zamicastat
DB12390 Seladelpar
DB12391 Sagopilone
DB12392 Resminostat
DB12393 Fanapanel
DB12394 Eleclazine
DB12395 Esreboxetine
DB12397 DSM-265
DB12398 Naproxen etemesil
DB12399 Polmacoxib
DB12400 Voxtalisib
DB12401 Bromperidol
DB12402 Pumosetrag
DB12403 Samarium
DB

DB12860 Actinium
DB12861 Rimeporide
DB12863 Sivelestat
DB12864 Melarsoprol
DB12865 Etelcalcetide
DB12866 Pradigastat
DB12867 Benperidol
DB12868 Intoplicine
DB12869 Eliprodil
DB12870 Buthionine sulfoximine
DB12873 Dianhydrogalactitol
DB12874 Quizartinib
DB12875 Mavatrep
DB12876 GS-9256
DB12877 Oxatomide
DB12878 AV-101
DB12879 Omigapil
DB12881 Indole-3-carbinol
DB12882 Ombrabulin
DB12883 Eltoprazine
DB12884 Lavoltidine
DB12885 Namodenoson
DB12886 GSK-1521498
DB12887 Tazemetostat
DB12888 Ezutromid
DB12889 Velneperit
DB12890 Dihydrexidine
DB12892 MGB-BP-3
DB12894 Raluridine
DB12895 Olitigaltin
DB12896 PSI-352938
DB12897 MK-7622
DB12899 TT-301
DB12900 Irdabisant
DB12901 Fiacitabine
DB12902 Trofosfamide
DB12903 DEBIO-1347
DB12904 ZSTK-474
DB12905 Samarium Sm-153
DB12906 LY-2334737
DB12907 Mannose
DB12910 Emicerfont
DB12911 Nicoboxil
DB12912 Nolatrexed
DB12914 Resmetirom
DB12916 Mitolactol
DB12919 T-62
DB12920 Pinometostat
DB12921 Chlorsulfaquinoxaline
DB12923 Gallopamil
DB12924 Ozenoxacin
DB

DB13335 Pinazepam
DB13336 Dimethylphthalate
DB13337 Pheneticillin
DB13338 Flurithromycin
DB13339 Etoglucid
DB13340 Suloctidil
DB13341 Fenozolone
DB13342 Cinepazet
DB13343 Tioxolone
DB13344 Mercuric amidochloride
DB13345 Dihydroergocristine
DB13346 Bufexamac
DB13347 Diphenadione
DB13348 Tiadenol
DB13349 Talastine
DB13350 Azanidazole
DB13351 Piperidolate
DB13352 Deanol
DB13353 Viminol
DB13354 Phenprobamate
DB13355 Visnadine
DB13356 Mesulfen
DB13357 Styramate
DB13358 Cibenzoline
DB13359 Magnesium aspartate
DB13360 Tolciclate
DB13361 Trimethyldiphenylpropylamine
DB13362 Pheneturide
DB13363 Oxaceprol
DB13364 Feprazone
DB13366 Hydrochloric acid
DB13367 Cloricromen
DB13368 Motretinide
DB13369 Benzilone
DB13370 Bromisoval
DB13371 Difenpiramide
DB13373 Acriflavine
DB13374 Vincamine
DB13376 Succinimide
DB13377 Vinbarbital
DB13378 Norfenefrine
DB13379 Chiniofon
DB13380 Difemerine
DB13381 Sodium feredetate
DB13382 Chlorproethazine
DB13383 Diiodohydroxypropane
DB13384 Melitracen
DB13386 Epimestrol


DB13763 Aurotioprol
DB13764 Monoxerutin
DB13765 Mercuric chloride
DB13766 Lidoflazine
DB13767 Vorozole
DB13768 Domiodol
DB13769 Emetonium iodide
DB13770 Vinylbital
DB13771 Ferric sodium citrate
DB13772 Rufloxacin
DB13773 Sulfamethoxypyridazine
DB13774 Myristalkonium
DB13775 Tertatolol
DB13776 Ibacitabine
DB13777 Prenalterol
DB13778 Cefazedone
DB13779 Guanoclor
DB13780 Aluminium clofibrate
DB13781 Xamoterol
DB13782 Imipramine oxide
DB13783 Acemetacin
DB13784 Dixyrazine
DB13785 Dropropizine
DB13786 Magnesium orotate
DB13787 Tilidine
DB13788 Chlorbenzoxamine
DB13789 Aluminium acetotartrate
DB13790 Fipexide
DB13791 Penfluridol
DB13792 Clopamide
DB13793 Vinburnine
DB13794 Dimethoxanate
DB13795 Brodimoprim
DB13796 Dibunate
DB13797 Iodocholesterol (131I)
DB13798 Demoxytocin
DB13799 Ethadione
DB13800 Calcium levulinate
DB13801 Muzolimine
DB13802 Epomediol
DB13803 Xipamide
DB13804 Benzylthiouracil
DB13805 Reposal
DB13806 Linopirdine
DB13807 Tisopurine
DB13808 Mebhydrolin
DB13810 Dimemorfan
DB13

DB14232 Deacetylbisacodyl
DB14471 2-Ethylhexyl 4-phenylbenzophenone-2'-carboxylate
DB14474 Sorbitan
DB14475 L-Lactic acid
DB14476 DL-alpha-Tocopherol
DB14477 DL-alpha tocopheryl acetate
DB14479 Acetylcysteine zinc
DB14480 Acetylcysteine amide
DB14481 Calcium phosphate dihydrate
DB14482 Sodium ascorbate
DB14483 Calcium ascorbate
DB14484 Magnesium ascorbate
DB14485 Zinc ascorbate
DB14486 Niacinamide ascorbate
DB14487 Zinc acetate
DB14488 Ferrous gluconate
DB14489 Ferrous succinate
DB14490 Ferrous ascorbate
DB14491 Ferrous fumarate
DB14492 Potassium triiodide
DB14493 Zinc glycinate
DB14494 Zinc carbonate
DB14495 Manganese citrate
DB14496 Sodium molybdate
DB14497 Calcium magnesium potassium carbonate chloride hydroxide
DB14498 Potassium acetate
DB14499 Potassium sulfate
DB14500 Potassium
DB14501 Ferrous glycine sulfate
DB14502 Sodium phosphate, dibasic
DB14505 Sodium borate
DB14506 Lithium hydroxide
DB14507 Lithium citrate
DB14508 Lithium succinate
DB14509 Lithium carbonate
DB14510 Ferrous

DB14986 Linrodostat
DB14987 Difamilast
DB14989 Umbralisib
DB14993 Pyrotinib
DB14995 NP-G2-044
DB14998 Olorinab
DB15003 PF-06700841
DB15006 Flufenoxuron
DB15008 GO-203-2C
DB15009 PF-04937319
DB15010 Edasalonexent
DB15011 Avacopan
DB15012 Farampator
DB15013 TAK-243
DB15016 Palladium Pd-103
DB15021 Leriglitazone
DB15023 BMS-791826
DB15024 Apararenone
DB15026 CXA-10
DB15027 Adriforant
DB15028 MK-1064
DB15029 AZD-8186
DB15031 Daridorexant
DB15033 Flortaucipir
DB15034 PRI-724
DB15035 Zanubrutinib
DB15036 Sitravatinib
DB15038 Litoxetine
DB15039 ITI-214
DB15040 TP-271
DB15041 Florilglutamic acid F-18
DB15046 LY-2881835
DB15047 BMS-919373
DB15048 Licogliflozin
DB15049 Sucralose
DB15050 Neopterin
DB15051 Ethaselen
DB15052 Ansofaxine
DB15054 TAS-117
DB15055 ABT-639
DB15056 Bifenthrin
DB15057 osgemcitabine palabenamide
DB15058 Flutemetamol
DB15059 Aprocitentan
DB15062 Inarigivir
DB15063 Inarigivir soproxil
DB15065 PF-06291874
DB15068 Agerafenib
DB15071 Omidenepag isopropyl
DB15073 Ribose
DB15075 4

DB15664 Vitamin K7
DB15665 SEP-363856
DB15666 Iodopropynyl butylcarbamate
DB15668 Silver oxide
DB15669 Fezolinetant
DB15670 Gusacitinib
DB15671 Besifovir
DB15672 Rilematovir
DB15673 Lenacapavir
DB15674 Sisunatovir
DB15675 Baloxavir
DB15678 Calcium undecylenate
DB15679 Aluminum subacetate
DB15684 2,3-dichloro-5,6-dicyanobenzoquinone
DB15685 Selpercatinib
DB15686 GS-441524
DB15687 Tridecactide
DB15688 Zavegepant
DB15690 Fluoroestradiol F-18
DB15694 Cedazuridine
DB15720 Zinc orotate
DB15759 Metronidazole benzoate
DB15760 Cobaltous sulfate
DB15761 Sulbactam pivoxil
DB15763 Iron polymaltose
DB15764 Benzyl nicotinate
DB15774 beta-Escin
DB15775 Algestone acetophenide
DB15777 Fluocortolone pivalate
DB15778 Hydrogen iodide
DB15779 Mebrofenin
DB15780 Mertiatide
DB15782 Cetyl palmitate
DB15783 Pargeverine
DB15784 Carmoterol
DB15785 Myrtecaine
DB15790 Mecloxamine
DB15791 MK-0668
DB15793 Zinc phenolsulfonate
DB15795 Adiphenine
DB15796 GC-376 free acid
DB15797 GC-373
DB15816 Calcium monoethylfumarat

DB16290 DAPTA
DB16291 Peretinoin
DB16292 Perfluorodecalin
DB16293 Pexiganan
DB16294 Pexmetinib
DB16295 HSK-3486
DB16296 IPI-549
DB16297 Florzolotau F-18
DB16299 Poseltinib
DB16300 Praliciguat
DB16301 Preimplantation factor
DB16302 BP-14979
DB16303 Gallium-68 PSMA
DB16304 TAK-653
DB16305 Xevinapant
DB16306 Racemetyrosine
DB16308 Revaprazan
DB16310 Rezafungin
DB16312 TNP-2092
DB16315 Rivenprost
DB16319 GSK-2981278
DB16320 Rovazolac
DB16321 S-777469
DB16323 Satoreotide tetraxetan
DB16324 Selgantolimod
DB16325 Selodenoson
DB16326 Sepiapterin
DB16330 Simurosertib
DB16331 Siremadlin
DB16332 Sodelglitazar
DB16333 Sonlicromanol
DB16335 Sulopenem etzadroxil
DB16338 Taniborbactam
DB16339 ABX-196
DB16340 Tetrafluoroborate
DB16343 Tropifexor
DB16344 Tulrampator
DB16345 Uracil C-13
DB16346 Veliflapon
DB16347 Velsecorat
DB16349 Vicagrel
DB16351 Volinanserin
DB16353 Razuprotafib
DB16354 Avasopasem manganese
DB16390 Mobocertinib
DB16397 Terevalefim
DB16400 Sibofimloc
DB16403 Isoquercitrin
DB16404 Carv

DB17235 Linperlisib
DB17237 PHI-101
DB17240 Avicin D
DB17247 Bisantrene
DB17255 Etoposide toniribate
DB17257 JS-K
DB17260 Minnelide free acid
DB17261 Flubrobenguane F18
DB17263 Caflanone
DB17264 TRX-E-002-1
DB17269 Cirtuvivint
DB17270 Ceclazepide
DB17273 Granaticin B
DB17277 CBP-501
DB17278 Lutetium Lu177 Edotreotide
DB17283 Chrysoeriol
DB17297 Enitociclib
DB17298 Disufenton
DB17299 p-Toluenesulfonamide
DB17300 Tyroserleutide
DB17304 Doranidazole
DB17306 Metavert
DB17308 Laurocapram
DB17309 Minnelide
DB17312 Dotamtate Pb-212
DB17342 Trimethyltetradecylammonium
DB17352 WT-1 A1
DB17379 (R)-Gossypol
DB17383 FN-1501
DB17384 Tinengotinib
DB17385 Lipotecan
DB17386 Xenon Xe-129
DB17390 Vinblastine N-oxide
DB17408 Borocaptate
DB17410 Vodudeutentan
DB17414 TK216
DB17418 Retinamidic acid
DB17419 S-3APG
DB17456 Dalpiciclib
DB17472 Pirtobrutinib
DB17490 NT-219
DB17493 Bioymifi
DB17499 GB1211
DB17503 PP-F11N lutetium Lu-177
DB17506 Etrumadenant
DB17507 COTI-2
DB17508 Hafnium oxide
DB17510 PXS-5505


DB18542 VB-312
DB18545 Tildacerfont
DB18564 YT-001
DB18573 1,​2-​Dioleoyl-​sn-​glycero-​3-​phospho-​L-​serine
DB18578 Brigimadlin
DB18588 Denatonium
DB18600 DM-1157
DB18603 APPD
DB18632 1-(3-fluoro-4-(7-(5-methyl-1H-imidazol-2-yl)-1-oxo-2,3-dihydro-1H-isoindo-1-4-yl)-phenyl)-3-(3-trifluoromethyl-phenyl)-urea
DB18633 Uttroside B
DB18634 Vasoactive intestinal peptide
DB18657 PARPI F-18
DB18665 Psoralen
DB18689 GDP-L-fucose
DB18706 Emraclidine
DB18707 AVL-3288
DB18708 Alogabat
DB18709 Ropsacitinib
DB18711 taletrectinib
DB18715 Tolebrutinib
UniChem error: sourceID Not found
DB18716 Enmetazobactam
UniChem error: sourceID Not found


In [13]:
# write source-specific mapping files
mapping_path = os.path.join(path, 'mapping.tsv.gz')
mapping_file = gzip.open(mapping_path, 'rb')
mapping_buffer = io.TextIOWrapper(mapping_file)
reader = csv.DictReader(mapping_buffer, delimiter='\t')
source_to_pairs = dict()
for row in reader:
    pair = row['drugbank_id'], row['compoundId']
    pairs = source_to_pairs.setdefault(row['source_name'], set())
    pairs.add(pair)
mapping_file.close()

del source_to_pairs['drugbank']
for source, pairs in source_to_pairs.items():
    bindingdb_path = os.path.join(path, 'mapping', '{}.tsv'.format(source))
    write_file = open(bindingdb_path, 'w')
    writer = csv.writer(write_file, delimiter='\t')
    writer.writerow(['drugbank_id', '{}_id'.format(source)])
    writer.writerows(sorted(pairs))
    write_file.close()

In [15]:
import pandas

drugs =['DB00289','DB00669','DB00862','DB00977','DB01248','DB08801']
mapping_tsv_df = pandas.read_table('../data/DrugBank/mapping.tsv.gz')
mapping_tsv_df[mapping_tsv_df['drugbank_id'].isin(drugs)]

Unnamed: 0,drugbank_id,drugbank_name,id,source_name,compoundId,baseIDURLAvailable,longName,shortName,typeOfSearch,url
19186,DB00289,Atomoxetine,1,chembl,CHEMBL641,True,ChEMBL,chembl,match,https://www.ebi.ac.uk/chembldb/compound/inspec...
19187,DB00289,Atomoxetine,1,chembl,CHEMBL299052,True,ChEMBL,chembl,match,https://www.ebi.ac.uk/chembldb/compound/inspec...
19188,DB00289,Atomoxetine,1,chembl,CHEMBL1442868,True,ChEMBL,chembl,match,https://www.ebi.ac.uk/chembldb/compound/inspec...
19189,DB00289,Atomoxetine,2,drugbank,DB00289,True,DrugBank,drugbank,match,http://www.drugbank.ca/drugs/DB00289
19190,DB00289,Atomoxetine,4,iuphar,7118,True,Guide to Pharmacology,gtopdb,match,http://www.guidetopharmacology.org/GRAC/Ligand...
...,...,...,...,...,...,...,...,...,...,...
286794,DB08801,Dimetindene,29,nikkaji,J8.041A,True,Nikkaji,nikkaji,match,http://jglobal.jst.go.jp/en/redirect?Nikkaji_N...
286795,DB08801,Dimetindene,29,nikkaji,J541.230G,True,Nikkaji,nikkaji,match,http://jglobal.jst.go.jp/en/redirect?Nikkaji_N...
286796,DB08801,Dimetindene,31,bindingdb,50297307,True,BindingDB,bindingdb,match,http://www.bindingdb.org/bind/chemsearch/marvi...
286797,DB08801,Dimetindene,31,bindingdb,81452,True,BindingDB,bindingdb,match,http://www.bindingdb.org/bind/chemsearch/marvi...


In [16]:
# check if they are in mapping tsv 8513+6 = 8519
bindingdb_id_df = pandas.read_table('../data/DrugBank/mapping/bindingdb.tsv')
bindingdb_id_df.shape
bindingdb_id_df[bindingdb_id_df['drugbank_id'].isin(drugs)]

Unnamed: 0,drugbank_id,bindingdb_id
290,DB00289,50022784
291,DB00289,50366567
822,DB00669,50005835
1080,DB00862,14776
1081,DB00862,50088373
1082,DB00862,50111900
1246,DB00977,158504
1247,DB00977,50187243
1640,DB01248,36351
6075,DB08801,50297307


In [17]:
bindingdb_id_df.shape

(9265, 2)

## Parse the BindingDB tsv export

This section peforms the following processing steps on the [BindingDB](http://www.bindingdb.org/bind/index.jsp) export:

+ processes affinities to floats
+ converts to entrez genes
+ simplifies observation into essential fields

See the corresponding [Thinklab discussion](http://doi.org/10.15363/thinklab.d53) for more information.

In [None]:
import os
import csv
import gzip
import pprint
import collections
import operator

import pandas
import requests
import zipfile
from zipfile import ZipFile

In [None]:
# uniprot to entrez gene mapping
url = '../data/UniProt/GeneID.tsv.gz'
uniprot_df = pandas.read_table(url, compression='gzip')

uniprot_to_entrez = dict()
for uniprot, entrez in zip(uniprot_df.uniprot, uniprot_df.GeneID):
    uniprot_to_entrez.setdefault(uniprot, set()).add(str(entrez))

In [None]:
# Read and process BindingDB tsv
path ='../data/BindingDB/BindingDB_All_202405.tsv'
df = pandas.read_table(path, delimiter='\t', on_bad_lines='skip')

In [None]:
#Field documentation: https://www.bindingdb.org/bind/chemsearch/marvin/BindingDB-TSV-Format.pdf

target_fields = [
    'BindingDB Target Chain  Sequence',
    'PDB ID(s) of Target Chain',
    'UniProt (SwissProt) Recommended Name of Target Chain',
    'UniProt (SwissProt) Entry Name of Target Chain',
    'UniProt (SwissProt) Primary ID of Target Chain',
    'UniProt (SwissProt) Secondary ID(s) of Target Chain',
    'UniProt (SwissProt) Alternative ID(s) of Target Chain',
    'UniProt (TrEMBL) Submitted Name of Target Chain',
    'UniProt (TrEMBL) Entry Name of Target Chain',
    'UniProt (TrEMBL) Primary ID of Target Chain',
    'UniProt (TrEMBL) Secondary ID(s) of Target Chain',
    'UniProt (TrEMBL) Alternative ID(s) of Target Chain',
]

chains_key = 'Number of Protein Chains in Target (>1 implies a multichain complex)'


chains_index = df.columns.get_loc(chains_key)
target0_index = chains_index + 1
ligand_fields = df.columns[:chains_index + 1]

verbose=True
max_rows=None

row_out = list()
for j, row in df.iterrows():
    if max_rows is not None and j == max_rows:
            break
    #row = [x if x else None for x in row] #do we need it?
    ligand_values = row[:chains_index + 1]
    # Ensure line has sufficient ligand fields
    if len(row) < chains_index + 1:
        if verbose:
            print('Line', j + 2, 'is deficient')
        continue
    rowdict = collections.OrderedDict(zip(ligand_fields, ligand_values))
    for key in [chains_key]:
        if key not in rowdict:
            print(j+2)
            print(row)
            print(rowdict)
        rowdict[key] = int(rowdict[key])
    chains = list()
    target_chain_cols = [col for col in df.columns if 'BindingDB Target Chain Sequence' in col]
    assert rowdict[chains_key] == (len(row[target_chain_cols])-len([i for i in row[target_chain_cols] if pandas.isnull(i) == True]))
    #assert rowdict[chains_key] == len(row[target0_index:]) / len(target_fields)
    for i in range(rowdict[chains_key]):
        i_0 = target0_index + i * len(target_fields)
        i_1 = target0_index + (i + 1) * len(target_fields)
        target_values = row[i_0:i_1]
        chain = collections.OrderedDict(zip(target_fields, target_values))
        chains.append(chain)
    rowdict['chains'] = chains
    row_out.append(rowdict)
row_out

In [None]:
bindingdb_generator = row_out #read_bindingdb(path, verbose=True)

bindings = list()
for i, row in enumerate(bindingdb_generator):
    #if i > 10000:
    #    break
    if len(row['chains']) != 1:
        continue
    chain, = row['chains']
    uniprots = chain['UniProt (SwissProt) Primary ID of Target Chain']
    if pandas.isnull(uniprots) == True:
        continue #skip to another iteration of for loop
    uniprots = uniprots.split(',')

    template = dict()
    template['bindingdb_id'] = row['BindingDB MonomerID']
    template['reaction_id'] = row['BindingDB Reactant_set_id']
    template['source'] = row['Curation/DataSource']
    template['organism'] = row['Target Source Organism According to Curator or DataSource']
    template['pubmed'] = row['PMID']
    template['doi'] = row['Article DOI']

    affinities = {'Ki': row['Ki (nM)'], 'Kd': row['Kd (nM)'], 'IC50': row['IC50 (nM)']}
    for measure, affinity in affinities.items():
        if affinity is None:
            continue
        for uniprot in uniprots:
            entrez_set = uniprot_to_entrez.get(uniprot)
            if not entrez_set:
                # uniprot_id not found in mapping
                continue
            for entrez in entrez_set:
                binding = template.copy()
                binding['measure'] = measure
                binding['affinity_nM'] = affinity
                binding['uniprot'] = uniprot
                binding['entrez_gene'] = entrez
                bindings.append(binding)

In [None]:
# Convert affinities to floats
lt, gt, eq, err = 0, 0, 0, 0
for binding in bindings:
    affinity = binding['affinity_nM']
    if str(affinity).startswith('<'):
        affinity = affinity.lstrip('<')
        affinity = float(affinity)
        if affinity >= 10.0:
            affinity -= 1.0
        lt += 1
    elif str(affinity).startswith('>'):
        affinity = affinity.lstrip('>')
        affinity = float(affinity)
        affinity += 1.0
        gt += 1
    else:
        try:
            affinity = float(affinity)
            eq += 1
        except ValueError:
            affinity = None
            err += 1
    binding['affinity_nM'] = affinity
print('< {}\n> {}\n= {}\nerrors {}'.format(lt, gt, eq, err))

In [None]:
fields = ['reaction_id', 'bindingdb_id', 'uniprot', 'entrez_gene',
          'measure', 'affinity_nM', 'source', 'organism', 'pubmed', 'doi']
with gzip.open('../data/BindingDB/binding.tsv.gz', 'wt') as write_file:
    writer = csv.DictWriter(write_file, delimiter='\t', fieldnames=fields)
    writer.writeheader()
    bindings.sort(key=operator.itemgetter(*fields))
    writer.writerows(bindings)

## Collapsing bindingDB compound-gene relationships
run collapse.Rmd

## Create a single dataset of Compound-Gene binding relationships from BindingDB and DrugBank

In [18]:
import sys
import itertools

import pandas

In [19]:
def split_and_clean_ids(id_str, sep='|'):
    """Return a set of unique non-empty IDs from a joined string."""
    if not id_str or pandas.isnull(id_str):
        return set()
    id_set = set(id_str.split(sep))
    id_set.discard('')
    return id_set

### Read datasets

In [20]:
# Read DrugCentral
url = '../data/DrugCentral/targets.tsv'
central_df = pandas.read_table(url, dtype={'pubmed_ids': str})
for column in 'pubmed_ids', 'sources', 'actions', 'urls':
    central_df[column] = central_df[column].map(split_and_clean_ids)
central_df = central_df.rename(columns={'GeneID': 'entrez_gene_id'})
central_df.head(2)

Unnamed: 0,entrez_gene_id,drugbank_id,drugbank_name,pubmed_ids,sources,actions,urls
0,18,DB01080,Vigabatrin,{},{DrugCentral (ChEMBL)},{inhibitor},{}
1,19,DB01599,Probucol,{},{DrugCentral (ChEMBL)},{inhibitor},{}


In [21]:
# Read BindingDB
url = '../data/BindingDB/bindings-drugbank-gene.tsv'
binding_df = pandas.read_table(url)
# Filter for micromolar binding affinity
binding_df = binding_df[binding_df.affinity_nM <= 1000]
# Parse compound fields
binding_df.sources = binding_df.sources.map(lambda x: split_and_clean_ids(x, ','))
binding_df.pubmeds = binding_df.pubmeds.map(lambda x: split_and_clean_ids(x, ','))
binding_df = binding_df.rename(columns={'entrez_gene': 'entrez_gene_id', 'pubmeds': 'pubmed_ids'})
binding_df.head(2)

Unnamed: 0,drugbank_id,entrez_gene_id,affinity_nM,n_pairs,sources,pubmed_ids,drugbank_name,drugbank_approved,gene_symbol
11,DB00091,2280,20.0,1,{ChEMBL},{9857082},Cyclosporine,1,FKBP1A
12,DB00091,2281,6.0,1,{ChEMBL},{7473543},Cyclosporine,1,FKBP1B


In [22]:
# Read DrubBank compound-gene interaction
url = '../data/DrugBank/proteins.tsv'
drugbank_protein_df = pandas.read_table(url)
drugbank_protein_df.pubmed_ids = drugbank_protein_df.pubmed_ids.map(
    lambda x: split_and_clean_ids(x, '|'))
drugbank_protein_df.actions = drugbank_protein_df.actions.map(
    lambda x: split_and_clean_ids(x, '|'))
drugbank_protein_df['sources'] = drugbank_protein_df.apply(
    lambda x: set(['DrugBank ({})'.format(x['category'])]), axis=1)
drugbank_protein_df.head(2)

Unnamed: 0,drugbank_id,category,uniprot_id,entrez_gene_id,organism,known_action,actions,pubmed_ids,sources
0,DB00001,target,P00734,2147,Humans,yes,{inhibitor},"{11467439, 10912644, 11055889, 11807012, 10505...",{DrugBank (target)}
1,DB00006,target,P00734,2147,Humans,yes,{inhibitor},"{11929334, 11060732, 11504570, 11923794, 11752...",{DrugBank (target)}


In [23]:
# DrugBank gene-protein interactions by category
drugbank_protein_df.category.value_counts()

target         17192
enzyme          5866
transporter     3430
carrier          903
Name: category, dtype: int64

### Combine BindingDB and DrugBank

In [24]:
# Create a combined dataset of BindingDB and DrugBank by appending all rows
long_df = pandas.concat([
    central_df[['drugbank_id', 'entrez_gene_id', 'sources', 'pubmed_ids', 'actions', 'urls']],
    drugbank_protein_df[['drugbank_id', 'entrez_gene_id', 'sources', 'pubmed_ids', 'actions']],
    binding_df[['drugbank_id', 'entrez_gene_id', 'affinity_nM', 'sources', 'pubmed_ids']],
])

for column in 'pubmed_ids', 'actions', 'urls':
    long_df[column] = long_df[column].map(lambda x: x if pandas.notnull(x) else set())
long_df.head(2)

Unnamed: 0,drugbank_id,entrez_gene_id,sources,pubmed_ids,actions,urls,affinity_nM
0,DB01080,18,{DrugCentral (ChEMBL)},{},{inhibitor},{},
1,DB01599,19,{DrugCentral (ChEMBL)},{},{inhibitor},{},


In [25]:
def get_license(sources):
    """Return the license of a binding relationship based on its sources."""
    sources = frozenset(sources)
    if 'ChEMBL' in sources:
        return 'CC BY-SA 3.0'
    if any(x.startswith('DrugBank') for x in sources):
        return 'CC BY-NC 4.0'
    if 'BindingDB' in sources:
        return 'CC BY 3.0'
    if any(x.startswith('DrugCentral') for x in sources):
        return 'CC BY 4.0'
    return None

def condense(df):
    """Combine gene-compound relationships"""
    row = pandas.Series()
    row['sources'] = set(itertools.chain.from_iterable(df.sources))
    row['pubmed_ids'] = set(itertools.chain.from_iterable(df.pubmed_ids))
    row['actions'] = set(itertools.chain.from_iterable(df.actions))
    row['affinity_nM'] = df.affinity_nM.mean(skipna=True)
    row['license'] = get_license(row['sources'])
    row['urls'] = set(itertools.chain.from_iterable(df.urls))
    return row

condensed_df = long_df.groupby(['drugbank_id', 'entrez_gene_id']).apply(condense).reset_index()

  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.Series()
  row = pandas.S

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [26]:
# Number of condensed bindings
len(condensed_df)

46573

In [27]:
# Convert compound fields into pipe-delimited strings
for column in 'sources', 'pubmed_ids', 'actions', 'urls':
    condensed_df[column] = condensed_df[column].map(lambda x: '|'.join(sorted(x)))

In [28]:
# Save condensed bindings
condensed_df.to_csv('../data/BindingDB/CbG-binding.tsv', sep='\t', index=False)

In [29]:
condensed_df.head(2)

Unnamed: 0,drugbank_id,entrez_gene_id,sources,pubmed_ids,actions,affinity_nM,license,urls
0,DB00001,2147,DrugBank (target),10505536|10912644|11055889|11467439|11807012,inhibitor,,CC BY-NC 4.0,
1,DB00002,712,DrugBank (target),32117299,binder,,CC BY-NC 4.0,


## Compound bindings

In [30]:
import pandas
url = '../data/DrugBank/drugbank-slim.tsv'
compound_df = pandas.read_table(url)
compound_df.head(2)

Unnamed: 0,drugbank_id,name,type,groups,atc_codes,categories,inchikey,inchi,description
0,DB00006,Bivalirudin,small molecule,approved|investigational,B01AE06,"Amino Acids, Peptides, and Proteins|Anticoagul...",OIRCOABEOLEUMC-GEJPAHFPSA-N,InChI=1S/C98H138N24O33/c1-5-52(4)82(96(153)122...,Bivalirudin is a synthetic 20 residue peptide ...
1,DB00007,Leuprolide,small molecule,approved|investigational,L02AE51|L02AE02,Adrenal Cortex Hormones|Agents Causing Muscle ...,GFIJNRVAKGFPGQ-LIJARHBVSA-N,InChI=1S/C59H84N16O12/c1-6-63-57(86)48-14-10-2...,Leuprolide is a synthetic 9-residue peptide an...


In [31]:
url = '../data/EntrezGene/genes-human.tsv'
gene_df = pandas.read_table(url)
gene_df = gene_df[gene_df.type_of_gene == 'protein-coding']
coding_genes = set(gene_df.GeneID)

In [35]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [36]:
binding_df = pandas.read_table('../data/BindingDB/CbG-binding.tsv')
binding_df.shape
binding_df = binding_df.merge(compound_df[['drugbank_id']])
binding_df = binding_df[binding_df.entrez_gene_id.isin(coding_genes)]
binding_df.shape
binding_df.head(2)

(46573, 8)

(25733, 8)

Unnamed: 0,drugbank_id,entrez_gene_id,sources,pubmed_ids,actions,affinity_nM,license,urls
0,DB00006,2147,DrugBank (target)|DrugCentral (IUPHAR),11060732|11504570|11752352|11833835|11923794|1...,inhibitor,,CC BY-NC 4.0,
1,DB00006,4353,DrugBank (enzyme),18701766,inhibitor,,CC BY-NC 4.0,
