In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from openpharmacophore.databases.pubchem import PubChem
import pandas as pd



## Bioassay for Estrogen Receptor
We are going to look into bioassays for the Estrogen Receptor, so we can get information on inactive and active compounds that bind to it.

In [3]:
#'Target': [{'GI': 118764400, 'Name': 'Estrogen receptor 1'}]
pubchem = PubChem()

## Get Assays for a target
We can get the ids and name of the assays for a given target, provided that we have the target id.

In [23]:
bioassays = pubchem.get_target_assays(identifier=118764400, identifier_type="gi")
bioassays.head(8)

Unnamed: 0,id,name
0,629,HTS of Estrogen Receptor- alpha Coactivator Bi...
1,639,HTS of Estrogen Receptor- alpha Coactivator Bi...
2,694,HTS of LOPAC library for Estrogen Receptor-alp...
3,713,Estrogen Receptor-alpha Coactivator Binding In...
4,737,Estrogen Receptor-alpha Coactivator Binding Po...
5,1079,Estrogen Receptor-alpha Coactivator Binding In...
6,1788,Discovery of novel allosteric modulators of th...
7,1793,Identification of Novel Modulators of Cl- depe...


#### Assay Description
Once we have the assay id we can get a descritption of it. The following function
return a dictionary with the assay info. In this case twe are interested in the bioassay with id 713

In [24]:
assay = pubchem.get_assay_description(assay_id=713, summary=True)
assay

{'AssaySummaries': {'AssaySummary': [{'AID': 713,
    'SourceName': 'Emory University Molecular Libraries Screening Center',
    'SourceID': 'Estrogen Receptor-alpha Coactivator Binding Inhibitors Dose Response Confirmation',
    'Name': 'Estrogen Receptor-alpha Coactivator Binding Inhibitors Dose Response Confirmation',
    'Description': ['NIH Molecular Libraries Screening Centers Network [MLSCN]',
     'Emory Chemical Biology Discovery Center in MLSCN',
     'Assay provider: John A. Katzenellenbogen, University of Illinois at Urbana-Champaign',
     'MLSCN Grant: 1 X01MH78953-01',
     '',
     'Title: HTS for Estrogen Receptor-alpha Coactivator Binding inhibitors',
     '',
     'Assay Overview',
     "      Estrogens, which are responsible for the growth of many breast cancers, act through the estrogen receptors, ER-alpha and ER-beta, which are ligand-modulated transcription factors and members of the nuclear receptor gene superfamily. ER-alpha and ER-beta are well validated prote


The following function will return a list containing the ids of all the compunds tested in the assay 
for the estrogen receptors.

In [36]:
cids = pubchem.get_assay_compounds_id(assay_id=713)
cids[0:5]

[2345113, 3243422, 657783, 2087492, 2296726]

##  Training Data from Assay
With the assay id, we can obtain a list of active and a list of inactive molecules for the target, which can be used later on to validate a pharmacophore model through retrospective screening.

In [39]:
# Get training data from assay
actives, inactives = pubchem.get_assay_training_data(713)

Fetching active compound smiles...


  0%|          | 0/221 [00:00<?, ?it/s]

Fetching inactive compound smiles...


  0%|          | 0/218 [00:00<?, ?it/s]

In [41]:
print(actives[0][0:3])
print(actives[1][0:3])

[5770444, 646737, 649863]
['CC1=CC=C(C=C1)C2=NN=C(O2)CSC3=NC4=C(C5=C(N4)C=CC(=C5)OC)N=N3', 'CC1=CC2=C(C(C(=C(O2)N)C#N)C3=CC4=C(C(=C3)OC)OCO4)C(=O)O1', 'CCC1=CC2=C(C=C1)NC(=O)C(=C2)CN(CC3=CC4=C(C=C3)OCO4)C(=O)C5=CC=CO5']


In [None]:
import pickle

# Save data to picke format
molecules = {
    "actives": actives,
    "inactives": inactives
}

pickled_mols = pickle.dumps(molecules)
with open("../../data/ligands/chembl_data", "wb") as f:
    f.write(pickled_mols)

## Get assays ids for a compund
We can also search for the assays in which a compound was tested

In [12]:
cid = 1684 #pleconaril
assay_summary = pubchem.get_compound_assay_summary(cid)
assay_summary.head()

Unnamed: 0,AID,Panel Member ID,SID,CID,Bioactivity Outcome,Target GI,Target GeneID,Activity Value [uM],Activity Name,Assay Name,Bioassay Type,PubMed ID,RNAi
0,14911,,103200390,1684,Unspecified,,,,,Tested for pharmacokinetic parameter in fasted...,Other,7731021.0,
1,15655,,103200390,1684,Unspecified,,,,,Tested for pharmacokinetic parameter in fasted...,Other,7731021.0,
2,19547,,103200390,1684,Unspecified,,,,,Tested for pharmacokinetic parameter in fasted...,Other,7731021.0,
3,23851,,103200390,1684,Unspecified,,,,,t1/2 in monkey liver microsomes,Other,7731021.0,
4,84561,,103200390,1684,Active,,,0.136,EC50,In vitro anti-rhinoviral activity against huma...,Confirmatory,14521419.0,


## Similarity Search

In [6]:
# Perform similarity search with fingerprints
tanimoto=95
smiles = "C1=NC2=C(N1)C(=O)N=C(N2)N"
similar_compounds = pubchem.similarity_search(smiles, threshold=tanimoto, max_records=200)
similar_compounds[:5]

[135398634, 9679, 135398638, 135436483, 70315]