# Notebook to fetch bioactivity data from ChEMBL and save it to a csv file in ./data

In [2]:
import pandas as pd
from chembl_webresource_client.new_client import new_client

## Search for target protein

In [3]:
# Search for target protein
target = new_client.target
target_query = target.search('aromatase')
targets = pd.DataFrame.from_dict(target_query)
targets

Unnamed: 0,cross_references,organism,pref_name,score,species_group_flag,target_chembl_id,target_components,target_type,tax_id
0,"[{'xref_id': 'P11511', 'xref_name': None, 'xre...",Homo sapiens,Cytochrome P450 19A1,20.0,False,CHEMBL1978,"[{'accession': 'P11511', 'component_descriptio...",SINGLE PROTEIN,9606
1,"[{'xref_id': 'P22443', 'xref_name': None, 'xre...",Rattus norvegicus,Cytochrome P450 19A1,20.0,False,CHEMBL3859,"[{'accession': 'P22443', 'component_descriptio...",SINGLE PROTEIN,10116


In [4]:
selected_target = targets.target_chembl_id[0]
selected_target

'CHEMBL1978'

In [5]:
activity = new_client.activity
res = activity.filter(target_chembl_id=selected_target).filter(standard_type="IC50")

In [6]:
df = pd.DataFrame.from_dict(res)
df.head(3)

Unnamed: 0,action_type,activity_comment,activity_id,activity_properties,assay_chembl_id,assay_description,assay_type,assay_variant_accession,assay_variant_mutation,bao_endpoint,...,target_organism,target_pref_name,target_tax_id,text_value,toid,type,units,uo_units,upper_value,value
0,,,82585,[],CHEMBL666794,Inhibition of Cytochrome P450 19A1,B,,,BAO_0000190,...,Homo sapiens,Cytochrome P450 19A1,9606,,,IC50,uM,UO_0000065,,7.1
1,,,94540,[],CHEMBL666794,Inhibition of Cytochrome P450 19A1,B,,,BAO_0000190,...,Homo sapiens,Cytochrome P450 19A1,9606,,,IC50,uM,UO_0000065,,50.0
2,,,112960,[],CHEMBL661700,In vitro inhibition of human Cytochrome P450 19A1,B,,,BAO_0000190,...,Homo sapiens,Cytochrome P450 19A1,9606,,,IC50,uM,UO_0000065,,0.238


In [7]:
df.standard_type.unique()

array(['IC50'], dtype=object)

In [8]:
df2 = df[df.standard_value.notna()]
df2

Unnamed: 0,action_type,activity_comment,activity_id,activity_properties,assay_chembl_id,assay_description,assay_type,assay_variant_accession,assay_variant_mutation,bao_endpoint,...,target_organism,target_pref_name,target_tax_id,text_value,toid,type,units,uo_units,upper_value,value
0,,,82585,[],CHEMBL666794,Inhibition of Cytochrome P450 19A1,B,,,BAO_0000190,...,Homo sapiens,Cytochrome P450 19A1,9606,,,IC50,uM,UO_0000065,,7.1
1,,,94540,[],CHEMBL666794,Inhibition of Cytochrome P450 19A1,B,,,BAO_0000190,...,Homo sapiens,Cytochrome P450 19A1,9606,,,IC50,uM,UO_0000065,,50.0
2,,,112960,[],CHEMBL661700,In vitro inhibition of human Cytochrome P450 19A1,B,,,BAO_0000190,...,Homo sapiens,Cytochrome P450 19A1,9606,,,IC50,uM,UO_0000065,,0.238
3,,,116766,[],CHEMBL661700,In vitro inhibition of human Cytochrome P450 19A1,B,,,BAO_0000190,...,Homo sapiens,Cytochrome P450 19A1,9606,,,IC50,uM,UO_0000065,,0.057
4,,,118017,[],CHEMBL661700,In vitro inhibition of human Cytochrome P450 19A1,B,,,BAO_0000190,...,Homo sapiens,Cytochrome P450 19A1,9606,,,IC50,uM,UO_0000065,,0.054
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3534,"{'action_type': 'INHIBITOR', 'description': 'N...",,25056466,"[{'comments': None, 'relation': '=', 'result_f...",CHEMBL5249084,Inhibition of human placental microsome aromat...,B,,,BAO_0000190,...,Homo sapiens,Cytochrome P450 19A1,9606,,,IC50,nM,UO_0000065,,237.8
3535,"{'action_type': 'INHIBITOR', 'description': 'N...",,25056467,"[{'comments': None, 'relation': '=', 'result_f...",CHEMBL5249084,Inhibition of human placental microsome aromat...,B,,,BAO_0000190,...,Homo sapiens,Cytochrome P450 19A1,9606,,,IC50,nM,UO_0000065,,1100.0
3536,"{'action_type': 'INHIBITOR', 'description': 'N...",,25056468,"[{'comments': None, 'relation': '=', 'result_f...",CHEMBL5249084,Inhibition of human placental microsome aromat...,B,,,BAO_0000190,...,Homo sapiens,Cytochrome P450 19A1,9606,,,IC50,nM,UO_0000065,,2531.0
3537,"{'action_type': 'INHIBITOR', 'description': 'N...",,25056469,"[{'comments': None, 'relation': '=', 'result_f...",CHEMBL5249084,Inhibition of human placental microsome aromat...,B,,,BAO_0000190,...,Homo sapiens,Cytochrome P450 19A1,9606,,,IC50,nM,UO_0000065,,252.4


In [9]:
bioactivity_class = []
for i in df2.standard_value:
  if float(i) >= 10000:
    bioactivity_class.append("inactive")
  elif float(i) <= 1000:
    bioactivity_class.append("active")
  else:
    bioactivity_class.append("intermediate")

In [10]:
mol_cid = []
for i in df2.molecule_chembl_id:
  mol_cid.append(i)

In [11]:
canonical_smiles = []
for i in df2.canonical_smiles:
  canonical_smiles.append(i)

In [12]:
standard_value = []
for i in df2.standard_value:
  standard_value.append(i)

In [13]:
data_tuples = list(zip(mol_cid, canonical_smiles, bioactivity_class, standard_value))
df3 = pd.DataFrame( data_tuples,  columns=['molecule_chembl_id', 'canonical_smiles', 'bioactivity_class', 'standard_value'])

In [14]:
df3

Unnamed: 0,molecule_chembl_id,canonical_smiles,bioactivity_class,standard_value
0,CHEMBL341591,CC12CCC(O)CC1=CCC1C2CCC2(C)C(CC3CN3)CCC12,intermediate,7100.0
1,CHEMBL2111947,C[C@]12CC[C@H]3[C@@H](CC=C4C[C@@H](O)CC[C@@]43...,inactive,50000.0
2,CHEMBL431859,CCn1c(C(c2ccc(F)cc2)n2ccnc2)c(C)c2cc(Br)ccc21,active,238.0
3,CHEMBL113637,CCn1cc(C(c2ccc(F)cc2)n2ccnc2)c2ccccc21,active,57.0
4,CHEMBL112021,Clc1ccccc1Cn1cc(Cn2ccnc2)c2ccccc21,active,54.0
...,...,...,...,...
3458,CHEMBL5278229,COc1ccc(C(=O)c2ccc(Cn3ccnc3)cc2)cc1,active,237.8
3459,CHEMBL5275747,O=C(c1ccc(O)cc1)c1ccc(Cn2ccnc2)cc1,intermediate,1100.0
3460,CHEMBL5273068,O=C(c1ccc(Cn2ccnc2)cc1)c1ccc(-c2ccccc2)cc1,intermediate,2531.0
3461,CHEMBL223176,O=C(c1ccccc1)c1ccc(Cn2ccnc2)cc1,active,252.4


In [15]:
#save to csv file
df3.to_csv('./data/test_bioactivity_data_raw.csv', index=False)