In [10]:
import math
from pathlib import Path
from zipfile import ZipFile
from tempfile import TemporaryDirectory

import numpy as np
import pandas as pd
from rdkit.Chem import PandasTools
from chembl_webresource_client.new_client import new_client
from tqdm.auto import tqdm

In [11]:
HERE = Path(_dh[-1])
DATA = HERE / "data"

In [12]:
# Next, we create resource objects for API access.
targets_api = new_client.target
compounds_api = new_client.molecule
bioactivities_api = new_client.activity

In [13]:
type(targets_api)

chembl_webresource_client.query_set.QuerySet

Get target data (EGFR kinase)  
- Get UniProt ID of the target of interest (EGFR kinase: P00533) from UniProt website  
- Use UniProt ID to get target information

Aldose reductase (AKR1B1): `P15121`  
Sodium/glucose cotransporter 2 (SLC5A2, SGLT2): `P31639`

## AKR1B1

In [14]:
uniprot_id = "P15121"

Fetch target data from ChEMBL

In [15]:
# Get target information from ChEMBL but restrict it to specified values only
targets = targets_api.get(target_components__accession=uniprot_id).only(
    "target_chembl_id", "organism", "pref_name", "target_type"
)
print(f'The type of the targets is "{type(targets)}"')

The type of the targets is "<class 'chembl_webresource_client.query_set.QuerySet'>"


Download target data from ChEMBL

In [16]:
targets = pd.DataFrame.from_records(targets)
targets

Unnamed: 0,organism,pref_name,target_chembl_id,target_type
0,Homo sapiens,Aldose reductase,CHEMBL1900,SINGLE PROTEIN
1,Homo sapiens,Aldose reductase,CHEMBL1900,SINGLE PROTEIN
2,Homo sapiens,Baculoviral IAP repeat-containing protein 2/Al...,CHEMBL4802032,PROTEIN-PROTEIN INTERACTION


Select target (target ChEMBL ID)

In [17]:
target = targets.iloc[0]
target

organism                Homo sapiens
pref_name           Aldose reductase
target_chembl_id          CHEMBL1900
target_type           SINGLE PROTEIN
Name: 0, dtype: object

Save selected ChEMBL ID

In [18]:
chembl_id = target.target_chembl_id
print(f"The target ChEMBL ID is {chembl_id}")
# NBVAL_CHECK_OUTPUT

The target ChEMBL ID is CHEMBL1900


Fetch bioactivity data for the target from ChEMBL

In [19]:
bioactivities = bioactivities_api.filter(
    target_chembl_id=chembl_id, type="IC50", relation="=", assay_type="B"
).only(
    "activity_id",
    "assay_chembl_id",
    "assay_description",
    "assay_type",
    "molecule_chembl_id",
    "type",
    "standard_units",
    "relation",
    "standard_value",
    "target_chembl_id",
    "target_organism",
)

print(f"Length and type of bioactivities object: {len(bioactivities)}, {type(bioactivities)}")

Length and type of bioactivities object: 981, <class 'chembl_webresource_client.query_set.QuerySet'>


In [20]:
print(f"Length and type of first element: {len(bioactivities[0])}, {type(bioactivities[0])}")
bioactivities[0]

Length and type of first element: 13, <class 'dict'>


{'activity_id': 72831,
 'assay_chembl_id': 'CHEMBL764859',
 'assay_description': 'Inhibition of human placental aldose reductase (HPAR) activity with glyceraldehyde as substrate',
 'assay_type': 'B',
 'molecule_chembl_id': 'CHEMBL18854',
 'relation': '=',
 'standard_units': 'nM',
 'standard_value': '230.0',
 'target_chembl_id': 'CHEMBL1900',
 'target_organism': 'Homo sapiens',
 'type': 'IC50',
 'units': 'uM',
 'value': '0.23'}

Download bioactivity data from ChEMBL (QuerySet) in the form of a pandas DataFrame.

In [21]:
bioactivities_df = pd.DataFrame.from_dict(bioactivities)
print(f"DataFrame shape: {bioactivities_df.shape}")
bioactivities_df.head()

DataFrame shape: (981, 13)


Unnamed: 0,activity_id,assay_chembl_id,assay_description,assay_type,molecule_chembl_id,relation,standard_units,standard_value,target_chembl_id,target_organism,type,units,value
0,72831,CHEMBL764859,Inhibition of human placental aldose reductase...,B,CHEMBL18854,=,nM,230.0,CHEMBL1900,Homo sapiens,IC50,uM,0.23
1,73884,CHEMBL764859,Inhibition of human placental aldose reductase...,B,CHEMBL19744,=,nM,130.0,CHEMBL1900,Homo sapiens,IC50,uM,0.13
2,73885,CHEMBL764859,Inhibition of human placental aldose reductase...,B,CHEMBL19711,=,nM,100.0,CHEMBL1900,Homo sapiens,IC50,uM,0.1
3,77187,CHEMBL764859,Inhibition of human placental aldose reductase...,B,CHEMBL19392,=,nM,5630.0,CHEMBL1900,Homo sapiens,IC50,uM,5.63
4,80619,CHEMBL764859,Inhibition of human placental aldose reductase...,B,CHEMBL19746,=,nM,7470.0,CHEMBL1900,Homo sapiens,IC50,uM,7.47
