In [None]:
pip install git+https://github.com/bp-kelley/descriptastorus
pip install DeepPurpose

In [18]:
from DeepPurpose import utils, dataset
from DeepPurpose import DTI as models
import warnings
warnings.filterwarnings("ignore")

**BindingDB**
* A comprehensive and widely used database of small-molecule binding data to protein targets
* Contains experimentally measured binding affinities (such as IC50, Ki, Kd) for small molecules interacting with target proteins
* Covers a broad range of targets, including enzymes, receptors, and transporters
* Diverse, including info from various sources and experimental methods
* More focused on the binding affinities of small molecules to proteins, making it suitable for regression-based DTI prediction tasks

**DAVIS (Drug Affinity Responsive Target Stability)**
* Specifically designed for drug-target interaction prediction
* Includes binding affinities and binary labels (active/inactive) for drug-target pairs
* Data is generated through a combination of virtual screening, experimental validation, and affinity measurements
* Primarily focuses on the stability of drug-target interactions and includes information about target responses to ligand binding
* More suitable for binary classification-based DTI prediction tasks, where the goal is to predict whether a drug and target interact or not

**KIBA (Kinase Inhibitor Bioactivity)**
* A specialized DS focuses on the interactions between kinase inhibitors and protein kinases
* Includes kinase inhibitor bioactivity measurements (binding affinities) and targets associated with specific inhibitors
* Designed to facilitate the prediction of kinase inhibitor bioactivity and kinase-specific interactions
* Particularly useful for predicting drug-target interactions involving protein kinases, which play essential roles in cell signaling and are targets for various diseases, including cancer

In [47]:
# pretrained models on BindingDB
# model = models.model_pretrained(model = 'MPNN_CNN_BindingDB') # predicted score is [4.65383243560791]
# model = models.model_pretrained(model = 'CNN_CNN_BindingDB_IC50') # predicted score is [5.397450923919678]
# model = models.model_pretrained(model = 'Morgan_CNN_BindingDB_IC50') # predicted score is [5.273427486419678]
# model = models.model_pretrained(model = 'Morgan_AAC_BindingDB_IC50') # predicted score is [5.152268409729004]
# model = models.model_pretrained(model = 'MPNN_CNN_BindingDB_IC50') # predicted score is [6.513026237487793]
# model = models.model_pretrained(model = 'Daylight_AAC_BindingDB_IC50') # predicted score is [5.836477756500244]
# model = models.model_pretrained(model = 'CNN_CNN_BindingDB') # predicted score is [4.437524795532227]
# model = models.model_pretrained(model = 'Morgan_CNN_BindingDB') # predicted score is [6.2451276779174805]
# model = models.model_pretrained(model = 'Transformer_CNN_BindingDB') # predicted score is [4.726782321929932]
# model = models.model_pretrained(model = 'Daylight_AAC_BindingDB') # predicted score is [7.82790994644165]
# model = models.model_pretrained(model = 'Morgan_AAC_BindingDB') # predicted score is [5.4454569816589355]

# pretrained models on DAVIS
# model = models.model_pretrained(model = 'MPNN_CNN_DAVIS') # predicted score is [5.951980113983154]
# model = models.model_pretrained(model = 'CNN_CNN_DAVIS') # predicted score is [5.034719467163086]
# model = models.model_pretrained(model = 'Morgan_CNN_DAVIS') # predicted score is [5.045121669769287]
# model = models.model_pretrained(model = 'Daylight_AAC_DAVIS') # predicted score is [5.31003999710083]
# model = models.model_pretrained(model = 'Morgan_AAC_DAVIS') # predicted score is [5.102268218994141]

# pretrained models on KIBA
# model = models.model_pretrained(model = 'Morgan_CNN_KIBA') # predicted score is [5.102268218994141]
# model = models.model_pretrained(model = 'MPNN_CNN_KIBA') # predicted score is [11.170574188232422]
# model = models.model_pretrained(model = 'Daylight_AAC_KIBA') # predicted score is [11.25594711303711]
model = models.model_pretrained(model = 'Morgan_AAC_KIBA') # predicted score is [11.629722595214844]


Downloading finished... Beginning to extract zip file...
pretrained model Successfully Downloaded...


Drug ipidacrine for Alzheimer target Amyloid beta

**Note that for DTI models, the BindingDB and DAVIS are trained on log scale.**

In [48]:
drug_encoding, target_encoding = 'Morgan', 'AAC' #'MPNN', 'CNN'

X_drug = ['NC1=C2CCCC2=NC2=C1CCCC2']                    # ipidacrine
X_target = ['DAEFRHESGYEVHHQKLVFFAEDVGSNKGAIIGLMVGGVV'] # Amyloid
y = [0] # numerical value is needed for y, but the value itself won't affect the predicted score... 
X_pred = utils.data_process(X_drug, X_target, y,
                                drug_encoding, target_encoding,
                                split_method='no_split')
y_pred = model.predict(X_pred)
print('The predicted score is ' + str(y_pred))

Drug Target Interaction Prediction Mode...
in total: 1 drug-target pairs
encoding drug...
unique drugs: 1
encoding protein...
unique target sequence: 1
-- Encoding AAC takes time. Time Reference: 24s for ~100 sequences in a CPU.				 Calculate your time by the unique target sequence #, instead of the entire dataset.
splitting dataset...
do not do train/test split on the data for already splitted data
predicting...
The predicted score is [11.629722595214844]
