# Predict DTIs and BR with trained Model

In [1]:
from HoTS.model.hots import *
from HoTS.utils.build_features import *

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
# define input feature
prot_vec = "Sequence"
drug_vec = "Morgan"
drug_len = 2048
radius = 2
protein_encoder = ProteinEncoder(prot_vec)
compound_encoder = CompoundEncoder(drug_vec, radius=radius, n_bits=drug_len)

In [3]:
# initialize model
dti_model = HoTS()

## Load trained model

To load trained model, you can use `dti_model.load_model("output.config.json")`

In [4]:
# load model
dti_model.load_model("/home/dlsrnsi/DTI/HoTS/Model_JCInfo/0_HoTS_br_9_grid_10.json")

{'protein_grid_size': 10, 'compound_grid_size': None, 'anchors': [9], 'hots_dimension': 128, 'hots_n_heads': 4, 'dropout': 0.1, 'drug_layers': [512, 128], 'protein_strides': [5, 10, 15, 20, 25, 30], 'filters': 128, 'fc_layers': [256, 64], 'hots_fc_layers': [256, 64], 'learning_rate': 0.0001, 'prot_vec': 'Sequence', 'drug_vec': 'Morgan', 'drug_len': 2048, 'activation': 'gelu', 'protein_layers': [128, 128, 128, 128], 'reg_loss_weight': 0.1, 'conf_loss_weight': 1, 'negative_loss_weight': 0.1, 'retina_loss_weight': 2, 'decay': 0.0001, 'hots_file': '/home/dlsrnsi/DTI/HoTS/Model_JCInfo/0_HoTS_br_9_grid_10.h5', 'dti_file': '/home/dlsrnsi/DTI/HoTS/Model_JCInfo/0_DTI_br_9_grid_10.h5', 'hots_validation_results': {}, 'dti_validation_results': {'MATADOR_DTI': [{'AUC': 0.6692041153579615, 'AUPR': 0.6280061376930326}, {'AUC': 0.6223146223146223, 'AUPR': 0.5732487653197109}, {'AUC': 0.6139719601258062, 'AUPR': 0.5634151615975538}, {'AUC': 0.6018124633509249, 'AUPR': 0.5534286734256454}, {'AUC': 0.731

In [5]:
# SMILES of drug (should be listed)

drugs = ["CC1=NC(NC2=NC=C(S2)C(=O)NC2=C(C)C=CC=C2Cl)=CC(=N1)N1CCN(CCO)CC1"]

# Sequences (should be listed)

targets = ["MLEICLKLVGCKSKKGLSSSSSCYLEEALQRPVASDFEPQGLSEAARWNSKENLLAGPSENDPNLFVALYDFVASGDNTLSITKGEKLRVLGYNHNGEWCEAQTKNGQGWVPSNYITPVNSLEKHSWYHGPVSRNAAEYLLSSGINGSFLVRESESSPGQRSISLRYEGRVYHYRINTASDGKLYVSSESRFNTLAELVHHHSTVADGLITTLHYPAPKRNKPTVYGVSPNYDKWEMERTDITMKHKLGGGQYGEVYEGVWKKYSLTVAVKTLKEDTMEVEEFLKEAAVMKEIKHPNLVQLLGVCTREPPFYIITEFMTYGNLLDYLRECNRQEVNAVVLLYMATQISSAMEYLEKKNFIHRDLAARNCLVGENHLVKVADFGLSRLMTGDTYTAHAGAKFPIKWTAPESLAYNKFSIKSDVWAFGVLLWEIATYGMSPYPGIDLSQVYELLEKDYRMERPEGCPEKVYELMRACWQWNPSDRPSFAEIHQAFETMFQESSISDEVEKELGKQGVRGAVSTLLQAPELPTKTRTSRRAAEHRDTTDVPEMPHSKGQGESDPLDHEPAVSPLLPRKERGPPEGGLNEDERLLPKDKKTNLFSALIKKKKKTAPTPPKRSSSFREMDGQPERRGAGEEEGRDISNGALAFTPLDTADPAKSPKPSNGAGVPNGALRESGGSGFRSPHLWKKSSTLTSSRLATGEEEGGGSSSKRFLRSCSASCVPHGAKDTEWRSVTLPRDLQSTGRQFDSSTFGGHKSEKPALPRKRAGENRSDQVTRGTVTPPPRLVKKNEEAADEVFKDIMESSPGSSPPNLTPKPLRRQVTVAPASGLPHKEEAGKGSALGTPAAAEPVTPTSKAGSGAPGGTSKGPAEESRVRRHKHSSESPGRDKGKLSRLKPAPPPPPAASAGKAGGKPSQSPSQEAAGEAVLGAKTKATSLVDAVNSDAAKPSQPGEGLKKPVLPATPKPQSAKPSGTPISPAPVPSTLPSASSALAGDQPSSTAFIPLISTRVSLRKTRQPPERIASGAITKGVVLDSTEALCLAISRNSEQMASHSAVLEAGKNLYTFCVSYVDSIQQMRNKFAFREAINKLENNLRELQICPATAGSGPAATQDFSKLLSSVKEISDIVQR"]

Encoding SMILES and Sequence with defined encoder

In [6]:
drugs_fp = [compound_encoder.encode(drug) for drug in drugs]
targets_encoded = [protein_encoder.encode(target) for target in targets]

## Prediction of DTIs

you can simply predict DTIs with `DTI_prediction` function of `dti_model` object

In [7]:
dti_model.DTI_prediction(drugs_fp, targets_encoded)

array([[0.9764183]], dtype=float32)

## Prediction of BRs

you can predict BRs with `HoTS_prediction` but it's hard to interpret

In [8]:
dti_model.HoTS_prediction(drugs_fp, targets_encoded)

(array([[0.9764183]], dtype=float32),
 [[(367, 383, 0.7656854391098022),
   (290, 304, 0.7525229454040527),
   (308, 325, 0.7470288276672363),
   (359, 373, 0.7408533096313477),
   (245, 261, 0.7305890917778015),
   (297, 312, 0.7042661309242249),
   (376, 390, 0.7004820108413696),
   (240, 253, 0.6935895681381226),
   (314, 331, 0.681891918182373),
   (349, 364, 0.6479472517967224),
   (157, 172, 0.6307445168495178),
   (90, 105, 0.6145213842391968),
   (740, 753, 0.5977887511253357),
   (280, 294, 0.5959694981575012),
   (199, 212, 0.5927091240882874),
   (330, 343, 0.5844812989234924),
   (1059, 1073, 0.5714472532272339),
   (66, 82, 0.5708515048027039),
   (1117, 1130, 0.5557569265365601),
   (436, 449, 0.5552679896354675),
   (829, 841, 0.5524572730064392),
   (1069, 1081, 0.5516120195388794),
   (220, 233, 0.5440090298652649),
   (1097, 1108, 0.5407511591911316),
   (389, 402, 0.5382654070854187),
   (61, 74, 0.5367584228515625),
   (338, 351, 0.5285118222236633),
   (691, 703, 0

You can visualize result with `HoTS_visualization`, but you need to give list of names for each visualiztion

In [13]:
names = ["ABL1_imatinib"]

In [14]:
dti_model.HoTS_visualization(drugs_fp, targets_encoded, targets, protein_names=names, th=0.6)

Prediction with 0.600000
ABL1_imatinib
DTI score :  [0.9764183]
  Sequence : MLEICLKLVGCKSKKGLSSSSSCYLEEALQRPVASDFEPQGLSEAARWNSKENLLAGPSENDPNLFVALYDFVASGDNTLSITKGEKLRVLGYNHNGEWC
Prediction :                                                                                           LGYNHNGEWC
     Score :                                                                                           61%       
  Sequence : EAQTKNGQGWVPSNYITPVNSLEKHSWYHGPVSRNAAEYLLSSGINGSFLVRESESSPGQRSISLRYEGRVYHYRINTASDGKLYVSSESRFNTLAELVH
Prediction : EAQTK                                                    PGQRSISLRYEGRVY                            
     Score :                                                          63%                                        
  Sequence : HHSTVADGLITTLHYPAPKRNKPTVYGVSPNYDKWEMERTDITMKHKLGGGQYGEVYEGVWKKYSLTVAVKTLKEDTMEVEEFLKEAAVMKEIKHPNLVQ
Prediction :                                         DITMKHKLGGGQYGEVYEGVW                             KEIKHPNLVQ
     Score :            