#  Setup

Install and import neccessary packages.

In [None]:
# Install RDKit.
%%capture
!pip install rdkit-pypi
!pip install pubchempy

import tensorflow as tf
import pandas as pd
import pubchempy as pc
import numpy as np
import os
import re
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import Descriptors
from rdkit.Chem import AllChem
from rdkit import DataStructs
from rdkit.Chem import PandasTools


# Functions

Run this code below to create essential functions for the prediction.

In [92]:
class DeepARV:

  def __init__(self, n_models=5):
    # Load all models
    self.model_trained  = {}
    print('Loading models')
    for i in tqdm(range(n_models)):
      self.model_trained[i] = tf.keras.models.load_model('/content/drive/MyDrive/liverpool_hiv/Trained_Models/DeepARV_v1/DeepARV_model' +str(i))

    # Load reference drugs
    print('\nLoading reference drugs')
    self.reference_drugs = np.load('/content/drive/MyDrive/liverpool_hiv/Cleaning_data/ref_list.npy', allow_pickle=True)

  def drugname_to_fp(self, name):
    '''
    Input is a drug name, output is list of array of the presence of
    neighbour atoms [1] within fingerprint
    '''
    # get drug smiles
    drug = pc.get_compounds(name, 'name')
    drug_smiles = drug[0].isomeric_smiles

    # Convert SMILES to Mol
    drug_mol = Chem.MolFromSmiles(drug_smiles)

    # Molecular to Morgan fingerprint bits
    fp2 = AllChem.GetMorganFingerprintAsBitVect(drug_mol,2,nBits=1024)
    fp2_list = list(fp2.GetOnBits())
    return np.array((fp2_list))


  def create_feature_vector(self, drug_fp):
    '''
    Geneate feature vector for drug (structural similarity profile)
    by calculating Tanimoto score against the reference
    list
    '''
    def tanimoto(fp1, fp2):
      # This function computes Tanimoto score between 2 drugs
      intersect  = np.intersect1d(fp1, fp2)
      union = np.union1d(fp1, fp2)
      sim_score = round(len(intersect) / len(union),4)
      return sim_score

    feature_vector = []
    for i in self.reference_drugs:
      sim_score = tanimoto(drug_fp, i)
      feature_vector.append(sim_score)
    return feature_vector

  def ensemble_prediction(self, d1_fv, d2_fv):

      # Concat into single input
      X = np.hstack((d1_fv, d2_fv)).reshape(1,-1)
      ## dict containing all results
      self.all_results = {}
      ##dict containing results from single ensemble
      self.temp_results = []
      for i in range(5):
          #make prediction
          #iterate through each ensemble
          self.all_results[i] = self.model_trained[i].predict(X)

          #store each final class prediction per model in temp dict
          class_pred = []
          ##loop through each prediction and get the class prediction
          for j in self.all_results[i]:
              max = np.argmax(j)
              class_pred.append(max)
          self.temp_results.append(class_pred)
      return

  def aggregate_predictions(self):
      '''
      Selects highest occurance value, if draw, value with highest class selected
      '''
      uniq_counts = np.unique(self.temp_results, return_counts=True)
      counts = uniq_counts[1]
      uniques = uniq_counts[0]

      all_max_idx  = np.where(counts == counts.max())[0]
      max_idx = all_max_idx.max()
      self.final_prediction = uniques[max_idx]
      if self.final_prediction == 0:
        self.clinical_pred = 'Green: No clinically significant '\
             'interaction expected.'
      if self.final_prediction == 1:
        self.clinical_pred = 'Yellow: Potential interaction of weak '\
             'clinical relevance for which additional action/monitoring '\
             'or dosage adjustment is not required.'
      if self.final_prediction == 2:
        self.clinical_pred = 'Amber: Potential clinically relevant '\
             'interaction that can be managed by clinical monitoring, '\
             'alteration of drug dosage or timing of administration.'
      if self.final_prediction == 3:
        self.clinical_pred = 'Red: These drugs should not be '\
             'co-administered as they may cause a deleterious effect '\
             '(e.g., loss of efficacy or toxicity of the ARV drug '\
             'or coadministered drug)'
      return

  def get_confidence_score(self):
    idx_to_avg = np.argwhere(self.temp_results == self.final_prediction)
    to_mean = []
    for i in idx_to_avg:
        to_mean.append(self.all_results[i[0]][0][self.final_prediction])
    self.pred_confidence = np.round(np.mean(to_mean)* 100)
    return

  def predict(self, drugname_1, drugname_2):
    ''' Models final prediction with confidence score and clinical explanation '''
    # Convert drugs to FP
    d1_fp = self.drugname_to_fp(drugname_1)
    d2_fp = self.drugname_to_fp(drugname_2)

     # Convert each drug to feature vector
    d1_fv = self.create_feature_vector(d1_fp)
    d2_fv = self.create_feature_vector(d2_fp)

    # Prediction from all 5 models
    self.ensemble_prediction(d1_fv, d2_fv)

    # Aggregate all model predictions for final
    self.aggregate_predictions()
    #Output confidence
    self.get_confidence_score()
    print('\n------------------------------')
    print('         PREDICTIONS            ')
    #print('Model output: ' , self.final_prediction)
    print('Clinical prediction: ', self.clinical_pred)
    print('Confidence: ', self.pred_confidence, '%')
    print('------------------------------')


#Usage

Given a drug pair of interest, our **pre-trained DeepARV** will predict DDI risks as '**traffic light**' system.


*   Green – No clinically significant interaction expected
*   Yellow – Potential interaction of weak clinical relevance for which additional action/monitoring or dosage adjustment is not required.
*   Amber – Potential clinically relevant interaction that can be managed by clinical monitoring, alteration of drug dosage or timing of administration.
*   Red – These drugs should not be co-administered as they may cause a deleterious effect (e.g., loss of efficacy or toxicity of the ARV drug or coadministered drug).



Below is example of DDI risk prediction for ARV drug and antifungal comedication:

> *Note: the first drug name has to be antiretroviral therapy (ARV)*

In [93]:
deeparv = DeepARV()
deeparv.predict('efavirenz','fluconazole')
# the first drug name has to be antiretroviral therapy

Loading models


100%|██████████| 5/5 [00:03<00:00,  1.29it/s]



Loading reference drugs

------------------------------
         PREDICTIONS            
Clinical prediction:  Amber: Potential clinically relevant interaction that can be managed by clinical monitoring, alteration of drug dosage or timing of administration.
Confidence:  70.0 %
------------------------------
