# Load libraries and dependencies

In [1]:
from __future__ import division, print_function
from comet_ml import Experiment
import numpy as np
from numpy import inf, ndarray
import pandas as pd
import tensorflow as tf
import os
import random
import keras
import sklearn
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn import metrics
import re
from keras import optimizers
from keras import losses
from keras import regularizers
import keras.backend as K
from keras.models import model_from_json
from keras.models import load_model, Model
from tempfile import TemporaryFile
from keras import layers
from rdkit import Chem
from rdkit.Chem import Draw, Descriptors
from keras.callbacks import History, ReduceLROnPlateau
from keras.layers import Input, BatchNormalization, Activation
from keras.layers import CuDNNLSTM, Dense, Bidirectional, Dropout, Layer
from keras.initializers import glorot_normal
from keras.regularizers import l2
from functools import partial
from multiprocessing import cpu_count, Pool
from keras.utils.generic_utils import Progbar
from copy import deepcopy
from NGF.utils import filter_func_args, mol_shapes_to_dims
import NGF.utils
import NGF_layers.features
import NGF_layers.graph_layers
from NGF_layers.features import one_of_k_encoding, one_of_k_encoding_unk, atom_features, bond_features, num_atom_features, num_bond_features
from NGF_layers.features import padaxis, tensorise_smiles, concat_mol_tensors
from NGF_layers.graph_layers import temporal_padding, neighbour_lookup, NeuralGraphHidden, NeuralGraphOutput
from math import ceil
from sklearn.metrics import precision_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error
from utility.gaussian import GaussianLayer
from utility.gaussian import custom_loss
from utility.evaluator import r_square, get_cindex, pearson_r, mse_sliced, model_evaluate
from utility.Generator import train_generator
from model import enc_mols, net

Using TensorFlow backend.


# Train data

In [2]:
#Load unique smiles and tensorize them
smiles = pd.read_csv('/home/biolab/Documents/Go distances/learning/data/mcf7q1/can_smiles_all.csv',index_col=0)

X_atoms, X_bonds, X_edges = tensorise_smiles(smiles.x, max_degree=5, max_atoms = 60)

In [3]:
smiles=list(smiles['x'])

In [4]:
#Load train data
df = pd.read_csv('/home/biolab/Documents/Go distances/learning/data/mcf7q1/80dr_cold/train_30_7.csv',index_col=0)
df = df.reset_index(drop=True)

# Model

In [5]:
p = {'lr': 0.0001,
     'nfilters': int(32),
     'size': int(8),
     'conv_width' : 128,
     'fp_length' : 256,
     'size_drug_1' : 8,
     'size_drug_2' : 4,
     'size_protein_1' : 8,
     'size_protein_2' : 16,
     'size_protein_3' : 3,
     'batch_size': int(128),
     'dense_size': int(256),
     'dense_size_2': 512,
     'dropout': 0.25,
     'l2reg': 0.01}

# Load Cold validation data

In [6]:
max_atoms = 60
max_degree = 5
num_atom_features = 62
num_bond_features = 6

In [7]:
df_cold = pd.read_csv('/home/biolab/Documents/Go distances/learning/data/mcf7q1/80dr_cold/cold_val_new.csv',index_col=0).reset_index(drop=True)

In [8]:
smiles_cold = df_cold['rdkit.x']
smiles_cold2 = df_cold['rdkit.y']
X_atoms_cold_1, X_bonds_cold_1, X_edges_cold_1 = tensorise_smiles(smiles_cold, max_degree=5, max_atoms = 60)
X_atoms_cold_2, X_bonds_cold_2, X_edges_cold_2 = tensorise_smiles(smiles_cold2, max_degree=5, max_atoms = 60)
Y_cold = df_cold.value
Y_cold = Y_cold/2

# Fit ensembles

In [9]:
cold_preds_mus=[]
cold_preds_sigmas=[]

In [10]:
for n in range(50):
    
    # Initialize model
    atoms0_1 = Input(name='atom_inputs_1', shape=(max_atoms, num_atom_features),dtype = 'float32')
    bonds_1 = Input(name='bond_inputs_1', shape=(max_atoms, max_degree, num_bond_features),dtype = 'float32')
    edges_1 = Input(name='edge_inputs_1', shape=(max_atoms, max_degree), dtype='int32')

    atoms0_2 = Input(name='atom_inputs_2', shape=(max_atoms, num_atom_features),dtype = 'float32')
    bonds_2 = Input(name='bond_inputs_2', shape=(max_atoms, max_degree, num_bond_features),dtype = 'float32')
    edges_2 = Input(name='edge_inputs_2', shape=(max_atoms, max_degree), dtype='int32')
    
    # Initialize encoder
    encoder_mols = enc_mols(p, 0.001, 128, 256,max_atoms, num_atom_features,max_degree, num_bond_features)
    
    #Use net defined in model.py
    thresh=0.20 #threshold to consider similars
    adam = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-8, decay=0.0, amsgrad=False)
    
    interaction_net=net(p,max_atoms, num_atom_features,max_degree, num_bond_features,encoder_mols,atoms0_1,bonds_1,edges_1,atoms0_2,bonds_2,edges_2)
    
    mu, sigma = GaussianLayer(1, name='main_output')(interaction_net.output)
    siamese_net = Model(inputs=[atoms0_1,bonds_1,edges_1,atoms0_2,bonds_2,edges_2],outputs=mu)
    
    siamese_net.compile(optimizer= adam,loss= custom_loss(sigma),metrics=['mse', get_cindex, r_square, pearson_r, mse_sliced])
    
    
    #Load Weights
    #siamese_net.load_weights('models/Nick_ensembles_no_augm/models/siam_ens_10.h5')
    
    # Augment dataset
    #df_new = augment(df_rest,df,1)
    
    # Train with fitgen
    rlr = ReduceLROnPlateau(monitor='loss', factor=0.5,patience=2, min_lr=0.00001, verbose=1, min_delta=1e-5)
    term=keras.callbacks.TerminateOnNaN()
    bs=128
    NUM_EPOCHS = 10
    #df_new = df_new.sample(frac=1).reset_index(drop=True)
    df = df.sample(frac=1).reset_index(drop=True)

    #Set total number of training samples and tests samples
    NUM_TRAIN = len(df)
    trainGen=train_generator(bs,df,smiles,X_atoms, X_bonds, X_edges)
    siamese_net.fit_generator(trainGen,
                        steps_per_epoch= ceil(NUM_TRAIN/bs),
                        epochs=NUM_EPOCHS,
                        verbose = 1,
                        validation_data = ([X_atoms_cold_1,X_bonds_cold_1,X_edges_cold_1,X_atoms_cold_2, X_bonds_cold_2, X_edges_cold_2],Y_cold),
                        shuffle = True,
                        callbacks= [term, rlr])
    
    # Save model
    siamese_net.save_weights('testing_train/models/siam_no_augment_%s.h5'%n)
    # Decouple model at the gaussian
    Gauss = keras.Model(siamese_net.inputs, siamese_net.get_layer('main_output').output)
    # evaluate and save evals
    y_pred = siamese_net.predict([X_atoms_cold_1,X_bonds_cold_1,X_edges_cold_1,X_atoms_cold_2, X_bonds_cold_2, X_edges_cold_2],batch_size=2048)
    get = model_evaluate(y_pred,Y_cold,thresh,df_cold)
    get.to_csv('testing_train/performance/Model_No_%s.csv'%n)
    # Predict on cold
    cold_pred = Gauss.predict([X_atoms_cold_1,X_bonds_cold_1,X_edges_cold_1,X_atoms_cold_2, X_bonds_cold_2, X_edges_cold_2],batch_size=2048)
    # Append mus and sigmas and save at the same time
    cold_preds_mus.append(cold_pred[0])
    np.save('testing_train/cold/mu/cold_mu_No_%s.npy'%n, cold_pred[0])
    cold_preds_sigmas.append(cold_pred[1])
    np.save('testing_train/cold/sigma/cold_sigma_No_%s.npy'%n, cold_pred[1])

W0410 16:31:57.363885 140577775949632 deprecation_wrapper.py:119] From /home/biolab/miniconda3/envs/nikos/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0410 16:31:57.375677 140577775949632 deprecation_wrapper.py:119] From /home/biolab/miniconda3/envs/nikos/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0410 16:31:57.376442 140577775949632 deprecation_wrapper.py:119] From /home/biolab/miniconda3/envs/nikos/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:4185: The name tf.truncated_normal is deprecated. Please use tf.random.truncated_normal instead.

W0410 16:31:57.490344 140577775949632 deprecation_wrapper.py:119] From /home/biolab/miniconda3/envs/nikos/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:133: The name tf.placeholder_wi

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
atom_inputs (InputLayer)        (None, 60, 62)       0                                            
__________________________________________________________________________________________________
bond_inputs (InputLayer)        (None, 60, 5, 6)     0                                            
__________________________________________________________________________________________________
edge_inputs (InputLayer)        (None, 60, 5)        0                                            
__________________________________________________________________________________________________
neural_graph_hidden_1 (NeuralGr (None, 60, 128)      44160       atom_inputs[0][0]                
                                                                 bond_inputs[0][0]                
          

W0410 16:31:58.640298 140577775949632 deprecation_wrapper.py:119] From /home/biolab/miniconda3/envs/nikos/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:3976: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead.

W0410 16:31:58.966978 140577775949632 deprecation_wrapper.py:119] From /home/biolab/miniconda3/envs/nikos/lib/python3.7/site-packages/keras/optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.

W0410 16:31:58.970188 140577775949632 deprecation.py:323] From /home/biolab/Documents/Go distances/deepSIBA/main/utility/gaussian.py:39: div (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
W0410 16:31:58.992588 140577775949632 deprecation.py:323] From /home/biolab/Documents/Go distances/deepSIBA/main/utility/evaluator.py:53: add_dispatch_support.<locals>.wrapper (from t

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
atom_inputs_1 (InputLayer)      (None, 60, 62)       0                                            
__________________________________________________________________________________________________
bond_inputs_1 (InputLayer)      (None, 60, 5, 6)     0                                            
__________________________________________________________________________________________________
edge_inputs_1 (InputLayer)      (None, 60, 5)        0                                            
__________________________________________________________________________________________________
atom_inputs_2 (InputLayer)      (None, 60, 62)       0                                            
__________________________________________________________________________________________________
bond_input

UnknownError: 2 root error(s) found.
  (0) Unknown: Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above.
	 [[{{node model_1/conv1d_1/convolution}}]]
	 [[metrics/mse_sliced/Mean_1/_1059]]
  (1) Unknown: Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above.
	 [[{{node model_1/conv1d_1/convolution}}]]
0 successful operations.
0 derived errors ignored.