# Load libraries and dependencies

In [0]:
from __future__ import division, print_function
from comet_ml import Experiment
import numpy as np
from numpy import inf, ndarray
import pandas as pd
import tensorflow as tf
import os
import random
import keras
import sklearn
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn import metrics
import re
from keras import optimizers
from keras import losses
from keras import regularizers
import keras.backend as K
from keras.models import model_from_json
from keras.models import load_model, Model
from tempfile import TemporaryFile
from keras import layers
from rdkit import Chem
from rdkit.Chem import Draw, Descriptors
from keras.callbacks import History, ReduceLROnPlateau
from keras.layers import Input, BatchNormalization, Activation
from keras.layers import CuDNNLSTM, Dense, Bidirectional, Dropout, Layer
from keras.initializers import glorot_normal
from keras.regularizers import l2
from functools import partial
from multiprocessing import cpu_count, Pool
from keras.utils.generic_utils import Progbar
from copy import deepcopy
from NGF.utils import filter_func_args, mol_shapes_to_dims
import NGF.utils
import NGF_layers.features
import NGF_layers.graph_layers
from NGF_layers.features import one_of_k_encoding, one_of_k_encoding_unk, atom_features, bond_features, num_atom_features, num_bond_features
from NGF_layers.features import padaxis, tensorise_smiles, concat_mol_tensors
from NGF_layers.graph_layers import temporal_padding, neighbour_lookup, NeuralGraphHidden, NeuralGraphOutput
from math import ceil
from sklearn.metrics import precision_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error
from utility.gaussian import GaussianLayer
from utility.gaussian import custom_loss
from utility.evaluator import r_square, get_cindex, pearson_r, mse_sliced, model_evaluate
from utility.Generator import train_generator
from model import enc_mols, net

# Train data

In [0]:
#Load unique smiles and tensorize them
smiles = pd.read_csv('data/a375q1/80dr_cold3/alla375q1smiles.csv',index_col=0)

X_atoms, X_bonds, X_edges = tensorise_smiles(smiles.x, max_degree=5, max_atoms = 60)

In [0]:
smiles=list(smiles['x'])

# Model

In [0]:
p = {'lr': 0.0001,
     'nfilters': int(32),
     'size': int(8),
     'conv_width' : 128,
     'fp_length' : 256,
     'size_drug_1' : 8,
     'size_drug_2' : 4,
     'size_protein_1' : 8,
     'size_protein_2' : 16,
     'size_protein_3' : 3,
     'batch_size': int(128),
     'dense_size': int(256),
     'dense_size_2': 512,
     'dropout': 0.25,
     'l2reg': 0.01}

# Load Cold validation data

In [0]:
num_molecules = len(df)
max_atoms = 60
max_degree = 5
num_atom_features = 62
num_bond_features = 6

In [0]:
df_cold = pd.read_csv('data/pc3q1/80dr_cold/cold_val_21_10.csv',index_col=0).reset_index(drop=True)

In [0]:
smiles_cold = df_cold['rdkit.x']
smiles_cold2 = df_cold['rdkit.y']
X_atoms_cold_1, X_bonds_cold_1, X_edges_cold_1 = tensorise_smiles(smiles_cold, max_degree=5, max_atoms = 60)
X_atoms_cold_2, X_bonds_cold_2, X_edges_cold_2 = tensorise_smiles(smiles_cold2, max_degree=5, max_atoms = 60)
Y_cold = df_cold.value
Y_cold = Y_cold/2

# Fit ensembles

In [0]:
cold_preds_mus=[]
cold_preds_sigmas=[]

In [0]:
for n in range(50):
    # Initialize encoder
    encoder_mols = enc_mols(p, 0.001, 128, 256)
    
    #Use net defined in model.py
    siamese_net=net(max_atoms, num_atom_features,max_degree, num_bond_features,encoder_mols)
    adam = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-8, decay=0.0, amsgrad=False)
    siamese_net.compile(optimizer= adam,loss= custom_loss(sigma),metrics=['mse', get_cindex, r_square, pearson_r, mse_sliced])
    
    #Load Weights
    #siamese_net.load_weights('models/Nick_ensembles_no_augm/models/siam_ens_10.h5')
    
    # Augment dataset
    #df_new = augment(df_rest,df,1)
    
    # Train with fitgen
    rlr = ReduceLROnPlateau(monitor='loss', factor=0.5,patience=2, min_lr=0.00001, verbose=1, min_delta=1e-5)
    term=keras.callbacks.TerminateOnNaN()
    bs=128
    NUM_EPOCHS = 10
    #df_new = df_new.sample(frac=1).reset_index(drop=True)
    df = df.sample(frac=1).reset_index(drop=True)

    #Set total number of training samples and tests samples
    NUM_TRAIN = len(df)
    trainGen=train_generator(bs,df,smiles,X_atoms, X_bonds, X_edges)
    siamese_net.fit_generator(trainGen,
                        steps_per_epoch= ceil(NUM_TRAIN/bs),
                        epochs=NUM_EPOCHS,
                        verbose = 1,
                        validation_data = ([X_atoms_cold_1,X_bonds_cold_1,X_edges_cold_1,X_atoms_cold_2, X_bonds_cold_2, X_edges_cold_2],Y_cold),
                        shuffle = True,
                        callbacks= [term, rlr])
    
    # Save model
    siamese_net.save_weights('models/Nick_ensembles_no_augm_pc3_pretrained_in_mcf7/models/siam_no_augment_%s.h5'%n)
    # Decouple model at the gaussian
    Gauss = keras.Model(siamese_net.inputs, siamese_net.get_layer('main_output').output)
    # evaluate and save evals
    y_pred = siamese_net.predict([X_atoms_cold_1,X_bonds_cold_1,X_edges_cold_1,X_atoms_cold_2, X_bonds_cold_2, X_edges_cold_2],batch_size=2048)
    get = model_evaluate(y_pred,Y_cold)
    get.to_csv('models/Nick_ensembles_no_augm_pc3_pretrained_in_mcf7/performance/Model_No_%s.csv'%n)
    # Predict on cold
    cold_pred = Gauss.predict([X_atoms_cold_1,X_bonds_cold_1,X_edges_cold_1,X_atoms_cold_2, X_bonds_cold_2, X_edges_cold_2],batch_size=2048)
    # Append mus and sigmas and save at the same time
    cold_preds_mus.append(cold_pred[0])
    np.save('models/Nick_ensembles_no_augm_pc3_pretrained_in_mcf7/cold/mu/cold_mu_No_%s.npy'%n, cold_pred[0])
    cold_preds_sigmas.append(cold_pred[1])
    np.save('models/Nick_ensembles_no_augm_pc3_pretrained_in_mcf7/cold/sigma/cold_sigma_No_%s.npy'%n, cold_pred[1])