In [1]:
import pandas as pd
from keras.callbacks import History, ReduceLROnPlateau,EarlyStopping,ModelCheckpoint
import os
import numpy as np
from data_analysis import calculate_metrics, load_weights_and_evaluate
from model_builders import GCN_siam_model

Using TensorFlow backend.


In [2]:
triplets = pd.read_csv("data/p38/split_aveb/fold_0/triplets_train_0.csv", index_col = 0)

In [3]:
triplets

Unnamed: 0,A,P,N,margin
1,CC1(c2nnc3ccc(-c4ocnc4-c4cc(F)c(F)cc4F)cn23)CC1,O=c1c(O)cc2c(-c3ccccc3Cl)cc(O)cc2n1-c1c(Cl)ccc...,CCCNc1nnc2ccc(-c3ocnc3-c3ccc(F)cc3)cn12,margin = 0.45
2,CC1(c2nnc3ccc(-c4ocnc4-c4cc(F)c(F)cc4F)cn23)CC1,CCNC(=O)c1cn2ncnc(Nc3cc(C(=O)Nc4ncco4)ccc3C)c2c1C,FC(F)c1nnc2ccc(-c3ocnc3-c3ccccc3)cn12,margin = 0.45
3,CC1(c2nnc3ccc(-c4ocnc4-c4cc(F)c(F)cc4F)cn23)CC1,O=C1NCc2nc(Sc3c(Cl)cccc3Cl)ccc2N1c1c(Cl)cccc1Cl,Nc1c(C(=O)c2ccccc2)cc[n+]([O-])c1-c1ccc(Cl)cc1,random
4,CC1(c2nnc3ccc(-c4ocnc4-c4cc(F)c(F)cc4F)cn23)CC1,CC(C)(C)c1cc(NC(=O)Nc2cccc(Cl)c2Cl)n(-c2cccc([...,COC(=O)c1sc(C(C)(C)C)cc1NC(=O)Nc1ccc(O)cc1,random
5,CC(C)(C)c1cc(NC(=O)Nc2ccccc2)n(-c2cccc([N+](=O...,COCCNC(=O)c1ccc(-c2c3nccc(-c4ccc(F)cc4F)c3cc[n...,CC(C)(C)c1cc(NC(=O)Nc2cccc(Cl)c2Cl)n(-c2cccc([...,margin = 0.35
6,CC(C)(C)c1cc(NC(=O)Nc2ccccc2)n(-c2cccc([N+](=O...,O=C(O)Cn1c(-c2ccncc2)c(-c2ccc(F)cc2)c2ncccc21,CC(C)(C)c1cc(NC(=O)Nc2ccccc2)n(-c2cccc(CNC(=O)...,margin = 0.35
7,CC(C)(C)c1cc(NC(=O)Nc2ccccc2)n(-c2cccc([N+](=O...,C[C@H](Nc1nccc(-c2c(-c3ccc(F)cc3)ncn2Cc2cccc(C...,CC(C)(C)c1cc(NC(=O)Nc2cccc(Cl)c2Cl)n(-c2ccccc2)n1,margin = 0.35
8,CC(C)(C)c1cc(NC(=O)Nc2ccccc2)n(-c2cccc([N+](=O...,CC(C)COc1ccccc1-c1cc(-c2ccc(Cl)cc2)[nH]c1-c1cc...,Cc1ccn(CC(=O)NCc2cccc(-n3nc(C(C)(C)C)cc3NC(=O)...,margin = 0.35
9,CC(C)(C)c1cc(NC(=O)Nc2ccccc2)n(-c2cccc([N+](=O...,OCC(O)CSc1nc(-c2ccc(F)cc2)c(-c2ccncc2)[nH]1,CC(C)(C)c1cc(NC(=O)Nc2ccccc2)n(-c2cccc(CNC(=O)...,margin = 0.35
10,CC(C)(C)c1cc(NC(=O)Nc2ccccc2)n(-c2cccc([N+](=O...,CSc1nc(-c2ccc(F)cc2)c(-c2ccnc(NC(C)=O)c2)n1CCC...,Cc1ccc(NC(=O)c2ccccc2)cc1-c1ccc(C(=O)NCC2CC2)cc1,random


In [4]:
es = EarlyStopping(monitor='loss',patience=8, min_delta=0)
rlr = ReduceLROnPlateau(monitor='loss',factor=0.5, patience=4, verbose=1, min_lr=0.0000001)

In [10]:
model_params = {
        "num_layers" : 3,
        "max_atoms" : 70,
        "num_atom_features" : 62,
        "num_atom_features_original" : 62,
        "num_bond_features" : 6,
        "max_degree" : 5,
        "conv_width" : [int(96), int(104), int(120)],
        "fp_length" : [int(160), int(160), int(160)],
        "activ_enc" : "selu",
        "activ_dec" : "selu",
        "learning_rates" : [0.001,0.001,0.001],
        "learning_rates_fp": [0.005,0.005,0.005],
        "losses_conv" : {
                    "neighbor_output": "mean_squared_error",
                    "self_output": "mean_squared_error",
                    },
        "lossWeights" : {"neighbor_output": 1.0, "self_output": 1.0},
        "metrics" : "mse",
        "loss_fp" : "mean_squared_error",
        "enc_layer_names" : ["enc_1", "enc_2", "enc_3"],
        'callbacks' : [es,rlr],
        'adam_decay': 0.0005329142291371636,
        'beta': 5,
        'p': 0.004465204118126482,
        'dense_size' : [int(256), int(192), int(96)],
        'dropout_rate' : [0.354, 0.354],
        'lr' : 0.0005,
        'batch_size' : int(64),
        'n_epochs' : int(35),
        'margin' : 0.2
        }
gcn = GCN_siam_model(model_params)

In [6]:
anchor_atoms, anchor_bonds, anchor_edges = gcn.dataframe_to_gcn_input(triplets["A"])
pos_atoms, pos_bonds, pos_edges = gcn.dataframe_to_gcn_input(triplets["P"])
neg_atoms, neg_bonds, neg_edges = gcn.dataframe_to_gcn_input(triplets["N"])

In [11]:
gcn_encoder = gcn.build_encoder()
gcn_model = gcn.build_model(gcn_encoder)
siamese = gcn.build_siam(gcn_model)

LAYER 0
LAYER 1
LAYER 2
y_pred.shape =  Tensor("merged_layer_1/concat:0", shape=(?, 288), dtype=float32)


In [12]:
print(siamese.summary())

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
atom_inputs_anchor (InputLayer) (None, 70, 62)       0                                            
__________________________________________________________________________________________________
bond_inputs_anchor (InputLayer) (None, 70, 5, 6)     0                                            
__________________________________________________________________________________________________
edge_inputs_anchor (InputLayer) (None, 70, 5)        0                                            
__________________________________________________________________________________________________
atom_inputs_pos (InputLayer)    (None, 70, 62)       0                                            
__________________________________________________________________________________________________
bond_input

In [None]:
es = EarlyStopping(monitor='loss',patience=8, min_delta=0)
rlr2 = ReduceLROnPlateau(monitor='loss',factor=0.5, patience=2, verbose=1, min_lr=0.0000001)

In [13]:
Y_dummy = np.empty((anchor_atoms.shape[0],288))
siamese.fit([anchor_atoms, anchor_bonds, anchor_edges,
              pos_atoms, pos_bonds, pos_edges,
              neg_atoms, neg_bonds, neg_edges],Y_dummy,
                    batch_size=128,
                    epochs=35,
                    verbose=2,
                    shuffle=True,
                    validation_data=None, callbacks = [es,rlr2])

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 1/35
 - 31s - loss: 0.2047
Epoch 2/35
 - 21s - loss: 0.1999
Epoch 3/35
 - 21s - loss: 0.1998
Epoch 4/35
 - 21s - loss: 0.2002
Epoch 5/35
 - 20s - loss: 0.1998
Epoch 6/35
 - 20s - loss: 0.2001
Epoch 7/35
 - 20s - loss: 0.1990
Epoch 8/35
 - 21s - loss: 0.1811
Epoch 9/35
 - 21s - loss: 0.1008
Epoch 10/35
 - 21s - loss: 0.0624
Epoch 11/35
 - 21s - loss: 0.0518
Epoch 12/35
 - 21s - loss: 0.0457
Epoch 13/35
 - 20s - loss: 0.0715
Epoch 14/35
 - 20s - loss: 0.0513
Epoch 15/35
 - 20s - loss: 0.0224
Epoch 16/35
 - 20s - loss: 0.0325
Epoch 17/35
 - 20s - loss: 0.0226
Epoch 18/35
 - 20s - loss: 0.0284
Epoch 19/35
 - 20s - loss: 0.0264
Epoch 20/35
 - 20s - loss: 0.0166
Epoch 21/35
 - 20s - loss: 0.0228
Epoch 22/35
 - 20s - loss: 0.0237
Epoch 23/35
 - 20s - loss: 0.0135
Epoch 24/35
 - 20s - loss: 0.0180
Epoch 25/35
 - 20s - loss: 0.0149
Epoch 26/35
 - 20s - loss: 0.0163
Epoch 27/35
 - 20s - loss: 0.0186
Epoch 28/35
 - 20s - loss: 0.0145
Epoch 29/35
 - 20s - loss: 0.0096
Epoch 30/35
 - 20s - lo

<keras.callbacks.History at 0x13ed36c4550>

In [15]:
val = pd.read_csv("data/p38/split_aveb/fold_0/val_0.csv", index_col = 0)
val_atoms, val_bonds, val_edges = gcn.dataframe_to_gcn_input(val["rdkit"])

In [16]:
embeddings = gcn_model.predict([val_atoms, val_bonds, val_edges])

In [17]:
df_embeddings = pd.DataFrame(embeddings)

In [19]:
df_embeddings.to_csv("data/p38/split_aveb/fold_0/embeddings_0.csv")

In [21]:
train = pd.read_csv("data/p38/split_aveb/fold_0/trainsmiles.csv", index_col = 0)
train_atoms, train_bonds, train_edges = gcn.dataframe_to_gcn_input(train["x"])

In [22]:
embeddings_train = gcn_model.predict([train_atoms, train_bonds, train_edges])
df_embeddings_train = pd.DataFrame(embeddings_train)
df_embeddings_train.to_csv("data/p38/split_aveb/fold_0/embeddings_train_0.csv")