In [1]:
import pandas as pd
from keras.callbacks import History, ReduceLROnPlateau,EarlyStopping,ModelCheckpoint
import os
import numpy as np
from data_analysis import calculate_metrics, load_weights_and_evaluate
from model_builders import GCN_siam_model

Using TensorFlow backend.


In [2]:
triplets = pd.read_csv("data/p38/split_aveb/fold_0/triplets_random/triplets_train_random.csv", index_col = 0)
triplets = triplets.sample(frac=1).reset_index(drop=True)

In [3]:
triplets

Unnamed: 0,A,P,N,margin
0,C[S+]([O-])c1cc(CSc2nc(-c3ccc(F)cc3)c(-c3ccncc...,Fc1ccc(-c2[nH]c(SCc3ccccc3)nc2-c2ccncc2)cc1,Cc1ccc(C(N)=O)cc1Nc1nc(N[C@H]2CCNC2)nc(N(C)CC(...,random
1,CSc1nc(-c2cccc(C(F)(F)F)c2)c(-c2ccnc(NCCCO)c2)...,Fc1ccc(-c2ncoc2-c2ccc3nnc(N4CCOCC4)n3c2)cc1,Cn1c(=O)n(C)c2cc(-c3[nH]c(-c4cccs4)nc3-c3cccc(...,random
2,Cc1cccc(-c2[nH]c(CNc3cccc(CN4CCOCC4)c3F)nc2-c2...,Cc1cccc(Sc2ccc3c(-c4c(Cl)cccc4Cl)c(=O)ncn3n2)c1,CCNC(=O)c1ccc(C)c(Nc2ncnn3cc(C(=O)N[C@@H](C)c4...,random
3,CC(=O)Nc1cc(-c2c(-c3ccc(F)cc3)nc(SCCCC(=O)N3CC...,Nc1ccc2c(c1)C(=O)c1ccc(Nc3ccc(F)cc3)cc1OC2,O=C(NC1CC1)c1ccc(Cl)c(Nc2nncc3c2cnn3-c2ccc(F)c...,random
4,Nc1c(C(=O)c2ccccc2)cnn1-c1cccc([N+](=O)[O-])c1,COc1ccc(C(=O)Nc2cc(NC(=O)c3cccc(N(C)C)c3)ccc2C...,Cc1ccc(C(=O)NC2CC2)cc1-c1ccc(C(=O)NCc2cccc(NS(...,random
5,Nc1nc2ccc(-c3[nH]c(-c4c(F)cccc4F)nc3-c3ccccc3)...,COCCCn1c(SCc2ccc([S+](C)[O-])cc2)nc(-c2ccc(F)c...,CSc1nc(-c2ccc(F)cc2)c(-c2ccnc(NC(C)=O)c2)n1C,random
6,CC(C)(C)c1cc(C(=O)N2CCNC(=O)CC2)c(NC(=O)Nc2ccc...,CC(C)(C)c1nc(-c2ccc(F)cc2F)c(-c2ccc3nc(N)n(S(=...,CCCc1c(C(=O)Nc2cc(S(=O)(=O)N(C)C)ccc2C)cnn1Cc1...,random
7,CC(C)(C)c1cc(NC(=O)Nc2ccc(Cl)c(COc3cccnc3)c2)[...,Cc1cccc(-c2[nH]c(CNc3cccc(C#N)c3CN(C)C)nc2-c2c...,Cc1ccc(Oc2nccc(-c3c(-c4ccc(F)cc4)ncn3C3CCNCC3)...,random
8,Cn1c(=O)n(C)c2cc(-c3[nH]c(-c4cccs4)nc3-c3cccc(...,Cc1nnc(-c2ccc(C)c(-c3ccc(C(=O)NCc4cccc(CN5CCOC...,O=C1NCc2nc(Sc3ccc(F)cc3)ccc2N1c1c(F)cccc1Cl,random
9,Cc1cccc(-c2nn(CC(=O)Nc3cccc(C#N)c3)cc2-c2ccc3n...,O=C1NCc2nc(NCc3ccc(F)cc3F)ccc2N1c1c(Cl)cccc1Cl,N#Cc1cc(NC(=O)C(=O)c2ccc(OCCN3CCOCC3)c3ccccc23...,random


In [4]:
es = EarlyStopping(monitor='loss',patience=8, min_delta=0)
rlr = ReduceLROnPlateau(monitor='loss',factor=0.5, patience=4, verbose=1, min_lr=0.0000001)

In [5]:
model_params = {
        "num_layers" : 3,
        "max_atoms" : 70,
        "num_atom_features" : 62,
        "num_atom_features_original" : 62,
        "num_bond_features" : 6,
        "max_degree" : 5,
        "conv_width" : [int(96), int(104), int(120)],
        "fp_length" : [int(160), int(160), int(160)],
        "activ_enc" : "selu",
        "activ_dec" : "selu",
        "learning_rates" : [0.001,0.001,0.001],
        "learning_rates_fp": [0.005,0.005,0.005],
        "losses_conv" : {
                    "neighbor_output": "mean_squared_error",
                    "self_output": "mean_squared_error",
                    },
        "lossWeights" : {"neighbor_output": 1.0, "self_output": 1.0},
        "metrics" : "mse",
        "loss_fp" : "mean_squared_error",
        "enc_layer_names" : ["enc_1", "enc_2", "enc_3"],
        'callbacks' : [es,rlr],
        'adam_decay': 0.0005329142291371636,
        'beta': 5,
        'p': 0.004465204118126482,
        'dense_size' : [int(256), int(256), int(256)],
        'dropout_rate' : [0.354, 0.354],
        'lr' : 0.0005,
        'batch_size' : int(64),
        'n_epochs' : int(35),
        'margin' : 0.2
        }
gcn = GCN_siam_model(model_params)

In [6]:
anchor_atoms, anchor_bonds, anchor_edges = gcn.dataframe_to_gcn_input(triplets["A"])
pos_atoms, pos_bonds, pos_edges = gcn.dataframe_to_gcn_input(triplets["P"])
neg_atoms, neg_bonds, neg_edges = gcn.dataframe_to_gcn_input(triplets["N"])

In [11]:
gcn_encoder = gcn.build_encoder()
gcn_model = gcn.build_model(gcn_encoder)
siamese = gcn.build_siam(gcn_model)

LAYER 0
LAYER 1
LAYER 2
y_pred.shape =  Tensor("merged_layer_1/concat:0", shape=(?, 768), dtype=float32)


In [8]:
print(siamese.summary())

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
atom_inputs_anchor (InputLayer) (None, 70, 62)       0                                            
__________________________________________________________________________________________________
bond_inputs_anchor (InputLayer) (None, 70, 5, 6)     0                                            
__________________________________________________________________________________________________
edge_inputs_anchor (InputLayer) (None, 70, 5)        0                                            
__________________________________________________________________________________________________
atom_inputs_pos (InputLayer)    (None, 70, 62)       0                                            
__________________________________________________________________________________________________
bond_input

In [9]:
es = EarlyStopping(monitor='loss',patience=8, min_delta=0)
rlr2 = ReduceLROnPlateau(monitor='loss',factor=0.5, patience=2, verbose=1, min_lr=0.0000001)

In [12]:
Y_dummy = np.empty((anchor_atoms.shape[0],768))
siamese.fit([anchor_atoms, anchor_bonds, anchor_edges,
              pos_atoms, pos_bonds, pos_edges,
              neg_atoms, neg_bonds, neg_edges],Y_dummy,
                    batch_size=256,
                    epochs=10,
                    verbose=2,
                    shuffle=True,
                    validation_data=None, callbacks = [es,rlr2])

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 1/10
 - 56s - loss: 0.4998
Epoch 2/10
 - 47s - loss: 0.4996
Epoch 3/10
 - 47s - loss: 0.3649
Epoch 4/10
 - 47s - loss: 0.2203
Epoch 5/10
 - 47s - loss: 0.1720
Epoch 6/10
 - 47s - loss: 0.1472
Epoch 7/10
 - 47s - loss: 0.1299
Epoch 8/10
 - 47s - loss: 0.1143
Epoch 9/10
 - 47s - loss: 0.1060
Epoch 10/10
 - 47s - loss: 0.1083


<keras.callbacks.History at 0x1c5b4c8b470>

In [13]:
val = pd.read_csv("data/p38/split_aveb/fold_0/val_0.csv", index_col = 0)
val_atoms, val_bonds, val_edges = gcn.dataframe_to_gcn_input(val["rdkit"])

In [14]:
embeddings = gcn_model.predict([val_atoms, val_bonds, val_edges])

In [15]:
df_embeddings = pd.DataFrame(embeddings)

In [16]:
df_embeddings.to_csv("data/p38/split_aveb/fold_0/embeddings/embeddings_0/embeddings_val.csv")

In [17]:
train = pd.read_csv("data/p38/split_aveb/fold_0/trainsmiles.csv", index_col = 0)
train_atoms, train_bonds, train_edges = gcn.dataframe_to_gcn_input(train["x"])

In [18]:
embeddings_train = gcn_model.predict([train_atoms, train_bonds, train_edges])
df_embeddings_train = pd.DataFrame(embeddings_train)
df_embeddings_train.to_csv("data/p38/split_aveb/fold_0/embeddings/embeddings_0/embeddings_train.csv")