In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import pandas as pd
import numpy as np

In [2]:
df_train= pd.read_csv("../data/processed/train_w_desc.csv")
df_test= pd.read_csv("../data/processed/test_w_desc.csv")
df_valid= pd.read_csv("../data/processed/valid_w_desc.csv")
train_rdkfp= pd.read_csv("../data/processed/train_rdkfp.csv")
valid_rdkfp= pd.read_csv("../data/processed/valid_rdkfp.csv")
test_rdkfp= pd.read_csv("../data/processed/test_rdkfp.csv")

In [3]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding, Bidirectional, Dropout, concatenate
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras import Input, Model

In [None]:
# Reference material on https://pyimagesearch.com/2019/02/04/keras-multiple-inputs-and-mixed-data/

In [4]:
#Numerical data aka chemical properties
X_train= df_train[["tpsa","mol_w","qed","HBA","HBD","logP","MR"]]
y_train= df_train["Y"]
X_test= df_test[["tpsa","mol_w","qed","HBA","HBD","logP","MR"]]
y_test= df_test["Y"]
X_valid= df_valid[["tpsa","mol_w","qed","HBA","HBD","logP","MR"]]
y_valid= df_valid["Y"]

In [5]:
from sklearn.preprocessing import MinMaxScaler

In [6]:
scaler= MinMaxScaler()

In [7]:
scaler.fit(X_train)

MinMaxScaler()

In [8]:
X_train_scaled= scaler.transform(X_train)
X_test_scaled= scaler.transform(X_test)
X_valid_scaled= scaler.transform(X_valid)

In [9]:
X_train_scaled.shape, X_test_scaled.shape, X_valid_scaled.shape

((28789, 7), (8225, 7), (4113, 7))

In [10]:
metrics = [
    keras.metrics.FalseNegatives(name="fn"),
    keras.metrics.FalsePositives(name="fp"),
    keras.metrics.TruePositives(name="tp"),
    keras.metrics.Recall(name="recall"),
    keras.metrics.AUC(curve="ROC", name="roc-auc")
    ]

In [17]:
Dense??

[1;31mInit signature:[0m
[0mDense[0m[1;33m([0m[1;33m
[0m    [0munits[0m[1;33m,[0m[1;33m
[0m    [0mactivation[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0muse_bias[0m[1;33m=[0m[1;32mTrue[0m[1;33m,[0m[1;33m
[0m    [0mkernel_initializer[0m[1;33m=[0m[1;34m'glorot_uniform'[0m[1;33m,[0m[1;33m
[0m    [0mbias_initializer[0m[1;33m=[0m[1;34m'zeros'[0m[1;33m,[0m[1;33m
[0m    [0mkernel_regularizer[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mbias_regularizer[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mactivity_regularizer[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mkernel_constraint[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mbias_constraint[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [1;33m**[0m[0mkwargs[0m[1;33m,[0m[1;33m
[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[1;31mSource:[0m        
[1;33m@[0m[0mkeras_export[0m[1;33m([0m[1;34m"ke

In [32]:
descriptors_in= Input(shape=(X_train_scaled.shape[-1]), name="descriptors")
dense_a = Dense(8, activation= "relu")(descriptors_in)

fingerprints_in= Input(shape=2048, name= "fingerprints")
dense_b= Dense(32, activation= "sigmoid")(fingerprints_in)

merged= concatenate([dense_a, dense_b])

merged= Dense(64, activation= "relu")(merged)
merged= Dropout(0.3)(merged)
merged= Dense(16, activation= "relu")(merged)
merged= Dropout(0.3)(merged)

drug_class= Dense(1, activation= "sigmoid")(merged)

model= Model([descriptors_in, fingerprints_in], drug_class)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=metrics)

checkpoint_path = "checkpoints/weights_func.{epoch:02d}.h5" 
model_checkpoint_callback = ModelCheckpoint(filepath=checkpoint_path, save_weights_only=False, monitor='val_roc-auc', save_best_only=True)
callbacks = [EarlyStopping(patience=5), model_checkpoint_callback]

In [11]:
train_inputs = {"descriptors": X_train_scaled, "fingerprints": train_rdkfp} 
valid_inputs =  {"descriptors": X_valid_scaled, "fingerprints": valid_rdkfp}

In [33]:
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 descriptors (InputLayer)       [(None, 7)]          0           []                               
                                                                                                  
 fingerprints (InputLayer)      [(None, 2048)]       0           []                               
                                                                                                  
 dense_7 (Dense)                (None, 8)            64          ['descriptors[0][0]']            
                                                                                                  
 dense_8 (Dense)                (None, 32)           65568       ['fingerprints[0][0]']           
                                                                                            

In [34]:
history = model.fit(x=train_inputs, y=y_train, validation_data=(valid_inputs, y_valid), callbacks=callbacks, epochs=5, batch_size=16)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [35]:
history2= model.fit(x=train_inputs, y=y_train, validation_data=(valid_inputs, y_valid), callbacks=callbacks, epochs=20, batch_size=16)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20


In [36]:
#model 2 with Dense layers with more nodes
descriptors_in= Input(shape=(X_train_scaled.shape[-1]), name="descriptors")
dense_a = Dense(8, activation= "relu")(descriptors_in)

fingerprints_in= Input(shape=2048, name= "fingerprints")
dense_b= Dense(32, activation= "sigmoid")(fingerprints_in)

merged= concatenate([dense_a, dense_b])

merged= Dense(128, activation= "relu")(merged)
merged= Dropout(0.3)(merged)
merged= Dense(32, activation= "relu")(merged)
merged= Dropout(0.3)(merged)

drug_class= Dense(1, activation= "sigmoid")(merged)

model2= Model([descriptors_in, fingerprints_in], drug_class)
model2.compile(loss='binary_crossentropy', optimizer='adam', metrics=metrics)

checkpoint_path = "checkpoints/weights_func2.{epoch:02d}.h5" 
model_checkpoint_callback = ModelCheckpoint(filepath=checkpoint_path, save_weights_only=False, monitor='val_recall', save_best_only=True)
callbacks = [EarlyStopping(patience=5), model_checkpoint_callback]

In [37]:
history3= model2.fit(x=train_inputs, y=y_train, validation_data=(valid_inputs, y_valid), callbacks=callbacks, epochs=20, batch_size=16)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [38]:
history4= model2.fit(x=train_inputs, y=y_train, validation_data=(valid_inputs, y_valid), callbacks=callbacks, epochs=20, batch_size=16)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20


In [12]:
#model 3 with Dense layers with more nodes
descriptors_in= Input(shape=(X_train_scaled.shape[-1]), name="descriptors")
dense_a = Dense(64, activation= "relu")(descriptors_in)

fingerprints_in= Input(shape=2048, name= "fingerprints")
dense_b= Dense(128, activation= "sigmoid")(fingerprints_in)

merged= concatenate([dense_a, dense_b])

merged= Dense(512, activation= "relu")(merged)
merged= Dropout(0.3)(merged)
merged= Dense(256, activation= "relu")(merged)
merged= Dropout(0.3)(merged)
merged= Dense(64, activation= "relu")(merged)
merged= Dropout(0.3)(merged)

drug_class= Dense(1, activation= "sigmoid")(merged)

model3= Model([descriptors_in, fingerprints_in], drug_class)
model3.compile(loss='binary_crossentropy', optimizer='adam', metrics=metrics)

checkpoint_path = "checkpoints/weights_func2.{epoch:02d}.h5" 
model_checkpoint_callback = ModelCheckpoint(filepath=checkpoint_path, save_weights_only=False, monitor='val_recall', save_best_only=True)
callbacks = [EarlyStopping(patience=5), model_checkpoint_callback]

In [13]:
history5= model3.fit(x=train_inputs, y=y_train, validation_data=(valid_inputs, y_valid), callbacks=callbacks, epochs=20, batch_size=16)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [14]:
history6= model3.fit(x=train_inputs, y=y_train, validation_data=(valid_inputs, y_valid), callbacks=callbacks, epochs=20, batch_size=16)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
