In [2]:
from rdkit import Chem
import networkx as nx
from karateclub import Graph2Vec
import numpy as np
import pandas as pd
from rdkit.Chem import Draw
from IPython.display import Image
import networkx as nx
import matplotlib.pyplot as plt
from karateclub import Graph2Vec
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from imblearn.over_sampling import SMOTE
from keras.models import Sequential
from keras.layers import Dense
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Activation
from tensorflow.keras import layers
import time

In [3]:
data = pd.read_csv('./HIV.csv')
def mol_to_nx(mol):
    G = nx.Graph()

    for atom in mol.GetAtoms():
        G.add_node(atom.GetIdx(),
                   atomic_num=atom.GetAtomicNum(),
                   is_aromatic=atom.GetIsAromatic(),
                   atom_symbol=atom.GetSymbol())
        
    for bond in mol.GetBonds():
        G.add_edge(bond.GetBeginAtomIdx(),
                   bond.GetEndAtomIdx(),
                   bond_type=bond.GetBondType())
        
    return G
print("Transform smiles to rdkit mol object")
data['mol'] = data['smiles'].apply(lambda x: Chem.MolFromSmiles(x))      
print("Create Networkx object from RDKit")
data['graph'] = data['mol'].apply(lambda x: mol_to_nx(x))
print("Create graph embedding")
model = Graph2Vec()
model.fit(data['graph'])
hiv_graph2vec = model.get_embedding()
hiv_graph2vec = pd.DataFrame(hiv_graph2vec)
X = hiv_graph2vec
y = data['HIV_active']
oversample = SMOTE()
X, y = oversample.fit_resample(X, y)
print('Synthetic balanced dataset')
print(y.value_counts())

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

Transform smiles to rdkit mol object




Create Networkx object from RDKit
Create graph embedding
Synthetic balanced dataset
0    40426
1    40426
Name: HIV_active, dtype: int64


In [6]:

def build_model(hp):
    model = Sequential()
    #model.add(Dense(128, input_dim=128, activation='relu'))
    model.add(Dense(hp.Int('input_units',
                                min_value=128,
                                max_value=256,
                                step=32), input_dim=128))
    model.add(Activation('relu'))
    for i in range(hp.Int('n_layers', 1, 4)):  # adding variation of layers.
        model.add(Dense(hp.Int(f'dense_{i}_units',
                                min_value=32,
                                max_value=256,
                                step=32)))
        model.add(Activation('relu'))
    
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    return model

In [7]:

LOG_DIR = f"{int(time.time())}"
from kerastuner.tuners import RandomSearch
from kerastuner.engine.hyperparameters import HyperParameters
tuner = RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=1,  # how many model variations to test?
    executions_per_trial=1,  # how many trials per variation? (same model could perform differently)
    directory=LOG_DIR)

In [8]:
tuner.search_space_summary()

Search space summary
Default search space size: 3
input_units (Int)
{'default': None, 'conditions': [], 'min_value': 128, 'max_value': 256, 'step': 32, 'sampling': None}
n_layers (Int)
{'default': None, 'conditions': [], 'min_value': 1, 'max_value': 4, 'step': 1, 'sampling': None}
dense_0_units (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 256, 'step': 32, 'sampling': None}


In [9]:
tuner.search(x=X_train,
             y=y_train,
             verbose=2, # just slapping this here bc jupyter notebook. The console out was getting messy.
             epochs=10,
             batch_size=64,
             #callbacks=[tensorboard],  # if you have callbacks like tensorboard, they go here.
             validation_data=(X_test, y_test))

Trial 1 Complete [00h 00m 33s]
val_accuracy: 0.9204749464988708

Best val_accuracy So Far: 0.9204749464988708
Total elapsed time: 00h 00m 33s
INFO:tensorflow:Oracle triggered exit


In [10]:
tuner.results_summary()

Results summary
Results in 1651062637\untitled_project
Showing 10 best trials
<keras_tuner.engine.objective.Objective object at 0x000001D637A3C610>
Trial summary
Hyperparameters:
input_units: 224
n_layers: 3
dense_0_units: 96
dense_1_units: 32
dense_2_units: 32
Score: 0.9204749464988708


In [11]:
tuner.get_best_hyperparameters()[0].values

{'input_units': 224,
 'n_layers': 3,
 'dense_0_units': 96,
 'dense_1_units': 32,
 'dense_2_units': 32}