In [None]:
import tensorflow as tf
import keras

In [None]:
import pysmiles
import numpy as np

In [None]:
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import Dense

## Load some data

In [None]:
import pyrfume

In [None]:
molecules = pyrfume.load_data('leffingwell/molecules.csv', remote=True)
behavior = pyrfume.load_data('leffingwell/behavior.csv', remote=True)

In [None]:
MAX_NODES = 100
MAX_EDGES = 500
EMBEDDING_DIM = 120

In [None]:
from GraphLayers import *

In [None]:
def build_model(params):
    vertices = Input(shape=((MAX_NODES, EMBEDDING_DIM,)), name="vertices")
    edges = Input(shape=((MAX_EDGES, EMBEDDING_DIM,)), name="edges")
    universal = Input(shape=((EMBEDDING_DIM,)), name="universal")
    adj = Input(shape=((MAX_NODES, MAX_NODES,)), name="adj")
    conEd = Input(shape=((MAX_NODES, MAX_EDGES,)), name="connectedEdges")
    edgeAdj = Input(shape=((MAX_EDGES, MAX_EDGES,)), name="edgeAdjacency")
    
    x = [vertices, edges, universal, adj, conEd, edgeAdj]

    for i in range(params["graph_layers"]):
        for k in range(params["pool_steps"]):
            x = PoolStep(params[f"step_{k}_pve"],
                        params[f"step_{k}_pee"],
                        params[f"step_{k}_pue"],
                        params[f"step_{k}_pvv"],
                        params[f"step_{k}_pev"],
                        params[f"step_{k}_puv"],
                        params[f"step_{k}_pvu"],
                        params[f"step_{k}_peu"])(x)
        x = GraphUpdate(params["embedding_dim"], params["embedding_dim"], params["embedding_dim"], params["update_function_depth"])(x)

    x = PoolStep(p_ve=False,
                p_ee=False,
                p_ue=False,
                p_vv=False,
                p_ev=False,
                p_uv=False,
                p_vu=True,
                p_eu=True)(x)        
    
    u = x[2]
    for i in range(params["num_final_layers"]):
        u = Dense(4, activation="relu")(u)

    u = Dense(1, activation="sigmoid")(u)
    
    return Model(inputs=[vertices, edges, universal, adj, conEd, edgeAdj], outputs=u)

In [None]:
import random

In [None]:
def generate_parameter_set():
    params = {}

    params["graph_layers"] = random.randint(1, 5)
    params["pool_steps"] = random.randint(0, 5)

    params["update_function_depth"] = random.randint(1, 5)

    for k in range(params["pool_steps"]):
        params[f"step_{k}_pve"] = random.choice([True, False])
        params[f"step_{k}_pee"] = random.choice([True, False])
        params[f"step_{k}_pue"] = random.choice([True, False])
        params[f"step_{k}_pvv"] = random.choice([True, False])
        params[f"step_{k}_pev"] = random.choice([True, False])
        params[f"step_{k}_puv"] = random.choice([True, False])
        params[f"step_{k}_pvu"] = random.choice([True, False])
        params[f"step_{k}_peu"] = random.choice([True, False])

    params["embedding_dim"] = random.randint(10, 100)
    params["num_final_layers"] = random.randint(1, 5)

    params["optimizer"] = random.choice(["RMSProp", "Adam", "SGD"])

    if params["optimizer"] == "RMSProp":
        #params["learning_rate"] = random.uniform(0.0001, 0.1)
        params["learning_rate"] = 0.001

    if params["optimizer"] == "Adam":
        #params["learning_rate"] = random.uniform(0.00001, 0.1)
        params["learning_rate"] = 0.001

    if params["optimizer"] == "SGD":
        #params["learning_rate"] = random.uniform(0.001, 0.1)
        params["learning_rate"] = 0.01

    params["batch_size"] = random.choice([4, 8, 16, 32])
    
    return params

# Convert all data

In [None]:
allVert = []
allEdj = []
allUni = []
allAm = []
allConn = []
allEdgeAdj = []

aromaticVals = []

for i in range(len(molecules)):
    moleculeSMILES = molecules["IsomericSMILES"].iloc[i]
    aromatic = behavior["aromatic"].iloc[i]

    graph = pysmiles.read_smiles(moleculeSMILES, explicit_hydrogen=True)
    ver, edj, uni, am, conn, edgeAdj = convertFromNetworkX(graph, MAX_NODES, MAX_EDGES, EMBEDDING_DIM)

    allVert.append(ver)
    allEdj.append(edj)
    allUni.append(uni)
    allAm.append(am)
    allConn.append(conn)
    allEdgeAdj.append(edgeAdj)
    aromaticVals.append(aromatic)

In [None]:
data = {"vertices": np.array(allVert),
                "edges": np.array(allEdj),
                "universal": np.array(allUni),
                "adj": np.array(allAm),
                "connectedEdges": np.array(allConn),
                "edgeAdjacency": np.array(allEdgeAdj)}

In [None]:
from datetime import datetime
import json
import time

log_file = "Experiment_Logs/OdourBenchmark/"+datetime.now().strftime("Run_%m_%d_%Y%_%H:%M:%S")+".jsonl"

while True:
    import gc
    gc.collect()
    
    params = generate_parameter_set()
    
    print(params)
    
    model = build_model(params)
    
    stopper = tf.keras.callbacks.EarlyStopping(monitor='val_auc', patience=10)
    
    if params["optimizer"] == "RMSProp":
        optimizer=tf.keras.optimizers.RMSprop(params["learning_rate"])
    
    if params["optimizer"] == "Adam":
        optimizer=tf.keras.optimizers.Adam(params["learning_rate"])
    
    if params["optimizer"] == "SGD":
        optimizer=tf.keras.optimizers.SGD(params["learning_rate"])
    
    model.compile(
        optimizer=optimizer,
        loss=tf.keras.losses.BinaryCrossentropy(from_logits=False),
        metrics="AUC"
    )

    start = time.time()
    history = model.fit(data, np.array(aromaticVals), epochs=50000, batch_size=params["batch_size"], validation_split=0.8, callbacks=[stopper])

    train_time = time.time()-start
    
    best_auc = max(history.history["val_auc"])
    train_steps = history.history["val_auc"].index(best_auc)

    log_line = {"params": params, "train_time": train_time, "train_steps": train_steps, "best_auc": best_auc}

    with open(log_file, "a") as log:
        log.write(json.dumps(log_line)+"\n")