In [2]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import deepchem as dc
from sklearn.metrics import accuracy_score
from utils import tox21

tf.set_random_seed(456)
np.random.seed(456)

In [15]:
def eval_tox21_hyperparams(n_hidden=50, n_layers=1, learning_rate=.001,
                           dropout_prob=0.5, n_epochs=45, batch_size=100,
                           weight_positives=True):
    d = 1024
    graph = tf.Graph()
    with graph.as_default():
        train_X, train_y, train_w, valid_X, valid_y, valid_w, test_X, test_y, test_w = tox21()
        
        with tf.name_scope("placeholders"):
            x = tf.placeholder(tf.float32, (None, d))
            y = tf.placeholder(tf.float32, (None,))
            w = tf.placeholder(tf.float32, (None,))
            keep_prob = tf.placeholder(tf.float32)
        
        for layer in range(n_layers):
            with tf.name_scope("layer-%d" % layer):
                W = tf.Variable(tf.random_normal((d, n_hidden)))
                b = tf.Variable(tf.random_normal((n_hidden,)))
                x_hidden = tf.nn.relu(tf.matmul(x, W) + b)
                x_hidden = tf.nn.dropout(x_hidden, keep_prob)
                
        with tf.name_scope("output"):
            W = tf.Variable(tf.random_normal((n_hidden, 1)))
            b = tf.Variable(tf.random_normal((1,)))
            y_logit = tf.matmul(x_hidden, W) + b
            
            y_one_prob = tf.sigmoid(y_logit)
            y_pred = tf.round(y_one_prob)
        
        with tf.name_scope("loss"):
            y_expand = tf.expand_dims(y, 1)
            entropy = tf.nn.sigmoid_cross_entropy_with_logits(logits=y_logit, labels=y_expand)
            
            if weight_positives:
                w_expand = tf.expand_dims(w, 1)
                entropy = w_expand * entropy
            
            l = tf.reduce_sum(entropy)
            
        with tf.name_scope("optim"):
            train_op = tf.train.AdamOptimizer(learning_rate).minimize(l)
        
        with tf.name_scope("summaries"):
            tf.summary.scalar("loss", l)
            merged = tf.summary.merge_all()
        
        hyperparam_str = "d-%d-hidden-%d-lr-%f-n_epochs-%d-batch_size-%d-weight_pos-%s" % (d, n_hidden, learning_rate, n_epochs, batch_size, str(weight_positives))
        train_writer = tf.summary.FileWriter('/tmp/fcnet-func-' + hyperparam_str, tf.get_default_graph())
        
        N = train_X.shape[0]
        
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            step = 0
            for epoch in range(n_epochs):
                pos = 0
                while pos < N:
                    batch_X = train_X[pos:pos+batch_size]
                    batch_y = train_y[pos:pos+batch_size]
                    batch_w = train_w[pos:pos+batch_size]
                    feed_dict = {x: batch_X, y: batch_y, w: batch_w, keep_prob: dropout_prob}
                    _, summary, loss = sess.run([train_op, merged, l], feed_dict=feed_dict)
                    train_writer.add_summary(summary, step)
                    
                    step += 1
                    pos += batch_size
#                 print("epoch %d, loss: %f" % (epoch, loss))
            valid_y_pred = sess.run(y_pred, feed_dict={x: valid_X, keep_prob: 1.0})
        weighted_score = accuracy_score(valid_y, valid_y_pred, sample_weight=valid_w)
        print("Valid Weighted Classification Accuracy: %f" % weighted_score)
    return weighted_score

In [10]:
eval_tox21_hyperparams()

Loading dataset from disk.
Loading dataset from disk.
Loading dataset from disk.
epoch 0, loss: 2282.731934
epoch 1, loss: 1385.962891
epoch 2, loss: 1038.810791
epoch 3, loss: 1908.433838
epoch 4, loss: 1469.491333
epoch 5, loss: 1610.777100
epoch 6, loss: 834.324951
epoch 7, loss: 1091.189819
epoch 8, loss: 325.707184
epoch 9, loss: 1064.686401
epoch 10, loss: 1773.819214
epoch 11, loss: 794.721863
epoch 12, loss: 948.814697
epoch 13, loss: 1453.650391
epoch 14, loss: 1228.517334
epoch 15, loss: 968.245972
epoch 16, loss: 1711.731445
epoch 17, loss: 467.960327
epoch 18, loss: 951.676758
epoch 19, loss: 194.095566
epoch 20, loss: 335.381775
epoch 21, loss: 986.657471
epoch 22, loss: 1066.617798
epoch 23, loss: 320.587158
epoch 24, loss: 242.460190
epoch 25, loss: 252.873444
epoch 26, loss: 764.278625
epoch 27, loss: 388.613831
epoch 28, loss: 754.788208
epoch 29, loss: 420.059998
epoch 30, loss: 588.575806
epoch 31, loss: 519.417725
epoch 32, loss: 39.453426
epoch 33, loss: 648.704651

0.64683275113462824

In [16]:
scores = {}
n_reps = 3
hidden_sizes = [30, 60]
epochs = [15, 30, 45]
dropouts = [.5]
num_layers = [1, 2]

In [17]:
for rep in range(n_reps):
    for n_epochs in epochs:
        for hidden_size in hidden_sizes:
            for dropout in dropouts:
                for n_layers in num_layers:
                    score = eval_tox21_hyperparams(n_hidden=hidden_size, n_epochs=n_epochs,
                                         dropout_prob=dropout, n_layers=n_layers)
                    if (hidden_size, n_epochs, dropout, n_layers) not in scores:
                        scores[(hidden_size, n_epochs, dropout, n_layers)] = []
                    scores[(hidden_size, n_epochs, dropout, n_layers)].append(score)
print("All Scores")
print(scores)

Loading dataset from disk.
Loading dataset from disk.
Loading dataset from disk.
Valid Weighted Classification Accuracy: 0.594250
Loading dataset from disk.
Loading dataset from disk.
Loading dataset from disk.
Valid Weighted Classification Accuracy: 0.577952
Loading dataset from disk.
Loading dataset from disk.
Loading dataset from disk.
Valid Weighted Classification Accuracy: 0.624987
Loading dataset from disk.
Loading dataset from disk.
Loading dataset from disk.
Valid Weighted Classification Accuracy: 0.633515
Loading dataset from disk.
Loading dataset from disk.
Loading dataset from disk.
Valid Weighted Classification Accuracy: 0.659444
Loading dataset from disk.
Loading dataset from disk.
Loading dataset from disk.
Valid Weighted Classification Accuracy: 0.625727
Loading dataset from disk.
Loading dataset from disk.
Loading dataset from disk.
Valid Weighted Classification Accuracy: 0.646092
Loading dataset from disk.
Loading dataset from disk.
Loading dataset from disk.
Valid Wei

In [19]:
avg_scores = {}
for params, param_scores in scores.items():
  avg_scores[params] = np.mean(np.array(param_scores))
print("Scores Averaged over %d repetitions" % n_reps)
print(avg_scores)

Scores Averaged over 3 repetitions
{(30, 15, 0.5, 1): 0.61401144897549809, (30, 15, 0.5, 2): 0.62240757745400555, (60, 15, 0.5, 1): 0.62696766779270374, (60, 15, 0.5, 2): 0.64129606941950357, (30, 30, 0.5, 1): 0.65030676317499292, (30, 30, 0.5, 2): 0.62473397475087722, (60, 30, 0.5, 1): 0.64955473986466783, (60, 30, 0.5, 2): 0.65227672859470742, (30, 45, 0.5, 1): 0.6574569784326747, (30, 45, 0.5, 2): 0.65314383304922674, (60, 45, 0.5, 1): 0.64892898614311134, (60, 45, 0.5, 2): 0.65275943006063342}
