In [1]:
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from tensorflow.contrib import learn
from skopt import gp_minimize

In [2]:
def batch_iter(X_train, y_train, num_epochs=5, batch_size=32, random_state=0):
    rng = np.random.RandomState(random_state)
    dataset_size = X_train.shape[0]
    indices = np.arange(dataset_size)
    start_indices = np.arange(0, dataset_size, batch_size)
    end_indices = np.arange(batch_size, dataset_size, batch_size)
    if len(start_indices) != len(end_indices):
        start_indices = start_indices[:-1]
    for i in range(num_epochs):
        rng.shuffle(indices)
        X_train = X_train[indices, :]
        y_train = y_train[indices]
        for start_ind, stop_ind in zip(start_indices, end_indices):
            yield X_train[start_ind: stop_ind], y_train[start_ind: stop_ind]

In [3]:
class MLPModel(object):
    """
    Tensorflow 2-layer MLP that allows setting learning rate and regularization.
    """
    def __init__(self, batch_size=32, n_features=100, n_classes=10):
        self.X = tf.placeholder(tf.float32, [batch_size, n_features])
        self.y = tf.placeholder(tf.float32, [batch_size, n_classes])

        # Hyperparameters
        self.learning_rate = tf.placeholder(tf.float32)
        self.reg = tf.placeholder(tf.float32)

        # Layer 1: 256 hidden units.
        W1 = tf.get_variable(
            "Weights1", shape=[n_features, 256], dtype=tf.float32)
        b1 = tf.get_variable("Bias1", shape=[256], dtype=tf.float32)
        X_hidden1 = tf.add(tf.matmul(self.X, W1), b1)
        X_hidden1 = tf.nn.relu(X_hidden1)

        # Layer 2: 10 classes
        W2 = tf.get_variable(
            "Weights2", shape=[256, n_classes], dtype=tf.float32)
        b2 = tf.get_variable("Bias2", shape=[n_classes], dtype=tf.float32)
        X_hidden2 = tf.add(tf.matmul(X_hidden1, W2), b2)

        l2_loss = tf.nn.l2_loss(W1)
        l2_loss += tf.nn.l2_loss(b1)
        l2_loss += tf.nn.l2_loss(W2)
        l2_loss += tf.nn.l2_loss(W2)

        self.loss_ = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
                X_hidden2, self.y)) + self.reg * l2_loss
        predictions = tf.argmax(X_hidden2, 1)
        true = tf.argmax(self.y, 1)
        self.accuracy_ = tf.reduce_mean(
            tf.cast(tf.equal(predictions, true), tf.float32))

        self.optimizer_ = tf.train.AdamOptimizer(
            learning_rate=self.learning_rate).minimize(self.loss_)

In [4]:
def optimize_2layer_mlp(params):
    init_scale, learning_rate, reg = params

    initializer = tf.random_uniform_initializer(-init_scale, init_scale)
    session = tf.Session()

    rng = np.random.RandomState()

    # Generate random scopes for every function call to prevent reusing weights
    # from previous function calls.
    random_scope = "MLPModel" + str(rng.randn())
    with tf.variable_scope(random_scope, reuse=None, initializer=initializer):
        mlpmodel = MLPModel(n_features=X_train.shape[1])

    session.run(tf.initialize_all_variables())
    for X_batch, y_batch in batch_iter(X_train, y_train):
        feed_dict = {
            mlpmodel.X: X_batch,
            mlpmodel.y: y_batch,
            mlpmodel.learning_rate: learning_rate,
            mlpmodel.reg: reg}
        ops = {
            "optimizer": mlpmodel.optimizer_,
            "accuracy": mlpmodel.accuracy_,
        }
        vals = session.run(ops, feed_dict)

    with tf.variable_scope(random_scope, reuse=True):
        mlpmodelval = MLPModel(
            batch_size=X_test.shape[0], n_features=X_train.shape[1])

    # In practise, one should use a validation set independent of the test set.
    feed_dict[mlpmodelval.X] = X_test
    feed_dict[mlpmodelval.y] = y_test
    ops = {"accuracy": mlpmodel.accuracy_}
    vals = session.run(ops, feed_dict)
    return -vals['accuracy']

In [None]:
digits = learn.datasets.mnist.load_mnist()
X_train, y_train = digits.train.images, digits.train.labels
X_test, y_test = digits.test.images, digits.test.labels

lb = LabelBinarizer()
lb.fit(np.unique(y_train))
y_train = lb.transform(y_train)
y_test = lb.transform(y_test)

Extracting MNIST-data/train-images-idx3-ubyte.gz
Extracting MNIST-data/train-labels-idx1-ubyte.gz
Extracting MNIST-data/t10k-images-idx3-ubyte.gz
Extracting MNIST-data/t10k-labels-idx1-ubyte.gz


In [None]:
bounds = [(0.0, 0.5), (1e-6, 1e-1, "log-uniform"), (1e-3, 1, "log-uniform")]
res = gp_minimize(optimize_2layer_mlp, bounds, n_calls=20, verbose=True)

Iteration No: 1 started. Evaluating function at random point.
Iteration No: 1 ended. Evaluation done at random point.