In [None]:
import argparse
import gzip
import numpy as np
import os
import tensorflow as tf

from six.moves.urllib.request import urlretrieve

# Polyaxon
from polyaxon_client.tracking import Experiment

In [None]:
ACTIVATIONS = {
    'relu': tf.nn.relu,
    'sigmoid': tf.sigmoid,
    'tanh': tf.tanh,
}

OPTIMIZERS = {
    'gradient_descent': tf.train.GradientDescentOptimizer,
    'rmsprop': tf.train.RMSPropOptimizer,
    'adam': tf.train.AdamOptimizer,
}

MNIST_HOST = 'http://yann.lecun.com/exdb/mnist/'
TRAIN_IMAGES = 'train-images-idx3-ubyte.gz'
TRAIN_LABELS = 'train-labels-idx1-ubyte.gz'
TEST_IMAGES = 't10k-images-idx3-ubyte.gz'
TEST_LABELS = 't10k-labels-idx1-ubyte.gz'
IMAGE_WIDTH = 28
OUTPUT_CLASSES = 10

In [None]:
def load_onehot_data(filename):
    with gzip.open(filename, 'rb') as unzipped_file:
        data = np.frombuffer(unzipped_file.read(), dtype=np.uint8)
    labels = data[8:]
    length = len(labels)
    onehot = np.zeros((length, OUTPUT_CLASSES), dtype=np.float32)
    onehot[np.arange(length), labels] = 1
    return onehot


def load_image_data(filename):
    with gzip.open(filename, 'rb') as unzipped_file:
        data = np.frombuffer(unzipped_file.read(), dtype=np.uint8)
    images = data[16:].reshape((-1, IMAGE_WIDTH ** 2)).astype(np.float32)
    images /= 255
    return images


def load_mnist_data(path='/tmp/mnist'):
    if not os.path.isdir(path):
        os.makedirs(path)
    for data_file in [
        TRAIN_IMAGES,
        TRAIN_LABELS,
        TEST_IMAGES,
        TEST_LABELS,
    ]:
        destination = os.path.join(path, data_file)
        if not os.path.isfile(destination):
            urlretrieve("{}{}".format(MNIST_HOST, data_file), destination)
    return (
        (load_image_data(os.path.join(path, TRAIN_IMAGES)),
         load_onehot_data(os.path.join(path, TRAIN_LABELS))),
        (load_image_data(os.path.join(path, TEST_IMAGES)),
         load_onehot_data(os.path.join(path, TEST_LABELS))),
    )


def weight_variable(shape):
    return tf.Variable(tf.truncated_normal(shape, stddev=0.1))


def bias_variable(shape):
    return tf.Variable(tf.constant(0.1, shape=shape))


def conv_layer(x, filter_size, out_features, activation, pool_size):
    W = weight_variable([filter_size, filter_size, x.get_shape()[3].value, out_features])
    b = bias_variable([out_features])
    conv = ACTIVATIONS[activation](tf.nn.conv2d(x, W, [1, 1, 1, 1], padding='SAME') + b)
    pool = tf.nn.max_pool(conv, ksize=[1, pool_size, pool_size, 1],
                          strides=[1, pool_size, pool_size, 1], padding='SAME')
    return pool


def fully_connected_layer(x, out_size):
    W = weight_variable([x.get_shape()[1].value, out_size])
    b = bias_variable([out_size])
    return tf.matmul(x, W) + b


def create_model(conv1_size,
                 conv1_out,
                 conv1_activation,
                 pool1_size,
                 conv2_size,
                 conv2_out,
                 conv2_activation,
                 pool2_size,
                 fc1_activation,
                 fc1_size,
                 optimizer,
                 log_learning_rate):
    x = tf.placeholder(tf.float32, shape=[None, IMAGE_WIDTH ** 2])
    y = tf.placeholder(tf.float32, shape=[None, OUTPUT_CLASSES])
    keep_prob = tf.placeholder(tf.float32)
    input_image = tf.reshape(x, [-1, IMAGE_WIDTH, IMAGE_WIDTH, 1])

    conv1 = conv_layer(input_image, conv1_size, conv1_out, conv1_activation, pool1_size)

    conv2 = conv_layer(conv1, conv2_size, conv2_out, conv2_activation, pool2_size)

    _, conv2_height, conv2_width, conv2_features = conv2.get_shape()
    flattened = tf.reshape(conv2,
                           [-1, conv2_height.value * conv2_width.value * conv2_features.value])

    fc_1 = ACTIVATIONS[fc1_activation](fully_connected_layer(flattened, fc1_size))
    fc_1_drop = tf.nn.dropout(fc_1, keep_prob)

    y_conv = fully_connected_layer(fc_1_drop, OUTPUT_CLASSES)
    cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=y_conv))
    train_step = OPTIMIZERS[optimizer](10 ** log_learning_rate).minimize(cross_entropy)
    correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    return x, y, y_conv, keep_prob, train_step, accuracy


def train_model(model, x_train, y_train, batch_size, dropout, epochs):
    x, y, y_conv, keep_prob, train_step, _ = model
    train_length = len(x_train)
    for i in range(epochs):
        indices = np.arange(train_length)
        np.random.shuffle(indices)
        for start in range(0, train_length, batch_size):
            end = min(start + batch_size, train_length)
            batch_indices = indices[start:end]
            x_batch, y_batch = x_train[batch_indices], y_train[batch_indices]
            train_step.run(feed_dict={x: x_batch, y: y_batch, keep_prob: dropout})


def evaluate_model(model, x_test, y_test):
    x, y, y_conv, keep_prob, _, accuracy = model
    return accuracy.eval(feed_dict={x: x_test, y: y_test, keep_prob: 1.0})

In [None]:
conv1_size_param=5
conv1_out_param=32
conv1_activation_param='relu'
pool1_size_param=2
conv2_size_param=5
conv2_out_param=64
conv2_activation_param='relu'
pool2_size_param=2
fc1_activation_param='sigmoid'
fc1_size_param=1024
optimizer_param='adam'
log_learning_rate_param=-3
batch_size_param=100
dropout_param=0.2
epochs_param=1

In [None]:
# Polyaxon
experiment = Experiment('mnist')
experiment.create(framework='tensorflow', tags=['examples'])
experiment.log_params(
    conv1_size=conv1_size_param,
    conv1_out=conv1_out_param,
    conv1_activation=conv1_activation_param,
    pool1_size=pool1_size_param,
    conv2_size=conv2_size_param,
    conv2_out=conv2_out_param,
    conv2_activation=conv2_activation_param,
    pool2_size=pool2_size_param,
    fc1_activation=fc1_activation_param,
    fc1_size=fc1_size_param,
    optimizer=optimizer_param,
    log_learning_rate=log_learning_rate_param,
    batch_size=batch_size_param,
    dropout=dropout_param,
    epochs=epochs_param)

(x_train, y_train), (x_test, y_test) = load_mnist_data()

# Polyaxon
experiment.log_data_ref(data=x_train, data_name='x_train')
experiment.log_data_ref(data=y_train, data_name='y_train')
experiment.log_data_ref(data=x_test, data_name='x_test')
experiment.log_data_ref(data=y_test, data_name='y_test')

with tf.Session() as sess:
    model = create_model(
        conv1_size=conv1_size_param,
        conv1_out=conv1_out_param,
        conv1_activation=conv1_activation_param,
        pool1_size=pool1_size_param,
        conv2_size=conv2_size_param,
        conv2_out=conv2_out_param,
        conv2_activation=conv2_activation_param,
        pool2_size=pool2_size_param,
        fc1_activation=fc1_activation_param,
        fc1_size=fc1_size_param,
        optimizer=optimizer_param,
        log_learning_rate=log_learning_rate_param)
    sess.run(tf.global_variables_initializer())
    train_model(model,
                x_train,
                y_train,
                batch_size=batch_size_param,
                dropout=dropout_param,
                epochs=epochs_param)
    accuracy = evaluate_model(model, x_test, y_test)

    # Polyaxon
    experiment.log_metrics(accuracy=accuracy)