In [35]:
# coding: utf-8
import time
import tensorflow as tf
from datetime import datetime
from keras.utils.np_utils import to_categorical
from tensorflow.examples.tutorials.mnist import input_data

In [36]:
mnist = input_data.read_data_sets("../MNIST_data/")

Extracting ../MNIST_data/train-images-idx3-ubyte.gz
Extracting ../MNIST_data/train-labels-idx1-ubyte.gz
Extracting ../MNIST_data/t10k-images-idx3-ubyte.gz
Extracting ../MNIST_data/t10k-labels-idx1-ubyte.gz


In [37]:
X_test = mnist.train.images
y_test = to_categorical(mnist.train.labels, num_classes=10)

In [38]:
def split_folds(num_folds=3):
    train_length = len(mnist.train.labels)
    fold_size = int(train_length / num_folds)
    X_train_folds = []
    y_train_folds = []
    for i in range(num_folds):
        start = i * fold_size
        end = (i + 1) * fold_size if i + 1 < num_folds else train_length
        images = mnist.train.images[start : end]
        labels = mnist.train.labels[start : end]
        labels = to_categorical(labels, num_classes=10)
        X_train_folds.append(images)
        y_train_folds.append(labels)

    return X_train_folds, y_train_folds

In [39]:
def init_input():
    with tf.name_scope('input'):
        X = tf.placeholder('float', [None, 784], name='input_x')
        y = tf.placeholder('float', [None, 10], name='label_y')
    return X, y

In [40]:
def add_layer(input_dim, output_dim, inputs, name, activation_function=None):
    name_W = 'W_' + name
    name_b = 'b_' + name
    
    with tf.name_scope(name):
        with tf.name_scope('weight'):
            W = tf.get_variable(shape=[input_dim, output_dim], initializer=tf.contrib.keras.initializers.he_normal(), name=name_W)
            tf.summary.histogram(name + '/weight', W)
        with tf.name_scope('bias'):
            b = tf.Variable(tf.constant(0.1, shape=[1, output_dim]), name=name_b)
            tf.summary.histogram(name + '/bias', b)
        with tf.name_scope('Wx_plus_b'):
            Wx_plus_b = tf.add(tf.matmul(inputs, W), b)
        if activation_function is None:
            outputs = Wx_plus_b
        else:
            outputs = activation_function(Wx_plus_b)
        tf.summary.histogram(name + '/output', outputs)
        return outputs

In [41]:
def build_network(X, n, activation):
    h1 = add_layer(input_dim=784, output_dim=n, inputs=X, name='hidden_layer_1', activation_function=activation)
    h2 = add_layer(input_dim=n, output_dim=n, inputs=h1, name='hidden_layer_2', activation_function=activation)
    h3 = add_layer(input_dim=n, output_dim=n, inputs=h2, name='hidden_layer_3', activation_function=activation)
    h4 = add_layer(input_dim=n, output_dim=n, inputs=h3, name='hidden_layer_4', activation_function=activation)
    h5 = add_layer(input_dim=n, output_dim=n, inputs=h4, name='hidden_layer_5', activation_function=activation)
    y_hat = add_layer(input_dim=n, output_dim=10, inputs=h5, name='output_layer', activation_function=tf.nn.softmax)
    return y_hat

In [42]:
def init_evaluation(y, y_hat, learning_rate):
    with tf.name_scope('cross_entropy'):
        loss_function = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=y, logits=y_hat))
    tf.summary.scalar('cross_entropy', loss_function)

    with tf.name_scope('train'):
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss_function)

    with tf.name_scope('accuracy'):
        with tf.name_scope('correction_prediction'):
            correct_prediction_count = tf.equal(tf.argmax(y, 1), tf.argmax(y_hat, 1))
        with tf.name_scope('accuracy'):
            acc = tf.reduce_mean(tf.cast(correct_prediction_count, 'float'))
    tf.summary.scalar('accuracy', acc)
    
    return loss_function, optimizer, acc

In [43]:
def DNN(epoch, n_neurons, learning_rate, activation, batch_size, early_stopping, restore_model_name=None):
    X_train_folds, y_train_folds = split_folds()
    
    tf.reset_default_graph()
    
    training_id = '[n_neurons_%d, learning_rate_%f, activation_%s, batch_size_%d]' % (n_neurons, learning_rate, activation, batch_size)
    training_id += "-" + datetime.utcnow().strftime("%Y%m%d%H%M%S")
    logdir = "tf_logs/{}/".format(training_id)
    
    X, y = init_input()
    y_hat = build_network(X, n_neurons, activation)
    loss_function, optimizer, acc = init_evaluation(y, y_hat, learning_rate)
    
    batch = int(mnist.train.num_examples / batch_size)

    saver = tf.train.Saver()
    start_epoch = 0
    iteration = 0
    
    print('[n_neurons=%d, learning_rate=%f, activation=%s, batch_size=%d]' % (n_neurons, learning_rate, activation, batch_size))
    
    with tf.Session() as sess:
        if restore_model_name:
            saver.restore(sess, "regular_train/" + restore_model_name)
            start_epoch = int(restore_model_name.split('.')[0].split('_')[-1])
        else:
            sess.run(tf.global_variables_initializer())
            
        merged = tf.summary.merge_all()    
        writer = tf.summary.FileWriter(logdir, sess.graph)
        
        for i in range(len(X_train_folds)):
            best_vali_acc = 0.0
            n = 0
            # Always takes the first fold as validation fold, than move the fold to the bottom after each iteration
            print('FOLD %d' % (i + 1))
            vali_X = X_train_folds.pop(0)
            vali_y = y_train_folds.pop(0)
            vali_loss_list = []
            vali_acc_list = []
            for i in range(start_epoch, epoch):
                for X_train_fold, y_train_fold in zip(X_train_folds, y_train_folds): 
                    for j in range(batch):
                        batch_x = X_train_fold[j * batch_size : (j + 1) * batch_size]
                        batch_y = y_train_fold[j * batch_size : (j + 1) * batch_size]
                        sess.run(optimizer, feed_dict={X: batch_x, y: batch_y})

                iteration += 1
                result = sess.run(merged, feed_dict={X: vali_X, y: vali_y})
                writer.add_summary(result, iteration)

                vali_loss = sess.run(loss_function, feed_dict={X: vali_X, y: vali_y})
                vali_acc = sess.run(acc, feed_dict={X: vali_X, y: vali_y})

                vali_loss_list.append(vali_loss)
                vali_acc_list.append(vali_acc)

                file_name = 'regular_training_epoch_%d.ckpt' % (i+1)
                save_path = saver.save(sess, "regular_train/%s/%s" % (training_id, file_name))
                best_vali_acc = max(vali_acc_list)
                best_vali_loss = min(vali_loss_list)

                print("Epoch: %2d, Validation loss: %9.4f, Best loss:%9.4f, Accuracy: %.4f, Best Accuracy: %.4f" % (i+1, vali_loss, best_vali_loss, vali_acc, best_vali_acc))    

                n = n + 1 if vali_acc <= best_vali_acc else 0
                if n > early_stopping:
                    print('Early Stopping at epoch %d' % i)
                    break
                    
            X_train_folds.append(vali_X)
            y_train_folds.append(vali_y)
        
        file_name = 'final_model'
        save_path = saver.save(sess, "regular_train/%s/%s" %(training_id, file_name))
        print("Model saved in path: %s" % save_path)
        
        test_acc = sess.run(acc, feed_dict={X: X_test, y: y_test})
        print("Final test accuracy: %.4f" % test_acc)
        
    return test_acc

In [44]:
neurons_list = [10, 30, 50, 70, 90, 100, 120, 140, 160]
batch_size_list = [10, 50, 100, 500]
learning_rate_list = [0.01, 0.02, 0.05, 0.1]
activation_function_list = [tf.nn.relu, tf.nn.elu, tf.nn.leaky_relu, tf.nn.tanh]
result = {}

In [23]:
for n in neurons_list:
    for bs in batch_size_list:
        for lr in learning_rate_list:
            for af in activation_function_list:
                test_acc = DNN(epoch=1000, n_neurons=n, learning_rate=lr, activation=af, batch_size=bs, early_stopping=10)
                training_id = '[n_neurons=%d, learning_rate=%f, activation=%s, batch_size=%d]' % (n, lr, af, bs)
                result[training_id] = test_acc

[n_neurons=10, learning_rate=0.010000, activation=<function relu at 0x117f706a8>, batch_size=10]
FOLD 1
Epoch:  1, Validation loss:    2.3657, Best loss:   2.3657, Accuracy: 0.0955, Best Accuracy: 0.0955
Epoch:  2, Validation loss:    2.3657, Best loss:   2.3657, Accuracy: 0.0954, Best Accuracy: 0.0955
Epoch:  3, Validation loss:    2.3600, Best loss:   2.3600, Accuracy: 0.1011, Best Accuracy: 0.1011
Epoch:  4, Validation loss:    2.3600, Best loss:   2.3600, Accuracy: 0.1011, Best Accuracy: 0.1011
Epoch:  5, Validation loss:    2.3600, Best loss:   2.3600, Accuracy: 0.1011, Best Accuracy: 0.1011
Epoch:  6, Validation loss:    2.3600, Best loss:   2.3600, Accuracy: 0.1011, Best Accuracy: 0.1011
Epoch:  7, Validation loss:    2.3600, Best loss:   2.3600, Accuracy: 0.1011, Best Accuracy: 0.1011
Epoch:  8, Validation loss:    2.3600, Best loss:   2.3600, Accuracy: 0.1011, Best Accuracy: 0.1011
Epoch:  9, Validation loss:    2.3600, Best loss:   2.3600, Accuracy: 0.1011, Best Accuracy: 0.1

KeyboardInterrupt: 

In [24]:
result

{'[n_neurons=10, learning_rate=0.010000, activation=<function relu at 0x117f706a8>, batch_size=10]': 0.09916364,
 '[n_neurons=10, learning_rate=0.020000, activation=<function relu at 0x117f706a8>, batch_size=10]': 0.10390909}