## 2 Layer Modern MLP w/AlignMNIST

75 epochs of RMSProp

see:  https://github.com/terryum/TensorFlow_Exercises/blob/master/3b_MLP_MNIST_Modern_160517.ipynb

In [None]:
import tensorflow as tf
import numpy as np

In [None]:
%run augmentmnist.py

In [None]:
num_epochs = 75
batch_size = 125

learning_rate = 0.001

In [None]:
# Network Parameters
n_hidden_1 = 1000 # 1st layer num features
n_hidden_2 = 500 # 2nd layer num features
n_input = 784 # MNIST data input (img shape: 28*28)
n_classes = 10 # MNIST total classes (0-9 digits)

logfile = "2-layer-modern-mlp-alignmnist"

dropout_prob = 0.5
sigma_init = 0.1  

In [None]:
# tf Graph input
x = tf.placeholder("float", [None, n_input])
y = tf.placeholder("float", [None, n_classes])

xt = tf.placeholder("float", [None, n_input])
yt = tf.placeholder("float", [None, n_classes])

In [None]:
def multilayer_perceptron(_X, _weights, _biases, _dropout_prob):
    layer_1 = tf.nn.dropout(tf.nn.relu(tf.add(tf.matmul(_X, _weights['h1']), _biases['b1'])), _dropout_prob)
    layer_2 = tf.nn.dropout(tf.nn.relu(tf.add(tf.matmul(layer_1, _weights['h2']), _biases['b2'])), _dropout_prob)
    return tf.nn.dropout((tf.matmul(layer_2, weights['out']) + biases['out']), _dropout_prob)

#### Xavier Init 

X. Glorot and Y. Bengio, "Understanding the difficulty of training deep feedforward neural networks", 2010.

In [None]:
def xavier_init(n_inputs, n_outputs, uniform=True):
    if uniform:
        init_range = tf.sqrt(6.0 / (n_inputs + n_outputs))
        return tf.random_uniform_initializer(-init_range, init_range)
    else:
        stddev = tf.sqrt(3.0 / (n_inputs + n_outputs))
    return tf.truncated_normal_initializer(stddev=stddev)

In [None]:
h1 = tf.get_variable("h1", shape=[n_input, n_hidden_1], initializer=xavier_init(n_input, n_hidden_1))
h2 = tf.get_variable("h2", shape=[n_hidden_1, n_hidden_2], initializer=xavier_init(n_hidden_1, n_hidden_2))
hout = tf.get_variable("out", shape=[n_hidden_2, n_classes], initializer=xavier_init(n_hidden_2, n_classes))

b1 = tf.Variable(tf.random_normal([n_hidden_1]))
b2 = tf.Variable(tf.random_normal([n_hidden_2]))
bout = tf.Variable(tf.random_normal([n_classes]))


In [None]:
weights = {
    'h1': h1,
    'h2': h2,
    'out': hout
}
biases = {
    'b1': b1,
    'b2': b2,
    'out': bout
}

In [None]:
mlp = multilayer_perceptron(x, weights, biases, dropout_prob )
mlp_test = multilayer_perceptron(xt, weights, biases, dropout_prob )

In [None]:
cost =  tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(mlp, y)) 
gs = tf.get_variable("global_step",[],trainable=False,initializer=tf.constant_initializer(0))
lr = tf.constant(learning_rate) 

In [None]:
train_op = tf.contrib.layers.optimize_loss(cost, global_step=gs, learning_rate=lr,optimizer="RMSProp")

In [None]:
#with tf.name_scope("training accuracy"):
pred = tf.equal(tf.argmax(mlp, 1), tf.argmax(y, 1)) # Count correct predictions
train_acc_op = tf.reduce_mean(tf.cast(pred, "float"))  # Cast boolean to float to average
tf.scalar_summary("training accuracy", train_acc_op)

In [None]:
test_pred = tf.equal(tf.argmax(mlp_test, 1), tf.argmax(yt, 1)) # Count correct predictions
test_acc_op = tf.reduce_mean(tf.cast(test_pred, "float"))  # Cast boolean to float to average
tf.scalar_summary("test 0 accuracy", test_acc_op)

In [None]:
!rm -rf ./logs/{logfile}
!ls logs

### Original MNIST Data

In [None]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets(".", one_hot=True)
trX_0, trY_0 = mnist.train.images, mnist.train.labels
teX_0, teY_0 = mnist.test.images, mnist.test.labels

In [None]:
alignmnist = AlignMNIST()

In [None]:
test_accuracies = []
train_accuracies = []
with tf.Session() as sess:
    # create a log writer. run 'tensorboard --logdir=./logs/{logfile}'
    writer = tf.train.SummaryWriter("./logs/{0}".format(logfile), sess.graph) # for 0.8
    merged = tf.merge_all_summaries()
 
    tf.initialize_all_variables().run()

    for epoch in range(num_epochs):
        trX, trY = alignmnist.next_epoch()
      
        for start, end in zip(range(0, len(trX), batch_size), range(batch_size, len(trX), batch_size)):
            sess.run(train_op, feed_dict={x: trX[start:end], y: trY[start:end]})
                 
        summary, trn_acc, tst_acc = sess.run([merged, train_acc_op, test_acc_op], feed_dict={x: trX, y: trY, xt: teX_0, yt: teY_0})
        writer.add_summary(summary, epoch)  
        
      #  print(epoch, trn_acc, tst_acc)
        train_accuracies.append(trn_acc)
        test_accuracies.append(tst_acc)

        writer.flush()
        

In [None]:
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline  

In [None]:
plt.plot(train_accuracies)
plt.plot(test_accuracies)

In [None]:
plt.show()

In [None]:
np.max(test_accuracies)

Best error is still only 

In [None]:
100.0*(1.0-np.max(test_accuracies))