## 5 Layer MLP w/InfiMNIST 10k

10,000 epochs of RMSProp

In [1]:
import tensorflow as tf
import numpy as np
import input_data

In [2]:
%run infimnist.py

In [3]:
num_epochs = 10000
batch_size = 125

learning_rate = 0.001

In [5]:
# Network Parameters
n_hidden_1 = 2000 # 1st layer num features
n_hidden_2 = 1500 # 1st layer num features
n_hidden_3 = 1000 # 1st layer num features
n_hidden_4 = 500 # 2nd layer num features
n_input = 784 # MNIST data input (img shape: 28*28)
n_classes = 10 # MNIST total classes (0-9 digits)

std_0 = 1.0/np.sqrt(n_input)
std_h1 = 1.0/np.sqrt(n_hidden_1)
std_h2 = 1.0/np.sqrt(n_hidden_2)
std_h3 = 1.0/np.sqrt(n_hidden_3)
std_h4 = 1.0/np.sqrt(n_hidden_4)

logfile = "5-layer-mlp-infimnist"

In [6]:
# tf Graph input
x = tf.placeholder("float", [None, n_input])
y = tf.placeholder("float", [None, n_classes])

xt = tf.placeholder("float", [None, n_input])
yt = tf.placeholder("float", [None, n_classes])

In [7]:
def multilayer_perceptron(_X, _weights, _biases):
    layer_1 = tf.nn.relu(tf.add(tf.matmul(_X, _weights['h1']), _biases['b1'])) 
    layer_2 = tf.nn.relu(tf.add(tf.matmul(layer_1, _weights['h2']), _biases['b2'])) 
    layer_3 = tf.nn.relu(tf.add(tf.matmul(layer_2, _weights['h3']), _biases['b3'])) 
    layer_4 = tf.nn.relu(tf.add(tf.matmul(layer_3, _weights['h4']), _biases['b4'])) 

    return tf.matmul(layer_4, weights['out']) + biases['out']

In [8]:
# Store layers weight & bias
weights = {
    'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1], stddev=std_0)),
    'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2], stddev=std_h1)),
    'h3': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3], stddev=std_h2)),
    'h4': tf.Variable(tf.random_normal([n_hidden_3, n_hidden_4], stddev=std_h3)),
    'out': tf.Variable(tf.random_normal([n_hidden_4, n_classes], stddev=std_h4))
}
biases = {
    'b1': tf.Variable(tf.random_normal([n_hidden_1], stddev=0.1)),
    'b2': tf.Variable(tf.random_normal([n_hidden_2], stddev=0.01)),
    'b3': tf.Variable(tf.random_normal([n_hidden_3], stddev=0.01)),
    'b4': tf.Variable(tf.random_normal([n_hidden_4], stddev=0.01)),

    'out': tf.Variable(tf.random_normal([n_classes], stddev=0.001))
}

In [9]:
mlp = multilayer_perceptron(x, weights, biases )
mlp_test = multilayer_perceptron(xt, weights, biases )

In [10]:
cost =  tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(mlp, y)) 
gs = tf.get_variable("global_step",[],trainable=False,initializer=tf.constant_initializer(0))
lr = tf.constant(learning_rate) 

In [11]:
train_op = tf.contrib.layers.optimize_loss(cost, global_step=gs, learning_rate=lr,optimizer="RMSProp")

In [12]:
#with tf.name_scope("training accuracy"):
pred = tf.equal(tf.argmax(mlp, 1), tf.argmax(y, 1)) # Count correct predictions
train_acc_op = tf.reduce_mean(tf.cast(pred, "float"))  # Cast boolean to float to average
tf.scalar_summary("training accuracy", train_acc_op)

<tf.Tensor 'ScalarSummary_2:0' shape=() dtype=string>

In [13]:
test_pred = tf.equal(tf.argmax(mlp_test, 1), tf.argmax(yt, 1)) # Count correct predictions
test_acc_op = tf.reduce_mean(tf.cast(test_pred, "float"))  # Cast boolean to float to average
tf.scalar_summary("test 0 accuracy", test_acc_op)

<tf.Tensor 'ScalarSummary_3:0' shape=() dtype=string>

In [14]:
infiminst = InfiMNIST()

In [15]:
!rm -rf ./logs/{logfile}
!ls logs

[34m2-layer-mlp-infimnist[m[m


### Original MNIST Data

In [16]:
infiminst.next_epoch()
mnist = input_data.read_data_sets(".", one_hot=True)
trX_0, trY_0 = mnist.train.images, mnist.train.labels
teX_0, teY_0 = mnist.test.images, mnist.test.labels

  chunk = self.extrabuf[offset: offset + size]
  data = data.reshape(num_images, rows, cols, 1)


In [17]:
test_accuracies = []
train_accuracies = []
with tf.Session() as sess:
    # create a log writer. run 'tensorboard --logdir=./logs/{logfile}'
    writer = tf.train.SummaryWriter("./logs/{0}".format(logfile), sess.graph) # for 0.8
    merged = tf.merge_all_summaries()
 
    tf.initialize_all_variables().run()

    for epoch in range(num_epochs):
        infiminst.next_epoch()
        mnist = input_data.read_data_sets(".", one_hot=True)
    
        trX, trY = mnist.train.images, mnist.train.labels
        teX, teY = mnist.test.images, mnist.test.labels
        
        for start, end in zip(range(0, len(trX), batch_size), range(batch_size, len(trX), batch_size)):
            sess.run(train_op, feed_dict={x: trX[start:end], y: trY[start:end]})
                 
        summary, trn_acc, tst_acc = sess.run([merged, train_acc_op, test_acc_op], feed_dict={x: trX, y: trY, xt: teX_0, yt: teY_0})
        writer.add_summary(summary, epoch)  
        
        print(epoch, trn_acc, tst_acc)
        train_accuracies.append(trn_acc)
        test_accuracies.append(tst_acc)

        writer.flush()
        

(0, 0.91018182, 0.91289997)
(1, 0.96670908, 0.9677)
(2, 0.96321815, 0.96429998)
(3, 0.95221817, 0.94630003)
(4, 0.9806, 0.97530001)
(5, 0.97541821, 0.96850002)
(6, 0.96899998, 0.9648)
(7, 0.97820002, 0.9756)
(8, 0.97714543, 0.9734)
(9, 0.98418182, 0.98150003)
(10, 0.98505455, 0.98259997)
(11, 0.98305452, 0.97759998)
(12, 0.98147273, 0.97670001)
(13, 0.97950912, 0.97689998)
(14, 0.97658181, 0.97390002)
(15, 0.98889089, 0.98509997)
(16, 0.98450911, 0.98089999)
(17, 0.9743818, 0.96509999)
(18, 0.98580003, 0.98119998)
(19, 0.9732182, 0.96460003)
(20, 0.98647273, 0.98360002)
(21, 0.99145454, 0.98610002)
(22, 0.99094546, 0.98400003)
(23, 0.98680001, 0.98220003)
(24, 0.9867273, 0.97890002)
(25, 0.98294544, 0.97780001)
(26, 0.98823637, 0.98189998)
(27, 0.99163634, 0.98659998)
(28, 0.99072725, 0.98519999)
(29, 0.99187273, 0.98549998)
(30, 0.99003637, 0.98280001)
(31, 0.98640001, 0.98000002)
(32, 0.98321819, 0.97960001)
(33, 0.99089092, 0.98439997)
(34, 0.99216366, 0.98379999)
(35, 0.98870909, 0

KeyboardInterrupt: 

In [None]:
plt.hist(test_accuracies)