In [1]:
import tensorflow as tf
import numpy as np

In [2]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

In [3]:
print(x_train.shape)

(60000, 28, 28)


In [4]:
x_val = x_train[50000: 60000]
x_train = x_train[: 50000]

In [5]:
y_val = y_train[50000: 60000]
y_train = y_train[0:50000]

In [6]:
print(x_train[0][8])
print(y_train[0:9])


[  0   0   0   0   0   0   0  18 219 253 253 253 253 253 198 182 247 241
   0   0   0   0   0   0   0   0   0   0]
[5 0 4 1 9 2 1 3 1]


In [7]:
x_train = x_train.reshape(50000, 784)
x_val = x_val.reshape(10000, 784)
x_test = x_test.reshape(10000, 784)

print(x_train.shape)
print(x_test.shape)

(50000, 784)
(10000, 784)


In [8]:
x_train = x_train.astype('float32')
x_val = x_val.astype('float32')
x_test = x_test.astype('float32')

gray_scale = 255
x_train /= gray_scale
x_val /= gray_scale
x_test /= gray_scale

In [9]:
num_classes = 10
y_train = tf.keras.utils.to_categorical(y_train, num_classes)
y_val = tf.keras.utils.to_categorical(y_val, num_classes)
y_test = tf.keras.utils.to_categorical(y_test, num_classes)

In [10]:
x = tf.placeholder(tf.float32, [None, 784])
y = tf.placeholder(tf.float32, [None, 10])

In [11]:
keep_prob = tf.placeholder(tf.float32)

In [12]:
def mlp(x):
    w1 = tf.Variable(tf.random_uniform([784, 256]))
    b1 = tf.Variable(tf.zeros([256]))
    h1 = tf.nn.relu(tf.matmul(x, w1) + b1)
    
    w2 = tf.Variable(tf.random_uniform([256, 128]))
    b2 = tf.Variable(tf.zeros([128]))
    h2 = tf.nn.relu(tf.matmul(h1, w2) + b2)
    
    h2_drop = tf.nn.dropout(h2, keep_prob)
    
    w3 = tf.Variable(tf.random_uniform([128, 10]))
    b3 = tf.Variable(tf.zeros([10]))
    logits = tf.matmul(h2_drop, w3) + b3
    
    return logits

In [13]:
logits = mlp(x)

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [14]:
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits= logits, labels= y))

In [15]:
train_op = tf.train.AdamOptimizer(learning_rate= 0.01).minimize(loss_op)

In [16]:
init = tf.global_variables_initializer()

In [17]:
saver = tf.train.Saver()
epoch_cnt = 300
batch_size = 1000
iteration = len(x_train) // batch_size

earlystop_threshold = 5
earlystop_cnt = 0

In [18]:
# Start training
with tf.Session() as sess:
    # Run the initializer
    sess.run(init)
    prev_train_acc = 0.0
    max_val_acc = 0.0
    
    for epoch in range(epoch_cnt):
        avg_loss = 0.
        start = 0; end = batch_size
        
        for i in range(iteration):
            _, loss = sess.run([train_op, loss_op], 
                               feed_dict={x: x_train[start: end], 
                                          y: y_train[start: end], 
                                          keep_prob: 0.9})
            start += batch_size; end += batch_size
            # Compute train average loss
            avg_loss += loss / iteration
            
        # Validate model
        preds = tf.nn.softmax(logits)  # Apply softmax to logits
        correct_prediction = tf.equal(tf.argmax(preds, 1), tf.argmax(y, 1))
        # Calculate accuracy
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
        # train accuracy
        cur_train_acc = accuracy.eval({x: x_train, y: y_train,keep_prob: 1.0})
        # validation accuarcy
        cur_val_acc = accuracy.eval({x: x_val, y: y_val, keep_prob: 1.0})
        # validation loss
        cur_val_loss = loss_op.eval({x: x_val, y: y_val,keep_prob: 1.0})
        
        print("epoch: "+str(epoch)+
              ", train acc: " + str(cur_train_acc) +
              ", val acc: " + str(cur_val_acc) )
              #', train loss: '+str(avg_loss)+
              #', val loss: '+str(cur_val_loss))
        
        if cur_val_acc < max_val_acc:
            if cur_train_acc > prev_train_acc or cur_train_acc > 0.99:
                if earlystop_cnt == earlystop_threshold:
                    print("early stopped on "+str(epoch))
                    break
                else:
                    print("overfitting warning: "+str(earlystop_cnt))
                    earlystop_cnt += 1
            else:
                earlystop_cnt = 0
        else:
            earlystop_cnt = 0
            max_val_acc = cur_val_acc
            # Save the variables to file.
            save_path = saver.save(sess, "model/model.ckpt")
        prev_train_acc = cur_train_acc

epoch: 0, train acc: 0.1708, val acc: 0.1738
epoch: 1, train acc: 0.49194, val acc: 0.5047
epoch: 2, train acc: 0.5828, val acc: 0.6021
epoch: 3, train acc: 0.63694, val acc: 0.6571
epoch: 4, train acc: 0.67022, val acc: 0.6881
epoch: 5, train acc: 0.69594, val acc: 0.7114
epoch: 6, train acc: 0.71912, val acc: 0.7371
epoch: 7, train acc: 0.7414, val acc: 0.7564
epoch: 8, train acc: 0.76134, val acc: 0.7754
epoch: 9, train acc: 0.77972, val acc: 0.791
epoch: 10, train acc: 0.79532, val acc: 0.8074
epoch: 11, train acc: 0.81086, val acc: 0.8229
epoch: 12, train acc: 0.82484, val acc: 0.8335
epoch: 13, train acc: 0.83546, val acc: 0.8428
epoch: 14, train acc: 0.84606, val acc: 0.8514
epoch: 15, train acc: 0.85404, val acc: 0.8577
epoch: 16, train acc: 0.86364, val acc: 0.8654
epoch: 17, train acc: 0.87146, val acc: 0.8728
epoch: 18, train acc: 0.8789, val acc: 0.8794
epoch: 19, train acc: 0.88468, val acc: 0.8843
epoch: 20, train acc: 0.891, val acc: 0.8895
epoch: 21, train acc: 0.89588,

epoch: 152, train acc: 0.98412, val acc: 0.9539
epoch: 153, train acc: 0.9845, val acc: 0.956
epoch: 154, train acc: 0.9859, val acc: 0.955
epoch: 155, train acc: 0.98804, val acc: 0.9542
epoch: 156, train acc: 0.9896, val acc: 0.9571
epoch: 157, train acc: 0.98918, val acc: 0.9575
epoch: 158, train acc: 0.99068, val acc: 0.9576
epoch: 159, train acc: 0.98814, val acc: 0.9556
epoch: 160, train acc: 0.98784, val acc: 0.9556
epoch: 161, train acc: 0.98932, val acc: 0.9555
epoch: 162, train acc: 0.99026, val acc: 0.9573
epoch: 163, train acc: 0.9899, val acc: 0.9574
epoch: 164, train acc: 0.98946, val acc: 0.9584
epoch: 165, train acc: 0.9909, val acc: 0.9591
epoch: 166, train acc: 0.99062, val acc: 0.9596
epoch: 167, train acc: 0.99228, val acc: 0.9598
epoch: 168, train acc: 0.9922, val acc: 0.962
epoch: 169, train acc: 0.99124, val acc: 0.9601
epoch: 170, train acc: 0.99204, val acc: 0.9622
epoch: 171, train acc: 0.99188, val acc: 0.9598
epoch: 172, train acc: 0.99226, val acc: 0.9606
e

In [19]:
with tf.Session() as sess:
    saver.restore(sess, 'model/model.ckpt')
    correct_predicion = tf.equal(tf.argmax(preds, 1), tf.argmax(y, 1))
    
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'))
    print('[Test Accuracy] : ', accuracy.eval({x: x_test, y: y_test, keep_prob: 1.0}))

Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from model/model.ckpt
[Test Accuracy] :  0.958
