In [5]:
import tensorflow as tf
from sklearn.datasets import fetch_mldata
from sklearn.model_selection import train_test_split
from tensorflow.contrib.layers import fully_connected
from tensorflow.contrib.layers import dropout
import numpy as np
from datetime import datetime
from time import time

In [2]:
mnist = fetch_mldata("MNIST original")
train_size = 60000
X_train, X_test, y_train, y_test = train_test_split(mnist.data/256-0.5, mnist.target.astype(int), train_size=train_size,shuffle=True)
m,n = X_train.shape
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
print(X_train[:5])



(60000, 784) (10000, 784) (60000,) (10000,)
[[ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]]


In [19]:
def fetch_batch(X,y,batch_size):
    random_indice = np.random.permutation(X.shape[0])[:batch_size]
    return [X[random_indice], y[random_indice]]
def get_learning_rate(lr_start, epoch):
    return lr_start/10**int(epoch/120.0)

In [20]:
# construction phase
now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "logs"
logdir = "{}/dropout-{}".format(root_logdir, now)
root_savedir = "checkpoints"
savedir = "{}/dropout-{}".format(root_savedir, now)

tf.reset_default_graph()
n_neurons = 50
lr_start = 0.2

X = tf.placeholder(tf.float32, shape=[None, 784])
y = tf.placeholder(tf.int32, shape=[None]) # int32?
is_training = tf.placeholder(tf.bool, shape=(), name="is_training")
keep_prob = 0.75

with tf.variable_scope("forward"):
    X_drop = dropout(X, keep_prob, is_training=is_training)
    dense_1 = fully_connected(X_drop, 400, scope="dense_1",activation_fn=tf.nn.elu)
    dense_1_drop = dropout(dense_1, keep_prob, is_training=is_training)
    dense_2 = fully_connected(dense_1_drop, 200, scope="dense_2", activation_fn=tf.nn.elu)
    dense_2_drop = dropout(dense_2, keep_prob, is_training=is_training)
    dense_3 = fully_connected(dense_2, 100, scope="dense_3")
    dense_3_drop = dropout(dense_3, keep_prob, is_training=is_training)
    dense_4 = fully_connected(dense_3_drop, 50, scope="dense_4")
    dense_4_drop = dropout(dense_4, keep_prob, is_training=is_training)
    output = fully_connected(dense_4_drop, 10, activation_fn=None, scope="output")
with tf.variable_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=output, labels=y,name="xentropy")
    loss = tf.reduce_mean(xentropy,name="loss")
with tf.variable_scope("annealing"):
    lr = tf.placeholder(tf.float32, shape=(), name="learning_rate")
with tf.variable_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(lr)
    training_op = optimizer.minimize(loss)
with tf.variable_scope("eval"):
    pred = tf.argmax(output,axis=1,name="prediction")
with tf.variable_scope("save"):
    saver = tf.train.Saver()
    train_loss_summary = tf.summary.scalar("test_loss", loss)
    test_loss_summary = tf.summary.scalar("train_loss", loss)
    filewriter = tf.summary.FileWriter(logdir,tf.get_default_graph())


In [21]:
# execution phase

with tf.Session() as sess:
    start_running = time()
    sess.run(tf.global_variables_initializer())
    n_epochs = 240
    batch_size = 1000
    n_batches = int(np.ceil(m/batch_size))
    for epoch in range(n_epochs):
        lr_ = get_learning_rate(lr_start,epoch)
        for batch in range(n_batches):
            X_batch, y_batch = fetch_batch(X_train, y_train, batch_size)
            sess.run(training_op, feed_dict={X:X_batch,y:y_batch,lr:lr_,is_training:True})
        if epoch%10==0:
            test_loss_summary_, loss_test, pred_test = sess.run([test_loss_summary,loss, pred], feed_dict={X:X_test,y:y_test,is_training:False})
            train_loss_summary_, loss_train, pred_train = sess.run([train_loss_summary,loss,pred], feed_dict={X:X_train,y:y_train,is_training:False})
            filewriter.add_summary(test_loss_summary_,epoch)
            filewriter.add_summary(train_loss_summary_,epoch)
            print("Epoch:",epoch)
            print("test loss : %.4f" % loss_test , "test accuracy : %.4f" % np.mean(pred_test==y_test))
            print("train loss: %.4f" % loss_train, "train accuracy: %.4f" % np.mean(pred_train==y_train))
            print("elapsed time: %.0f" % (time()-start_running), "lr: %.5f" % lr_)
        if epoch%50==0:
            saver.save(sess,savedir+"/model.ckpt")
    saver.save(sess,savedir+"/model_final.ckpt")
    train_accuracy = np.mean(sess.run(pred, feed_dict={X:X_train, y:y_train,is_training:False})==y_train)
    test_accuracy = np.mean(sess.run(pred, feed_dict={X:X_test, y:y_test,is_training:False})==y_test)
    print("Test accuracy :", test_accuracy)
    print("Train accuracy:", train_accuracy)
    

Epoch: 0
test loss : 0.3834 test accuracy : 0.8867
train loss: 0.3909 train accuracy: 0.8866
elapsed time: 10 lr: 0.20000
Epoch: 10
test loss : 0.1398 test accuracy : 0.9574
train loss: 0.1364 train accuracy: 0.9576
elapsed time: 83 lr: 0.20000
Epoch: 20
test loss : 0.1076 test accuracy : 0.9683
train loss: 0.0977 train accuracy: 0.9697
elapsed time: 155 lr: 0.20000
Epoch: 30
test loss : 0.0922 test accuracy : 0.9737
train loss: 0.0740 train accuracy: 0.9766
elapsed time: 229 lr: 0.20000
Epoch: 40
test loss : 0.0792 test accuracy : 0.9777
train loss: 0.0602 train accuracy: 0.9808
elapsed time: 301 lr: 0.20000
Epoch: 50
test loss : 0.0752 test accuracy : 0.9780
train loss: 0.0522 train accuracy: 0.9836
elapsed time: 373 lr: 0.20000
Epoch: 60
test loss : 0.0695 test accuracy : 0.9807
train loss: 0.0428 train accuracy: 0.9862
elapsed time: 445 lr: 0.20000
Epoch: 70
test loss : 0.0650 test accuracy : 0.9818
train loss: 0.0369 train accuracy: 0.9886
elapsed time: 517 lr: 0.20000
Epoch: 80
t