In [42]:
import tensorflow as tf
from sklearn.datasets import fetch_mldata
from sklearn.model_selection import train_test_split
from tensorflow.contrib.layers import fully_connected
import numpy as np
from datetime import datetime
from time import time

In [2]:
mnist = fetch_mldata("MNIST original")
train_size = 60000
X_train, X_test, y_train, y_test = train_test_split(mnist.data, mnist.target.astype(int), train_size=train_size,shuffle=True)
X_train, X_test = X_train/256, X_test/256
m,n = X_train.shape
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
print(X_train[:5])



(60000, 784) (10000, 784) (60000,) (10000,)
[[ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]]


In [32]:
def fetch_batch(X,y,batch_size):
    random_indice = np.random.permutation(X.shape[0])[:batch_size]
    return [X[random_indice], y[random_indice]]
def get_learning_rate(lr_start, epoch):
    if epoch<100:
        return lr_start
    else:
        return np.maximum((1-0.01*(epoch-100))*lr_start, lr_start/10)

In [40]:
# construction phase
now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "logs"
logdir = "{}/run-{}".format(root_logdir, now)
root_savedir = "checkpoints"
savedir = "{}/run-{}".format(root_savedir, now)

tf.reset_default_graph()
n_neurons = 50
lr_start = 0.1

X = tf.placeholder(tf.float32, shape=[None, 784])
y = tf.placeholder(tf.int32, shape=[None]) # int32?
is_training = tf.placeholder(tf.bool, shape=(), name="is_training")
bn_params = {'is_training': is_training,'decay': 0.99,'updates_collections': None}

with tf.variable_scope("forward"):
    dense_1 = fully_connected(X, 300, scope="dense_1",activation_fn=tf.nn.elu)
    dense_2 = fully_connected(dense_1, 100, scope="dense_2", activation_fn=tf.nn.elu)
    # dense_3 = fully_connected(dense_2, n_neurons, scope="dense_3")
    output = fully_connected(dense_2, 10, activation_fn=None, scope="output")
with tf.variable_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=output, labels=y,name="xentropy")
    loss = tf.reduce_mean(xentropy,name="loss")
with tf.variable_scope("annealing"):
    lr = tf.placeholder(tf.float32, shape=(), name="learning_rate")
with tf.variable_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(lr)
    training_op = optimizer.minimize(loss)
with tf.variable_scope("eval"):
    pred = tf.argmax(output,axis=1,name="prediction")
with tf.variable_scope("save"):
    saver = tf.train.Saver()
    train_loss_summary = tf.summary.scalar("test_loss", loss)
    test_loss_summary = tf.summary.scalar("train_loss", loss)
    filewriter = tf.summary.FileWriter(logdir,tf.get_default_graph())


In [43]:
# execution phase

with tf.Session() as sess:
    start_running = time()
    sess.run(tf.global_variables_initializer())
    n_epochs = 350
    batch_size = 1000
    n_batches = int(np.ceil(m/batch_size))
    for epoch in range(n_epochs):
        lr_ = get_learning_rate(lr_start,epoch)
        for batch in range(n_batches):
            X_batch, y_batch = fetch_batch(X_train, y_train, batch_size)
            sess.run(training_op, feed_dict={X:X_batch,y:y_batch,lr:lr_,is_training:True})
        if epoch%20==0:
            start_time = time()
            test_loss_summary_, loss_, pred_ = sess.run([test_loss_summary,loss, pred], feed_dict={X:X_test,y:y_test,is_training:False})
            #train_loss_summary_ = sess.run(train_loss_summary, feed_dict={X:X_train,y:y_train})
            filewriter.add_summary(test_loss_summary_,epoch)
            #filewriter.add_summary(train_loss_summary_,epoch)
            print("Epoch",epoch,"loss:",loss_, "accuracy:",np.mean(pred_==y_test))
            print("lr:",lr_)
            print("Time to inspect and log:", time()-start_time)
            print("Time elapsed:",time()-start_running)
            
        if epoch%100==0:
            start_time = time()
            saver.save(sess,savedir+"/model.ckpt")
            print("Time to save checkpoints:",time()-start_time)
    saver.save(sess,savedir+"/model_final.ckpt")
    train_accuracy = np.mean(sess.run(pred, feed_dict={X:X_train, y:y_train,is_training:False})==y_train)
    test_accuracy = np.mean(sess.run(pred, feed_dict={X:X_test, y:y_test,is_training:False})==y_test)
    print("Test accuracy :", test_accuracy)
    print("Train accuracy:", train_accuracy)
    

Epoch 0 loss: 0.442828 accuracy: 0.8823
lr: 0.1
Time to inspect and log: 0.19151830673217773
Time to save checkpoints: 0.1523749828338623
Epoch 20 loss: 0.187308 accuracy: 0.9472
lr: 0.1
Time to inspect and log: 0.19510483741760254
Epoch 40 loss: 0.130007 accuracy: 0.9613
lr: 0.1
Time to inspect and log: 0.2531859874725342
Epoch 60 loss: 0.102674 accuracy: 0.9683
lr: 0.1
Time to inspect and log: 0.1849982738494873
Epoch 80 loss: 0.0885186 accuracy: 0.9732
lr: 0.1
Time to inspect and log: 0.192521333694458
Epoch 100 loss: 0.0797625 accuracy: 0.9747
lr: 0.01
Time to inspect and log: 0.18650293350219727
Time to save checkpoints: 0.152740478515625
Epoch 120 loss: 0.078173 accuracy: 0.9755
lr: 0.01
Time to inspect and log: 0.2000410556793213
Epoch 140 loss: 0.0779948 accuracy: 0.9754
lr: 0.01
Time to inspect and log: 0.18951153755187988


KeyboardInterrupt: 

In [16]:
# evaluation
now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "logs"
logdir = "{}/run-{}".format(root_logdir, now)
root_savedir = "checkpoints"
savedir = "{}/run-{}".format(root_savedir, now)
with tf.Session() as sess:
    saver.restore(sess, "checkpoints/run-20180416001636/model_final.ckpt")
    train_accuracy = np.mean(sess.run(pred, feed_dict={X:X_train, y:y_train})==y_train)
    test_accuracy = np.mean(sess.run(pred, feed_dict={X:X_test, y:y_test})==y_test)
    print("Test accuracy :", test_accuracy)
    print("Train accuracy:", train_accuracy)
    

INFO:tensorflow:Restoring parameters from checkpoints/run-20180416001636/model_final.ckpt
Test accuracy : 0.9457
Train accuracy: 0.946983333333
