In [4]:
import tensorflow as tf
from sklearn.datasets import fetch_mldata
from sklearn.model_selection import train_test_split
from tensorflow.contrib.layers import fully_connected
import numpy as np
from datetime import datetime
from time import time

In [2]:
mnist = fetch_mldata("MNIST original")
train_size = 60000
X_train, X_test, y_train, y_test = train_test_split(mnist.data, mnist.target.astype(int), train_size=train_size,shuffle=True)
X_train, X_test = X_train/256, X_test/256
m,n = X_train.shape
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
print(X_train[:5])



(60000, 784) (10000, 784) (60000,) (10000,)
[[ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]]


In [11]:
def fetch_batch(X,y,batch_size):
    random_indice = np.random.permutation(X.shape[0])[:batch_size]
    return [X[random_indice], y[random_indice]]
def get_learning_rate(lr_start, epoch):
    return lr_start/10**int(epoch/150.0)

In [29]:
# construction phase
now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "logs"
logdir = "{}/L2-{}".format(root_logdir, now)
root_savedir = "checkpoints"
savedir = "{}/L2-{}".format(root_savedir, now)

tf.reset_default_graph()
n_neurons = 50
lr_start = 0.1

X = tf.placeholder(tf.float32, shape=[None, 784])
y = tf.placeholder(tf.int32, shape=[None]) # int32?
is_training = tf.placeholder(tf.bool, shape=(), name="is_training")
bn_params = {'is_training': is_training,'decay': 0.99,'updates_collections': None}

with tf.variable_scope("forward"):
    regularizer = tf.contrib.layers.l1_regularizer(scale=0.0001)
    dense_1 = fully_connected(X, 300, scope="dense_1",activation_fn=tf.nn.elu, weights_regularizer=regularizer)
    dense_2 = fully_connected(dense_1, 100, scope="dense_2", activation_fn=tf.nn.elu, weights_regularizer=regularizer)
    # dense_3 = fully_connected(dense_2, n_neurons, scope="dense_3")
    output = fully_connected(dense_2, 10, activation_fn=None, scope="output")
with tf.variable_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=output, labels=y,name="xentropy")
    reg_loss = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
    loss = tf.add(tf.reduce_sum(reg_loss), tf.reduce_mean(xentropy), name="loss")
with tf.variable_scope("annealing"):
    lr = tf.placeholder(tf.float32, shape=(), name="learning_rate")
with tf.variable_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(lr)
    training_op = optimizer.minimize(loss)
with tf.variable_scope("eval"):
    pred = tf.argmax(output,axis=1,name="prediction")
with tf.variable_scope("save"):
    saver = tf.train.Saver()
    train_loss_summary = tf.summary.scalar("test_loss", loss)
    test_loss_summary = tf.summary.scalar("train_loss", loss)
    filewriter = tf.summary.FileWriter(logdir,tf.get_default_graph())


In [30]:
# execution phase

with tf.Session() as sess:
    start_running = time()
    sess.run(tf.global_variables_initializer())
    n_epochs = 200
    batch_size = 1000
    n_batches = int(np.ceil(m/batch_size))
    for epoch in range(n_epochs):
        lr_ = get_learning_rate(lr_start,epoch)
        for batch in range(n_batches):
            X_batch, y_batch = fetch_batch(X_train, y_train, batch_size)
            sess.run(training_op, feed_dict={X:X_batch,y:y_batch,lr:lr_,is_training:True})
        if epoch%10==0:
            test_loss_summary_, loss_test, pred_test = sess.run([test_loss_summary,loss, pred], feed_dict={X:X_test,y:y_test,is_training:False})
            train_loss_summary_, loss_train, pred_train = sess.run([train_loss_summary,loss,pred], feed_dict={X:X_train,y:y_train,is_training:False})
            filewriter.add_summary(test_loss_summary_,epoch)
            filewriter.add_summary(train_loss_summary_,epoch)
            print("Epoch:",epoch)
            print("test loss : %.4f" % loss_test , "test accuracy : %.4f" % np.mean(pred_test==y_test))
            print("train loss: %.4f" % loss_train, "train accuracy: %.4f" % np.mean(pred_train==y_train))
            print("elapsed time: %.0f" % (time()-start_running), "lr: %.5f" % lr_)
        if epoch%50==0:
            saver.save(sess,savedir+"/model.ckpt")
    saver.save(sess,savedir+"/model_final.ckpt")
    train_accuracy = np.mean(sess.run(pred, feed_dict={X:X_train, y:y_train,is_training:False})==y_train)
    test_accuracy = np.mean(sess.run(pred, feed_dict={X:X_test, y:y_test,is_training:False})==y_test)
    print("Test accuracy :", test_accuracy)
    print("Train accuracy:", train_accuracy)
    

Epoch: 0
test loss : 1.4916 test accuracy : 0.8800
train loss: 1.5065 train accuracy: 0.8774
elapsed time: 5 lr: 0.10000
Epoch: 10
test loss : 1.1646 test accuracy : 0.9274
train loss: 1.1617 train accuracy: 0.9278
elapsed time: 44 lr: 0.10000
Epoch: 20
test loss : 0.9878 test accuracy : 0.9414
train loss: 0.9768 train accuracy: 0.9462
elapsed time: 84 lr: 0.10000
Epoch: 30
test loss : 0.8440 test accuracy : 0.9506
train loss: 0.8300 train accuracy: 0.9555
elapsed time: 125 lr: 0.10000
Epoch: 40
test loss : 0.7280 test accuracy : 0.9539
train loss: 0.7105 train accuracy: 0.9608
elapsed time: 166 lr: 0.10000
Epoch: 50
test loss : 0.6289 test accuracy : 0.9575
train loss: 0.6094 train accuracy: 0.9651
elapsed time: 205 lr: 0.10000
Epoch: 60
test loss : 0.5461 test accuracy : 0.9582
train loss: 0.5244 train accuracy: 0.9673
elapsed time: 247 lr: 0.10000
Epoch: 70
test loss : 0.4747 test accuracy : 0.9621
train loss: 0.4522 train accuracy: 0.9696
elapsed time: 287 lr: 0.10000
Epoch: 80
tes