In [14]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import time
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [15]:
# get the dataset
mnist_data = input_data.read_data_sets("MNIST_data/", one_hot=True)
train_X, train_Y, test_X, test_Y = mnist_data.train.images, \
mnist_data.train.labels, mnist_data.test.images, mnist_data.test.labels

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [16]:
# define the hyper params
lr = 0.0001
epochs = 30
batch_size = 64
num_iters_per_epoch = train_X.shape[0] // batch_size
height = width = int(np.sqrt(train_X.shape[1]))
channel = 1
num_classes = train_Y.shape[1]
dropout_rate = 0.75
fil_h = 5
fil_w = 5
inp_dep = 1
hid1_dep = 64
hid2_dep = hid1_dep * 2
flatten_units = (height//4)*(width//4)*hid2_dep
dense_units = 1024

In [17]:
# define placeholders and variables
X = tf.placeholder(tf.float32, [None, height*width])
Y = tf.placeholder(tf.float32, [None, num_classes])
keep_prob = tf.placeholder(tf.float32)

weights = {
    "conv_w1": tf.Variable(tf.random_normal([fil_h, fil_w, inp_dep, hid1_dep], stddev=0.01)),
    "conv_w2": tf.Variable(tf.random_normal([fil_h, fil_w, hid1_dep, hid2_dep], stddev=0.01)),
    "dense_w1": tf.Variable(tf.random_normal([flatten_units, dense_units])),
    "dense_w2": tf.Variable(tf.random_normal([dense_units, num_classes]))
}

biases = {
    "conv_b1": tf.Variable(tf.zeros([hid1_dep])),
    "conv_b2": tf.Variable(tf.zeros([hid2_dep])),
    "dense_b1": tf.Variable(tf.zeros([dense_units])),
    "dense_b2": tf.Variable(tf.zeros([num_classes]))
}

In [18]:
# functions needed to make the CNN network
def conv2d(x, w, b, strides=1):
    x = tf.nn.conv2d(x, w, strides=[1, strides, strides, 1], padding="SAME")
    x = tf.nn.bias_add(x, b)
    return tf.nn.relu(x)

def maxpooling2d(x, strides=2):
    return tf.nn.max_pool(x, ksize=[1, strides, strides, 1], strides=[1, strides, strides, 1], padding="SAME")

def mnist_conv_net(x, weights, biases, dropout_rate):
    x = tf.reshape(x, shape=[-1, height, width, channel])
    x = conv2d(x, weights["conv_w1"], biases["conv_b1"])
    x = maxpooling2d(x, 2)
    x = conv2d(x, weights["conv_w2"], biases["conv_b2"])
    x = maxpooling2d(x, 2)
    x = tf.reshape(x, [-1, flatten_units])
    x = tf.add(tf.matmul(x, weights["dense_w1"]), biases["dense_b1"])
    x = tf.nn.relu(x)
    x = tf.nn.dropout(x, dropout_rate)
    out = tf.add(tf.matmul(x, weights["dense_w2"]), biases["dense_b2"])
    return out

In [19]:
loss_log = []
test_loss_log = []
acc_log = []
test_acc_log = []

pred = mnist_conv_net(X, weights, biases, keep_prob)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=Y, logits=pred))
opt = tf.train.AdamOptimizer(learning_rate=lr)
train_op = opt.minimize(loss)
acc = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(pred, 1), tf.argmax(Y, 1)), tf.float32))

# to measure total training time
start_time = time.time()
init = tf.global_variables_initializer()
with tf.Session() as sess:
    init.run()
    for ep in range(epochs):
        # shuffle dataset for every epoch
        rand_indexes = np.random.permutation(train_X.shape[0])
        tmp_acc = 0.0
        tmp_loss = 0.0
        tmp_test_acc = 0.0
        tmp_test_loss = 0.0
        
        for it in range(num_iters_per_epoch):
            _, it_acc, it_loss = sess.run([train_op, acc, loss], feed_dict={X: train_X[rand_indexes][it*batch_size:(it+1)*batch_size],
                                          Y: train_Y[rand_indexes][it*batch_size:(it+1)*batch_size],
                                          keep_prob: dropout_rate})

            tmp_acc += it_acc / num_iters_per_epoch
            tmp_loss += it_loss / num_iters_per_epoch
        
        tmp_test_acc, tmp_test_loss = sess.run([acc, loss], feed_dict={X: test_X, Y: test_Y, keep_prob: 1.0})
        
        acc_log.append(tmp_acc)
        loss_log.append(tmp_loss)
        test_acc_log.append(tmp_test_acc)
        test_loss_log.append(tmp_test_loss)
        
        print("epoch:{} acc:{:.3f} loss:{:.3f} test_acc:{:.3f} test_loss:{:.3f}"\
              .format(ep+1, acc_log[-1], loss_log[-1], test_acc_log[-1], test_loss_log[-1]))
        
end_time = time.time()
print("Total Processing Time:", end_time - start_time)

# reset the default graph
tf.reset_default_graph()

epoch:1 acc:0.887 loss:0.547 test_acc:0.972 test_loss:0.085
epoch:2 acc:0.962 loss:0.123 test_acc:0.981 test_loss:0.059
epoch:3 acc:0.974 loss:0.085 test_acc:0.986 test_loss:0.046
epoch:4 acc:0.979 loss:0.068 test_acc:0.986 test_loss:0.039
epoch:5 acc:0.983 loss:0.053 test_acc:0.988 test_loss:0.036
epoch:6 acc:0.985 loss:0.048 test_acc:0.989 test_loss:0.038
epoch:7 acc:0.988 loss:0.038 test_acc:0.989 test_loss:0.033
epoch:8 acc:0.989 loss:0.035 test_acc:0.990 test_loss:0.036
epoch:9 acc:0.990 loss:0.030 test_acc:0.989 test_loss:0.036
epoch:10 acc:0.992 loss:0.024 test_acc:0.991 test_loss:0.029
epoch:11 acc:0.992 loss:0.025 test_acc:0.990 test_loss:0.033
epoch:12 acc:0.993 loss:0.020 test_acc:0.989 test_loss:0.041
epoch:13 acc:0.994 loss:0.019 test_acc:0.991 test_loss:0.032
epoch:14 acc:0.995 loss:0.016 test_acc:0.990 test_loss:0.040
epoch:15 acc:0.995 loss:0.017 test_acc:0.991 test_loss:0.037
epoch:16 acc:0.995 loss:0.014 test_acc:0.991 test_loss:0.036
epoch:17 acc:0.996 loss:0.013 tes