In [30]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import time
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [31]:
# get the dataset
mnist_data = input_data.read_data_sets("MNIST_data/", one_hot=True)
train_X, train_Y, test_X, test_Y = mnist_data.train.images, \
mnist_data.train.labels, mnist_data.test.images, mnist_data.test.labels

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [46]:
# define the hyper params
lr = 0.01
epochs = 30
batch_size = 64
num_iters_per_epoch = train_X.shape[0] // batch_size
height = width = int(np.sqrt(train_X.shape[1]))
num_classes = train_Y.shape[1]
dropout_rate = 0.5
fil_h = 5
fil_w = 5
inp_dep = 1
hid1_dep = 64
hid2_dep = hid1_dep * 2
flatten_units = (height//4)*(width//4)*hid2_dep
dense_units = 1024

In [47]:
# define placeholders and variables
X = tf.placeholder(tf.float32, [None, height*width])
Y = tf.placeholder(tf.float32, [None, num_classes])
keep_prob = tf.placeholder(tf.float32)

weights = {
    "conv_w1": tf.Variable(tf.random_normal([fil_h, fil_w, inp_dep, hid1_dep])),
    "conv_w2": tf.Variable(tf.random_normal([fil_h, fil_w, hid1_dep, hid2_dep])),
    "dense_w1": tf.Variable(tf.random_normal([flatten_units, dense_units])),
    "dense_w2": tf.Variable(tf.random_normal([dense_units, num_classes]))
}

biases = {
    "conv_b1": tf.Variable(tf.ones([hid1_dep])),
    "conv_b2": tf.Variable(tf.ones([hid2_dep])),
    "dense_b1": tf.Variable(tf.ones([dense_units])),
    "dense_b2": tf.Variable(tf.ones([num_classes]))
}

In [50]:
# functions needed to make the CNN network
def conv2d(x, w, b, strides=1):
    x = tf.nn.conv2d(x, w, strides=[1, strides, strides, 1], padding="SAME")
    x = tf.nn.bias_add(x, b)
    return tf.nn.relu(x)

def maxpooling2d(x, strides=2):
    return tf.nn.max_pool(x, ksize=[1, strides, strides, 1], strides=[1, strides, strides, 1], padding="SAME")

def mnist_conv_net(x, weights, biases, dropout_rate):
    x = tf.reshape(x, shape=[-1, 28, 28, 1])
    x = conv2d(x, weights["conv_w1"], biases["conv_b1"])
    x = maxpooling2d(x, 2)
    x = conv2d(x, weights["conv_w2"], biases["conv_b2"])
    x = maxpooling2d(x, 2)
    x = tf.reshape(x, [-1, flatten_units])
    x = tf.add(tf.matmul(x, weights["dense_w1"]), biases["dense_b1"])
    x = tf.nn.relu(x)
    x = tf.nn.dropout(x, dropout_rate)
    out = tf.add(tf.matmul(x, weights["dense_w2"]), biases["dense_b2"])
    return out

In [53]:
loss_log = []
test_loss_log = []
acc_log = []
test_acc_log = []

pred = mnist_conv_net(X, weights, biases, keep_prob)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred, labels=Y))
train_op = tf.train.AdamOptimizer(learning_rate=lr).minimize(loss)
acc = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(pred, 1), tf.argmax(Y, 1)), tf.float32))

# to measure total training time
start_time = time.time()
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for ep in range(epochs):
        # shuffle dataset for every epoch
        rand_indexes = np.random.permutation(train_X.shape[0])
        tmp_loss = 0.0
        tmp_test_loss = 0.0
        tmp_acc = 0.0
        tmp_test_acc = 0.0
        
        for it in range(num_iters_per_epoch):
            sess.run(train_op, feed_dict={X: train_X[rand_indexes][it*batch_size:(it+1)*batch_size],\
                                          Y: train_Y[rand_indexes][it*batch_size:(it+1)*batch_size],\
                                          keep_prob: dropout_rate})
            tmp_loss += sess.run(loss, feed_dict={X: train_X[rand_indexes][it*batch_size:(it+1)*batch_size],\
                                                 Y: train_Y[rand_indexes][it*batch_size:(it+1)*batch_size],\
                                                 keep_prob: dropout_rate}) / batch_size
            tmp_test_loss += sess.run(loss, feed_dict={X: test_X[it*batch_size:(it+1)*batch_size],\
                                                       Y: test_Y[it*batch_size:(it+1)*batch_size],\
                                                       keep_prob: 1.0}) / batch_size
            tmp_acc += sess.run(acc, feed_dict={X: train_X[rand_indexes][it*batch_size:(it+1)*batch_size],\
                                                Y: train_Y[rand_indexes][it*batch_size:(it+1)*batch_size],\
                                                keep_prob: dropout_rate})
            tmp_test_acc += sess.run(acc, feed_dict={X: test_X[it*batch_size:(it+1)*batch_size],\
                                                     Y: test_Y[it*batch_size:(it+1)*batch_size],\
                                                     keep_prob: 1.0})
        
        loss_log.append(tmp_loss / num_iters_per_epoch)
        test_loss_log.append(tmp_test_loss / num_iters_per_epoch)
        acc_log.append(tmp_acc / num_iters_per_epoch)
        test_acc_log.append(tmp_test_acc / num_iters_per_epoch)
        
        print("epoch:{} acc:{:.3f} loss:{:.3f} test_acc:{:.3f} test_loss:{:.3f}"\
              .format(ep+1, tmp_acc, tmp_loss, tmp_test_acc, tmp_test_loss))
        
end_time = time.time()
print("Total Processing Time:", end_time - start_time)

epoch:1 acc:295.062 loss:29720.294 test_acc:nan test_loss:nan


KeyboardInterrupt: 

In [45]:
X.shape

TensorShape([Dimension(None), Dimension(784)])