In [1]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [2]:
print('total train dataset', mnist.train.images.shape[0])
print('total test dataset', mnist.test.images.shape[0])
print('dimension of picture', mnist.train.images.shape[1])
print('total unique classes', np.unique(np.argmax(mnist.train.labels,axis=1)))

total train dataset 55000
total test dataset 10000
dimension of picture 784
total unique classes [0 1 2 3 4 5 6 7 8 9]


In [0]:
class RNN:
  def __init__(self, input_size, output_size, num_layer, size_layer, learning_rate):
    self.X = tf.placeholder(tf.float32, (None, None,input_size))
    self.Y = tf.placeholder(tf.float32, (None, output_size))
    def rnn_cell():
      return tf.nn.rnn_cell.BasicRNNCell(size_layer)
    self.rnn_cells = tf.nn.rnn_cell.MultiRNNCell([rnn_cell() for _ in range(num_layer)])
    outputs, states = tf.nn.dynamic_rnn(self.rnn_cells, self.X, dtype=tf.float32)
    w1 = tf.Variable(tf.random_normal([size_layer, size_layer]))
    w2 = tf.Variable(tf.random_normal([size_layer, output_size]))
    feed = tf.nn.relu(tf.matmul(outputs[:,-1], w1))
    self.logits = tf.matmul(feed, w2)
    self.cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=self.Y, logits=self.logits))
    self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize(self.cost)

In [0]:
SIZE = 64
EPOCH = 10
BATCH_SIZE = 128
LEARNING_RATE = 0.001
NUM_LAYER = 2
INPUT_SIZE = int(np.sqrt(mnist.train.images.shape[1]))

In [0]:
tf.reset_default_graph()
sess = tf.InteractiveSession()
model = RNN(INPUT_SIZE, np.unique(np.argmax(mnist.train.labels,axis=1)).shape[0], SIZE, NUM_LAYER, LEARNING_RATE)
sess.run(tf.global_variables_initializer())

In [0]:
LOSS, ACCURACY, ACCURACY_TEST = [], [], []
for i in range(EPOCH):
  total_loss, total_acc = 0, 0
  for k in range(0, (mnist.train.images.shape[0] // BATCH_SIZE) * BATCH_SIZE, BATCH_SIZE):
    batch_x = mnist.train.images[k:k+BATCH_SIZE, :].reshape((-1, INPUT_SIZE, INPUT_SIZE))
    batch_x = batch_x / 255.0
    batch_y = mnist.train.labels[k:k+BATCH_SIZE, :]
    logits, loss, _ = sess.run([model.logits, model.cost, model.optimizer], feed_dict={model.X:batch_x, model.Y:batch_y})
    acc = np.mean(np.argmax(logits,axis=1)==np.argmax(batch_y,axis=1))
    total_loss += loss
    total_acc += acc
  total_loss /= (mnist.train.images.shape[0] // BATCH_SIZE)
  total_acc /= (mnist.train.images.shape[0] // BATCH_SIZE)
  LOSS.append(total_loss)
  ACCURACY.append(total_acc)
  total_acc = 0
  for k in range(0, (mnist.test.images.shape[0] // BATCH_SIZE) * BATCH_SIZE, BATCH_SIZE):
    batch_x = mnist.test.images[k:k+BATCH_SIZE, :].reshape((-1, INPUT_SIZE, INPUT_SIZE))
    batch_x = batch_x / 255.0
    batch_y = mnist.test.labels[k:k+BATCH_SIZE, :]
    logits = sess.run(model.logits, feed_dict={model.X:batch_x})
    acc = np.mean(np.argmax(logits,axis=1)==np.argmax(batch_y,axis=1))
    total_acc += acc
  total_acc /= (mnist.test.images.shape[0] // BATCH_SIZE)
  ACCURACY_TEST.append(total_acc)
  print('epoch %d, loss %f, training accuracy %f, testing accuracy %f'%(i+1, LOSS[-1], ACCURACY[-1], ACCURACY_TEST[-1]))

epoch 1, loss 2.306706, training accuracy 0.098867, testing accuracy 0.098057
epoch 2, loss 2.302585, training accuracy 0.098995, testing accuracy 0.098057
epoch 3, loss 2.302585, training accuracy 0.098995, testing accuracy 0.098057


In [0]:
plt.figure(figsize=(10,5))
plt.plot(ACCURACY, label='training accuracy')
plt.plot(ACCURACY_TEST, label='testing accuracy')
plt.legend()
plt.show()