This experiment uses tensorflow eager excution mode to build the MNIST softmax regression model to do numerals recognition.

Firstly, let me give a simple example as follows:

In [27]:
import tensorflow as tf
import tensorflow.contrib.eager as tfe
import numpy as np
tfe.enable_eager_execution()

a = tf.constant([[12, 32, 4]])
b = tf.constant([[1], [2], [3]])

c = tf.matmul(b, a)
print(a)
print(b)
print(c)

m = np.array([[12, 32, 4]])
n = np.array([[1], [2], [3]])

p = np.matmul(m, n)

print(m, m.shape)
print(n, n.shape)
print(p, p.shape)




tf.Tensor([[12 32  4]], shape=(1, 3), dtype=int32)
tf.Tensor(
[[1]
 [2]
 [3]], shape=(3, 1), dtype=int32)
tf.Tensor(
[[12 32  4]
 [24 64  8]
 [36 96 12]], shape=(3, 3), dtype=int32)
[[12 32  4]] (1, 3)
[[1]
 [2]
 [3]] (3, 1)
[[88]] (1, 1)


Then we list 4 functions about calculate the gradients.
f(x) = O(x, y, z, .... m)
tfe.gradients_function()   # cal gradient for x

tfe.value_and_gradients_function() # cal f(x) value and gradient for x

tfe.implicit_gradients()  # cal gradient for y, z, ..., m

tfe.implicit_value_and_gradients() # cal f(x) and gradient for y, z, ..., m

In [35]:
import tensorflow as tf
import tensorflow.contrib.eager as tfe
import numpy as np
tfe.enable_eager_execution()


x = tfe.Variable(initial_value = 2.0, name = 'x')
y = tfe.Variable(initial_value = 3.0, name = 'y')
print(x.numpy())
print("#######")
# 2
def loss(y):
    return (y - x ** 2) ** 2
print(loss(7).numpy())
print("#######")
# 9
grad = tfe.implicit_gradients(loss)
print(grad(7.))
print("#######")
# cal gradient for x：2 * (y - x ** 2)(-2 * x) = 2 * (7 - 2 ** 2) (-2 * 2) = 2 * 3 * (-4) = -24
# [(<tf.Tensor: id=834, shape=(), dtype=float32, numpy=-24.0>, <tf.Variable 'x:0' shape=() dtype=float32, numpy=2.0>)]

grad = tfe.gradients_function(loss)
print(grad(7.0))
# cal grad for y: 2 * (y - x^2) = 2 * (7-2^2) =6
# [<tf.Tensor: id=2139, shape=(), dtype=float32, numpy=6.0>]

grad = tfe.value_and_gradients_function(loss)
print(grad(7.0))
# cal value and grad for y
# (<tf.Tensor: id=2186, shape=(), dtype=float32, numpy=9.0>, [<tf.Tensor: id=2198, shape=(), dtype=float32, numpy=6.0>])

grad = tfe.implicit_value_and_gradients(loss)
print(grad(7.))
print("#######")  
# cal value and the grad for x: value = 9 and grad for x = -24
# (<tf.Tensor: id=1220, shape=(), dtype=float32, numpy=9.0>, 
# [(<tf.Tensor: id=1257, shape=(), dtype=float32, numpy=-24.0>, <tf.Variable 'x:0' shape=() dtype=float32, numpy=2.0>)])

x = tfe.Variable(initial_value = 2.0, name = 'x')
y = tfe.Variable(initial_value = 3.0, name = 'y')
z = tfe.Variable(initial_value = 3.0, name = 'z')
print(x)
print("#######")
# 2

def loss(y):
    return (y - x ** 2 + z ** 2) ** 2
print(loss(7).numpy())
print("#######")
# (7 - 2^2 + 3^2)^2 = 12 ^ 2 = 144
grad = tfe.implicit_gradients(loss)
print(grad(7.))
print("#######")
# [(<tf.Tensor: id=1381, shape=(), dtype=float32, numpy=-96.0>, <tf.Variable 'x:0' shape=() dtype=float32, numpy=2.0>), 
# (<tf.Tensor: id=1356, shape=(), dtype=float32, numpy=144.0>, <tf.Variable 'z:0' shape=() dtype=float32, numpy=3.0>)]


grad = tfe.implicit_value_and_gradients(loss)
print(grad(7.))
print("#######")
# (<tf.Tensor: id=1411, shape=(), dtype=float32, numpy=144.0>,
# [(<tf.Tensor: id=1472, shape=(), dtype=float32, numpy=-96.0>, <tf.Variable 'x:0' shape=() dtype=float32, numpy=2.0>), 
# (<tf.Tensor: id=1447, shape=(), dtype=float32, numpy=144.0>, <tf.Variable 'z:0' shape=() dtype=float32, numpy=3.0>)])


2.0
#######
9.0
#######
[(<tf.Tensor: id=2103, shape=(), dtype=float32, numpy=-24.0>, <tf.Variable 'x:0' shape=() dtype=float32, numpy=2.0>)]
#######
[<tf.Tensor: id=2139, shape=(), dtype=float32, numpy=6.0>]
(<tf.Tensor: id=2186, shape=(), dtype=float32, numpy=9.0>, [<tf.Tensor: id=2198, shape=(), dtype=float32, numpy=6.0>])
(<tf.Tensor: id=2245, shape=(), dtype=float32, numpy=9.0>, [(<tf.Tensor: id=2282, shape=(), dtype=float32, numpy=-24.0>, <tf.Variable 'x:0' shape=() dtype=float32, numpy=2.0>)])
#######
<tf.Variable 'x:0' shape=() dtype=float32, numpy=2.0>
#######
144.0
#######
[(<tf.Tensor: id=2406, shape=(), dtype=float32, numpy=-96.0>, <tf.Variable 'x:0' shape=() dtype=float32, numpy=2.0>), (<tf.Tensor: id=2381, shape=(), dtype=float32, numpy=144.0>, <tf.Variable 'z:0' shape=() dtype=float32, numpy=3.0>)]
#######
(<tf.Tensor: id=2436, shape=(), dtype=float32, numpy=144.0>, [(<tf.Tensor: id=2497, shape=(), dtype=float32, numpy=-96.0>, <tf.Variable 'x:0' shape=() dtype=float32, n

Next, I will show how to use Dataset and Iterators. "https://www.tensorflow.org/api_docs/python/tf/data"

In [21]:
import tensorflow as tf
import tensorflow.contrib.eager as tfe
import numpy as np
tfe.enable_eager_execution()

###### from_tensor_slices ######
# Assume batch size is 1
dataset1 = tf.data.Dataset.from_tensor_slices(tf.range(10, 15))
# Emits data of 10, 11, 12, 13, 14, (One element at a time)

dataset2 = tf.data.Dataset.from_tensor_slices((tf.range(30, 45, 3), np.arange(60, 70, 2)))
# Emits data of (30, 60), (33, 62), (36, 64), (39, 66), (42, 68) one-to-one match
# Emits one tuple at a time

for i in tfe.Iterator(dataset1):
    print(i)

for i in tfe.Iterator(dataset2):
    print(i)

###### from_tensors ######
dataset3 = tf.data.Dataset.from_tensors(tf.range(10, 15))
# Emits data of [10, 11, 12, 13, 14]
# Holds entire list as one element

dataset4 = tf.data.Dataset.from_tensors((tf.range(30, 45, 3), np.arange(60, 70, 2)))
# Emits data of ([30, 33, 36, 39, 42], [60, 62, 64, 66, 68])
# Holds entire tuple as one element
for i in tfe.Iterator(dataset3):
    print(i)

for i in tfe.Iterator(dataset4):
    print(i)


###### batch, repeat , shuffle ######
dataset1 = tf.data.Dataset.from_tensor_slices(tf.range(10))
# Dataset: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

dataset1 = dataset1.batch(4)
# Dataset: [0, 1, 2, 3], [4, 5, 6, 7], [8, 9]
for i in tfe.Iterator(dataset1):
    print(i)
dataset1 = dataset1.repeat(2)
# Dataset: [0, 1, 2, 3], [4, 5, 6, 7], [8, 9], [0, 1, 2, 3], [4, 5, 6, 7], [8, 9]
# Notice a 2 element batch in between

dataset1 = dataset1.shuffle(4)
# Shuffles at batch level.
# Dataset: [0, 1, 2, 3], [4, 5, 6, 7], [8, 9], [8, 9], [0, 1, 2, 3], [4, 5, 6, 7]


# Ordering #2
dataset2 = tf.data.Dataset.from_tensor_slices(tf.range(10))
# Dataset: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

dataset2 = dataset2.shuffle(4)
# Dataset: [3, 1, 0, 4, 5, 8, 6, 9, 7, 2]

dataset2 = dataset2.repeat(2)
# Dataset: [3, 1, 0, 4, 5, 8, 6, 9, 7, 2, 3, 1, 0, 4, 5, 8, 6, 9, 7, 2]

dataset2 = dataset2.batch(4)

# Dataset: [3, 1, 0, 4], [5, 8, 6, 9], [7, 2, 3, 1], [0, 4, 5, 8], [6, 9, 7, 2]

tf.Tensor(10, shape=(), dtype=int32)
tf.Tensor(11, shape=(), dtype=int32)
tf.Tensor(12, shape=(), dtype=int32)
tf.Tensor(13, shape=(), dtype=int32)
tf.Tensor(14, shape=(), dtype=int32)
(<tf.Tensor: id=545, shape=(), dtype=int32, numpy=30>, <tf.Tensor: id=546, shape=(), dtype=int32, numpy=60>)
(<tf.Tensor: id=549, shape=(), dtype=int32, numpy=33>, <tf.Tensor: id=550, shape=(), dtype=int32, numpy=62>)
(<tf.Tensor: id=553, shape=(), dtype=int32, numpy=36>, <tf.Tensor: id=554, shape=(), dtype=int32, numpy=64>)
(<tf.Tensor: id=557, shape=(), dtype=int32, numpy=39>, <tf.Tensor: id=558, shape=(), dtype=int32, numpy=66>)
(<tf.Tensor: id=561, shape=(), dtype=int32, numpy=42>, <tf.Tensor: id=562, shape=(), dtype=int32, numpy=68>)
tf.Tensor([10 11 12 13 14], shape=(5,), dtype=int32)
(<tf.Tensor: id=586, shape=(5,), dtype=int32, numpy=array([30, 33, 36, 39, 42])>, <tf.Tensor: id=587, shape=(5,), dtype=int32, numpy=array([60, 62, 64, 66, 68])>)
tf.Tensor([0 1 2 3], shape=(4,), dtype=int32)
tf.Tenso

Now let us use eager mode to build the MNIST softmax regression model to do numerals recognition. 

In [46]:
import tensorflow as tf
import tensorflow.contrib.eager as tfe
tfe.enable_eager_execution()
from tensorflow.examples.tutorials.mnist import input_data

class MNIST:
    def __init__(self):
        self.mnist = input_data.read_data_sets("data/MNIST_data/", one_hot=True)
        self.W = tf.get_variable(name="W", shape=(784, 10)) # use tf.get_variable to replace tf.Variable
        self.b = tf.get_variable(name="b", shape=(10))

        self.train_ds = tf.data.Dataset.from_tensor_slices((self.mnist.train.images, self.mnist.train.labels))\
                                       .map(lambda x, y: (x, tf.cast(y, tf.float32)))\
                                       .shuffle(buffer_size = 1000)\
                                       .batch(100)

    def softmax_model(self, image_batch):
        y = tf.nn.softmax(tf.matmul(image_batch, self.W) + self.b)
        
        return y
    
    def cross_entropy(self, image_batch, label_batch):
        y = self.softmax_model(image_batch)
        loss = tf.reduce_mean(-tf.reduce_sum(label_batch * tf.log(y), 1))

        return loss

    def cal_gradient(self, image_batch, label_batch):
        grad = tfe.implicit_value_and_gradients(self.cross_entropy)
        # A logic here:
        # we should 1) construct a function like y = x^2
        # 2) then we calculate gradients function, i.e., y = 2x
        # 3) finally we apply x = 4 to gradients function y = 2x
        # In this case, we cannot use cross_entropy(image_batch, label_batch) directly. 
        
        return grad(image_batch, label_batch)
    
    def train(self):   
        for step, (image_batch, label_batch) in enumerate(tfe.Iterator(self.train_ds)):
        #    print(image_batch.shape)
            
            loss, grads_and_vars = self.cal_gradient(image_batch, label_batch)
            train_step = tf.train.GradientDescentOptimizer(0.5).apply_gradients(grads_and_vars) # learning rate is 0.5
            # Why not diretly use minimzie() as follows:
            #loss = self.cross_entropy(image_batch, label_batch)
            #train_step = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
            # if like this, loss will be a real value, not gradient function.
            print("step: {} loss: {}".format(step, loss.numpy()))

    def predict(self):
        y = self.softmax_model(self.mnist.test.images)
        correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(self.mnist.test.labels, 1)) # [true, true, false,..., true] boolen type
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))    # [1, 1, 0, ..., 1] with cast to convert

        print("test accuracy = {}".format(accuracy.numpy()))
            
            
            
            
if __name__ == '__main__':
    mnist_model = MNIST()
    mnist_model.train()
    mnist_model.predict()




Extracting data/MNIST_data/train-images-idx3-ubyte.gz
Extracting data/MNIST_data/train-labels-idx1-ubyte.gz
Extracting data/MNIST_data/t10k-images-idx3-ubyte.gz
Extracting data/MNIST_data/t10k-labels-idx1-ubyte.gz
step: 0 loss: 2.518420934677124
step: 1 loss: 1.9672133922576904
step: 2 loss: 1.9115958213806152
step: 3 loss: 1.6100528240203857
step: 4 loss: 1.3711836338043213
step: 5 loss: 1.555497169494629
step: 6 loss: 1.5340849161148071
step: 7 loss: 0.9834867715835571
step: 8 loss: 0.8099104166030884
step: 9 loss: 0.910269021987915
step: 10 loss: 0.7270058989524841
step: 11 loss: 0.9075319170951843
step: 12 loss: 0.9778170585632324
step: 13 loss: 0.9014764428138733
step: 14 loss: 0.7988381385803223
step: 15 loss: 0.6955233216285706
step: 16 loss: 0.8558200001716614
step: 17 loss: 0.6183838844299316
step: 18 loss: 0.7889552116394043
step: 19 loss: 0.6219127178192139
step: 20 loss: 0.5840185880661011
step: 21 loss: 0.7579265832901001
step: 22 loss: 0.6852337121963501
step: 23 loss: 0.

step: 237 loss: 0.450685054063797
step: 238 loss: 0.3476310074329376
step: 239 loss: 0.3358263075351715
step: 240 loss: 0.3243204355239868
step: 241 loss: 0.4013921320438385
step: 242 loss: 0.39357316493988037
step: 243 loss: 0.41609418392181396
step: 244 loss: 0.34057196974754333
step: 245 loss: 0.454276442527771
step: 246 loss: 0.4364652633666992
step: 247 loss: 0.46663933992385864
step: 248 loss: 0.310959130525589
step: 249 loss: 0.460178017616272
step: 250 loss: 0.2628069519996643
step: 251 loss: 0.32114288210868835
step: 252 loss: 0.4539031684398651
step: 253 loss: 0.5069394707679749
step: 254 loss: 0.3205595314502716
step: 255 loss: 0.5110148191452026
step: 256 loss: 0.5103697180747986
step: 257 loss: 0.612093448638916
step: 258 loss: 0.4614759087562561
step: 259 loss: 0.3232191503047943
step: 260 loss: 0.34048226475715637
step: 261 loss: 0.414230078458786
step: 262 loss: 0.3251868188381195
step: 263 loss: 0.3347572386264801
step: 264 loss: 0.41020721197128296
step: 265 loss: 0.5

step: 476 loss: 0.4645126760005951
step: 477 loss: 0.3733900487422943
step: 478 loss: 0.3258801996707916
step: 479 loss: 0.4040234386920929
step: 480 loss: 0.4823179543018341
step: 481 loss: 0.32798314094543457
step: 482 loss: 0.25747188925743103
step: 483 loss: 0.31094101071357727
step: 484 loss: 0.2920394837856293
step: 485 loss: 0.2934306263923645
step: 486 loss: 0.3596334159374237
step: 487 loss: 0.20532439649105072
step: 488 loss: 0.34153884649276733
step: 489 loss: 0.3228786885738373
step: 490 loss: 0.4614839255809784
step: 491 loss: 0.32868796586990356
step: 492 loss: 0.4221988618373871
step: 493 loss: 0.22081485390663147
step: 494 loss: 0.3258853852748871
step: 495 loss: 0.2938960790634155
step: 496 loss: 0.26152297854423523
step: 497 loss: 0.3245876431465149
step: 498 loss: 0.3915487229824066
step: 499 loss: 0.22904826700687408
step: 500 loss: 0.20780354738235474
step: 501 loss: 0.14822646975517273
step: 502 loss: 0.2812418043613434
step: 503 loss: 0.3145609200000763
step: 504