In [1]:
import matplotlib.pyplot as plt
import tensorflow as tf
import math
import numpy as np

# load MNIST data
mnist = tf.keras.datasets.mnist
(x_train, y_train),(x_test, y_test) = mnist.load_data()

# normalize data
x_train, x_test = x_train / 255.0, x_test / 255.0

In [2]:
#
# Build a simple NN with hidden layers
#
def build_layer(x, units, activation=None):
    input_features = int(x.shape[1])
    stddev = 2 / np.sqrt(input_features)
    w = tf.Variable(tf.truncated_normal([input_features, units], stddev=stddev))
    b = tf.Variable(tf.zeros([1, units]))
    output = tf.add(tf.matmul(x, w), b)
    return activation(output) if activation else output
    
def build_model(input_features, output_features, hidden_units):
    
    # inputs
    input_shape = [None]
    input_shape.extend(input_features)      # [None, 28, 28]
    x = tf.placeholder(tf.float32, input_shape, name="input")
    print('input', x)
    
    # flatten
    y = tf.layers.Flatten()(x)
    print('flatten input', y)
    
    # hidden layers
    for units in hidden_units:
        y = build_layer(y, units, tf.nn.relu)
        print('hidden layer', y)
        
    # output layer
    y = build_layer(y, output_features)
    print('output', y)
    
    # targets
    labels = tf.placeholder(tf.int32, [None], name="labels")
    print('labels', labels)
    
    return x, y, labels

# shapes
input_shape=x_train.shape[1:]
output_features=len(set(y_train))

# build our model
X, logits, y = build_model(input_shape, output_features, [512, 10])

('input', <tf.Tensor 'input:0' shape=(?, 28, 28) dtype=float32>)
('flatten input', <tf.Tensor 'flatten/Reshape:0' shape=(?, 784) dtype=float32>)
('hidden layer', <tf.Tensor 'Relu:0' shape=(?, 512) dtype=float32>)
('hidden layer', <tf.Tensor 'Relu_1:0' shape=(?, 10) dtype=float32>)
('output', <tf.Tensor 'Add_2:0' shape=(?, 10) dtype=float32>)
('labels', <tf.Tensor 'labels:0' shape=(?,) dtype=int32>)


In [3]:
# cost function
# tf.nn.sparse_softmax_cross_entropy_with_logits takes labels as numbers, logits as one-hot vector
cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits))

# optimizer as gradient descent
optimizer = tf.train.GradientDescentOptimizer(0.1).minimize(cost)
    
# accuracy metric
acc, acc_op = tf.metrics.accuracy(labels=y, predictions=tf.argmax(logits,1))

# run training session
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())
    
    batch_size=32
    batch_count=x_train.shape[0] // batch_size

    for epoch in range(5):
        for i in range(batch_count):
            x_batch = x_train[i*batch_size: (i+1)*batch_size]
            y_batch = y_train[i*batch_size: (i+1)*batch_size]
            sess.run(optimizer, feed_dict={X:x_batch, y:y_batch})
        
        print('Epoch {}: Loss: {}, train accuracy: {}, test accuracy: {}\r'.format(epoch, 
                                                     sess.run(cost, feed_dict={X:x_train, y:y_train}), 
                                                     sess.run(acc_op, feed_dict={X:x_train, y:y_train}),
                                                     sess.run(acc_op, feed_dict={X:x_test, y:y_test})))

Epoch 0: Loss: 0.131886079907, train accuracy: 0.960500001907, test accuracy: 0.959742844105
Epoch 1: Loss: 0.0860697701573, train accuracy: 0.965730786324, test accuracy: 0.9656214118
Epoch 2: Loss: 0.0555194951594, train accuracy: 0.970570027828, test accuracy: 0.970561921597
Epoch 3: Loss: 0.040143750608, train accuracy: 0.974300026894, test accuracy: 0.974207162857
Epoch 4: Loss: 0.033744353801, train accuracy: 0.97678822279, test accuracy: 0.976671457291


In [4]:
model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(512, activation=tf.nn.relu),
  tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.fit(x_train, y_train, epochs=5)
model.evaluate(x_test, y_test)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[0.06355307361135493, 0.9815]