In [6]:
import tensorflow as tf
import numpy as np

In [17]:
# to make this notebook's output stable across runs
def reset_graph(seed=1):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

## Model Construction Phase

1. Major net parameters

In [62]:
reset_graph()
n_inputs = 28 * 28 #MNIST
n_hidden1 = 300
n_hidden2 = 100
n_output = 10

2. Define input data format

In [63]:
# define placeholder node in the computation graph
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name='X')
y = tf.placeholder(tf.int64, shape=(None), name="y") # y will be just 1D tensor, int

3. (optional) Define a function that generats a fully-connected layer

In [64]:
# define a non-input neural layer function
def neuron_layer(X, n_neurons, name, activation=None):
    """
    ---INPUT---
    X: input computation node of size (n_instances, n_inputs). n_input should be the 
       n_neurons of previous layer
    n_neurons: int, count of neuron in current layer. This number will also be the
       n_input of next layer
    name: name of the layer to be saved in name_scope
    activation: activation function
    ---OUTPUT---
    Z: resulted computation node output of shape (n_instances, n_neurons)
    """
    # define a name scope such that these computation nodes come under the same name
    with tf.name_scope(name):
        n_inputs = int(X.get_shape()[1])
        # widely accepted little tweak that just works (for now)
        stddev = 2 / np.sqrt(n_inputs + n_neurons)
        # truncated random weights ensures that no large weights are created
        init = tf.truncated_normal((n_inputs, n_neurons), stddev=stddev)
        W = tf.Variable(init, name='kernel')
        b = tf.Variable(tf.zeros([n_neurons]), name='bias')
        Z = tf.matmul(X, W) + b
        if activation is not None:
            return activation(Z)
        else:
            return Z

Above function is already provided by `tf.layers.dense`. In production, there is no need to re-invent the wheel for this

4. Construct hidden layers using FC (fully connected) layers

In [None]:
# construct graph using customized function
with tf.name_scope('dnn'):
    hidden1 = neuron_layer(X, n_hidden1, name='hidden1', activation=tf.nn.relu)
    hidden2 = neuron_layer(hidden1, n_hidden2, name='hidden2', activation=tf.nn.relu)
    # the "logits" is pre-softmax node
    logits = neuron_layer(hidden2, n_output, name='outputs')

In [65]:
# alternatively using tf.layers.dense
with tf.name_scope('dnn'):
    hidden1 = tf.layers.dense(X, n_hidden1, name='hidden1', activation=tf.nn.relu)
    hidden2 = tf.layers.dense(hidden1, n_hidden2, name='hidden2', activation=tf.nn.relu)
    # the "logits" is pre-softmax node
    logits = neuron_layer(hidden2, n_output, name='outputs')

5. Define loss function for GD

In [66]:
with tf.name_scope('loss'):
    # this function takes labels from y, one hot it, and then use the logits node to 
    # calculate loss
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    # average xentropy as the "loss" of the model current state
    loss = tf.reduce_mean(xentropy, name='loss')

6. Define the optimizer that does gradient back propagation based on loss function

In [67]:
learning_rate = 0.01

with tf.name_scope('train'):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

7. Define the evaluation nodes that calculates based on pure accuracy

In [68]:
with tf.name_scope('eval'):
    # tells if the logits perdictions are in top 1
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

(Optional) Define logger

In [69]:
from datetime import datetime
now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = 'mlp_logs'
logdir = "{}/run-{}/".format(root_logdir, now)

In [70]:
with tf.name_scope('log'):
    acc_summary = tf.summary.scalar('Acc', accuracy)
    file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

8. Define init and saver

In [71]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

## Model Execution Phase

In [72]:
from tensorflow.examples.tutorials.mnist import input_data

mnist = input_data.read_data_sets("/tmp/data/")

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [73]:
n_epochs = 40
batch_size = 50

In [52]:
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_val = accuracy.eval(feed_dict={X: mnist.validation.images,
                                           y: mnist.validation.labels})
        print("Epoch {0}: Train Accuracy - {1}, Val Accuracy - {2}".format(
            epoch + 1, acc_train, acc_val))
    save_path = saver.save(sess, './my_model_final.ckpt')

Epoch 1: Train Accuracy - 0.9599999785423279, Val Accuracy - 0.9064000248908997
Epoch 2: Train Accuracy - 0.9800000190734863, Val Accuracy - 0.9287999868392944
Epoch 3: Train Accuracy - 0.8799999952316284, Val Accuracy - 0.9369999766349792
Epoch 4: Train Accuracy - 0.9399999976158142, Val Accuracy - 0.9434000253677368
Epoch 5: Train Accuracy - 0.9599999785423279, Val Accuracy - 0.9485999941825867
Epoch 6: Train Accuracy - 0.9800000190734863, Val Accuracy - 0.9557999968528748
Epoch 7: Train Accuracy - 0.9200000166893005, Val Accuracy - 0.9592000246047974
Epoch 8: Train Accuracy - 0.9599999785423279, Val Accuracy - 0.9624000191688538
Epoch 9: Train Accuracy - 0.8999999761581421, Val Accuracy - 0.9629999995231628
Epoch 10: Train Accuracy - 0.9200000166893005, Val Accuracy - 0.9643999934196472
Epoch 11: Train Accuracy - 0.9399999976158142, Val Accuracy - 0.967199981212616
Epoch 12: Train Accuracy - 0.9800000190734863, Val Accuracy - 0.9685999751091003
Epoch 13: Train Accuracy - 0.980000019

(Optional) Run the below codes to instead save log information

In [74]:
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size + 1):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
            if iteration % 10 == 0:
                summary_str = acc_summary.eval(feed_dict={X:X_batch, y:y_batch})
                # step is total number of minibatches from beginning
                step = epoch * (mnist.train.num_examples // batch_size + 1) + iteration
                # call the file_writer to add the above information
                file_writer.add_summary(summary_str, step)
        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_val = accuracy.eval(feed_dict={X: mnist.validation.images,
                                           y: mnist.validation.labels})
        print("Epoch {0}: Train Accuracy - {1}, Val Accuracy - {2}".format(
            epoch + 1, acc_train, acc_val))
    save_path = saver.save(sess, './my_model_final.ckpt')
    file_writer.close()

Epoch 1: Train Accuracy - 0.9599999785423279, Val Accuracy - 0.9097999930381775
Epoch 2: Train Accuracy - 0.8999999761581421, Val Accuracy - 0.9279999732971191
Epoch 3: Train Accuracy - 0.9599999785423279, Val Accuracy - 0.9373999834060669
Epoch 4: Train Accuracy - 0.9399999976158142, Val Accuracy - 0.9437999725341797
Epoch 5: Train Accuracy - 0.9800000190734863, Val Accuracy - 0.9480000138282776
Epoch 6: Train Accuracy - 0.9599999785423279, Val Accuracy - 0.9539999961853027
Epoch 7: Train Accuracy - 0.9399999976158142, Val Accuracy - 0.9589999914169312
Epoch 8: Train Accuracy - 0.9800000190734863, Val Accuracy - 0.9624000191688538
Epoch 9: Train Accuracy - 1.0, Val Accuracy - 0.9624000191688538
Epoch 10: Train Accuracy - 1.0, Val Accuracy - 0.9656000137329102
Epoch 11: Train Accuracy - 0.9800000190734863, Val Accuracy - 0.967199981212616
Epoch 12: Train Accuracy - 0.9599999785423279, Val Accuracy - 0.9684000015258789
Epoch 13: Train Accuracy - 0.9800000190734863, Val Accuracy - 0.9700

## Apply the Stored Model on New Data

In [None]:
with tf.Session() as sess:
    saver.restore( sess, "./ my_model_final.ckpt")
    X_new_scaled = [...] # some new images (scaled from 0 to 1)
    Z = logits.eval(feed_dict ={ X: X_new_scaled})
    y_pred = np.argmax(Z, axis = 1)