In [1]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import warnings
warnings.filterwarnings('ignore')
tf.__version__

'0.12.0'

In [2]:
mnist = input_data.read_data_sets('./data/MNIST_data', one_hot=True)  # load data

Extracting ./data/MNIST_data/train-images-idx3-ubyte.gz
Extracting ./data/MNIST_data/train-labels-idx1-ubyte.gz
Extracting ./data/MNIST_data/t10k-images-idx3-ubyte.gz
Extracting ./data/MNIST_data/t10k-labels-idx1-ubyte.gz


In [3]:
learning_rate = 0.01  # set learning rate
epochs = 250  # set training epoches
batch_size = 100  # set batch size
# mnist data image of shape 28*28=784
x = tf.placeholder(tf.float32, [None, 784], name='inputs')
# 0-9 digits recognition => 10 classes
y = tf.placeholder(tf.float32, [None, 10], name='labels')

In [4]:
# define maxout
def max_out(inputs, num_units, axis=None):
    shape = inputs.get_shape().as_list()
    if shape[0] is None:
        shape[0] = -1
    if axis is None:  # Assume that channel is the last dimension
        axis = -1
    num_channels = shape[axis]
    if num_channels % num_units:
        raise ValueError('number of features({}) is not '
                         'a multiple of num_units({})'.format(num_channels, num_units))
    shape[axis] = num_units
    shape += [num_channels // num_units]
    outputs = tf.reduce_max(tf.reshape(inputs, shape), -1, keep_dims=False)
    return outputs

In [5]:
# define weight variable initializer
def create_weight_variable(name, shape):
    initializer = tf.contrib.layers.xavier_initializer_conv2d()
    return tf.Variable(initializer(shape=shape), name=name)

In [6]:
# define bias varibale initializer
def create_bias_variable(name, shape):
    initializer = tf.constant_initializer(value=0.0, dtype=tf.float32)
    return tf.Variable(initializer(shape=shape), name=name)

In [7]:
def make_predictions():
    W1 = create_weight_variable('Weights1', [784, 100])
    b1 = create_bias_variable('Bias1', [100])
    W2 = create_weight_variable('Weights2', [50, 10])
    b2 = create_bias_variable('Bias2', [10])
    t = max_out(tf.matmul(x, W1) + b1, 50)
    return tf.nn.softmax(tf.matmul(t, W2) + b2)

In [8]:
# Construct model and encapsulating all ops into scopes
with tf.name_scope('predict'):
    pred = make_predictions()

with tf.name_scope('loss'):
    cost = tf.reduce_mean(-tf.reduce_sum(y * tf.log(pred), reduction_indices=1))

with tf.name_scope('sgd'):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

with tf.name_scope('accuracy'):
    accuracy = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(accuracy, tf.float32))

In [9]:
train_loss_list = []
test_loss_list = []
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer()) # Initializing the variables
    # Training cycle
    for epoch in range(epochs):
        train_avg_loss = 0.0
        total_batches = int(mnist.train.num_examples / batch_size)
        # loop over all batches
        for i in range(total_batches):
            batch_xs, batch_ys = mnist.train.next_batch(batch_size)
            _, c = sess.run([optimizer, cost], feed_dict={x: batch_xs, y: batch_ys})
            train_avg_loss += c / total_batches  # Compute average loss
        train_loss_list.append(train_avg_loss)
        test_avg_loss = sess.run(cost, feed_dict={x: mnist.test.images, y: mnist.test.labels})
        test_loss_list.append(test_avg_loss)
        print('Epoch: %04d' % (epoch + 1), 'training cost=', '{:.9f}'.format(train_avg_loss), 'test cost=', 
             '{:.9f}'.format(test_avg_loss))

Epoch: 0001 training cost= 0.992127061 test cost= 0.546172738
Epoch: 0002 training cost= 0.486424812 test cost= 0.403940916
Epoch: 0003 training cost= 0.394900460 test cost= 0.347531468
Epoch: 0004 training cost= 0.349487334 test cost= 0.313572079
Epoch: 0005 training cost= 0.319800078 test cost= 0.290561527
Epoch: 0006 training cost= 0.298104208 test cost= 0.272943735
Epoch: 0007 training cost= 0.280992032 test cost= 0.259365231
Epoch: 0008 training cost= 0.267008742 test cost= 0.247563377
Epoch: 0009 training cost= 0.255171297 test cost= 0.237542138
Epoch: 0010 training cost= 0.244690942 test cost= 0.228677735
Epoch: 0011 training cost= 0.235410459 test cost= 0.221095294
Epoch: 0012 training cost= 0.227307682 test cost= 0.214637548
Epoch: 0013 training cost= 0.219859619 test cost= 0.208010152
Epoch: 0014 training cost= 0.213029524 test cost= 0.202875242
Epoch: 0015 training cost= 0.206710415 test cost= 0.198648781
Epoch: 0016 training cost= 0.200902558 test cost= 0.193033233
Epoch: 0

Epoch: 0134 training cost= 0.052662160 test cost= 0.090703920
Epoch: 0135 training cost= 0.052373560 test cost= 0.091183268
Epoch: 0136 training cost= 0.052051427 test cost= 0.090390757
Epoch: 0137 training cost= 0.051670812 test cost= 0.090215638
Epoch: 0138 training cost= 0.051403070 test cost= 0.090726137
Epoch: 0139 training cost= 0.051052178 test cost= 0.090391323
Epoch: 0140 training cost= 0.050706308 test cost= 0.090661392
Epoch: 0141 training cost= 0.050348488 test cost= 0.090103909
Epoch: 0142 training cost= 0.050023676 test cost= 0.090388015
Epoch: 0143 training cost= 0.049764921 test cost= 0.090039387
Epoch: 0144 training cost= 0.049453851 test cost= 0.089703523
Epoch: 0145 training cost= 0.049101816 test cost= 0.089106478
Epoch: 0146 training cost= 0.048743579 test cost= 0.089719467
Epoch: 0147 training cost= 0.048563860 test cost= 0.090022340
Epoch: 0148 training cost= 0.048229816 test cost= 0.089120418
Epoch: 0149 training cost= 0.047934272 test cost= 0.089682803
Epoch: 0

In [None]:
import matplotlib.pyplot as plt

In [None]:
train_loss_list = np.array(train_loss_list)
test_loss_list = np.array(test_loss_list)
x = np.arange(0, 250, 1)

In [None]:
plt.figure(figsize=(14, 7))  # set image size
plt.plot(x, train_loss_list, c='r', ls='dotted')
plt.plot(x, test_loss_list, c='g')
plt.xlim(0, 260)
plt.ylim(0, 0.8)
plt.legend(['training loss', 'test loss'])
plt.show()