In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data

In [2]:
INPUT_NODE = 784
OUTPUT_NODE = 10
IMAGE_SIZE = 28
NUM_CHANNELS = 1
NUM_LABELS = 10

CONV1_DEEP = 32
CONV1_SIZE = 5

CONV2_DEEP = 64
CONV2_SIZE = 5

FC_SIZE1 = 128
FC_SIZE2 = 64

KEEP_PROB = 0.9

BATCH_SIZE = 100
LEARNING_RATE_BASE = 0.8
LEARNING_RATE_DECAY = 0.99

REGULARIZATION_RATE = 1e-4
MOVING_AVERAGE_DECAY = 0.99
TRAIN_STEP = 1000
MODEL_PATH = 'model/le_net.ckpt'
IS_TRAIN = True

In [3]:
def inference(input_tensor, train, regularizer=None):
    with tf.variable_scope('layer1-conv'):
        w1 = tf.get_variable(
            'weight', 
            [CONV1_SIZE, CONV1_SIZE, NUM_CHANNELS, CONV1_DEEP],
            initializer=tf.truncated_normal_initializer(stddev=0.1)
        )
        b1 = tf.get_variable(
            'bias',
            [CONV1_DEEP],
            initializer=tf.constant_initializer(0.0)
        )
        
        conv1 = tf.nn.conv2d(
            input_tensor,
            w1,
            strides=[1, 1, 1, 1],
            padding='SAME'
        )
        
        a1 = tf.nn.relu(tf.nn.bias_add(conv1, b1))
    
    with tf.variable_scope('layer1-pool'):
        pool1 = tf.nn.max_pool(
            a1,
            ksize=[1, 2, 2, 1],
            strides=[1, 1, 1, 1],
            padding='SAME'
        )
    
    with tf.variable_scope('layer2-conv'):
        w2 = tf.get_variable(
            'weight',
            [CONV2_SIZE, CONV2_SIZE, CONV1_DEEP, CONV2_DEEP],
            initializer=tf.truncated_normal_initializer(stddev=0.1)
        )
        
        b2 = tf.get_variable(
            'bias',
            [CONV2_DEEP],
            initializer=tf.constant_initializer(0.0)
        )
        
        conv2 = tf.nn.conv2d(
            a1,
            w2,
            strides=[1, 1, 1, 1],
            padding='SAME'
        )
        
        a2 = tf.nn.relu(tf.nn.bias_add(conv2, b2))
    
    with tf.variable_scope('layer2-pool'):
        pool2 = tf.nn.max_pool(
            a2,
            ksize=[1, 2, 2, 1],
            strides=[1,2, 2, 1],
            padding='SAME'
        )
        
    pool_shape = pool2.get_shape().as_list()
    nodes = pool_shape[1] * pool_shape[2] * pool_shape[3]
    reshaped = tf.reshape(pool2, [pool_shape[0], nodes])
    
    with tf.variable_scope('layer3-fc'):
        w3 = tf.get_variable(
            'weight',
            [nodes, FC_SIZE1],
            initializer=tf.truncated_normal_initializer(stddev=0.1)
        )
        
        b3 = tf.get_variable(
            'bias',
            [FC_SIZE1],
            initializer=tf.constant_initializer(0.0)
        )
        
        a3 = tf.nn.relu(tf.matmul(reshaped, w3) + b3)
        
        if regularizer != None:
            tf.add_to_collection('losses', regularizer(w3))
        
        if train:
            tf.nn.dropout(a3, KEEP_PROB)
            
    with tf.variable_scope('layer4-fc'):
        w4 = tf.get_variable(
            'weight',
            [FC_SIZE1, FC_SIZE2],
            initializer=tf.truncated_normal_initializer(stddev=0.1)
        )
        
        b4 = tf.get_variable(
            'bias',
            [FC_SIZE2],
            initializer=tf.constant_initializer(0.0)
        )
        
        if regularizer != None:
            tf.add_to_collection('losses', regularizer(w4))
        
        logit = tf.matmul(a3, w4) + b4
        
    return logit


In [4]:
def mnist_train(mnist):
    x = tf.placeholder(tf.float32, [BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS], name='x-input')
    y = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name='y-input')
    
    regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
    
    y_ = inference(x, IS_TRAIN, regularizer)
    
    global_step = tf.Variable(0, trainable=False)
    
    ema = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
    ema_op = ema.apply(tf.trainable_variables())
    
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=tf.argmax(y, 1), logits=y_)
    
    cross_entropy_mean = tf.reduce_mean(cross_entropy)
    loss = cross_entropy_mean + tf.add_n(tf.get_collection('losses'))
    learning_rate = tf.train.exponential_decay(
        LEARNING_RATE_BASE,
        global_step,
        mnist.train.num_examples // BATCH_SIZE,
        LEARNING_RATE_DECAY
    )
    
    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
    
    # control_dependencies的作用要求在上下文定义的变量必须在依赖执行完才能执行
    # 这里定义一个不执行的 伪操作，来一次性执行两个操作
    with tf.control_dependencies([train_step, ema_op]):
        train_op = tf.no_op(name='train')
        
    saver = tf.train.Saver()
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        
        for i in range(TRAIN_STEP):
            xs, ys = mnist.train.next_batch(BATCH_SIZE)
            reshaped_xs = np.reshape(xs, [BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS])
#             print(reshaped_xs)
            _, loss_value, step = sess.run(
                [train_op, loss, global_step],
                feed_dict={x:reshaped_xs, y:ys}
            )
#             
            if (i+1) % 100 == 0:
                print('After %d training steps, loss on training batch is %g' %(i+1, loss_value))
                saver.save(sess, MODEL_PATH, global_step=global_step)
                
    

In [5]:
def main(argv=None):
    
    data_path = r'D:\Workspace\python\Jupyter\trytf\01_MNIST\data'
    mnist = input_data.read_data_sets(data_path, one_hot=True)
    mnist_train(mnist)

In [6]:
if __name__ == '__main__':
    tf.app.run()

Extracting D:\Workspace\python\Jupyter\trytf\01_MNIST\data\train-images-idx3-ubyte.gz
Extracting D:\Workspace\python\Jupyter\trytf\01_MNIST\data\train-labels-idx1-ubyte.gz
Extracting D:\Workspace\python\Jupyter\trytf\01_MNIST\data\t10k-images-idx3-ubyte.gz
Extracting D:\Workspace\python\Jupyter\trytf\01_MNIST\data\t10k-labels-idx1-ubyte.gz
After 100 training steps, loss on training batch is 8658.2
After 200 training steps, loss on training batch is 8521.12
After 300 training steps, loss on training batch is 8386.52
After 400 training steps, loss on training batch is 8254.23
After 500 training steps, loss on training batch is 8124.37
After 600 training steps, loss on training batch is 7996.7
After 700 training steps, loss on training batch is 7871.29
After 800 training steps, loss on training batch is 7748.04
After 900 training steps, loss on training batch is 7626.99
After 1000 training steps, loss on training batch is 7508.04


SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
