### mnist dataset是手写数字的训练数据，项目的本质上是解释如何将图片像素数据转换成可以在机器学习算法中使用的数据

In [7]:
from tensorflow.examples.tutorials.mnist import input_data

mnist = input_data.read_data_sets("./MNIST_data/", one_hot=True)

Extracting ./MNIST_data/train-images-idx3-ubyte.gz
Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting ./MNIST_data/train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.
Extracting ./MNIST_data/t10k-images-idx3-ubyte.gz
Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.
Extracting ./MNIST_data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.


In [8]:
batch_size = 100
xs, ys = mnist.train.next_batch(batch_size)
print("X shape:",xs.shape)
print("Y shape:",ys.shape)

X shape: (100, 784)
Y shape: (100, 10)


### 深度学习一方面需要激活函数实现非线性化，
### 另外一方面需要使用一个或者多个隐藏层使得神经网络的结构更深，以解决复杂问题

> 但是随着神经网络的结构变得复杂，需要带指数衰减的学习率设置，以保证梯度下降容易收敛，使用正则化来防止过拟合，使用滑动平均
> 模型来使得最终的模型更加健壮
> 滑动平均𝑡 时刻变量 𝑣 的滑动平均值大致等于过去 1/(1−𝛽) 个时刻 𝜃 值的平均；当 𝛽 越大时，滑动平均得到的值越和 𝜃 的历史值相关。如果 𝛽=0.9，则大致等于过去 10 个 𝜃 值的平均；如果 𝛽=0.99，则大致等于过去 100 个 𝜃 值的平均。

In [14]:
import tensorflow as tf

# MNIST相关的常数
INPUT_NODE = 784
OUTPUT_NODE = 10

# 配置神经网络的参数
LAYER1_NODE = 500
BATCH_SIZE = 100

LEARNING_RATE_BASE = 0.8 #基础学习率
LEARNING_RATE_DECAY = 0.99
REGULARIZATION_RATE = 0.001
TRAINING_STEPS = 3000
MOVING_AVERAGE_DECAY = 0.99

# 辅助函数，给定输入和所有参数，计算前向传播结果
# 定义了一个使用RELU激活函数的三层全连接神经网络，通过加入隐藏层实现多层网络结构
# 通过Relu激活函数实现去线性化，在函数中也支持传入用于计算参数平均值的类
# 方便测试时使用滑动平均模型

def inference(input_tensor, avg_class, weights1, biases1,
              weights2, biases2):
    if avg_class == None:
        layer1 = tf.nn.relu(tf.matmul(input_tensor,weights1) + biases1)
        return tf.matmul(layer1,weights2) + biases2
    else:
        layer1 = tf.nn.relu(tf.matmul(input_tensor, avg_class.average(weights1)) + avgclass.average(biases1))
        return tf.matmul(layer1, avg_class.average(weights2)) + avgclass.average(biases2)

def train(mnist):
    x = tf.placeholder(tf.float32, [None,INPUT_NODE], name='x-input')
    y_ = tf.placeholder(tf.float32, [None,OUTPUT_NODE], name='y-input')
    weights1 = tf.Variable(tf.truncated_normal([INPUT_NODE,LAYER1_NODE], stddev=0.1))
    biases1 = tf.Variable(tf.constant(0.1,shape=[LAYER1_NODE]))
    
    weights2 = tf.Variable(tf.truncated_normal([LAYER1_NODE,OUTPUT_NODE], stddev=0.1))
    biases2 = tf.Variable(tf.constant(0.1,shape=[OUTPUT_NODE]))
    
    y = inference(x, None, weights1,biases1,weights2,biases2)
    global_step = tf.Variable(0, trainable=False)
    
    # 对所有能训练的变量采取滑动平均操作
    variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
    variable_averages_op = variable_averages.apply(tf.trainable_variables())
    
    average_y = inference(x, variable_averages, weights1, biases1, weights2, biases2)
    
    # 交叉熵定义损失函数，这里的交叉熵使用sparse_softmax_cross_entropy_with_logits函数来计算交叉熵
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_,1))
    cross_entropy_mean = tf.reduce_mean(cross_entropy)
    
    # 计算L2正则化损失函数
    regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
    regularization = regularizer(weights1) + regularizer(weights2)
    loss = cross_entropy_mean + regularization
    
    
    learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE, 
                                               global_step, 
                                               mnist.train.num_examples/BATCH_SIZE,
                                               LEARNING_RATE_DECAY)
    
    train_step=tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
    train_op = tf.group(train_step, variables_averages_op)

    correct_prediction = tf.equal(tf.argmax(average_y,1),tf.argmax(y_,1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    with tf.Session() as sess:
        tf.global_variables_initializer()
        validation_feed = {x:mnist.validation.images, y:mnist.validation.labels}
        test_feed = {x:mnist.test.images, y:mnist.test.labels}
        for i in range(TRAINING_STEPS):
            if i % 1000 == 0:
                validation_acc = sess.run(accuracy, feed_dict=validate_feed)
                print("validation_accuracy is %g"%(validate_acc))
            xs, ys = mnist.train.next_batch(BATCH_SIZE)
            sess.run(train_op, feed_dict={x:xs,y_:ys})
        test_acc = sess.run(accuracy,feed_dict=test_feed)
        print("model is %g"%(test_acc))

def main(argv=None):
    mnist = input_data.read_data_sets("./MNIST_data/", one_hot=True)
    train(mnist)

In [15]:
if __name__ == '__main__':
    tf.app.run()

Extracting ./MNIST_data/train-images-idx3-ubyte.gz
Extracting ./MNIST_data/train-labels-idx1-ubyte.gz
Extracting ./MNIST_data/t10k-images-idx3-ubyte.gz
Extracting ./MNIST_data/t10k-labels-idx1-ubyte.gz


NameError: name 'avgclass' is not defined

In [16]:
import tensorflow as tf
from tensorflow import feature_column
from tensorflow.python.feature_column.feature_column import _LazyBuilder

def test_numeric():
    price = {'price': [[1.], [2.], [3.], [4.]]}  # 4行样本
    builder = _LazyBuilder(price)

    def transform_fn(x):
        return x + 2

    price_column = feature_column.numeric_column('price', normalizer_fn=transform_fn)
    price_transformed_tensor = price_column._get_dense_tensor(builder)
    with tf.Session() as session:
        print(session.run([price_transformed_tensor]))

    # 使用input_layer
    price_transformed_tensor = feature_column.input_layer(price, [price_column])
    with tf.Session() as session:
        print('use input_layer' + '_' * 40)
        print(session.run([price_transformed_tensor]))

test_numeric()

[array([[3.],
       [4.],
       [5.],
       [6.]], dtype=float32)]
use input_layer________________________________________
[array([[3.],
       [4.],
       [5.],
       [6.]], dtype=float32)]


In [26]:
def test_categorical_column_with_hash_bucket():
#     color_data = {'color': [[2,3], [5,4], [-1,1], [0,1]]}  # 4行样本
    color_data = {'color': [[2], [5], [-1], [0]]}  # 4行样本
    builder = _LazyBuilder(color_data)
    color_column = feature_column.categorical_column_with_hash_bucket('color', 5, dtype=tf.int32)
    color_column_tensor = color_column._get_sparse_tensors(builder)
    with tf.Session() as session:
        session.run(tf.global_variables_initializer())
        session.run(tf.tables_initializer())
        print(session.run([color_column_tensor.id_tensor]))

    # 将稀疏的转换成dense，也就是one-hot形式，只是multi-hot
    color_column_identy = feature_column.indicator_column(color_column)
    color_dense_tensor = feature_column.input_layer(color_data, [color_column_identy])

    with tf.Session() as session:
        session.run(tf.global_variables_initializer())
        session.run(tf.tables_initializer())
        print('use input_layer' + '_' * 40)
        print(session.run([color_dense_tensor]))

test_categorical_column_with_hash_bucket()

[SparseTensorValue(indices=array([[0, 0],
       [1, 0],
       [3, 0]]), values=array([4, 1, 0]), dense_shape=array([4, 1]))]
use input_layer________________________________________
[array([[0., 0., 0., 0., 1.],
       [0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0.]], dtype=float32)]
