In [3]:
# 滑动平均模型
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

# 一个训练batch中的训练数据个数。数字越小时，训练过程越接近随机梯度下降；数字越大时，训练越接近梯度下降
BATCH_SIZE=100

# 输入层的节点数，对于MNIST数据集，这个就等于图片的像素
INPUT_NODE=784  

# 隐藏层节点数，这里使用只有一个隐藏层的网络结构作为样例，这个隐藏层有500个节点
LAYER1_NODE=500

#输出层的节点数，这个等于类别的数目，因为在MNIST数据集中需要区分的是0-9这10个数字，所以这里输出层的节点数为10
OUTPUT_NODE=10

# 基础的学习率
LEARNING_RATE_BASE=0.8

# 训练轮数
TRAINING_STEPS=30

# 滑动平均衰减率
MOVING_VERAGE_DECAY=0.99

# 一个辅助函数，给定神经网络的输入和所有参数，计算神经网络的向前传播结果，这里定义了一个使用RELU激活函数的三层全连接神经网络
# 通过加入隐藏层实现了多层网络结构，通过RELU函数实现了去线性化，在这个函数中也支持传入用于计算参数平均值的类，
# 这样方便在测试时使用滑动平均模型

def inference(input_tensor,avgclass,weights1,biases1,weights2,biases2):
#     当没有提供滑动平均参类时，直接使用参数当前的取值
    if avgclass == None:
#         记录隐藏层的前向传播结果，这里使用了RELU激活函数
        layer1=tf.nn.relu(tf.matmul(input_tensor,weights1)+biases1)
#         计算输出层的前向传播结果，因为在计算损失函数时会一并计算softmax函数
#         所以这里不需要加入激活函数。而且不加入softmax不会影响预测结果。
#         因为与测试使用的是不同类别对应节点输出值的相对大小，有没有softmax
#         层对最后分类结果的计算没有影响。于是在计算整个神经网络的前向传播时可以不加入最后的softmax层
        return tf.matmul(layer1,weights2)+biases2

    else:
#         首先使用avg_class.average函数来计算得出变量的平均滑动值
#         然后在计算相应的神经网络前向传播结果
        layer1=tf.nn.relu(tf.matmul(input_tensor,avgclass.average(weights1))+avgclass.average(biases1))
        return tf.matmul(layer1,avgclass.average(weights2))+avgclass.average(biases2)
# 训练模型的过程
def train(mnist):
    x = tf.placeholder(tf.float32,[None,INPUT_NODE],name='x_input')
    y_ = tf.placeholder(tf.float32,[None,OUTPUT_NODE],name='y_input')
    
#     生成隐藏层的参数
    weight1=tf.Variable(tf.truncated_normal([INPUT_NODE,LAYER1_NODE],stddev=1.0))
    biases1=tf.Variable(tf.constant(0.1,shape=[LAYER1_NODE]))
    
#     生成输出层的参数
    weight2=tf.Variable(tf.truncated_normal([LAYER1_NODE,OUTPUT_NODE],stddev=0.1))
    biases2=tf.Variable(tf.constant(0.1,shape=[OUTPUT_NODE]))
    
#     计算在当前参数下神经网络前向传播的结果，这里给出的用于计算滑动平均的类为None
#     所以函数不会使用参数的滑动平均值
    y=inference(x,None,weight1,biases1,weight2,biases2)
    
#     定义存储训练论述的变量，这个变量不需要计算滑动平均值，所以指定这个变量为不可恩训练的变量（trainable=False）
#     在使用tensorflow训练神经网络时，一般会将代表训练论述的变量指定为不可训练的参数
    global_step=tf.Variable(0,trainable=False)
    
#     给定滑动平均衰减率和训练轮数的变量，初始化滑动平均类。给定训练轮数的变量可以加快训练早期变量的更新速度
    variable_average=tf.train.ExponentialMovingAverage(MOVING_VERAGE_DECAY,global_step)
    
#     在所有代表神经网络参数的变量上使用滑动平均，其他辅助变量如global_step就不需要了。tf.trainable_variables
#     返回的就是图上集合 GraphKeys.TRAINABLE_VARIABLES中的元素。这个集合的元素就是所有没有指定trainable=False的参数
    variable_average_op=variable_average.apply(tf.trainable_variables())
    
#     计算使用了滑动平均之后的前向传播效果，滑动平均不会改变变量本身的取值，而是维护一个影子变量来记录其滑动平均
#     取值，所以当需要去哦使用这个滑动平均值时，需要明确调用average函数
    average_y = inference(x,variable_average,weight1,biases1,weight2,biases2)
    
#     计算交叉熵作为刻画预测值与真实值之间差距的损失函数，这里使用了sparse_softmax_entropy_with_logits函数来
#     计算交叉熵。当分类问题只有一个正确答案时，可以使用这个函数来加速交叉熵的计算，MNIST问题的图片中
#     只包含了0-9中的一个数字，所以可以使用这个函数来计算交叉熵的损失。这个函数的第一个参数是神经网咯不包括softax层
#     的前向传播的结果,第二个参数是训练数据的证据答案.因为标准答案是一个长度为10的一维数组,而该函数需要提供的是一个
#     正确答案的数组,所以需要使用tf.argmax函数来达到整个却答案对应的类别编号
    cross_entropy=tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y,labels=tf.argmax(y_,1))       
    train_step=tf.train.GradientDescentOptimizer(LEARNING_RATE_BASE).minimize(cross_entropy,global_step=global_step)
    correction_prediction=tf.equal(tf.argmax(average_y,1),tf.argmax(y_,1))

#这个运算首先将一个布尔型的数值转换为实数型，然后计算平均值。这个平均值就是模型在这一组数据上的正确率
    accuracy=tf.reduce_mean(tf.cast(correction_prediction,tf.float32))
    # 初始化会话并开始训练过程
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        #准备验证数据。一般在神经网络的训练过程中会通过验证数据来大致判断停止的
            #条件和评判训练的效果
        validate_feed={x:mnist.validation.images,y_:mnist.validation.labels}
        #准备测试数据。在真实的应用中，这部分数据在训练时是不可见的，这个数据只是作为
            #模型优劣的最后评判标准
        test_feed = {x:mnist.test.images,y_:mnist.test.labels}

        #迭代地训练神经网络
        for i in range(TRAINING_STEPS):
            #每1000轮输出一次在验证数据集上的测试结果
            if i%1000==0:
                #将模型保存到这个文件下
#                 save_path = saver.save(sess, 'model/mnist_model.ckpt')

                #计算滑动平均模型在验证数据上的结果。因为MNIST数据集比较小，所以一次
                #可以处理所有的验证数据。为了计算方便，本样例程序没有将验证数据划分为更小的batch
                #当神经网络模型比较复杂或者验证数据比较大时，太大的batch
                #会导致计算时间过长甚至发生内存溢出的错误
                validate_acc = sess.run(accuracy,feed_dict=validate_feed)
                test_acc = sess.run(accuracy, feed_dict=test_feed)
                print("After %d training step(s), validation accuracy using average model is %g,\
                test accuracy using average model is %g" %(i,validate_acc,test_acc))
                #产生这一轮使用的一个batch的训练数据，并运行训练过程
            xs,ys=mnist.train.next_batch(BATCH_SIZE)
#             sess.run(train_op,feed_dict={x:xs,y_:ys})
        #在训练结束之后，在测试数据上检测神经网络模型的最终正确率
        test_acc=sess.run(accuracy,feed_dict=test_feed)
        print("After %d training step(s), test accuracy using \
        average model is %g "%(TRAINING_STEPS,test_acc))

#主程序入口
def main(argv=None):
    #声明处理MNIST数据集的类，这个类在初始化时会自动下载数据
    mnist=input_data.read_data_sets("MNIST_data/",one_hot=True)
    train(mnist)     
 
main()

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
After 0 training step(s), validation accuracy using average model is 0.1304,                test accuracy using average model is 0.1319
After 30 training step(s), test accuracy using         average model is 0.1319 


In [4]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

In [31]:
mnist=input_data.read_data_sets('MNIST_data/',one_hot=True)

x=tf.placeholder(tf.float32,[None,784])
y_=tf.placeholder(tf.float32,[None,10])

x_image=tf.reshape(x,[-1,28,28,1])
sess=tf.InteractiveSession()
def getWeight(shape):
    initial=tf.truncated_normal(shape,stddev=1.0,mean=1.0)
    return tf.Variable(initial)

def getBias(shape):
    initial=tf.constant(0.1,shape=shape)
    return initial
    
def conv2d(x,w):
     return tf.nn.conv2d(x,w,strides=[1,1,1,1],padding='SAME')

def max_pool(x):
    return tf.nn.max_pool(x,ksize=[1,2,2,1],strides=[1,1,1,1],padding='SAME')

# 卷积层
def inference(input_tensor,avg_class,weight,bias):
    if avg_class == None:
        return conv2d(input_tensor,weight)+bias
    else:
        return conv2d(input,avg_class.average(weight))+avg_class.average(bias)

#     专门为全卷积
def fcn_inference(input_tensor,avg_class,weight,bias):
    if avg_class:
        return tf.matmul(input_tensor,avg_class.average(weight))+avg_class.average(bias)
    else:
        return tf.matmul(input_tensor,weight)+bias


def train(variable_average=None):
    if variable_average:
            # 第一层
        weight_1=getWeight([5,5,1,32])
        bias_1=getBias([32])
        conv_1=inference(x_image,variable_average,weight_1,bias_1)
        pool_1=max_pool(conv_1)

        # 第二层
        weight_2=getWeight([3,3,32,64])
        bias_2=getBias([64])
        conv_2=inference(pool_1,variable_average,weight_2,bias_2)
        pool_2=max_pool(conv_2)

        # 第三层
        weight_3=getWeight([7*7*64,10])
        bias_3=getBias([10])
        pool_2=tf.reshape(pool_2,[-1,7*7*64])
        conv_3=fcn_inference(pool_2,variable_average,weight_3,bias_3)
    else:
        # 第一层
        weight_1=getWeight([5,5,1,32])
        bias_1=getBias([32])
        conv_1=inference(x_image,None,weight_1,bias_1)
        pool_1=max_pool(conv_1)

        # 第二层
        weight_2=getWeight([7,7,32,64])
        bias_2=getBias([64])
        conv_2=inference(pool_1,None,weight_2,bias_2)
        pool_2=max_pool(conv_2)

        # 第三层
        weight_3=getWeight([7*7*64,10])
        bias_3=getBias([10])
        pool_2=tf.reshape(pool_2,[-1,7*7*64])
        conv_3=fcn_inference(pool_2,None,weight_3,bias_3)
    return conv_3

conv_3=train()
#     定义存储训练论述的变量，这个变量不需要计算滑动平均值，所以指定这个变量为不可恩训练的变量（trainable=False）
#     在使用tensorflow训练神经网络时，一般会将代表训练论述的变量指定为不可训练的参数
global_step=tf.Variable(0,trainable=False)

#     给定滑动平均衰减率和训练轮数的变量，初始化滑动平均类。给定训练轮数的变量可以加快训练早期变量的更新速度
variable_average=tf.train.ExponentialMovingAverage(MOVING_VERAGE_DECAY,global_step)

#     在所有代表神经网络参数的变量上使用滑动平均，其他辅助变量如global_step就不需要了。tf.trainable_variables
#     返回的就是图上集合 GraphKeys.TRAINABLE_VARIABLES中的元素。这个集合的元素就是所有没有指定trainable=False的参数
variable_average_op=variable_average.apply(tf.trainable_variables())

#     计算使用了滑动平均之后的前向传播效果，滑动平均不会改变变量本身的取值，而是维护一个影子变量来记录其滑动平均
#     取值，所以当需要去哦使用这个滑动平均值时，需要明确调用average函数
average_y = train(variable_average)

cross_entropy=tf.nn.sparse_softmax_cross_entropy_with_logits(logits=conv_3,labels=y_)
train_step=tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy,global_step=global_step)
# correction_prediction=tf.equal(tf.argmax(conv_3,1),tf.argmax(y_,1))
correction_prediction=tf.equal(tf.argmax(average_y,1),tf.argmax(y_,1))
accuracy=tf.reduce_mean(tf.cast(correction_prediction,tf.float32))



tf.global_variables_initializer().run()

steps=5
for i in range(steps):
    batch=mnist.train.next_batch(batch_size)
    if i % 100 == 0:
        train_accuracy=accuracy.eval(feed_dict={x:batch[0],y_:batch[1]})
        print("step %d,train accuracy %g" %(i,train_accuracy))
        train_step.run(feed_dict={x:batch[0],y_:batch[1]})
        
print("test accuracy %g" %accuracy.eval(feed_dict={x:mnist.test.images,y_:mnist.test.labels}))


Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
------------------


TypeError: Failed to convert object of type <class 'method'> to Tensor. Contents: <bound method Kernel.raw_input of <ipykernel.ipkernel.IPythonKernel object at 0x000000E039CD2DA0>>. Consider casting elements to a supported type.

In [1]:
import tensorflow as tf  
import math  
from tensorflow.examples.tutorials.mnist import input_data
tf.set_random_seed(0.0)  
  
# Download images and labels into mnist.test (10K images+labels)   
# and mnist.train (60K images+labels)  
mnist=input_data.read_data_sets('MNIST_data/',one_hot=True)
  
# Ylogits      - input data that need tobe batch normalised. For convolutional  
#                layer, it's a 4-D tensor. For fully connected layer, it's a 2-D tensor  
# is_test      - flag, is_test = False for train  
#                      is_test = True  for test  
# offset       - beta  
#                gamma(scaling) is not useful for relu   
def batchnormForRelu(Ylogits, is_test, Iteration, offset, convolutional=False):  
    # adding the iteration prevents from averaging across non-existing iterations  
    exp_moving_avg = tf.train.ExponentialMovingAverage(0.999, Iteration)   
    bnepsilon = 1e-5  
    if convolutional:  
        mean, variance = tf.nn.moments(Ylogits, [0, 1, 2])  
    else:  
        mean, variance = tf.nn.moments(Ylogits, [0])  
    update_moving_averages = exp_moving_avg.apply([mean, variance])  
    m = tf.cond(is_test, lambda: exp_moving_avg.average(mean), lambda: mean)  
    v = tf.cond(is_test, lambda: exp_moving_avg.average(variance), lambda: variance)  
    Ybn = tf.nn.batch_normalization(Ylogits, m, v, offset, None, bnepsilon)  
    return Ybn, update_moving_averages  
  
def compatible_convolutional_noise_shape(Y):  
    noiseshape = tf.shape(Y)  
    noiseshape = noiseshape * tf.constant([1,0,0,1]) + tf.constant([0,1,1,0])  
    return noiseshape  
  
# input X: 28x28 grayscale images  
X  = tf.placeholder(tf.float32, [None,784]) 
X_image=tf.reshape(X,[-1,28,28,1])
# correct answers will go here  
Y_ = tf.placeholder(tf.float32, [None, 10])  
# variable learning rate  
lr = tf.placeholder(tf.float32)  
# test flag for batch norm  
tst = tf.placeholder(tf.bool)  
Iter = tf.placeholder(tf.int32)  
# dropout probability  
pkeep = tf.placeholder(tf.float32)  
pkeep_conv = tf.placeholder(tf.float32)  
  
# three convolutional layers with their channel counts, and a  
# fully connected layer (tha last layer has 10 softmax neurons)  
K = 24  # 1st convolutional layer output depth  
L = 48  # 2nd convolutional layer output depth  
M = 64  # 3rd convolutional layer  
N = 200 # 4th fully connected layer  
  
W1 = tf.Variable(tf.truncated_normal([6, 6, 1, K], stddev=0.1))  # 6x6 patch, 1 input channel, K output channels  
B1 = tf.Variable(tf.constant(0.1, tf.float32, [K]))  
W2 = tf.Variable(tf.truncated_normal([5, 5, K, L], stddev=0.1))  
B2 = tf.Variable(tf.constant(0.1, tf.float32, [L]))  
W3 = tf.Variable(tf.truncated_normal([4, 4, L, M], stddev=0.1))  
B3 = tf.Variable(tf.constant(0.1, tf.float32, [M]))  
  
W4 = tf.Variable(tf.truncated_normal([7 * 7 * M, N], stddev=0.1))  
B4 = tf.Variable(tf.constant(0.1, tf.float32, [N]))  
W5 = tf.Variable(tf.truncated_normal([N, 10], stddev=0.1))  
B5 = tf.Variable(tf.constant(0.1, tf.float32, [10]))  
  
# The model  
# batch norm scaling is not useful with relus  
# batch norm offsets are used instead of biases  
Y1l = tf.nn.conv2d(X_image, W1, strides=[1, 1, 1, 1], padding='SAME')  
Y1bn, update_ema1 = batchnormForRelu(Y1l, tst, Iter, B1, convolutional=True)  
Y1r = tf.nn.relu(Y1bn)  
Y1 = tf.nn.dropout(Y1r, pkeep_conv, compatible_convolutional_noise_shape(Y1r))  
stride = 2  # output is 14x14  
Y2l = tf.nn.conv2d(Y1, W2, strides=[1, stride, stride, 1], padding='SAME')  
Y2bn, update_ema2 = batchnormForRelu(Y2l, tst, Iter, B2, convolutional=True)  
Y2r = tf.nn.relu(Y2bn)  
Y2 = tf.nn.dropout(Y2r, pkeep_conv, compatible_convolutional_noise_shape(Y2r))  
stride = 2  # output is 7x7  
Y3l = tf.nn.conv2d(Y2, W3, strides=[1, stride, stride, 1], padding='SAME')  
Y3bn, update_ema3 = batchnormForRelu(Y3l, tst, Iter, B3, convolutional=True)  
Y3r = tf.nn.relu(Y3bn)  
Y3 = tf.nn.dropout(Y3r, pkeep_conv, compatible_convolutional_noise_shape(Y3r))  
# reshape  
YY = tf.reshape(Y3, shape=[-1, 7 * 7 * M])  
Y4l = tf.matmul(YY, W4)  
Y4bn, update_ema4 = batchnormForRelu(Y4l, tst, Iter, B4)  
Y4r = tf.nn.relu(Y4bn)  
Y4 = tf.nn.dropout(Y4r, pkeep)  
Ylogits = tf.matmul(Y4, W5) + B5  
Y = tf.nn.softmax(Ylogits)  
  
update_ema = tf.group(update_ema1, update_ema2, update_ema3, update_ema4)  
  
# cross-entropy loss function (= -sum(Y_i * log(Yi)) ), normalised for batches of 100  images  
# TensorFlow provides the softmax_cross_entropy_with_logits function to avoid numerical stability  
# problems with log(0) which is NaN  
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=Ylogits, labels=Y_)  
cross_entropy = tf.reduce_mean(cross_entropy)*100  
  
# accuracy of the trained model, between 0 (worst) and 1 (best)  
correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Y_, 1))  
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))  
  
# training step, the learning rate is a placeholder  
train_step = tf.train.AdamOptimizer(lr).minimize(cross_entropy)  
  
# init  
init = tf.global_variables_initializer()  
sess = tf.Session()  
sess.run(init)  
  
def training_step(i, update_test_data, update_train_data):  
  
    # training on batches of 100 images with 100 labels  
    batch_X, batch_Y = mnist.train.next_batch(100)  
  
    # learning rate decay  
    max_learning_rate = 0.02  
    min_learning_rate = 0.0001  
    decay_speed = 1600  
    learning_rate = min_learning_rate + (max_learning_rate - min_learning_rate) * math.exp(-i/decay_speed)  
  
    # compute training values for visualisation  
    if update_train_data:  
        a, c = sess.run([accuracy, cross_entropy],
                        {X: batch_X, Y_: batch_Y, tst: False, pkeep: 1.0, pkeep_conv: 1.0})  
        print(str(i) + ": accuracy:" + str(a) + " loss: " + str(c) + " (lr:" + str(learning_rate) + ")")  
    # compute test values for visualisation  
    if update_test_data:  
        a, c = sess.run([accuracy, cross_entropy],
                        {X: mnist.test.images, Y_: mnist.test.labels, tst: True, pkeep: 1.0, pkeep_conv: 1.0})  
        print(str(i) + ": ********* epoch " + str(i*100//mnist.train.images.shape[0]+1) + " ********* test accuracy:" + str(a) + " test loss: " + str(c))  
    # the backpropagation training step  
    sess.run(train_step, {X: batch_X, Y_: batch_Y, lr: learning_rate, tst: False, pkeep: 0.75, pkeep_conv: 1.0})  
    sess.run(update_ema, {X: batch_X, Y_: batch_Y, tst: False, Iter: i, pkeep: 1.0, pkeep_conv: 1.0})  
  
if __name__ == "__main__":  
    for i in range(0, 10):  
       training_step(i, True, True)  

  from ._conv import register_converters as _register_converters


Instructions for updating:
Use the retry module or similar alternatives.
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See tf.nn.softmax_cross_entropy_with_logits_v2.

0: accuracy:0.05 loss: 302.39838 (lr:0.