In [2]:
import numpy as np
import tensorflow as tf
import sys
from tensorflow.examples.tutorials.mnist import input_data

In [18]:
def weight_variable(shape):
    return tf.Variable(tf.truncated_normal(shape, stddev=0.1))

def bias_variable(shape):
    return tf.Variable( tf.constant(0.1, shape=shape))

def conv2D(x, W):
    # padding:在卷积核移动逐渐扫描整体图时候，因为步长的设置问题，可能导致剩下未扫描的空间不足以提供给卷积核的大小扫描 
    # 比如有图大小为5*5,卷积核为2*2,步长为2,卷积核扫描了两次后，剩下一个元素，不够卷积核扫描了.
    # 这个时候就在后面补零，补完后满足卷积核的扫描，这种方式就是same。如果说把刚才不足以扫描的元素位置抛弃掉，就是valid方式
    # strides: 第一个和第四个必须为1，如果步长为2.，就是[1,2,2,1]
    # default format "NHWC", the data is stored in the order of: [batch, height, width, channels].
    return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME')

def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')

# 自己实现一个next_batch方法
def next_batch(train_data, train_target, batch_size):
    # 获取所有的索引
    idx = [ i for i in range(0, len(train_target))]
    # 打乱索引
    np.random.shuffle(idx)
    batch_data = []
    batch_target = []
    # 从打乱的索引中取出batch_size个，加入data和target
    for i in range(0, batch_size):
        batch_data.append(train_data[idx[i]])
        batch_target.append(train_target[idx[i]])
    return batch_data, batch_target

In [8]:
# 初始化参数
mnist = input_data.read_data_sets("./MNIST_DATA", one_hot=True)
train_data = mnist.train.images #55000的数据量
train_target = mnist.train.labels
test_data = mnist.test.images #10000数据
test_target = mnist.test.labels
x = tf.placeholder("float", shape=[None, 784])
y = tf.placeholder("float", shape=[None, 10])
keep_prob = tf.placeholder("float") # 隐藏节点保持工作的概率
epochs_num = 5000
batch_size = 100

Extracting ./MNIST_DATA/train-images-idx3-ubyte.gz
Extracting ./MNIST_DATA/train-labels-idx1-ubyte.gz
Extracting ./MNIST_DATA/t10k-images-idx3-ubyte.gz
Extracting ./MNIST_DATA/t10k-labels-idx1-ubyte.gz


In [19]:
# 第一层卷积
# 卷积核大小5x5，32个卷积核（卷积核的每次滑动都会有一个5x5项的线性组合，若干次滑遍28x28的图像，产生一个28x28的新图像），所以传出32张图像（通道）。
# 直观来讲，这里就比较像一张图片加了32种滤镜，滤出32中特征供后续使用
W_conv1 = weight_variable([5,5,1,32])
# 因为卷积核有32个，就有32个线性组合，所以需要32个截距
# 需要注意一点的是，每一次卷积操作，要所有通道（也就是等于原图的厚度）计算完毕后并再进行截距修正，相当于综合了每一个通道的截距
b_conv1 = bias_variable([32])
# 输入图像是28像素×28像素，1个通道（灰度）。在这种情况下，第一个维度是图像的批次编号，可以是任意大小（因此我们将其设置为-1）。
# 第二和第三个维度是宽度和高度，最后一个是图像通道。我们按照这种方式把图片化成标准网络的输入参数。
x_image = tf.reshape(x, [-1, 28, 28, 1])
# Relu激活，这里就很像线性组合了:XW + b。因为Padding是SAME，所以仍然是28x28
h_conv1 = tf.nn.relu(conv2D(x_image, W_conv1) + b_conv1)
# 池化，压缩图片，提取特征，池化方法选择max_pool。这里变成了14x14
h_pool1 = max_pool_2x2(h_conv1)

In [21]:
# 第二层卷积
W_conv2 = weight_variable([5,5,32,64])
# 这里就是32次线性组合之后加入一个bias，64个核一共进行64*32次线性组合，最后生成64个通道
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2D(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2) # 这里变成了7*7

In [22]:
# 创建CNN的第一个全连接层
# 我们设置第一个隐藏层（全连接层）有1024个神经元，每个神经元的连线数量为flatten图像tensor的数量，即7x7的图像再乘以64个滤镜
W_fc1 = weight_variable([7 * 7 * 64, 1024])
# 每个神经元都要有一个bias
b_fc1 = bias_variable([1024])
# 将原有图像，变成一行，7*7*64列的向量
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
# 全连接层仍然用Relu激活函数
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
# 失活一部分节点，提升速度，防止过拟合
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

In [23]:
# 创建CNN的第二个全连接层
# 1024个神经元要到10个分类
W_fc2 = weight_variable([1024, 10])
# 只有10个输出神经元，自然只有10个截距
b_fc2 = bias_variable([10])
# 最后一次线性组合后，激活函数采用softmax，这个是多元分类常用的激活函数
# 假设10个神经元的输出为a0,a2,...,a9, 那么激活函数为 S_i = e^{ai} / (e^{a1} + e^{a2} + ... + e^{a9})
y_conv = tf.nn.softmax( tf.matmul(h_fc1_drop, W_fc2) + b_fc2)

In [31]:
# 创建一个Session
sess = tf.InteractiveSession()
# 损失函数用交叉熵
cross_entropy = -tf.reduce_sum(y * tf.log(y_conv))
# 训练函数用Adam,使交叉熵最小
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
# 预测
correct_prediction = tf.equal(tf.argmax(y_conv, axis=1), tf.argmax(y,axis=1))
# 因为都是1或者0，均值就是准确率了
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
# 运行初始化所有变量
sess.run(tf.global_variables_initializer())

# 循环5000次进行训练
for i in range(epochs_num):
    batch_data, batch_target = next_batch(train_data, train_target, batch_size)
    # 每100次展示下准确率
    if i %100 == 0:
        # 这一次一个也不失活
        train_accuracy = accuracy.eval(feed_dict={x:batch_data, y:batch_target, keep_prob: 1.0})
        print("step %d, train accuracy %.3f" %(i, train_accuracy))
    # 失活一半
    train_step.run(feed_dict={x:batch_data, y:batch_target, keep_prob: 0.5})

#训练全部完成后
print("Training Finished")
print("Test accuracy %.3f"%accuracy.eval(feed_dict={x:batch_data, y:batch_target, keep_prob: 1.0}))

step 0, train accuracy 0.120
step 100, train accuracy 0.890
step 200, train accuracy 0.940
step 300, train accuracy 0.930
step 400, train accuracy 0.950
step 500, train accuracy 0.940
step 600, train accuracy 0.970
step 700, train accuracy 0.960
step 800, train accuracy 0.950
step 900, train accuracy 0.970
step 1000, train accuracy 0.980
step 1100, train accuracy 1.000
step 1200, train accuracy 1.000
step 1300, train accuracy 0.980
step 1400, train accuracy 0.970
step 1500, train accuracy 0.990
step 1600, train accuracy 0.990
step 1700, train accuracy 0.990
step 1800, train accuracy 1.000
step 1900, train accuracy 1.000
step 2000, train accuracy 0.970
step 2100, train accuracy 1.000
step 2200, train accuracy 0.990
step 2300, train accuracy 0.980
step 2400, train accuracy 0.980
step 2500, train accuracy 1.000
step 2600, train accuracy 0.980
step 2700, train accuracy 0.980
step 2800, train accuracy 1.000
step 2900, train accuracy 0.980
step 3000, train accuracy 0.980
step 3100, train acc

In [27]:
# help(accuracy.eval)
help(tf.Session.run)

Help on function run in module tensorflow.python.client.session:

run(self, fetches, feed_dict=None, options=None, run_metadata=None)
    Runs operations and evaluates tensors in `fetches`.
    
    This method runs one "step" of TensorFlow computation, by
    running the necessary graph fragment to execute every `Operation`
    and evaluate every `Tensor` in `fetches`, substituting the values in
    `feed_dict` for the corresponding input values.
    
    The `fetches` argument may be a single graph element, or an arbitrarily
    nested list, tuple, namedtuple, dict, or OrderedDict containing graph
    elements at its leaves.  A graph element can be one of the following types:
    
    * An @{tf.Operation}.
      The corresponding fetched value will be `None`.
    * A @{tf.Tensor}.
      The corresponding fetched value will be a numpy ndarray containing the
      value of that tensor.
    * A @{tf.SparseTensor}.
      The corresponding fetched value will be a
      @{tf.SparseTensorVa

In [3]:
help(tf.layers.conv2d)

Help on function conv2d in module tensorflow.python.layers.convolutional:

conv2d(inputs, filters, kernel_size, strides=(1, 1), padding='valid', data_format='channels_last', dilation_rate=(1, 1), activation=None, use_bias=True, kernel_initializer=None, bias_initializer=<tensorflow.python.ops.init_ops.Zeros object at 0x7f02df6c2588>, kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, trainable=True, name=None, reuse=None)
    Functional interface for the 2D convolution layer.
    
    This layer creates a convolution kernel that is convolved
    (actually cross-correlated) with the layer input to produce a tensor of
    outputs. If `use_bias` is True (and a `bias_initializer` is provided),
    a bias vector is created and added to the outputs. Finally, if
    `activation` is not `None`, it is applied to the outputs as well.
    
    Arguments:
      inputs: Tensor input.
      filters: Integer, the dimensionality of t