# TensorFlow 实现简单的卷积网络

## 读入数据

In [78]:
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
sess = tf.InteractiveSession()

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


## 便利函数

### 权重和偏置的初始化函数

In [79]:
def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

### 卷积层和池化层创建函数

In [80]:
def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1], padding='SAME')

* `tf.nn.conv2d`：TensorFlow 中的 2 维卷积函数；
  - `x`：输入
  - `W`：卷积的参数，比如 `[5,5,1,32]`（四个数字分别代表尺寸、channel、卷积核的数量）
* `Strides`：卷积模板移动的步长
* `padding`：边界的处理方式
  - `SAME`:：代表给边界加上 Padding 让卷积的输出和输入保持同样的尺寸
* `tf.nn.max_pool`：最大池化函数（$2\times 2\rightarrow 1\times 1$）

## placeholder

In [81]:
x = tf.placeholder(tf.float32, [None, 784])
y_ = tf.placeholder(tf.float32, [None, 10])
# 把一维 1x784 图像reshape成二维 28x28 图像（卷积要用到空间结构信息）
x_image = tf.reshape(x, [-1, 28, 28, 1])

## 定义网络结构

两层卷积层，一层全连接层，

### 第一层卷积层

卷积核尺寸 $5\times 5$，1个颜色通道，32个不同的卷积核。

In [82]:
W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)

### 第二层卷积层

64个卷积核（即提取64种特征）。

由于经历了两次步长为 $2 \times 2$ 的最大池化，边长变为 1/4 ，即 $28\times 28\rightarrow 7\times 7$。

输出的 tensor 尺寸变为 $7\times 7\times 64$。

In [83]:
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)

### 全连接层

隐含节点 1024 ，并使用 ReLU 激活函数。

In [84]:
W_fc1 = weight_variable([7 * 7 * 64 , 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

### Dropout 层

随机丢弃一部分（由`keep_prob`比率来控制）节点的数据来减轻过拟合。

In [85]:
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

### 输出层

最后将 Dropout 层的输出连接一个 Softmax 层，得到最后的概率输出。

In [86]:
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2

## 训练

### 损失函数

使用 cross entropy

In [87]:
#cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_conv), reduction_indices=[1]))
cross_entropy = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)

### 准确率

In [88]:
correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))


### 训练

- `keep_prob` 比率为 0.5
- mini-batch 大小为 50
- 共进行 20000 次训练迭代
- 参与训练的样本数量总共 100 万
- 每 100 次训练评估一次准确率

In [None]:
sess.run(tf.global_variables_initializer())
for i in range(20000):
    batch = mnist.train.next_batch(50)
    if i%100 == 0:
        train_accuracy = accuracy.eval(feed_dict={x:batch[0], y_:batch[1], keep_prob:1.0})
        print("step %d, training accuracy %g"%(i, train_accuracy))
    train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})

print("test accuracy %g" % accuracy.eval(feed_dict={x: mnist.test.images, y_:mnist.test.labels, keep_prob:1.0}))

step 0, training accuracy 0.04
step 100, training accuracy 0.78
step 200, training accuracy 0.9
step 300, training accuracy 0.88
step 400, training accuracy 0.96
step 500, training accuracy 0.94
step 600, training accuracy 1
step 700, training accuracy 0.96
step 800, training accuracy 0.88
step 900, training accuracy 0.98
step 1000, training accuracy 0.94
step 1100, training accuracy 0.88
step 1200, training accuracy 0.98
step 1300, training accuracy 0.98
step 1400, training accuracy 1
step 1500, training accuracy 0.98
step 1600, training accuracy 0.96
step 1700, training accuracy 0.98
step 1800, training accuracy 1
step 1900, training accuracy 1
step 2000, training accuracy 0.98
step 2100, training accuracy 0.94
step 2200, training accuracy 1
step 2300, training accuracy 1
step 2400, training accuracy 0.98
step 2500, training accuracy 0.98
step 2600, training accuracy 0.98
step 2700, training accuracy 1
step 2800, training accuracy 0.96
step 2900, training accuracy 0.98
step 3000, tra