In [1]:
import os
os.sys.path.append(os.path.dirname(os.path.abspath('.')))

TensorFlow中提供了批归一化与随机失活的API：
- ```tf.layers.dropout```
- ```tf.layers.batch_normalization```

注意这两个操作在网络中的位置，BN层是位于conv操作与act操作之间，即```conv -> BN -> act```；而dropout操作则可以放在act操作之后，即```conv -> BN -> act -> dropout```。

一般来说，dropout只会应用在全连接层上。

# 数据准备

In [2]:
from dataset.dataset import load_cifar10

train_data,test_data=load_cifar10(batch_size=64)



(50000, 3072) (50000,)




(10000, 3072) (10000,)


# 网络设计与搭建
这里使用的是之前实现的```mini_CNN```。注意在使用```tf.layers.batch_normalization```时有一个坑，跟TF中BN层实现有关，BN层每次训练时会计算得到一个mean与var，但是最后用于预测的mean、var怎么来的呢？是使用移动平均的方法计算得到的。

In [3]:
import tensorflow as tf

unit_I = train_data.n_features    # 输入单元数，等于特征数

filters = 32    # 卷积核的数量
conv_size = (3, 3)    # 卷积核尺寸

pool_size = (2, 2)    # 池化核尺寸
strides = (2, 2)    # 核移动的步长

fc_size = 128

unit_O = 10    # 输出单元数，类别数

  from ._conv import register_converters as _register_converters


In [4]:
X = tf.placeholder(tf.float32, [None, unit_I])
Y = tf.placeholder(tf.int64, [None])
is_training = tf.placeholder(tf.bool)    # 训练标识位
X_img = tf.transpose(tf.reshape(X, [-1, 3, 32, 32]),
                     perm=[0, 2, 3, 1])

with tf.name_scope('CNN'):
    with tf.name_scope('conv1'):
        conv1 = tf.layers.conv2d(X_img, filters=filters,
                                 kernel_size=conv_size, padding='same',
                                 activation=None)
        conv1 = tf.layers.batch_normalization(conv1, training=is_training)
        conv1 = tf.nn.relu(conv1)
        pooling1 = tf.layers.max_pooling2d(conv1, pool_size=pool_size,
                                           strides=strides)

    with tf.name_scope('conv2'):
        conv2 = tf.layers.conv2d(pooling1, filters=filters,
                                 kernel_size=conv_size, padding='same',
                                 activation=None)
        conv2 = tf.layers.batch_normalization(conv2, training=is_training)
        conv2 = tf.nn.relu(conv2)
        pooling2 = tf.layers.max_pooling2d(conv2, pool_size=pool_size,
                                           strides=strides)

    with tf.name_scope('conv3'):
        conv3 = tf.layers.conv2d(pooling2, filters=filters,
                                 kernel_size=conv_size, padding='same',
                                 activation=None)
        conv3 = tf.layers.batch_normalization(conv3, training=is_training)
        conv3 = tf.nn.relu(conv3)
        pooling3 = tf.layers.max_pooling2d(conv3, pool_size=pool_size,
                                           strides=strides)

    with tf.name_scope('fc'):
        fc = tf.layers.dense(tf.layers.flatten(pooling3),
                             fc_size, activation=tf.nn.relu)
#         该网络并不深，不需要很大的rate
#         fc = tf.layers.dropout(fc, rate=0.1, training=is_training)

    logits = tf.layers.dense(fc, unit_O, activation=None)

with tf.name_scope('Eval'):
    loss = tf.losses.sparse_softmax_cross_entropy(labels=Y, logits=logits)
    predict = tf.argmax(logits, 1)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(predict, Y), tf.float32))

with tf.name_scope('train_op'):
    lr = 1e-3
    # 注意这里牵扯到BN层的一个实现原理，BN每次在训练阶段会计算mean跟var
    #
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        train_op = tf.train.AdamOptimizer(lr).minimize(loss)

init = tf.global_variables_initializer()
config = tf.ConfigProto()
config.gpu_options.allow_growth = True

Instructions for updating:
Use keras.layers.conv2d instead.
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use keras.layers.batch_normalization instead.
Instructions for updating:
Use keras.layers.max_pooling2d instead.
Instructions for updating:
Use keras.layers.flatten instead.
Instructions for updating:
Use keras.layers.dense instead.
Instructions for updating:
Use tf.cast instead.


# 训练网络

In [5]:
import numpy as np

with tf.Session(config=config) as sess:
    sess.run(init)
    epochs = 20

    batch_cnt = 0
    for epoch in range(epochs):
        for batch_data, batch_labels in train_data.next_batch():
            batch_cnt += 1
            loss_val, acc_val, _ = sess.run(
                [loss, accuracy, train_op], feed_dict={
                    X: batch_data,
                    Y: batch_labels,
                    is_training: True})

            # 每1000batch输出一次信息
            if (batch_cnt+1) % 1000 == 0:
                print('epoch: {}, batch_loss: {}, batch_acc: {}'.format(
                    epoch, loss_val, acc_val))

            # 每5000batch做一次验证
            if (batch_cnt+1) % 5000 == 0:
                all_test_acc_val = list()
                for test_batch_data, test_batch_labels in test_data.next_batch():
                    test_acc_val = sess.run(accuracy, feed_dict={
                        X: test_batch_data,
                        Y: test_batch_labels,
                        is_training: False
                    })
                    all_test_acc_val.append(test_acc_val)
                test_acc = np.mean(all_test_acc_val)
                print('epoch: {}, test_acc: {}'.format(epoch, test_acc))

epoch: 1, batch_loss: 1.0254285335540771, batch_acc: 0.640625
epoch: 2, batch_loss: 0.9187150001525879, batch_acc: 0.640625
epoch: 3, batch_loss: 0.927048921585083, batch_acc: 0.703125
epoch: 5, batch_loss: 0.8170256614685059, batch_acc: 0.703125
epoch: 6, batch_loss: 0.6145782470703125, batch_acc: 0.8125
epoch: 6, test_acc: 0.6488381624221802
epoch: 7, batch_loss: 0.4981405436992645, batch_acc: 0.828125
epoch: 8, batch_loss: 0.5179604291915894, batch_acc: 0.796875
epoch: 10, batch_loss: 0.4443773329257965, batch_acc: 0.828125
epoch: 11, batch_loss: 0.5571237206459045, batch_acc: 0.796875
epoch: 12, batch_loss: 0.5267115235328674, batch_acc: 0.84375
epoch: 12, test_acc: 0.7114382982254028
epoch: 14, batch_loss: 0.2802017331123352, batch_acc: 0.9375
epoch: 15, batch_loss: 0.44359830021858215, batch_acc: 0.828125
epoch: 16, batch_loss: 0.35668569803237915, batch_acc: 0.875
epoch: 17, batch_loss: 0.5140799283981323, batch_acc: 0.828125
epoch: 19, batch_loss: 0.329128623008728, batch_acc: 

经测试发现，该网络单纯的使用BN比混合使用BN效果更好，所以在代码中注释掉了dropout层。