In [1]:
import os
os.sys.path.append(os.path.dirname(os.path.abspath('..')))

# 数据准备

In [2]:
from dataset.dataset import load_cifar10

train_data, test_data = load_cifar10(batch_size=64)



(50000, 3072) (50000,)




(10000, 3072) (10000,)


# 网络结构设计
原论文中ResNet34中不包含bottleneck层，并且每经过一个残差块数据流的尺寸减半、深度加倍。另外，由于这里使用的是CIFAR10数据集，尺寸为$32{\times}32$，无法承受多次缩小，这里就省略了最开始的卷积层$(7{\times}7{\times}64)$与最大池化层$(3{\times}3{\times}64)$，使用一个$(3{\times}3{\times}32)$、步长1的卷积核代替。即这里实现的ResNet是从conv3才开始减少尺寸的。

首先定义一个构建残差块的函数：

In [3]:
import tensorflow as tf

unit_I = train_data.n_features    # 输入单元数，等于特征数

# 三种卷积核
conv_size1 = (1, 1)
conv_size3 = (3, 3)
conv_size5 = (5, 5)
init_filters = 32    # 初始卷积核数量

# 两种步长
strides_1 = (1, 1)
strides_2 = (2, 2)

# 池化核尺寸
pool_size = (2, 2)

n_res_blocks = [3, 4, 6, 3]    # 各部分残差块的数量
the_last_layer = None    # 用于缓存

fc_size = 128    # 全连接层单元数

unit_O = 10    # 输出单元数，类别数

  from ._conv import register_converters as _register_converters


In [4]:
def res_block(inputs, out_channels, conv_id=2, block_id=1):
    '''
    inputs: 输入
    out_channels: 输出数据的通道数
    '''
    in_channels = inputs.get_shape().as_list()[-1]    # 最后一维为深度
    if out_channels == in_channels*2:
        deepen = True
        strides = strides_2    # 加深的同时尺寸减半，通过步长来实现
    else:
        deepen = False
        strides = strides_1

    with tf.variable_scope('conv{}-{}'.format(conv_id, block_id)):
        # 如果需要降采样(减半尺寸)，论文中指明只在残差块的第一层中使用步长为2的卷积核实现
        conv1 = tf.layers.conv2d(inputs, filters=out_channels,
                                 kernel_size=conv_size3, strides=strides, padding='same',
                                 activation=tf.nn.relu)
        # 后续步长为1
        conv2 = tf.layers.conv2d(conv1, filters=out_channels,
                                 kernel_size=conv_size3, strides=strides_1,  padding='same',
                                 activation=tf.nn.relu)

    # 如果做了加深操作，那么相加前需要对inputs做同样的变换：减半加深
    if deepen:
        # 尺寸减半
        input_trans = tf.layers.average_pooling2d(inputs, pool_size=pool_size,
                                                  strides=strides_2)
        # 深度加倍，只对最后一维(深度)填充
        input_trans = tf.pad(input_trans,
                             [[0, 0], [0, 0], [0, 0], [in_channels//2, in_channels//2]])
    else:
        input_trans = inputs

    return input_trans+conv2

# 搭建网络

In [5]:
X = tf.placeholder(tf.float32, [None, unit_I])  # 数据的样本数不指定，只指定特征数
Y = tf.placeholder(tf.int64, [None])    # 目标值为列向量，int64为了兼容
X_img = tf.transpose(tf.reshape(X, [-1, 3, 32, 32]),
                     perm=[0, 2, 3, 1])    # 转为图片格式送入模型，(n_samples,width,height,depth)

with tf.name_scope('ResNet'):
    # 第一部分，使用3*3、步长1的卷积代替最开始的卷积+池化
    conv1 = tf.layers.conv2d(X_img, init_filters,
                             kernel_size=conv_size3, strides=strides_1, padding='same',
                             activation=tf.nn.relu, name='conv1-1')
    the_last_layer = conv1

    # 残差的4个部分：conv2,conv3,conv4,conv5
    for conv_id in range(4):
        # 子编号，如conv2-1,conv3-2等
        for block_id in range(n_res_blocks[conv_id]):
            cur_conv = res_block(the_last_layer,
                                 # 深度(卷积核数)以2的倍数增加
                                 init_filters * (2**(conv_id)),
                                 conv_id+2, block_id+1)
            the_last_layer = cur_conv

    pooling = tf.layers.average_pooling2d(the_last_layer, pool_size=conv_size3,
                                          strides=strides_1, name='pooling')

    fc = tf.layers.dense(tf.layers.flatten(pooling),
                         fc_size, activation=tf.nn.relu, name='FC')

    logits = tf.layers.dense(fc, unit_O, activation=None)

# 评估图
with tf.name_scope('Eval'):
    # 计算一维向量与onehot向量之间的损失
    loss = tf.losses.sparse_softmax_cross_entropy(labels=Y, logits=logits)
    predict = tf.argmax(logits, 1)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(predict, Y), tf.float32))

# 优化图
with tf.name_scope('train_op'):
    lr = 1e-3
    train_op = tf.train.AdamOptimizer(lr).minimize(loss)

init = tf.global_variables_initializer()
config = tf.ConfigProto()
config.gpu_options.allow_growth = True    # 按需使用显存

Instructions for updating:
Use keras.layers.conv2d instead.
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use keras.layers.average_pooling2d instead.
Instructions for updating:
Use keras.layers.flatten instead.
Instructions for updating:
Use keras.layers.dense instead.
Instructions for updating:
Use tf.cast instead.


# 训练网络

In [6]:
import numpy as np

with tf.Session(config=config) as sess:
    sess.run(init)
    epochs = 20

    batch_cnt = 0
    for epoch in range(epochs):
        for batch_data, batch_labels in train_data.next_batch():
            batch_cnt += 1
            loss_val, acc_val, _ = sess.run([loss, accuracy, train_op],
                                            feed_dict={X: batch_data,
                                                       Y: batch_labels})

            # 每1000batch输出一次信息
            if (batch_cnt+1) % 1000 == 0:
                print('epoch: {}, batch_loss: {}, batch_acc: {}'.format(
                    epoch, loss_val, acc_val))

            # 每5000batch做一次验证
            if (batch_cnt+1) % 5000 == 0:
                all_test_acc_val = list()
                for test_batch_data, test_batch_labels in test_data.next_batch():
                    test_acc_val = sess.run([accuracy],
                                            feed_dict={X: test_batch_data,
                                                       Y: test_batch_labels})
                    all_test_acc_val.append(test_acc_val)
                test_acc = np.mean(all_test_acc_val)
                print('epoch: {}, test_acc: {}'.format(epoch, test_acc))

epoch: 1, batch_loss: 1.512810468673706, batch_acc: 0.46875
epoch: 2, batch_loss: 1.3371647596359253, batch_acc: 0.5
epoch: 3, batch_loss: 0.710586428642273, batch_acc: 0.765625
epoch: 5, batch_loss: 0.6311267018318176, batch_acc: 0.765625
epoch: 6, batch_loss: 0.4470682740211487, batch_acc: 0.875
epoch: 6, test_acc: 0.7403846383094788
epoch: 7, batch_loss: 0.7567460536956787, batch_acc: 0.765625
epoch: 8, batch_loss: 0.27723222970962524, batch_acc: 0.9375
epoch: 10, batch_loss: 0.2840682864189148, batch_acc: 0.890625
epoch: 11, batch_loss: 0.26853126287460327, batch_acc: 0.90625
epoch: 12, batch_loss: 0.26754921674728394, batch_acc: 0.921875
epoch: 12, test_acc: 0.7673277258872986
epoch: 14, batch_loss: 0.18979890644550323, batch_acc: 0.953125
epoch: 15, batch_loss: 0.17225077748298645, batch_acc: 0.96875
epoch: 16, batch_loss: 0.15391401946544647, batch_acc: 0.921875
epoch: 17, batch_loss: 0.09193442761898041, batch_acc: 0.96875
epoch: 19, batch_loss: 0.07834817469120026, batch_acc: 

同样不完整的ResNet要略优于VGGNet。