In [1]:
import os
os.sys.path.append(os.path.dirname(os.path.abspath('..')))

# 数据准备

In [2]:
from dataset.dataset import load_cifar10

train_data, test_data = load_cifar10(batch_size=64)



(50000, 3072) (50000,)




(10000, 3072) (10000,)


# 网络结构设计
GoogLeNet结构设计主要在于其独特的Interception结构：并行的三个卷积与最大池化操作。同样由于这里使用CIFAR10数据集的原因，这里实现的GoogLeNet是一个缩水版。首先将Interception之前的所有层替换成单层的conv+maxpooling。并且这里只设置4个Interception结构。

In [3]:
import tensorflow as tf

unit_I = train_data.n_features    # 输入单元数，等于特征数

# 三种卷积核
conv_size1 = (1, 1)
conv_size3 = (3, 3)
conv_size5 = (5, 5)

filters1 = 32
filters_2 = [16, 64, 8]    # inception模块的通道列表
filters_3 = [32, 128, 16]

# 两种步长
strides_1 = (1, 1)
strides_2 = (2, 2)

fc_size = 128    # 全连接层单元数

unit_O = 10

  from ._conv import register_converters as _register_converters


定义创建Interception结构的函数：

In [4]:
def inc_block(inputs, channels, name=None):
    '''
    channels: 列表，包含三个卷积操作的输出深度
    '''
    in_channels = inputs.get_shape().as_list()[-1]    # 最后一维为深度

    with tf.variable_scope(name):
        conv1 = tf.layers.conv2d(inputs, filters=channels[0],
                                 kernel_size=conv_size1, strides=strides_1,
                                 padding='same', activation=tf.nn.relu)

        conv2_1 = tf.layers.conv2d(inputs, filters=in_channels//2,    # 瓶颈层将参数减半
                                   kernel_size=conv_size1, strides=strides_1,
                                   padding='same', activation=tf.nn.relu)
        conv2_2 = tf.layers.conv2d(conv2_1, filters=channels[1],
                                   kernel_size=conv_size3, strides=strides_1,
                                   padding='same', activation=tf.nn.relu)

        conv3_1 = tf.layers.conv2d(inputs, filters=in_channels//2,    # 瓶颈层将参数减半
                                   kernel_size=conv_size1, strides=strides_1,
                                   padding='same', activation=tf.nn.relu)
        conv3_2 = tf.layers.conv2d(conv3_1, filters=channels[2],
                                   kernel_size=conv_size5, strides=strides_1,
                                   padding='same', activation=tf.nn.relu)

        pool4_1 = tf.layers.max_pooling2d(inputs, pool_size=conv_size3,
                                          strides=strides_1, padding='same')
        conv4_2 = tf.layers.conv2d(pool4_1, filters=in_channels//2,
                                   kernel_size=conv_size1, strides=strides_1,
                                   padding='same', activation=tf.nn.relu)

    return tf.concat([conv1, conv2_2, conv3_2, conv4_2], axis=3)

# 搭建网络

In [5]:
X = tf.placeholder(tf.float32, [None, unit_I])  # 数据的样本数不指定，只指定特征数
Y = tf.placeholder(tf.int64, [None])    # 目标值为列向量，int64为了兼容
X_img = tf.transpose(tf.reshape(X, [-1, 3, 32, 32]),
                     perm=[0, 2, 3, 1])    # 转为图片格式送入模型，(n_samples,width,height,depth)

# 网络结构图
with tf.name_scope('GoogLeNet'):
    conv1 = tf.layers.conv2d(X_img, filters=32,
                             kernel_size=conv_size5, padding='same',
                             activation=tf.nn.relu, name='conv1')
    pooling1 = tf.layers.max_pooling2d(conv1, pool_size=conv_size3,
                                       strides=strides_2, name='pooling1')

    inc_mod_2a = inc_block(pooling1, filters_2, 'inception_2a')
    inc_mod_2b = inc_block(inc_mod_2a, filters_2, 'inception_2b')

    pooling2 = tf.layers.max_pooling2d(inc_mod_2b, pool_size=conv_size3,
                                       strides=strides_2, name='pooling2')

    inc_mod_3a = inc_block(pooling2, filters_3, 'inception_3a')
    inc_mod_3b = inc_block(inc_mod_3a, filters_3, 'inception_3b')

    pooling3 = tf.layers.average_pooling2d(inc_mod_3b, pool_size=conv_size3,
                                           strides=strides_1, name='pooling3')

    fc = tf.layers.dense(tf.layers.flatten(pooling3), fc_size,
                         activation=tf.nn.relu, name='FC')

    logits = tf.layers.dense(fc, unit_O, activation=None)

# 评估图
with tf.name_scope('Eval'):
    # 计算一维向量与onehot向量之间的损失
    loss = tf.losses.sparse_softmax_cross_entropy(labels=Y, logits=logits)
    predict = tf.argmax(logits, 1)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(predict, Y), tf.float32))

# 优化图
with tf.name_scope('train_op'):
    lr = 1e-3
    train_op = tf.train.AdamOptimizer(lr).minimize(loss)

init = tf.global_variables_initializer()
config = tf.ConfigProto()
config.gpu_options.allow_growth = True    # 按需使用显存

Instructions for updating:
Use keras.layers.conv2d instead.
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use keras.layers.max_pooling2d instead.
Instructions for updating:
Use keras.layers.average_pooling2d instead.
Instructions for updating:
Use keras.layers.flatten instead.
Instructions for updating:
Use keras.layers.dense instead.
Instructions for updating:
Use tf.cast instead.


# 训练网络

In [6]:
import numpy as np

with tf.Session(config=config) as sess:
    sess.run(init)
    epochs = 20

    batch_cnt = 0
    for epoch in range(epochs):
        for batch_data, batch_labels in train_data.next_batch():
            batch_cnt += 1
            loss_val, acc_val, _ = sess.run([loss, accuracy, train_op],
                                            feed_dict={X: batch_data,
                                                       Y: batch_labels})

            # 每1000batch输出一次信息
            if (batch_cnt+1) % 1000 == 0:
                print('epoch: {}, batch_loss: {}, batch_acc: {}'.format(
                    epoch, loss_val, acc_val))

            # 每5000batch做一次验证
            if (batch_cnt+1) % 5000 == 0:
                all_test_acc_val = list()
                for test_batch_data, test_batch_labels in test_data.next_batch():
                    test_acc_val = sess.run([accuracy],
                                            feed_dict={X: test_batch_data,
                                                       Y: test_batch_labels})
                    all_test_acc_val.append(test_acc_val)
                test_acc = np.mean(all_test_acc_val)
                print('epoch: {}, test_acc: {}'.format(epoch, test_acc))

epoch: 1, batch_loss: 1.1947743892669678, batch_acc: 0.578125
epoch: 2, batch_loss: 0.9500958919525146, batch_acc: 0.6875
epoch: 3, batch_loss: 0.7968510389328003, batch_acc: 0.703125
epoch: 5, batch_loss: 0.5155200362205505, batch_acc: 0.875
epoch: 6, batch_loss: 0.7560532093048096, batch_acc: 0.71875
epoch: 6, test_acc: 0.7427884340286255
epoch: 7, batch_loss: 0.4780675172805786, batch_acc: 0.8125
epoch: 8, batch_loss: 0.5322756767272949, batch_acc: 0.765625
epoch: 10, batch_loss: 0.29794013500213623, batch_acc: 0.921875
epoch: 11, batch_loss: 0.3662758469581604, batch_acc: 0.875
epoch: 12, batch_loss: 0.34177935123443604, batch_acc: 0.890625
epoch: 12, test_acc: 0.7739382982254028
epoch: 14, batch_loss: 0.1486511379480362, batch_acc: 0.953125
epoch: 15, batch_loss: 0.14802637696266174, batch_acc: 0.953125
epoch: 16, batch_loss: 0.2157987654209137, batch_acc: 0.953125
epoch: 17, batch_loss: 0.44383519887924194, batch_acc: 0.875
epoch: 19, batch_loss: 0.06459574401378632, batch_acc: 0

这里我们只构建了一个具有四个inception model的浅层GoogLeNet，可以看到其效果已经与之前的ResNet34相当。