In [1]:
import tensorflow as tf
import numpy as np
import os

  return f(*args, **kwds)
  from ._conv import register_converters as _register_converters


# 数据准备

In [2]:
from sklearn.preprocessing import StandardScaler


def unpickle(file):
    '''
    CIFAR-10数据读取函数
    '''
    import pickle
    with open(file, 'rb') as fd:
        data = pickle.load(fd, encoding='bytes')
    return data[b'data'], np.array(data[b'labels'])


class CifarData:
    def __init__(self, paths, batch_size=32, normalize=False, shuffle=False):
        '''
        paths: 文件路径
        '''
        self._data = list()
        self._target = list()
        self._n_samples = 0
        self.n_features = 0

        self._idx = 0    # mini-batch的游标
        self._batch_size = batch_size

        self._load(paths)

        if shuffle:
            self._shuffle_data()
        if normalize:
            self._normalize_data()

        print(self._data.shape, self._target.shape)

    def _load(self, paths):
        '''
        载入数据
        '''
        for path in paths:
            data, labels = unpickle(path)
            self._data.append(data)
            self._target.append(labels)

        # 将所有批次的数据拼接起来
        self._data, self._target = np.vstack(
            self._data), np.hstack(self._target)

        self._n_samples, self.n_features = self._data.shape[0], self._data.shape[1]

    def _shuffle_data(self):
        '''
        打乱数据
        '''
        idxs = np.random.permutation(self._n_samples)
        self._data = self._data[idxs]
        self._target = self._target[idxs]

    def _normalize_data(self):
        scaler = StandardScaler()
        self._data = scaler.fit_transform(self._data)

    def next_batch(self):
        '''
        生成mini-batch
        '''
        while self._idx < self._n_samples:
            yield self._data[self._idx: (self._idx+self._batch_size)], self._target[self._idx: (self._idx+self._batch_size)]
            self._idx += self._batch_size

        self._idx = 0
        self._shuffle_data()

  return f(*args, **kwds)
  return f(*args, **kwds)


In [3]:
CIFAR_DIR = "../dataset/cifar-10-batches-py/"
train_filenames = [os.path.join(
    CIFAR_DIR, 'data_batch_{}'.format(i)) for i in range(1, 6)]
test_filenames = [os.path.join(CIFAR_DIR, 'test_batch')]

batch_size = 32
train_data = CifarData(train_filenames, batch_size=batch_size,
                       normalize=True, shuffle=True)
test_data = CifarData(test_filenames, batch_size=batch_size,
                      normalize=True, shuffle=False)



(50000, 3072) (50000,)




(10000, 3072) (10000,)




# 网络结构设计
GoogLeNet结构设计主要在于其独特的Interception结构：并行的三个卷积与最大池化操作。同样由于这里使用CIFAR10数据集的原因，这里实现的GoogLeNet是一个缩水版。首先将Interception之前的所有层替换成单层的conv+maxpooling。并且这里只设置4个Interception结构。

In [4]:
unit_I = train_data.n_features    # 输入单元数，等于特征数

# 三种卷积核
conv_size1 = (1, 1)
conv_size3 = (3, 3)
conv_size5 = (5, 5)

filters1 = 32
filters_2 = [16, 64, 8]    # inception结构的通道列表
filters_3 = [32, 128, 16]

# 两种步长
strides_1 = (1, 1)
strides_2 = (2, 2)

fc_size = 128    # 全连接层单元数

unit_O = 10

定义创建Interception结构的函数：

In [5]:
def inc_block(inputs, channels, mod_name=None):
    '''
    channels: 列表，包含三个卷积操作的输出深度
    '''
    in_channels = inputs.get_shape().as_list()[-1]    # 最后一维为深度

    with tf.variable_scope(mod_name):
        conv1 = tf.layers.conv2d(inputs, filters=channels[0],
                                 kernel_size=conv_size1, strides=strides_1,
                                 padding='same', activation=tf.nn.relu)

        conv2_1 = tf.layers.conv2d(inputs, filters=in_channels//2,    # 瓶颈层将参数减半
                                   kernel_size=conv_size1, strides=strides_1,
                                   padding='same', activation=tf.nn.relu)
        conv2_2 = tf.layers.conv2d(conv2_1, filters=channels[1],
                                   kernel_size=conv_size3, strides=strides_1,
                                   padding='same', activation=tf.nn.relu)

        conv3_1 = tf.layers.conv2d(inputs, filters=in_channels//2,    # 瓶颈层将参数减半
                                   kernel_size=conv_size1, strides=strides_1,
                                   padding='same', activation=tf.nn.relu)
        conv3_2 = tf.layers.conv2d(conv3_1, filters=channels[2],
                                   kernel_size=conv_size5, strides=strides_1,
                                   padding='same', activation=tf.nn.relu)

        pool4_1 = tf.layers.max_pooling2d(inputs, pool_size=conv_size3, 
                                          strides=strides_1,padding='same')
        conv4_2 = tf.layers.conv2d(pool4_1, filters=in_channels//2,
                                   kernel_size=conv_size1, strides=strides_1,
                                   padding='same', activation=tf.nn.relu)

    return tf.concat([conv1, conv2_2, conv3_2, conv4_2], axis=3)

# 搭建网络

In [6]:
X = tf.placeholder(tf.float32, [None, unit_I])  # 数据的样本数不指定，只指定特征数
Y = tf.placeholder(tf.int64, [None])    # 目标值为列向量，int64为了兼容
X_img = tf.transpose(tf.reshape(X, [-1, 3, 32, 32]),
                     perm=[0, 2, 3, 1])    # 转为图片格式送入模型，(n_samples,width,height,depth)

# 网络结构图
with tf.name_scope('GoogLeNet'):
    conv1 = tf.layers.conv2d(X_img, filters=32,
                             kernel_size=conv_size5, padding='same',
                             activation=tf.nn.relu, name='conv1')
    pooling1 = tf.layers.max_pooling2d(conv1, pool_size=conv_size3,
                                       strides=strides_2, name='pooling1')

    inc_mod_2a = inc_block(pooling1, filters_2, 'inception_2a')
    inc_mod_2b = inc_block(inc_mod_2a, filters_2, 'inception_2b')

    pooling2 = tf.layers.max_pooling2d(inc_mod_2b, pool_size=conv_size3,
                                       strides=strides_2, name='pooling2')

    inc_mod_3a = inc_block(pooling2, filters_3, 'inception_3a')
    inc_mod_3b = inc_block(inc_mod_3a, filters_3, 'inception_3b')

    pooling3 = tf.layers.average_pooling2d(inc_mod_3b, pool_size=conv_size3,
                                           strides=strides_2, name='pooling3')

    fc = tf.layers.dense(tf.layers.flatten(pooling3), fc_size,
                         activation=tf.nn.relu, name='FC')

    logits = tf.layers.dense(fc, unit_O, activation=None)

# 评估图
with tf.name_scope('Eval'):
    # 计算一维向量与onehot向量之间的损失
    loss = tf.losses.sparse_softmax_cross_entropy(labels=Y, logits=logits)
    predict = tf.argmax(logits, 1)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(predict, Y), tf.float32))

# 优化图
with tf.name_scope('train_op'):
    lr = 1e-3
    train_op = tf.train.AdamOptimizer(lr).minimize(loss)

init = tf.global_variables_initializer()
config = tf.ConfigProto()
config.gpu_options.allow_growth = True    # 按需使用显存

Instructions for updating:
Use keras.layers.conv2d instead.
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use keras.layers.max_pooling2d instead.
Instructions for updating:
Use keras.layers.average_pooling2d instead.
Instructions for updating:
Use keras.layers.flatten instead.
Instructions for updating:
Use keras.layers.dense instead.
Instructions for updating:
Use tf.cast instead.


# 训练网络

In [7]:
with tf.Session(config=config) as sess:
    sess.run(init)
    epochs = 20

    batch_cnt = 0
    for epoch in range(epochs):
        for batch_data, batch_labels in train_data.next_batch():
            batch_cnt += 1
            loss_val, acc_val, _ = sess.run(
                [loss, accuracy, train_op],
                feed_dict={
                    X: batch_data,
                    Y: batch_labels})

            # 每1000batch输出一次信息
            if (batch_cnt+1) % 1000 == 0:
                print('epoch: {}, batch_loss: {}, batch_acc: {}'.format(
                    epoch, loss_val, acc_val))

            # 每5000batch做一次验证
            if (batch_cnt+1) % 5000 == 0:
                all_test_acc_val = list()
                for test_batch_data, test_batch_labels in test_data.next_batch():
                    test_acc_val = sess.run(
                        [accuracy],
                        feed_dict={
                            X: test_batch_data,
                            Y: test_batch_labels
                        })
                    all_test_acc_val.append(test_acc_val)
                test_acc = np.mean(all_test_acc_val)
                print('epoch: {}, test_acc: {}'.format(epoch, test_acc))

epoch: 0, batch_loss: 1.3469971418380737, batch_acc: 0.59375
epoch: 1, batch_loss: 1.1970510482788086, batch_acc: 0.625
epoch: 1, batch_loss: 0.7365506887435913, batch_acc: 0.8125
epoch: 2, batch_loss: 0.5225967764854431, batch_acc: 0.84375
epoch: 3, batch_loss: 0.8309845924377441, batch_acc: 0.75
epoch: 3, test_acc: 0.7290335297584534
epoch: 3, batch_loss: 0.9054447412490845, batch_acc: 0.6875
epoch: 4, batch_loss: 0.4975298047065735, batch_acc: 0.8125
epoch: 5, batch_loss: 0.24219228327274323, batch_acc: 0.9375
epoch: 5, batch_loss: 0.6137686371803284, batch_acc: 0.8125
epoch: 6, batch_loss: 0.47228461503982544, batch_acc: 0.78125
epoch: 6, test_acc: 0.7696685194969177
epoch: 7, batch_loss: 0.49589961767196655, batch_acc: 0.8125
epoch: 7, batch_loss: 0.25727078318595886, batch_acc: 0.84375
epoch: 8, batch_loss: 0.13629378378391266, batch_acc: 1.0
epoch: 8, batch_loss: 0.43135687708854675, batch_acc: 0.90625
epoch: 9, batch_loss: 0.44383591413497925, batch_acc: 0.78125
epoch: 9, test_

这里我们只构建了一个具有四个inception model的浅层GoogLeNet，可以看到其效果已经与之前的ResNet34相当。