In [1]:
import os
os.sys.path.append(os.path.dirname(os.path.abspath('..')))

# 数据准备

In [2]:
from dataset.dataset import load_cifar10

train_data,test_data=load_cifar10(batch_size=64)



(50000, 3072) (50000,)




(10000, 3072) (10000,)


# 网络结构设计
mobilenet中DSC+PC的结构是隔层使用的，即conv+DSC+PC+conv这样的结构。这里实现的mini-mobilenet与Googlenet类似，只使用两次级联结构。

In [3]:
import tensorflow as tf

unit_I = train_data.n_features    # 输入单元数，等于特征数

# 三种卷积核
conv_size1 = (1, 1)
conv_size3 = (3, 3)

filters_1 = 32
filters_2 = 64
filters_3 = 128

# 两种步长
strides_1 = (1, 1)
strides_2 = (2, 2)

fc_size = 128    # 全连接层单元数

unit_O = 10

  from ._conv import register_converters as _register_converters


定义创建```Conv dw```结构的函数：

In [4]:
def conv_dw(inputs, filters, strides, name=None):
    '''
    filters: 改变通道数，PC层使用的参数
    strides: 改变尺寸，DSC层使用的参数
    '''
    in_channels = inputs.get_shape().as_list()[-1]    # 最后一维为深度
    input_split = tf.split(inputs, in_channels, axis=3)    # 按深度进行分割
    DSC_outputs = list()

    with tf.name_scope(name):
        for i in range(in_channels):    # 分别对每个深度做卷积
            cur_conv = tf.layers.conv2d(input_split[i], filters=1,
                                        kernel_size=conv_size3, strides=strides,
                                        padding='same', activation=tf.nn.relu)
            DSC_outputs.append(cur_conv)
        DSC_outputs = tf.concat(DSC_outputs, axis=3)

        PC_layer = tf.layers.conv2d(DSC_outputs, filters=filters,
                                    kernel_size=conv_size3, strides=strides_1,
                                    padding='same', activation=tf.nn.relu)
        
    return PC_layer

# 搭建网络

In [5]:
X = tf.placeholder(tf.float32, [None, unit_I])  # 数据的样本数不指定，只指定特征数
Y = tf.placeholder(tf.int64, [None])    # 目标值为列向量，int64为了兼容
X_img = tf.transpose(tf.reshape(X, [-1, 3, 32, 32]),
                     perm=[0, 2, 3, 1])    # 转为图片格式送入模型，(n_samples,width,height,depth)

# 网络结构图
with tf.name_scope('MobileNet'):
    conv1 = tf.layers.conv2d(X_img, filters=filters_1,
                             kernel_size=conv_size3, strides=strides_2,
                             padding='same', activation=tf.nn.relu, name='conv1')
    conv_dw_1 = conv_dw(conv1, filters=filters_1,
                        strides=strides_1, name='conv_dw1')

    conv2 = tf.layers.conv2d(X_img, filters=filters_2,
                             kernel_size=conv_size1, strides=strides_1,
                             padding='same', activation=tf.nn.relu, name='conv2')
    conv_dw_2 = conv_dw(conv1, filters=filters_2,
                        strides=strides_2, name='conv_dw2')

    conv3 = tf.layers.conv2d(X_img, filters=filters_3,
                             kernel_size=conv_size3, strides=strides_1,
                             padding='same', activation=tf.nn.relu, name='conv3')
    conv_dw_3 = conv_dw(conv1, filters=filters_3,
                        strides=strides_1, name='conv_dw3')
    
    pooling=tf.layers.average_pooling2d(conv_dw_3, pool_size=conv_size3,
                                           strides=strides_1, name='pooling')

    fc = tf.layers.dense(tf.layers.flatten(pooling), fc_size,
                         activation=tf.nn.relu, name='FC')

    logits = tf.layers.dense(fc, unit_O, activation=None)

# 评估图
with tf.name_scope('Eval'):
    # 计算一维向量与onehot向量之间的损失
    loss = tf.losses.sparse_softmax_cross_entropy(labels=Y, logits=logits)
    predict = tf.argmax(logits, 1)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(predict, Y), tf.float32))

# 优化图
with tf.name_scope('train_op'):
    lr = 1e-3
    train_op = tf.train.AdamOptimizer(lr).minimize(loss)

init = tf.global_variables_initializer()
config = tf.ConfigProto()
config.gpu_options.allow_growth = True    # 按需使用显存

Instructions for updating:
Use keras.layers.conv2d instead.
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use keras.layers.average_pooling2d instead.
Instructions for updating:
Use keras.layers.flatten instead.
Instructions for updating:
Use keras.layers.dense instead.
Instructions for updating:
Use tf.cast instead.


# 训练网络

In [6]:
import numpy as np

with tf.Session(config=config) as sess:
    sess.run(init)
    epochs = 20

    batch_cnt = 0
    for epoch in range(epochs):
        for batch_data, batch_labels in train_data.next_batch():
            batch_cnt += 1
            loss_val, acc_val, _ = sess.run([loss, accuracy, train_op],
                feed_dict={X: batch_data,
                    Y: batch_labels})

            # 每1000batch输出一次信息
            if (batch_cnt+1) % 1000 == 0:
                print('epoch: {}, batch_loss: {}, batch_acc: {}'.format(
                    epoch, loss_val, acc_val))

            # 每5000batch做一次验证
            if (batch_cnt+1) % 5000 == 0:
                all_test_acc_val = list()
                for test_batch_data, test_batch_labels in test_data.next_batch():
                    test_acc_val = sess.run([accuracy],
                        feed_dict={X: test_batch_data,
                            Y: test_batch_labels})
                    all_test_acc_val.append(test_acc_val)
                test_acc = np.mean(all_test_acc_val)
                print('epoch: {}, test_acc: {}'.format(epoch, test_acc))

epoch: 1, batch_loss: 0.8887923955917358, batch_acc: 0.65625
epoch: 2, batch_loss: 0.8980585336685181, batch_acc: 0.65625
epoch: 3, batch_loss: 0.6783936023712158, batch_acc: 0.75
epoch: 5, batch_loss: 0.5539352297782898, batch_acc: 0.859375
epoch: 6, batch_loss: 0.46830227971076965, batch_acc: 0.796875
epoch: 6, test_acc: 0.733473539352417
epoch: 7, batch_loss: 0.5143811702728271, batch_acc: 0.84375
epoch: 8, batch_loss: 0.18421509861946106, batch_acc: 0.953125
epoch: 10, batch_loss: 0.28145772218704224, batch_acc: 0.90625
epoch: 11, batch_loss: 0.12987932562828064, batch_acc: 0.953125
epoch: 12, batch_loss: 0.11782132089138031, batch_acc: 0.953125
epoch: 12, test_acc: 0.7272636294364929
epoch: 14, batch_loss: 0.04714268073439598, batch_acc: 1.0
epoch: 15, batch_loss: 0.10158750414848328, batch_acc: 0.96875
epoch: 16, batch_loss: 0.17938190698623657, batch_acc: 0.921875
epoch: 17, batch_loss: 0.11327043920755386, batch_acc: 0.9375
epoch: 19, batch_loss: 0.1213018149137497, batch_acc: 