In [1]:
import tensorflow as tf
import numpy as np
import math
import os
os.sys.path.append(os.path.dirname(os.path.abspath('..')))

  from ._conv import register_converters as _register_converters


In [2]:
def get_default_params():
    return tf.contrib.training.HParams(
        emb_size=64,
        t_size=50,
        conv_size=3,    # 卷积核尺寸
        filters=64,    # 卷积核数量
        fc_size=32,
        dropout_rate=0.5,
        batch_size=64,
        grad_thresh=1.0,    # 梯度阈值
        lr=0.001,
        cnt_thresh=10,    # 词的频率阈值
    )


params = get_default_params()


For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.



## 数据封装

In [3]:
from dataset.dataset import load_news

train_data, test_data = load_news(batch_size=params.batch_size,
                                  cnt_thresh=params.cnt_thresh, t_size=params.t_size)

(50000, 50) (50000,)
(10000, 50) (10000,)


## 网络搭建

In [4]:
vocal_size = train_data.voc_size

unit_O = 10    # 输出单元数，类别数

In [5]:
X = tf.placeholder(tf.int32, [None, params.t_size])
Y = tf.placeholder(tf.int64, [None])
is_training = tf.placeholder(tf.bool)    # 训练标识位

# 自动计数
global_step = tf.Variable(tf.zeros([], tf.int32),
                          name='global_step', trainable=False)

with tf.variable_scope('emb', initializer=tf.random_uniform_initializer(-1.0, 1.0)):
    emb_lookup = tf.get_variable('embedding', [vocal_size, params.emb_size],
                                 dtype=tf.float32)
    # (batch_size,t_size,emb_size)
    emb = tf.nn.embedding_lookup(emb_lookup, X)

# 对embedding使用CNN网络
xavier_scale = 1 / math.sqrt(params.emb_size + params.filters) / 3
initializer = tf.random_uniform_initializer(-xavier_scale, xavier_scale)
with tf.variable_scope('CNN', initializer=initializer):
    conv_layer = tf.layers.conv1d(emb, filters=params.filters,
                                  kernel_size=params.conv_size,
                                  activation=tf.nn.relu)    # [None, t_size-conv_size+1, filters]
    conv_outputs = tf.reduce_mean(conv_layer, axis=1)    # [None, filters]

with tf.name_scope('FC'):
    fc = tf.layers.dense(conv_outputs, params.fc_size, activation=tf.nn.relu)
    fc = tf.layers.dropout(fc, rate=params.dropout_rate, training=is_training)

logits = tf.layers.dense(fc, unit_O, activation=None)    # 输出层，无激活

with tf.name_scope('Eval'):
    loss = tf.losses.sparse_softmax_cross_entropy(labels=Y, logits=logits)
    predict = tf.argmax(logits, 1)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(predict, Y), tf.float32))

with tf.name_scope('train_op'):
    lr = 1e-3
    t_vars = tf.trainable_variables()    # 可训练变量
    # 应用梯度截断
    print(tf.gradients(loss, t_vars))
    grads, _ = tf.clip_by_global_norm(tf.gradients(loss, t_vars),
                                      params.grad_thresh)
    optimizer = tf.train.AdamOptimizer(lr)
    train_op = optimizer.apply_gradients(zip(grads, t_vars),
                                         global_step=global_step)

init = tf.global_variables_initializer()
config = tf.ConfigProto()
config.gpu_options.allow_growth = True    # 按需使用显存

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use keras.layers.conv1d instead.
Instructions for updating:
Use keras.layers.dense instead.
Instructions for updating:
Use keras.layers.dropout instead.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Use tf.cast instead.
[<tensorflow.python.framework.ops.IndexedSlices object at 0x7fcfe59d9c88>, <tf.Tensor 'train_op/gradients/CNN/conv1d/conv1d/ExpandDims_1_grad/Reshape:0' shape=(3, 64, 64) dtype=float32>, <tf.Tensor 'train_op/gradients/CNN/conv1d/BiasAdd_grad/BiasAddGrad:0' shape=(64,) dtype=float32>, <tf.Tensor 'train_op/gradients/FC/dense/MatMul_grad/MatMul_1:0' shape=(64, 32) dtype=float32>, <tf.Tensor 'train_op/gradients/FC/dense/BiasAdd_grad/BiasAddGrad:0' shape=(32,) dtype=float32>, <tf.Tensor 'train_op/gradients/dense/MatMul_grad/MatMul_1:0' 

## 训练网络

In [6]:
import numpy as np

with tf.Session(config=config) as sess:
    sess.run(init)
    epochs = 20

    batch_cnt = 0
    for epoch in range(epochs):
        for batch_data, batch_labels in train_data.next_batch():
            batch_cnt += 1
            loss_val, acc_val, _ = sess.run([loss, accuracy, train_op],
                                            feed_dict={X: batch_data,
                                                       Y: batch_labels,
                                                       is_training: True})

            # 每1000batch输出一次信息
            if (batch_cnt+1) % 1000 == 0:
                print('epoch: {}, batch_loss: {}, batch_acc: {}'.format(
                    epoch, loss_val, acc_val))

            # 每5000batch做一次验证
            if (batch_cnt+1) % 5000 == 0:
                all_test_acc_val = list()
                for test_batch_data, test_batch_labels in test_data.next_batch():
                    test_acc_val = sess.run(accuracy,
                                            feed_dict={X: test_batch_data,
                                                       Y: test_batch_labels,
                                                       is_training: False})
                    all_test_acc_val.append(test_acc_val)
                test_acc = np.mean(all_test_acc_val)
                print('epoch: {}, test_acc: {}'.format(epoch, test_acc))

epoch: 1, batch_loss: 0.4775638282299042, batch_acc: 0.875
epoch: 2, batch_loss: 0.3023480474948883, batch_acc: 0.90625
epoch: 3, batch_loss: 0.14371176064014435, batch_acc: 0.953125
epoch: 5, batch_loss: 0.16871078312397003, batch_acc: 0.921875
epoch: 6, batch_loss: 0.11492493748664856, batch_acc: 0.953125
epoch: 6, test_acc: 0.91796875
epoch: 7, batch_loss: 0.051839783787727356, batch_acc: 0.96875
epoch: 8, batch_loss: 0.12428519874811172, batch_acc: 0.9375
epoch: 10, batch_loss: 0.03708276525139809, batch_acc: 0.984375
epoch: 11, batch_loss: 0.03559653460979462, batch_acc: 0.984375
epoch: 12, batch_loss: 0.002867139643058181, batch_acc: 1.0
epoch: 12, test_acc: 0.9169671535491943
epoch: 14, batch_loss: 0.018249914050102234, batch_acc: 1.0
epoch: 15, batch_loss: 0.07922951132059097, batch_acc: 0.96875
epoch: 16, batch_loss: 0.005837270990014076, batch_acc: 1.0
epoch: 17, batch_loss: 0.017340626567602158, batch_acc: 1.0
epoch: 19, batch_loss: 0.049290094524621964, batch_acc: 0.96875
e