In [1]:
import json
import numpy as np
from tqdm import tqdm
from transformers import AutoTokenizer

In [2]:
tokenizer = AutoTokenizer.from_pretrained("E:\\hugging_face\\bert-base-chinese")

In [3]:
p_entitys = ['丈夫', '上映时间', '主持人', '主演', '主角', '作曲', '作者', '作词', '出品公司', '出生地', '出生日期',
             '创始人', '制片人', '号', '嘉宾', '国籍', '妻子', '字', '导演', '所属专辑', '改编自', '朝代', '歌手',
             '母亲', '毕业院校', '民族', '父亲', '祖籍', '编剧', '董事长', '身高', '连载网站']

In [4]:
max_length = 300

In [5]:
token_list = []
p_entity_label_list = []
with open('../data/train_data.json', 'r', encoding='utf-8') as f:
    data = json.load(f)
    for item in tqdm(data):
        text = item['text']
        new_spo_list = item['new_spo_list']
        label = [0.] * len(p_entitys)

        for spo in new_spo_list:
            s_entity = spo['s']['entity']
            p_entity = spo['p']['entity']
            o_entity = spo['o']['entity']
            label[p_entitys.index(p_entity)] = 1
        token = tokenizer.encode(text, add_special_tokens=True, max_length=max_length, truncation=True)
        token = token + [0] * (max_length - len(token))
        token_list.append((token))
        p_entity_label_list.append(label)

token_list = np.array(token_list)
p_entity_label_list = np.array(p_entity_label_list)
print(token_list.shape, p_entity_label_list.shape)

100%|██████████| 113613/113613 [00:20<00:00, 5460.08it/s]


(113613, 300) (113613, 32)


In [6]:
val_token_list = []
val_p_entity_label_list = []
with open('../data/valid_data.json', 'r', encoding='utf-8') as f:
    data = json.load(f)
    for item in tqdm(data):
        text = item['text']
        new_spo_list = item['new_spo_list']
        label = [0.] * len(p_entitys)

        for spo in new_spo_list:
            s_entity = spo['s']['entity']
            p_entity = spo['p']['entity']
            o_entity = spo['o']['entity']
            label[p_entitys.index(p_entity)] = 1

        token = tokenizer.encode(text, add_special_tokens=True, max_length=max_length, truncation=True)
        token = token + [0] * (max_length - len(token))
        val_token_list.append((token))
        val_p_entity_label_list.append(label)
val_token_list = np.array(val_token_list)
val_p_entity_label_list = np.array(val_p_entity_label_list)

100%|██████████| 17781/17781 [00:03<00:00, 5658.70it/s]


In [7]:
train_length = len(p_entity_label_list)


def generator(batch_size=12):
    batch_num = train_length // batch_size
    seed = int(np.random.random() * 5217)
    np.random.seed(seed);
    np.random.shuffle(token_list)
    np.random.seed(seed);
    np.random.shuffle(p_entity_label_list)
    while 1:
        for i in range(batch_num):
            start = batch_size * i
            end = batch_size * (i + 1)
            yield token_list[start:end], p_entity_label_list[start:end]

In [8]:
import tensorflow as tf


class IDCNN(tf.keras.layers.Layer):
    def __init__(self, d_model=312, filter_num=128, kernel_size=3, n_layers=4, dilation_rates=[1, 3, 5]):
        self.d_model = d_model
        self.filter_num = filter_num

        self.filter_width = 3
        self.kernel_size = kernel_size
        self.n_layers = n_layers
        self.dilation_rates = dilation_rates
        super(IDCNN, self).__init__()

    def build(self, input_shape):
        self.sequence_length = input_shape[1]
        self.input_embedding_conv2d = tf.keras.layers.Conv2D(filters=self.filter_num,
                                                             kernel_size=[1, self.filter_width], padding="same")
        self.dilation_convs = []
        self.layer_norms = []
        for i in range(len(self.dilation_rates)):
            self.dilation_convs.append(
                tf.keras.layers.SeparableConv2D(filters=self.filter_num, kernel_size=[1, self.filter_width],
                                                padding="same", dilation_rate=self.dilation_rates[i]))
            self.layer_norms.append(tf.keras.layers.LayerNormalization())
        self.last_dense = tf.keras.layers.Dense(units=128, activation=tf.nn.relu)
        super(IDCNN, self).build(input_shape)

    def call(self, inputs):
        embedding = inputs
        embedding = tf.expand_dims(embedding, axis=1)
        embedding = self.input_embedding_conv2d(embedding)

        final_out_from_layers = []
        total_width_for_last_dim = 0

        for i in range(self.n_layers):
            for j in range(len(self.dilation_rates)):
                embedding = self.dilation_convs[j](embedding)
            final_out_from_layers.append(embedding)
            total_width_for_last_dim += self.filter_num

        final_out = tf.concat(final_out_from_layers, axis=3)
        final_out = tf.squeeze(final_out, axis=1)
        final_out = tf.keras.layers.Dropout(0.17)(final_out)
        final_out = self.last_dense(final_out)

        return final_out

In [9]:
strategy = tf.distribute.MirroredStrategy(devices=["/gpu:0", "/gpu:1"],
                                          cross_device_ops=tf.distribute.ReductionToOneDevice())
with strategy.scope():
    input_token = tf.keras.Input(shape=(300,), dtype='int32', name='input_token')
    embedding = tf.keras.layers.Embedding(input_dim=21128, output_dim=256)(input_token)
    embedding = IDCNN()(embedding)
    embedding = tf.keras.layers.BatchNormalization()(embedding)
    embedding = tf.keras.layers.Flatten()(embedding)
    embedding = tf.keras.layers.Dropout(0.217)(embedding)
    output = tf.keras.layers.Dense(32)(embedding)
    model = tf.keras.Model(inputs=input_token, outputs=output)
    import os
    if os.path.exists('../saver/model.h5'):
        model.load_weights('../saver/model.h5')
    model.compile(optimizer=tf.keras.optimizers.Adam(2.17e-5), loss=tf.nn.sigmoid_cross_entropy_with_logits,
                  metrics=['accuracy'])
    batch_size = 256
    for i in range(20):
        model.fit(generator(batch_size), steps_per_epoch=train_length // batch_size, epochs=5, validation_data=(val_token_list,val_p_entity_label_list))
        model.save_weights('../saver/model.h5')
    

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1')
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensor

KeyboardInterrupt: 