In [55]:
import tensorflow as tf
from tensorflow import keras
from keras_bert import get_base_dict, get_model, compile_model, gen_batch_inputs
from keras_bert import load_trained_model_from_checkpoint,Tokenizer
import numpy as np
import pandas as pd
import jieba
#from keras.models import Sequential
from tensorflow.python.keras.utils.data_utils import Sequence
from keras.layers import *
from pandas import Series
from keras import backend as K
from sklearn.model_selection import StratifiedKFold
import codecs

In [56]:
train = pd.read_csv('train.csv')

In [57]:
#创建vocabulary和word2id，目前只使用了前10000句话
train = train.iloc[:1000,:]
texts = train['text']
vocabulary = []
for i,text in enumerate(texts):
    for t in jieba.cut(text):
        if t not in vocabulary:
            vocabulary.append(t)
word2id = get_base_dict()
for token in vocabulary:
    if token not in word2id:
        word2id[token] = len(word2id)

In [58]:
#字词向量结合
maxlen = 512
def remake(x,num):
    L = []
    for i,each in enumerate(num):
        L += [x[i]]*each
    return L
def word2int(texts):
    x = []
    for text in texts:
        words =jieba.lcut(text)
        temp = [len(t) for t in words]
        x3 = [word2id[t] if t in vocabulary else 1 for t in words]
        x3 = remake(x3, temp)
        if len(x3) < maxlen - 2:
            x3 = [2] + x3 + [3] + [0] * (maxlen - len(x3) - 2)    #2是CLS,3是SEP
        else:
            x3 = [2] + x3[:maxlen - 2] + [3]
        x.append(x3)
    return x

In [59]:
train_data_X = word2int(texts)
train_data_Y = train['label']

In [46]:
embedding_index = np.zeros((len(vocabulary) + 2, 200))
config_path = 'chinese_L-12_H-768_A-12/bert_config.json'
checkpoint_path = 'chinese_L-12_H-768_A-12/bert_model.ckpt'
dict_path = 'chinese_L-12_H-768_A-12/vocab.txt'

In [47]:
class data_generator:
    def __init__(self, feature, label, batch_size=4):
        #feature和label都是list
        self.feature = feature
        self.label = label
        self.batch_size = batch_size
        self.steps = len(self.label) // self.batch_size
        if len(self.label) % self.batch_size != 0:
            self.steps += 1
    def __len__(self):
        return self.steps
    def __iter__(self):
        while True:
            idxs = range(len(self.label))
            np.random.shuffle(np.array(idxs))
            X1,X2, Y = [], [] ,[]
            for i in idxs:
                d = self.feature[i]
                text1 = d[:maxlen]
                segment1 = np.zeros(maxlen)
                y = self.label[i]
                X1.append(text1)
                X2.append(segment1)
                Y.append([y])
                if len(X1) == self.batch_size or i == idxs[-1]:
                    yield ([np.array(X1),np.array(X2)], np.array(Y))
                    [X1,X2,Y] = [], [] ,[]

In [49]:
class MaskedGlobalMaxPool1D(keras.layers.Layer):
    def __init__(self, **kwargs):
        super(MaskedGlobalMaxPool1D, self).__init__(**kwargs)
        self.supports_masking = True

    def compute_mask(self, inputs, mask=None):
        return None

    def compute_output_shape(self, input_shape):
        return input_shape[:-2] + (input_shape[-1],)

    def call(self, inputs, mask=None):
        if mask is not None:
            mask = K.cast(mask, K.floatx())
            inputs -= K.expand_dims((1.0 - mask) * 1e6, axis=-1)
        return K.max(inputs, axis=-2)
    
class MaskedGlobalAveragePooling1D(keras.layers.Layer):

    def __init__(self, **kwargs):
        super(MaskedGlobalAveragePooling1D, self).__init__(**kwargs)
        self.supports_masking = True

    def compute_mask(self, inputs, mask=None):
        return None

    def compute_output_shape(self, input_shape):
        return input_shape[:-2] + (input_shape[-1],)
        
    def call(self, x, mask=None):
        if mask is not None:
            mask = K.repeat(mask, x.shape[-1])
            mask = tf.transpose(mask, [0, 2, 1])
            mask = K.cast(mask, K.floatx())
            x = x * mask
            return K.sum(x, axis=1) / K.sum(mask, axis=1)
        else:
            return K.mean(x, axis=1)

In [50]:
class MaskedConv1D(keras.layers.Conv1D):
        def compute_mask(self, input, mask=None):
            return mask

In [63]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"
#bert+CNN
x1_in = Input(shape=(None,))
x2_in = Input(shape=(None,))
#x3_in = Input(shape=(None,))

bert_model = load_trained_model_from_checkpoint(config_path, checkpoint_path)
#第一个参数：词汇表大小  第二个参数：embedding矩阵维度，词语向量维度
embedding1= Embedding(len(vocabulary) + 2, 200,weights=[embedding_index],mask_zero= True)   
x1 = embedding1(x1_in)
#bert好像必须有两个输入
embed_layer = bert_model([x1_in,x2_in])
embed_layer = Concatenate()([embed_layer,x1])
x = MaskedConv1D(filters=256, kernel_size=3, padding='same', activation='relu')(embed_layer )
pool = MaskedGlobalMaxPool1D()(x)
ave = MaskedGlobalAveragePooling1D()(x)
x = Add()([pool,ave])
x = Dropout(0.1)(x)
x = Dense(32, activation = 'relu')(x)
p = Dense(1, activation='sigmoid')(x)
model = keras.Model([x1_in,x2_in], p)
model.compile(
    loss='binary_crossentropy',
    optimizer=keras.optimizers.Adam(1e-3),
    metrics=['accuracy']
)

In [64]:

#10折交叉训练
NFOLDS = 10
kfold = StratifiedKFold(n_splits=NFOLDS,shuffle=True,random_state=2)

for train,test in kfold.split(train_data_X,train_data_Y):
    #model = getModel()
    t1,t2,t3,t4 = np.array(train_data_X)[train], np.array(train_data_X)[test],np.array(train_data_Y)[train],np.array(train_data_Y)[test]
    train_D = data_generator(t1.tolist(), t3.tolist())
    dev_D = data_generator(t2.tolist(), t4.tolist())
    #evaluator = Evaluate()
    model.fit_generator(
                        train_D.__iter__(),
                        steps_per_epoch=len(train_D),
                        epochs=3,
                        #callbacks=[evaluator,lrate]
                        validation_data=dev_D.__iter__(),
                        validation_steps=len(dev_D)
                        )
    #del model
    #K.clear_session()
    break

Epoch 1/3


UnknownError: 2 root error(s) found.
  (0) Unknown:  Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above.
	 [[node model_11/masked_conv1d_3/conv1d (defined at <ipython-input-64-5626c5b6f24c>:17) ]]
	 [[gradient_tape/model_11/embedding_3/embedding_lookup/Reshape/_82]]
  (1) Unknown:  Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above.
	 [[node model_11/masked_conv1d_3/conv1d (defined at <ipython-input-64-5626c5b6f24c>:17) ]]
0 successful operations.
0 derived errors ignored. [Op:__inference_train_function_148100]

Function call stack:
train_function -> train_function
