From 2267b374661b08a6c7bdf5524377d704159f7dbd Mon Sep 17 00:00:00 2001 From: BrikerMan Date: Fri, 22 Feb 2019 17:59:45 +0800 Subject: [PATCH 01/20] update setup.py --- setup.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 696c7cf1..29185a20 100644 --- a/setup.py +++ b/setup.py @@ -12,12 +12,17 @@ """ import pathlib -from version import __version__ + from setuptools import find_packages, setup +from version import __version__ + # Package meta-data. NAME = 'kashgari' -DESCRIPTION = 'simple and powerful state-of-the-art NLP framework with pre-trained word2vec and bert embedding.' +DESCRIPTION = 'Simple and powerful NLP framework, ' \ + 'build your state-of-art model in 5 minutes for ' \ + 'named entity recognition (NER), part-of-speech ' \ + 'tagging (PoS) and text classification tasks.' URL = 'https://github.com/BrikerMan/Kashgari' EMAIL = 'eliyar917@gmail.com' AUTHOR = 'BrikerMan' From 8b82119424c4cf893ed9d28686632cd3c8b6cc42 Mon Sep 17 00:00:00 2001 From: alexwwang Date: Sat, 23 Feb 2019 12:19:29 +0800 Subject: [PATCH 02/20] modified and add some models --- kashgari/tasks/classification/__init__.py | 9 +- kashgari/tasks/classification/deep_models.py | 592 ++++++++++++++++++ .../tasks/classification/shadow_models.py | 117 ++++ 3 files changed, 715 insertions(+), 3 deletions(-) create mode 100644 kashgari/tasks/classification/deep_models.py create mode 100644 kashgari/tasks/classification/shadow_models.py diff --git a/kashgari/tasks/classification/__init__.py b/kashgari/tasks/classification/__init__.py index 8b171209..c0184385 100644 --- a/kashgari/tasks/classification/__init__.py +++ b/kashgari/tasks/classification/__init__.py @@ -11,6 +11,9 @@ """ from .base_model import ClassificationModel -from .blstm_model import BLSTMModel -from .cnn_lstm_model import CNNLSTMModel -from .cnn_model import CNNModel +#from .blstm_model import BLSTMModel +#from .cnn_lstm_model import CNNLSTMModel +#from .cnn_model import CNNModel +from .shadow_models import BLSTMModel, CNNLSTMModel, CNNModel +from .deep_models import AVCNNModel, KMaxCNNModel, RCNNModel, AVRNNModel +from .deep_models import DropoutBGRUModel, DropoutAVRNNModel diff --git a/kashgari/tasks/classification/deep_models.py b/kashgari/tasks/classification/deep_models.py new file mode 100644 index 00000000..57e251d3 --- /dev/null +++ b/kashgari/tasks/classification/deep_models.py @@ -0,0 +1,592 @@ +# encoding: utf-8 +""" +@author: Alex +@contact: ialexwwang@gmail.com + +@version: 0.1 +@license: Apache Licence +@file: deep_models.py +@time: 2019-02-21 17:54 + +@Reference: https://github.com/zake7749/DeepToxic/blob/master/sotoxic/models/keras/model_zoo.py +""" +from __future__ import absolute_import, division +import logging + +import tensorflow as tf +from keras.layers import Dense, Input, Embedding, Lambda, Activation, Reshape, Flatten +from keras.layers import Dropout, SpatialDropout1D +from keras.layers import GlobalAveragePooling1D, GlobalMaxPooling1D +from keras.layers import Bidirectional, GRU, Conv1D +from keras.layers import add, concatenate +from keras.models import Model +from keras.layers.normalization import BatchNormalization +from keras import optimizers +from keras import initializers +from keras.engine import InputSpec, Layer +from keras import backend as K + +from kashgari.tasks.classification.base_model import ClassificationModel + + +class AttentionWeightedAverage(Layer): + ''' + Computes a weighted average of the different channels across timesteps. + Uses 1 parameter pr. channel to compute the attention value for a single timestep. + ''' + + def __init__(self, return_attention=False, **kwargs): + self.init = initializers.get('uniform') + self.supports_masking = True + self.return_attention = return_attention + super(AttentionWeightedAverage, self).__init__(**kwargs) + + + def build(self, input_shape): + self.input_spec = [InputSpec(ndim=3)] + assert len(input_shape) == 3 + + self.W = self.add_weight(shape=(input_shape[2], 1), + name='{}_w'.format(self.name), + initializer=self.init) + self.trainable_weights = [self.W] + super(AttentionWeightedAverage, self).build(input_shape) + + + def call(self, x, mask=None): + # computes a probability distribution over the timesteps + # uses 'max trick' for numerical stability + # reshape is done to avoid issue with Tensorflow + # and 1-dimensional weights + logits = K.dot(x, self.W) + x_shape = K.shape(x) + logits = K.reshape(logits, (x_shape[0], x_shape[1])) + ai = K.exp(logits - K.max(logits, axis=-1, keepdims=True)) + + # masked timesteps have zero weight + if mask is not None: + mask = K.cast(mask, K.floatx()) + ai = ai * mask + att_weights = ai / (K.sum(ai, axis=1, keepdims=True) + K.epsilon()) + weighted_input = x * K.expand_dims(att_weights) + result = K.sum(weighted_input, axis=1) + if self.return_attention: + return [result, att_weights] + return result + + + def get_output_shape_for(self, input_shape): + return self.compute_output_shape(input_shape) + + + def compute_output_shape(self, input_shape): + output_len = input_shape[2] + if self.return_attention: + return [(input_shape[0], output_len), (input_shape[0], input_shape[1])] + return (input_shape[0], output_len) + + + def compute_mask(self, input, input_mask=None): + if isinstance(input_mask, list): + return [None] * len(input_mask) + else: + return None + + +class KMaxPooling(Layer): + ''' + K-max pooling layer that extracts the k-highest activation from a sequence (2nd dimension). + TensorFlow backend. + ''' + + def __init__(self, k=1, **kwargs): + super().__init__(**kwargs) + self.input_spec = InputSpec(ndim=3) + self.k = k + + + def compute_output_shape(self, input_shape): + return (input_shape[0], (input_shape[2] * self.k)) + + + def call(self, inputs): + # swap last two dimensions since top_k will be applied along the last dimension + shifted_input = tf.transpose(inputs, [0, 2,1]) + + # extract top_k, returns two tensors [values, indices] + top_k = tf.nn.top_k(shifted_input, k=self.k, sorted=True, name=None)[0] + + # return flattened output + return Flatten()(top_k) + + +class AVCNNModel(ClassificationModel): + __architect_name__ = 'AVCNNModel' + __base_hyper_parameters__ = { + 'conv_0': { + 'filters': 300, + 'kernel_size':1, + 'kernel_initializer': 'normal', + 'padding': 'valid', + 'activation': 'relu' + }, + 'conv_1': { + 'filters': 300, + 'kernel_size':2, + 'kernel_initializer': 'normal', + 'padding': 'valid', + 'activation': 'relu' + }, + 'conv_2': { + 'filters': 300, + 'kernel_size':3, + 'kernel_initializer': 'normal', + 'padding': 'valid', + 'activation': 'relu' + }, + 'conv_3': { + 'filters': 300, + 'kernel_size':4, + 'kernel_initializer': 'normal', + 'padding': 'valid', + 'activation': 'relu' + }, + # --- + 'attn_0': {}, + 'avg_0': {}, + 'maxpool_0': {}, + # --- + 'maxpool_1': {}, + 'attn_1': {}, + 'avg_1': {}, + # --- + 'maxpool_2': {}, + 'attn_2': {}, + 'avg_2': {}, + # --- + 'maxpool_3': {}, + 'attn_3': {}, + 'avg_3': {}, + # --- + 'v0_col':{ + #'mode': 'concat', + 'axis': 1 + }, + 'v1_col':{ + #'mode': 'concat', + 'axis': 1 + }, + 'v2_col':{ + #'mode': 'concat', + 'axis': 1 + }, + 'merged_tensor':{ + #'mode': 'concat', + 'axis': 1 + }, + 'dropout':{ + 'rate': 0.7 + }, + 'dense':{ + 'units': 144, + 'activation': 'relu' + }, + 'activation_layer':{ + 'activation': 'softmax' + }, + 'adam_optimizer':{ + 'lr': 1e-3, + 'decay': 1e-7 + } + } + + def build_model(self): + base_model = self.embedding.model + conv_0 = Conv1D(**self.hyper_parameters['conv_0'])(base_model.output) + conv_1 = Conv1D(**self.hyper_parameters['conv_1'])(base_model.output) + conv_2 = Conv1D(**self.hyper_parameters['conv_2'])(base_model.output) + conv_3 = Conv1D(**self.hyper_parameters['conv_3'])(base_model.output) + + maxpool_0 = GlobalMaxPooling1D()(conv_0) + attn_0 = AttentionWeightedAverage()(conv_0) + avg_0 = GlobalAveragePooling1D()(conv_0) + + maxpool_1 = GlobalMaxPooling1D()(conv_1) + attn_1 = AttentionWeightedAverage()(conv_1) + avg_1 = GlobalAveragePooling1D()(conv_1) + + maxpool_2 = GlobalMaxPooling1D()(conv_2) + attn_2 = AttentionWeightedAverage()(conv_2) + avg_2 = GlobalAveragePooling1D()(conv_2) + + maxpool_3 = GlobalMaxPooling1D()(conv_3) + attn_3 = AttentionWeightedAverage()(conv_3) + avg_3 = GlobalAveragePooling1D()(conv_3) + + v0_col = concatenate([maxpool_0, maxpool_1, maxpool_2, maxpool_3], + **self.hyper_parameters['v0_col']) + v1_col = concatenate([attn_0, attn_1, attn_2, attn_3], + **self.hyper_parameters['v1_col']) + v2_col = concatenate([avg_1, avg_2, avg_0, avg_3], + **self.hyper_parameters['v2_col']) + merged_tensor = concatenate([v0_col, v1_col, v2_col], + **self.hyper_parameters['merged_tensor']) + output = Dropout(**self.hyper_parameters['dropout'])(merged_tensor) + output = Dense(**self.hyper_parameters['dense'])(output) + output = Dense(len(self.label2idx), + **self.hyper_parameters['activation_layer'])(output) + + model = Model(base_model.inputs, output) + adam_optimizer = optimizers.Adam(**self.hyper_parameters['adam_optimizer']) + model.compile(loss='categorical_crossentropy', + optimizer=adam_optimizer, + metrics=['accuracy']) + self.model = model + self.model.summary() + + +class KMaxCNNModel(ClassificationModel): + __architect_name__ = 'KMaxCNNModel' + __base_hyper_parameters__ = { + 'conv_0': { + 'filters': 180, + 'kernel_size': 1, + 'kernel_initializer': 'normal', + 'padding': 'valid', + 'activation': 'relu' + }, + 'conv_1': { + 'filters': 180, + 'kernel_size': 2, + 'kernel_initializer': 'normal', + 'padding': 'valid', + 'activation': 'relu' + }, + 'conv_2': { + 'filters': 180, + 'kernel_size': 3, + 'kernel_initializer': 'normal', + 'padding': 'valid', + 'activation': 'relu' + }, + 'conv_3': { + 'filters': 180, + 'kernel_size': 4, + 'kernel_initializer': 'normal', + 'padding': 'valid', + 'activation': 'relu' + }, + 'maxpool_0': { + 'k': 3 + }, + 'maxpool_1': { + 'k': 3 + }, + 'maxpool_2': { + 'k': 3 + }, + 'maxpool_3': { + 'k': 3 + }, + 'merged_tensor': { + #'mode': 'concat', + 'axis': 1 + }, + 'dropout':{ + 'rate': 0.6 + }, + 'dense':{ + 'units': 144, + 'activation': 'relu' + }, + 'activation_layer':{ + 'activation': 'softmax' + }, + 'adam_optimizer':{ + 'lr': 1e-3, + 'decay': 1e-7 + } + } + + def build_model(self): + base_model = self.embedding.model + conv_0 = Conv1D(**self.hyper_parameters['conv_0'])(base_model.output) + conv_1 = Conv1D(**self.hyper_parameters['conv_1'])(base_model.output) + conv_2 = Conv1D(**self.hyper_parameters['conv_2'])(base_model.output) + conv_3 = Conv1D(**self.hyper_parameters['conv_3'])(base_model.output) + + maxpool_0 = KMaxPooling(**self.hyper_parameters['maxpool_0'])(conv_0) + maxpool_1 = KMaxPooling(**self.hyper_parameters['maxpool_1'])(conv_1) + maxpool_2 = KMaxPooling(**self.hyper_parameters['maxpool_2'])(conv_2) + maxpool_3 = KMaxPooling(**self.hyper_parameters['maxpool_3'])(conv_3) + + merged_tensor = concatenate([maxpool_0, maxpool_1, maxpool_2, maxpool_3], + **self.hyper_parameters['merged_tensor']) + output = Dropout(**self.hyper_parameters['dropout'])(merged_tensor) + output = Dense(**self.hyper_parameters['dense'])(output) + output = Dense(len(self.label2idx), + **self.hyper_parameters['activation_layer'])(output) + + model = Model(base_model.inputs, output) + adam_optimizer = optimizers.Adam(**self.hyper_parameters['adam_optimizer']) + model.compile(loss='categorical_crossentropy', + optimizer=adam_optimizer, + metrics=['accuracy']) + self.model = model + self.model.summary() + + +class RCNNModel(ClassificationModel): + __architect_name__ = 'RCNNModel' + __base_hyper_parameters__ = { + 'rnn_0': { + 'units': 64, + 'return_sequences': True + }, + 'conv_0': { + 'filters': 128, + 'kernel_size': 2, + 'kernel_initializer': 'normal', + 'padding': 'valid', + 'activation': 'relu', + 'strides': 1 + }, + 'maxpool': {}, + 'attn': {}, + 'average': {}, + 'concat': { + 'axis': 1 + }, + 'dropout':{ + 'rate': 0.5 + }, + 'dense':{ + 'units': 120, + 'activation': 'relu' + }, + 'activation_layer':{ + 'activation': 'softmax' + }, + 'adam_optimizer':{ + 'lr': 1e-3, + 'clipvalue': 5, + 'decay': 1e-5 + } + } + + def build_model(self): + base_model = self.embedding.model + rnn_0 = Bidirectional(GRU(**self.hyper_parameters['rnn_0']))(base_model.output) + conv_0 = Conv1D(**self.hyper_parameters['conv_0'])(rnn_0) + maxpool = GlobalMaxPooling1D()(conv_0) + attn = AttentionWeightedAverage()(conv_0) + average = GlobalAveragePooling1D()(conv_0) + + concatenated = concatenated([maxpool, attn, average], + **self.hyper_parameters['concat']) + output = Dropout(**self.hyper_parameters['dropout'])(concatenated) + output = Dense(**self.hyper_parameters['dense'])(output) + output = Dense(len(self.label2idx), + **self.hyper_parameters['activation_layer'])(output) + + model = Model(base_model.inputs, output) + adam_optimizer = optimizers.Adam(**self.hyper_parameters['adam_optimizer']) + model.compile(loss='categorical_crossentropy', + optimizer=adam_optimizer, + metrics=['accuracy']) + self.model = model + self.model.summary() + + +class AVRNNModel(ClassificationModel): + __architect_name__ = 'AVRNNModel' + __base_hyper_parameters__ = { + 'rnn_0': { + 'units': 60, + 'return_sequences': True + }, + 'rnn_1': { + 'units': 60, + 'return_sequences': True + }, + 'concat_rnn': { + 'axis': 2 + }, + 'last': {}, + 'maxpool': {}, + 'attn': {}, + 'average': {}, + 'all_views':{ + 'axis': 1 + }, + 'dropout':{ + 'rate': 0.5 + }, + 'dense':{ + 'units': 144, + 'activation': 'relu' + }, + 'activation_layer':{ + 'activation': 'softmax' + }, + 'adam_optimizer':{ + 'lr': 1e-3, + 'clipvalue': 5, + 'decay': 1e-6 + } + } + + def build_model(self): + base_model = self.embedding.model + rnn_0 = Bidirectional(GRU(**self.hyper_parameters['rnn_0']))(base_model.output) + rnn_1 = Bidirectional(GRU(**self.hyper_parameters['rnn_1']))(rnn_0) + concat_rnn = concatenate([rnn_0, rnn_1], + **self.hyper_parameters['concat_rnn']) + + last = Lambda(lambda t: t[:, -1], name='last')(concat_rnn) + maxpool = GlobalMaxPooling1D()(concat_rnn) + attn = AttentionWeightedAverage()(concat_rnn) + average = GlobalAveragePooling1D()(concat_rnn) + + all_views = concatenate([last, maxpool, attn, average], + **self.hyper_parameters['all_views']) + output = Dropout(**self.hyper_parameters['dropout'])(all_views) + output = Dense(**self.hyper_parameters['dense'])(output) + output = Dense(len(self.label2idx), + **self.hyper_parameters['activation_layer'])(output) + + model = Model(base_model.inputs, output) + adam_optimizer = optimizers.Adam(**self.hyper_parameters['adam_optimizer']) + model.compile(loss='categorical_crossentropy', + optimizer=adam_optimizer, + metrics=['accuracy']) + self.model = model + self.model.summary() + + +class DropoutBGRUModel(ClassificationModel): + __architect_name__ = 'DropoutBGRUModel' + __base_hyper_parameters__ = { + 'rnn_0': { + 'units': 64, + 'return_sequences': True + }, + 'dropout_rnn':{ + 'rate': 0.35 + }, + 'rnn_1': { + 'units': 64, + 'return_sequences': True + }, + 'last': {}, + 'maxpool': {}, + 'average': {}, + 'all_views': { + 'axis': 1 + }, + 'dropout':{ + 'rate': 0.5 + }, + 'dense':{ + 'units': 72, + 'activation': 'relu' + }, + 'activation_layer':{ + 'activation': 'softmax' + } + } + + def build_model(self): + base_model = self.embedding.model + rnn_0 = Bidirectional(GRU(**self.hyper_parameters['rnn_0']))(base_model.output) + dropout_rnn = Dropout(**self.hyper_parameters['dropout_rnn'])(rnn_0) + rnn_1 = Bidirectional(GRU(**self.hyper_parameters['rnn_1']))(dropout_rnn) + last = Lambda(lambda t: t[:, -1], name='last')(rnn_1) + maxpool = GlobalMaxPooling1D()(rnn_1) + #attn = AttentionWeightedAverage()(rnn_1) + average = GlobalAveragePooling1D()(rnn_1) + + all_views = concatenate([last, maxpool, average], + **self.hyper_parameters['all_views']) + output = Dropout(**self.hyper_parameters['dropout'])(all_views) + output = Dense(**self.hyper_parameters['dense'])(output) + output = Dense(len(self.label2idx), + **self.hyper_parameters['activation_layer'])(output) + + model = Model(base_model.inputs, output) + #adam_optimizer = optimizers.Adam(**self.hyper_parameters['adam_optimizer']) + model.compile(loss='categorical_crossentropy', + optimizer='adam', + metrics=['accuracy']) + self.model = model + self.model.summary() + + +class DropoutAVRNNModel(ClassificationModel): + __architect_name__ = 'DropoutAVRNNModel' + __base_hyper_parameters__ = { + 'rnn_0': { + 'units': 56, + 'return_sequences': True + }, + 'rnn_dropout': { + 'rate': 0.3 + }, + 'rnn_1': { + 'units': 56, + 'return_sequences': True + }, + 'last': {}, + 'maxpool': {}, + 'attn': {}, + 'average': {}, + 'all_views':{ + 'axis': 1 + }, + 'dropout_0':{ + 'rate': 0.5 + }, + 'dense':{ + 'units': 128, + 'activation': 'relu' + }, + 'dropout_1':{ + 'rate': 0.25 + }, + 'activation_layer':{ + 'activation': 'softmax' + }, + 'adam_optimizer':{ + 'lr': 1e-3, + 'clipvalue': 5, + 'decay': 1e-7 + } + } + + def build_model(self): + base_model = self.embedding.model + rnn_0 = Bidirectional(GRU(**self.hyper_parameters['rnn_0']))(base_model.output) + rnn_dropout = SpatialDropout1D(**self.hyper_parameters['rnn_dropout'])(rnn_0) + rnn_1 = Bidirectional(GRU(**self.hyper_parameters['rnn_dropout']))(rnn_dropout) + + last = Lambda(lambda t: t[:, -1], name='last')(rnn_1) + maxpool = GlobalMaxPooling1D()(rnn_1) + attn = AttentionWeightedAverage()(rnn_1) + average = GlobalAveragePooling1D()(rnn_1) + + all_views = concatenate([last, maxpool, attn, average], + **self.hyper_parameters) + output = Dropout(**self.hyper_parameters['dropout_0'])(all_views) + output = Dense(**self.hyper_parameters['dense'])(output) + output = Dropout(**self.hyper_parameters['dropout_1'])(output) + output = Dense(len(self.label2idx), + **self.hyper_parameters['activation_layer'])(output) + + model = Model(base_model.inputs, output) + adam_optimizer = optimizers.Adam(**self.hyper_parameters['adam_optimizer']) + model.compile(loss='categorical_crossentropy', + optimizer=adam_optimizer, + metrics=['accuracy']) + self.model = model + self.model.summary() + diff --git a/kashgari/tasks/classification/shadow_models.py b/kashgari/tasks/classification/shadow_models.py new file mode 100644 index 00000000..68e6ee83 --- /dev/null +++ b/kashgari/tasks/classification/shadow_models.py @@ -0,0 +1,117 @@ +# encoding: utf-8 +""" +@author: Alex +@contact: ialexwwang@gmail.com + +@version: 0.1 +@license: Apache Licence +@file: shadow_models.py +@time: 2019-02-20 16:40 +""" +import logging +from keras.layers import Dense, Bidirectional +from keras.layers import Conv1D, MaxPooling1D, GlobalMaxPooling1D +from keras.layers.recurrent import LSTM +from keras.models import Model + +from kashgari.tasks.classification.base_model import ClassificationModel + + +class BLSTMModel(ClassificationModel): + __architect_name__ = 'BLSTMModel' + __base_hyper_parameters__ = { + 'lstm_layer': { + 'units': 256, + 'return_sequences': False + }, + 'activation_layer': { + 'activation': 'softmax' + } + } + + def build_model(self): + base_model = self.embedding.model + bilstm_layer = Bidirectional(LSTM(**self.hyper_parameters['lstm_layer']) + )(base_model.output) + dense_layer = Dense(len(self.label2idx), + **self.hyper_parameters['activation_layer'])(bilstm_layer) + output_layers = [dense_layer] + + model = Model(base_model.inputs, output_layers) + model.compile(loss='categorical_crossentropy', + optimizer='adam', + metrics=['accuracy']) + self.model = model + self.model.summary() + + +class CNNLSTMModel(ClassificationModel): + __architect_name__ = 'CNNLSTMModel' + __base_hyper_parameters__ = { + 'conv_layer': { + 'filters': 32, + 'kernel_size': 3, + 'padding': 'same', + 'activation': 'relu' + }, + 'max_pool_layer': { + 'pool_size': 2 + }, + 'lstm_layer': { + 'units': 100 + }, + 'activation_layer': { + 'activation': 'softmax' + } + } + + def build_model(self): + base_model = self.embedding.model + conv_layer = Conv1D(**self.hyper_parameters['conv_layer'])(base_model.output) + max_pool_layer = MaxPooling1D(**self.hyper_parameters['max_pool_layer'])(conv_layer) + lstm_layer = LSTM(**self.hyper_parameters['lstm_layer'])(max_pool_layer) + dense_layer = Dense(len(self.label2idx), + **self.hyper_parameters['activation_layer'])(lstm_layer) + output_layers = [dense_layer] + + model = Model(base_model.inputs, output_layers) + model.compile(loss='categorical_crossentropy', + optimizer='adam', + metrics=['accuracy']) + self.model = model + self.model.summary() + + +class CNNModel(ClassificationModel): + __architect_name__ = 'CNNModel' + __base_hyper_parameters__ = { + 'conv1d_layer': { + 'filters': 128, + 'kernel_size': 5, + 'activation': 'relu' + }, + 'max_pool_layer': {}, + 'dense_1_layer': { + 'units': 64, + 'activation': 'relu' + }, + 'activation_layer': { + 'activation': 'softmax' + } + } + + def build_model(self): + base_model = self.embedding.model + conv1d_layer = Conv1D(**self.hyper_parameters['conv1d_layer'])(base_model.output) + max_pool_layer = GlobalMaxPooling1D(**self.hyper_parameters['max_pool_layer'])(conv1d_layer) + dense_1_layer = Dense(**self.hyper_parameters['dense_1_layer'])(max_pool_layer) + dense_2_layer = Dense(len(self.label2idx), + **self.hyper_parameters['activation_layer'])(dense_1_layer) + + model = Model(base_model.inputs, dense_2_layer) + model.compile(loss='categorical_crossentropy', + optimizer='adam', + metrics=['accuracy']) + self.model = model + self.model.summary() + From 841df10b56d3f30fdba905621c689119429fe439 Mon Sep 17 00:00:00 2001 From: BrikerMan Date: Sat, 23 Feb 2019 23:26:30 +0800 Subject: [PATCH 03/20] refactor classifiers models, add testcases for new models --- kashgari/__init__.py | 2 + kashgari/layers.py | 119 ++++ kashgari/macros.py | 8 + kashgari/tasks/base/base_model.py | 4 +- kashgari/tasks/classification/__init__.py | 9 +- kashgari/tasks/classification/blstm_model.py | 56 -- .../tasks/classification/cnn_lstm_model.py | 67 -- kashgari/tasks/classification/cnn_model.py | 62 -- kashgari/tasks/classification/deep_models.py | 592 ------------------ .../tasks/classification/shadow_models.py | 117 ---- tests/test_classifier_models.py | 118 ++-- 11 files changed, 206 insertions(+), 948 deletions(-) create mode 100644 kashgari/layers.py delete mode 100644 kashgari/tasks/classification/blstm_model.py delete mode 100644 kashgari/tasks/classification/cnn_lstm_model.py delete mode 100644 kashgari/tasks/classification/cnn_model.py delete mode 100644 kashgari/tasks/classification/deep_models.py delete mode 100644 kashgari/tasks/classification/shadow_models.py diff --git a/kashgari/__init__.py b/kashgari/__init__.py index 17333ee9..47ddbef8 100644 --- a/kashgari/__init__.py +++ b/kashgari/__init__.py @@ -17,6 +17,8 @@ from kashgari.tasks import classification from kashgari.tasks import seq_labeling +from kashgari.macros import config + if __name__ == "__main__": print("Hello world") diff --git a/kashgari/layers.py b/kashgari/layers.py new file mode 100644 index 00000000..67c79bee --- /dev/null +++ b/kashgari/layers.py @@ -0,0 +1,119 @@ +# encoding: utf-8 +""" +@author: BrikerMan +@contact: eliyar917@gmail.com +@blog: https://eliyar.biz + +@version: 1.0 +@license: Apache Licence +@file: layers +@time: 2019-02-23 + +""" +from __future__ import absolute_import, division +import logging + +import tensorflow as tf +from keras.layers import Flatten +from keras.layers import GRU, LSTM +from keras.layers import CuDNNGRU, CuDNNLSTM +from keras import initializers +from keras.engine import InputSpec, Layer +from keras import backend as K + +from kashgari.macros import config + +if config.use_CuDNN_cell: + GRULayer = CuDNNGRU + LSTMLayer = CuDNNLSTM +else: + GRULayer = GRU + LSTMLayer = LSTM + + +class AttentionWeightedAverage(Layer): + ''' + Computes a weighted average of the different channels across timesteps. + Uses 1 parameter pr. channel to compute the attention value for a single timestep. + ''' + + def __init__(self, return_attention=False, **kwargs): + self.init = initializers.get('uniform') + self.supports_masking = True + self.return_attention = return_attention + super(AttentionWeightedAverage, self).__init__(**kwargs) + + def build(self, input_shape): + self.input_spec = [InputSpec(ndim=3)] + assert len(input_shape) == 3 + + self.W = self.add_weight(shape=(input_shape[2], 1), + name='{}_w'.format(self.name), + initializer=self.init) + self.trainable_weights = [self.W] + super(AttentionWeightedAverage, self).build(input_shape) + + def call(self, x, mask=None): + # computes a probability distribution over the timesteps + # uses 'max trick' for numerical stability + # reshape is done to avoid issue with Tensorflow + # and 1-dimensional weights + logits = K.dot(x, self.W) + x_shape = K.shape(x) + logits = K.reshape(logits, (x_shape[0], x_shape[1])) + ai = K.exp(logits - K.max(logits, axis=-1, keepdims=True)) + + # masked timesteps have zero weight + if mask is not None: + mask = K.cast(mask, K.floatx()) + ai = ai * mask + att_weights = ai / (K.sum(ai, axis=1, keepdims=True) + K.epsilon()) + weighted_input = x * K.expand_dims(att_weights) + result = K.sum(weighted_input, axis=1) + if self.return_attention: + return [result, att_weights] + return result + + def get_output_shape_for(self, input_shape): + return self.compute_output_shape(input_shape) + + def compute_output_shape(self, input_shape): + output_len = input_shape[2] + if self.return_attention: + return [(input_shape[0], output_len), (input_shape[0], input_shape[1])] + return (input_shape[0], output_len) + + def compute_mask(self, input, input_mask=None): + if isinstance(input_mask, list): + return [None] * len(input_mask) + else: + return None + + +class KMaxPooling(Layer): + ''' + K-max pooling layer that extracts the k-highest activation from a sequence (2nd dimension). + TensorFlow backend. + ''' + + def __init__(self, k=1, **kwargs): + super().__init__(**kwargs) + self.input_spec = InputSpec(ndim=3) + self.k = k + + def compute_output_shape(self, input_shape): + return (input_shape[0], (input_shape[2] * self.k)) + + def call(self, inputs): + # swap last two dimensions since top_k will be applied along the last dimension + shifted_input = tf.transpose(inputs, [0, 2,1]) + + # extract top_k, returns two tensors [values, indices] + top_k = tf.nn.top_k(shifted_input, k=self.k, sorted=True, name=None)[0] + + # return flattened output + return Flatten()(top_k) + + +if __name__ == '__main__': + print("hello, world") diff --git a/kashgari/macros.py b/kashgari/macros.py index 16f1014e..45f7938a 100644 --- a/kashgari/macros.py +++ b/kashgari/macros.py @@ -36,6 +36,14 @@ pathlib.Path(PROCESSED_CORPUS_PATH).mkdir(parents=True, exist_ok=True) +class _Config(object): + def __init__(self): + self.use_CuDNN_cell = False + + +config = _Config() + + class CustomEmbedding(object): def __init__(self, embedding_size=100): self.embedding_size = embedding_size diff --git a/kashgari/tasks/base/base_model.py b/kashgari/tasks/base/base_model.py index 4abb30a0..af949a0f 100644 --- a/kashgari/tasks/base/base_model.py +++ b/kashgari/tasks/base/base_model.py @@ -23,6 +23,7 @@ from kashgari.embeddings import CustomEmbedding, BaseEmbedding from kashgari.utils.crf import CRF, crf_loss, crf_accuracy from keras_bert.bert import get_custom_objects as get_bert_custom_objects +from kashgari.layers import AttentionWeightedAverage, KMaxPooling class BaseModel(object): @@ -94,7 +95,8 @@ def create_custom_objects(model_info): if embedding and embedding['embedding_type'] == 'bert': custom_objects['NonMaskingLayer'] = helper.NonMaskingLayer custom_objects.update(get_bert_custom_objects()) - + custom_objects['AttentionWeightedAverage'] = AttentionWeightedAverage + custom_objects['KMaxPooling'] = KMaxPooling return custom_objects @classmethod diff --git a/kashgari/tasks/classification/__init__.py b/kashgari/tasks/classification/__init__.py index c0184385..62c53768 100644 --- a/kashgari/tasks/classification/__init__.py +++ b/kashgari/tasks/classification/__init__.py @@ -11,9 +11,6 @@ """ from .base_model import ClassificationModel -#from .blstm_model import BLSTMModel -#from .cnn_lstm_model import CNNLSTMModel -#from .cnn_model import CNNModel -from .shadow_models import BLSTMModel, CNNLSTMModel, CNNModel -from .deep_models import AVCNNModel, KMaxCNNModel, RCNNModel, AVRNNModel -from .deep_models import DropoutBGRUModel, DropoutAVRNNModel +from .models import BLSTMModel, CNNLSTMModel, CNNModel +from .models import AVCNNModel, KMaxCNNModel, RCNNModel, AVRNNModel +from .models import DropoutBGRUModel, DropoutAVRNNModel diff --git a/kashgari/tasks/classification/blstm_model.py b/kashgari/tasks/classification/blstm_model.py deleted file mode 100644 index d2d20328..00000000 --- a/kashgari/tasks/classification/blstm_model.py +++ /dev/null @@ -1,56 +0,0 @@ -# encoding: utf-8 -""" -@author: BrikerMan -@contact: eliyar917@gmail.com -@blog: https://eliyar.biz - -@version: 1.0 -@license: Apache Licence -@file: blstm_model.py -@time: 2019-01-21 17:37 - -""" -import logging -from keras.layers import Dense, Bidirectional -from keras.layers.recurrent import LSTM -from keras.models import Model - -from kashgari.tasks.classification.base_model import ClassificationModel - - -class BLSTMModel(ClassificationModel): - __architect_name__ = 'BLSTMModel' - __base_hyper_parameters__ = { - 'lstm_layer': { - 'units': 256, - 'return_sequences': False - } - } - - def build_model(self): - base_model = self.embedding.model - blstm_layer = Bidirectional(LSTM(**self.hyper_parameters['lstm_layer']))(base_model.output) - dense_layer = Dense(len(self.label2idx), activation='sigmoid')(blstm_layer) - output_layers = [dense_layer] - - model = Model(base_model.inputs, output_layers) - model.compile(loss='categorical_crossentropy', - optimizer='adam', - metrics=['accuracy']) - self.model = model - self.model.summary() - - -if __name__ == "__main__": - from kashgari.utils.logger import init_logger - from kashgari.corpus import TencentDingdangSLUCorpus - - init_logger() - - x_data, y_data = TencentDingdangSLUCorpus.get_classification_data() - classifier = BLSTMModel() - classifier.fit(x_data, y_data, epochs=1) - classifier.save('./classifier_saved2') - - model = ClassificationModel.load_model('./classifier_saved2') - logging.info(model.predict('我要听音乐')) diff --git a/kashgari/tasks/classification/cnn_lstm_model.py b/kashgari/tasks/classification/cnn_lstm_model.py deleted file mode 100644 index a674c9ed..00000000 --- a/kashgari/tasks/classification/cnn_lstm_model.py +++ /dev/null @@ -1,67 +0,0 @@ -# encoding: utf-8 -""" -@author: BrikerMan -@contact: eliyar917@gmail.com -@blog: https://eliyar.biz - -@version: 1.0 -@license: Apache Licence -@file: cnn_lstm_model.py -@time: 2019-01-19 11:52 - -""" -import logging - -from keras.layers import Dense, Conv1D, MaxPooling1D -from keras.layers.recurrent import LSTM -from keras.models import Model - -from kashgari.tasks.classification.base_model import ClassificationModel - - -class CNNLSTMModel(ClassificationModel): - __architect_name__ = 'CNNLSTMModel' - __base_hyper_parameters__ = { - 'conv_layer': { - 'filters': 32, - 'kernel_size': 3, - 'padding': 'same', - 'activation': 'relu' - }, - 'max_pool_layer': { - 'pool_size': 2 - }, - 'lstm_layer': { - 'units': 100 - } - } - - def build_model(self): - base_model = self.embedding.model - conv_layer = Conv1D(**self.hyper_parameters['conv_layer'])(base_model.output) - max_pool_layer = MaxPooling1D(**self.hyper_parameters['max_pool_layer'])(conv_layer) - lstm_layer = LSTM(**self.hyper_parameters['lstm_layer'])(max_pool_layer) - dense_layer = Dense(len(self.label2idx), activation='sigmoid')(lstm_layer) - output_layers = [dense_layer] - - model = Model(base_model.inputs, output_layers) - model.compile(loss='categorical_crossentropy', - optimizer='adam', - metrics=['accuracy']) - self.model = model - self.model.summary() - - -if __name__ == "__main__": - from kashgari.utils.logger import init_logger - from kashgari.corpus import TencentDingdangSLUCorpus - - init_logger() - - x_data, y_data = TencentDingdangSLUCorpus.get_classification_data() - classifier = CNNLSTMModel() - classifier.fit(x_data, y_data, epochs=1) - classifier.save('./classifier_saved2') - - model = ClassificationModel.load_model('./classifier_saved2') - logging.info(model.predict('我要听音乐')) diff --git a/kashgari/tasks/classification/cnn_model.py b/kashgari/tasks/classification/cnn_model.py deleted file mode 100644 index 9512d0df..00000000 --- a/kashgari/tasks/classification/cnn_model.py +++ /dev/null @@ -1,62 +0,0 @@ -# encoding: utf-8 -""" -@author: BrikerMan -@contact: eliyar917@gmail.com -@blog: https://eliyar.biz - -@version: 1.0 -@license: Apache Licence -@file: cnn_model.py -@time: 2019-01-21 17:49 - -""" -import logging -from keras.layers import Dense, Conv1D, GlobalMaxPooling1D -from keras.models import Model - -from kashgari.tasks.classification.base_model import ClassificationModel - - -class CNNModel(ClassificationModel): - __architect_name__ = 'CNNModel' - __base_hyper_parameters__ = { - 'conv1d_layer': { - 'filters': 128, - 'kernel_size': 5, - 'activation': 'relu' - }, - 'max_pool_layer': {}, - 'dense_1_layer': { - 'units': 64, - 'activation': 'relu' - } - } - - def build_model(self): - base_model = self.embedding.model - conv1d_layer = Conv1D(**self.hyper_parameters['conv1d_layer'])(base_model.output) - max_pool_layer = GlobalMaxPooling1D(**self.hyper_parameters['max_pool_layer'])(conv1d_layer) - dense_1_layer = Dense(**self.hyper_parameters['dense_1_layer'])(max_pool_layer) - dense_2_layer = Dense(len(self.label2idx), activation='sigmoid')(dense_1_layer) - - model = Model(base_model.inputs, dense_2_layer) - model.compile(loss='categorical_crossentropy', - optimizer='adam', - metrics=['accuracy']) - self.model = model - self.model.summary() - - -if __name__ == "__main__": - from kashgari.utils.logger import init_logger - from kashgari.corpus import TencentDingdangSLUCorpus - - init_logger() - - x_data, y_data = TencentDingdangSLUCorpus.get_classification_data() - classifier = CNNModel() - classifier.fit(x_data, y_data, epochs=1) - classifier.save('./classifier_saved2') - - model = ClassificationModel.load_model('./classifier_saved2') - logging.info(model.predict('我要听音乐')) diff --git a/kashgari/tasks/classification/deep_models.py b/kashgari/tasks/classification/deep_models.py deleted file mode 100644 index 57e251d3..00000000 --- a/kashgari/tasks/classification/deep_models.py +++ /dev/null @@ -1,592 +0,0 @@ -# encoding: utf-8 -""" -@author: Alex -@contact: ialexwwang@gmail.com - -@version: 0.1 -@license: Apache Licence -@file: deep_models.py -@time: 2019-02-21 17:54 - -@Reference: https://github.com/zake7749/DeepToxic/blob/master/sotoxic/models/keras/model_zoo.py -""" -from __future__ import absolute_import, division -import logging - -import tensorflow as tf -from keras.layers import Dense, Input, Embedding, Lambda, Activation, Reshape, Flatten -from keras.layers import Dropout, SpatialDropout1D -from keras.layers import GlobalAveragePooling1D, GlobalMaxPooling1D -from keras.layers import Bidirectional, GRU, Conv1D -from keras.layers import add, concatenate -from keras.models import Model -from keras.layers.normalization import BatchNormalization -from keras import optimizers -from keras import initializers -from keras.engine import InputSpec, Layer -from keras import backend as K - -from kashgari.tasks.classification.base_model import ClassificationModel - - -class AttentionWeightedAverage(Layer): - ''' - Computes a weighted average of the different channels across timesteps. - Uses 1 parameter pr. channel to compute the attention value for a single timestep. - ''' - - def __init__(self, return_attention=False, **kwargs): - self.init = initializers.get('uniform') - self.supports_masking = True - self.return_attention = return_attention - super(AttentionWeightedAverage, self).__init__(**kwargs) - - - def build(self, input_shape): - self.input_spec = [InputSpec(ndim=3)] - assert len(input_shape) == 3 - - self.W = self.add_weight(shape=(input_shape[2], 1), - name='{}_w'.format(self.name), - initializer=self.init) - self.trainable_weights = [self.W] - super(AttentionWeightedAverage, self).build(input_shape) - - - def call(self, x, mask=None): - # computes a probability distribution over the timesteps - # uses 'max trick' for numerical stability - # reshape is done to avoid issue with Tensorflow - # and 1-dimensional weights - logits = K.dot(x, self.W) - x_shape = K.shape(x) - logits = K.reshape(logits, (x_shape[0], x_shape[1])) - ai = K.exp(logits - K.max(logits, axis=-1, keepdims=True)) - - # masked timesteps have zero weight - if mask is not None: - mask = K.cast(mask, K.floatx()) - ai = ai * mask - att_weights = ai / (K.sum(ai, axis=1, keepdims=True) + K.epsilon()) - weighted_input = x * K.expand_dims(att_weights) - result = K.sum(weighted_input, axis=1) - if self.return_attention: - return [result, att_weights] - return result - - - def get_output_shape_for(self, input_shape): - return self.compute_output_shape(input_shape) - - - def compute_output_shape(self, input_shape): - output_len = input_shape[2] - if self.return_attention: - return [(input_shape[0], output_len), (input_shape[0], input_shape[1])] - return (input_shape[0], output_len) - - - def compute_mask(self, input, input_mask=None): - if isinstance(input_mask, list): - return [None] * len(input_mask) - else: - return None - - -class KMaxPooling(Layer): - ''' - K-max pooling layer that extracts the k-highest activation from a sequence (2nd dimension). - TensorFlow backend. - ''' - - def __init__(self, k=1, **kwargs): - super().__init__(**kwargs) - self.input_spec = InputSpec(ndim=3) - self.k = k - - - def compute_output_shape(self, input_shape): - return (input_shape[0], (input_shape[2] * self.k)) - - - def call(self, inputs): - # swap last two dimensions since top_k will be applied along the last dimension - shifted_input = tf.transpose(inputs, [0, 2,1]) - - # extract top_k, returns two tensors [values, indices] - top_k = tf.nn.top_k(shifted_input, k=self.k, sorted=True, name=None)[0] - - # return flattened output - return Flatten()(top_k) - - -class AVCNNModel(ClassificationModel): - __architect_name__ = 'AVCNNModel' - __base_hyper_parameters__ = { - 'conv_0': { - 'filters': 300, - 'kernel_size':1, - 'kernel_initializer': 'normal', - 'padding': 'valid', - 'activation': 'relu' - }, - 'conv_1': { - 'filters': 300, - 'kernel_size':2, - 'kernel_initializer': 'normal', - 'padding': 'valid', - 'activation': 'relu' - }, - 'conv_2': { - 'filters': 300, - 'kernel_size':3, - 'kernel_initializer': 'normal', - 'padding': 'valid', - 'activation': 'relu' - }, - 'conv_3': { - 'filters': 300, - 'kernel_size':4, - 'kernel_initializer': 'normal', - 'padding': 'valid', - 'activation': 'relu' - }, - # --- - 'attn_0': {}, - 'avg_0': {}, - 'maxpool_0': {}, - # --- - 'maxpool_1': {}, - 'attn_1': {}, - 'avg_1': {}, - # --- - 'maxpool_2': {}, - 'attn_2': {}, - 'avg_2': {}, - # --- - 'maxpool_3': {}, - 'attn_3': {}, - 'avg_3': {}, - # --- - 'v0_col':{ - #'mode': 'concat', - 'axis': 1 - }, - 'v1_col':{ - #'mode': 'concat', - 'axis': 1 - }, - 'v2_col':{ - #'mode': 'concat', - 'axis': 1 - }, - 'merged_tensor':{ - #'mode': 'concat', - 'axis': 1 - }, - 'dropout':{ - 'rate': 0.7 - }, - 'dense':{ - 'units': 144, - 'activation': 'relu' - }, - 'activation_layer':{ - 'activation': 'softmax' - }, - 'adam_optimizer':{ - 'lr': 1e-3, - 'decay': 1e-7 - } - } - - def build_model(self): - base_model = self.embedding.model - conv_0 = Conv1D(**self.hyper_parameters['conv_0'])(base_model.output) - conv_1 = Conv1D(**self.hyper_parameters['conv_1'])(base_model.output) - conv_2 = Conv1D(**self.hyper_parameters['conv_2'])(base_model.output) - conv_3 = Conv1D(**self.hyper_parameters['conv_3'])(base_model.output) - - maxpool_0 = GlobalMaxPooling1D()(conv_0) - attn_0 = AttentionWeightedAverage()(conv_0) - avg_0 = GlobalAveragePooling1D()(conv_0) - - maxpool_1 = GlobalMaxPooling1D()(conv_1) - attn_1 = AttentionWeightedAverage()(conv_1) - avg_1 = GlobalAveragePooling1D()(conv_1) - - maxpool_2 = GlobalMaxPooling1D()(conv_2) - attn_2 = AttentionWeightedAverage()(conv_2) - avg_2 = GlobalAveragePooling1D()(conv_2) - - maxpool_3 = GlobalMaxPooling1D()(conv_3) - attn_3 = AttentionWeightedAverage()(conv_3) - avg_3 = GlobalAveragePooling1D()(conv_3) - - v0_col = concatenate([maxpool_0, maxpool_1, maxpool_2, maxpool_3], - **self.hyper_parameters['v0_col']) - v1_col = concatenate([attn_0, attn_1, attn_2, attn_3], - **self.hyper_parameters['v1_col']) - v2_col = concatenate([avg_1, avg_2, avg_0, avg_3], - **self.hyper_parameters['v2_col']) - merged_tensor = concatenate([v0_col, v1_col, v2_col], - **self.hyper_parameters['merged_tensor']) - output = Dropout(**self.hyper_parameters['dropout'])(merged_tensor) - output = Dense(**self.hyper_parameters['dense'])(output) - output = Dense(len(self.label2idx), - **self.hyper_parameters['activation_layer'])(output) - - model = Model(base_model.inputs, output) - adam_optimizer = optimizers.Adam(**self.hyper_parameters['adam_optimizer']) - model.compile(loss='categorical_crossentropy', - optimizer=adam_optimizer, - metrics=['accuracy']) - self.model = model - self.model.summary() - - -class KMaxCNNModel(ClassificationModel): - __architect_name__ = 'KMaxCNNModel' - __base_hyper_parameters__ = { - 'conv_0': { - 'filters': 180, - 'kernel_size': 1, - 'kernel_initializer': 'normal', - 'padding': 'valid', - 'activation': 'relu' - }, - 'conv_1': { - 'filters': 180, - 'kernel_size': 2, - 'kernel_initializer': 'normal', - 'padding': 'valid', - 'activation': 'relu' - }, - 'conv_2': { - 'filters': 180, - 'kernel_size': 3, - 'kernel_initializer': 'normal', - 'padding': 'valid', - 'activation': 'relu' - }, - 'conv_3': { - 'filters': 180, - 'kernel_size': 4, - 'kernel_initializer': 'normal', - 'padding': 'valid', - 'activation': 'relu' - }, - 'maxpool_0': { - 'k': 3 - }, - 'maxpool_1': { - 'k': 3 - }, - 'maxpool_2': { - 'k': 3 - }, - 'maxpool_3': { - 'k': 3 - }, - 'merged_tensor': { - #'mode': 'concat', - 'axis': 1 - }, - 'dropout':{ - 'rate': 0.6 - }, - 'dense':{ - 'units': 144, - 'activation': 'relu' - }, - 'activation_layer':{ - 'activation': 'softmax' - }, - 'adam_optimizer':{ - 'lr': 1e-3, - 'decay': 1e-7 - } - } - - def build_model(self): - base_model = self.embedding.model - conv_0 = Conv1D(**self.hyper_parameters['conv_0'])(base_model.output) - conv_1 = Conv1D(**self.hyper_parameters['conv_1'])(base_model.output) - conv_2 = Conv1D(**self.hyper_parameters['conv_2'])(base_model.output) - conv_3 = Conv1D(**self.hyper_parameters['conv_3'])(base_model.output) - - maxpool_0 = KMaxPooling(**self.hyper_parameters['maxpool_0'])(conv_0) - maxpool_1 = KMaxPooling(**self.hyper_parameters['maxpool_1'])(conv_1) - maxpool_2 = KMaxPooling(**self.hyper_parameters['maxpool_2'])(conv_2) - maxpool_3 = KMaxPooling(**self.hyper_parameters['maxpool_3'])(conv_3) - - merged_tensor = concatenate([maxpool_0, maxpool_1, maxpool_2, maxpool_3], - **self.hyper_parameters['merged_tensor']) - output = Dropout(**self.hyper_parameters['dropout'])(merged_tensor) - output = Dense(**self.hyper_parameters['dense'])(output) - output = Dense(len(self.label2idx), - **self.hyper_parameters['activation_layer'])(output) - - model = Model(base_model.inputs, output) - adam_optimizer = optimizers.Adam(**self.hyper_parameters['adam_optimizer']) - model.compile(loss='categorical_crossentropy', - optimizer=adam_optimizer, - metrics=['accuracy']) - self.model = model - self.model.summary() - - -class RCNNModel(ClassificationModel): - __architect_name__ = 'RCNNModel' - __base_hyper_parameters__ = { - 'rnn_0': { - 'units': 64, - 'return_sequences': True - }, - 'conv_0': { - 'filters': 128, - 'kernel_size': 2, - 'kernel_initializer': 'normal', - 'padding': 'valid', - 'activation': 'relu', - 'strides': 1 - }, - 'maxpool': {}, - 'attn': {}, - 'average': {}, - 'concat': { - 'axis': 1 - }, - 'dropout':{ - 'rate': 0.5 - }, - 'dense':{ - 'units': 120, - 'activation': 'relu' - }, - 'activation_layer':{ - 'activation': 'softmax' - }, - 'adam_optimizer':{ - 'lr': 1e-3, - 'clipvalue': 5, - 'decay': 1e-5 - } - } - - def build_model(self): - base_model = self.embedding.model - rnn_0 = Bidirectional(GRU(**self.hyper_parameters['rnn_0']))(base_model.output) - conv_0 = Conv1D(**self.hyper_parameters['conv_0'])(rnn_0) - maxpool = GlobalMaxPooling1D()(conv_0) - attn = AttentionWeightedAverage()(conv_0) - average = GlobalAveragePooling1D()(conv_0) - - concatenated = concatenated([maxpool, attn, average], - **self.hyper_parameters['concat']) - output = Dropout(**self.hyper_parameters['dropout'])(concatenated) - output = Dense(**self.hyper_parameters['dense'])(output) - output = Dense(len(self.label2idx), - **self.hyper_parameters['activation_layer'])(output) - - model = Model(base_model.inputs, output) - adam_optimizer = optimizers.Adam(**self.hyper_parameters['adam_optimizer']) - model.compile(loss='categorical_crossentropy', - optimizer=adam_optimizer, - metrics=['accuracy']) - self.model = model - self.model.summary() - - -class AVRNNModel(ClassificationModel): - __architect_name__ = 'AVRNNModel' - __base_hyper_parameters__ = { - 'rnn_0': { - 'units': 60, - 'return_sequences': True - }, - 'rnn_1': { - 'units': 60, - 'return_sequences': True - }, - 'concat_rnn': { - 'axis': 2 - }, - 'last': {}, - 'maxpool': {}, - 'attn': {}, - 'average': {}, - 'all_views':{ - 'axis': 1 - }, - 'dropout':{ - 'rate': 0.5 - }, - 'dense':{ - 'units': 144, - 'activation': 'relu' - }, - 'activation_layer':{ - 'activation': 'softmax' - }, - 'adam_optimizer':{ - 'lr': 1e-3, - 'clipvalue': 5, - 'decay': 1e-6 - } - } - - def build_model(self): - base_model = self.embedding.model - rnn_0 = Bidirectional(GRU(**self.hyper_parameters['rnn_0']))(base_model.output) - rnn_1 = Bidirectional(GRU(**self.hyper_parameters['rnn_1']))(rnn_0) - concat_rnn = concatenate([rnn_0, rnn_1], - **self.hyper_parameters['concat_rnn']) - - last = Lambda(lambda t: t[:, -1], name='last')(concat_rnn) - maxpool = GlobalMaxPooling1D()(concat_rnn) - attn = AttentionWeightedAverage()(concat_rnn) - average = GlobalAveragePooling1D()(concat_rnn) - - all_views = concatenate([last, maxpool, attn, average], - **self.hyper_parameters['all_views']) - output = Dropout(**self.hyper_parameters['dropout'])(all_views) - output = Dense(**self.hyper_parameters['dense'])(output) - output = Dense(len(self.label2idx), - **self.hyper_parameters['activation_layer'])(output) - - model = Model(base_model.inputs, output) - adam_optimizer = optimizers.Adam(**self.hyper_parameters['adam_optimizer']) - model.compile(loss='categorical_crossentropy', - optimizer=adam_optimizer, - metrics=['accuracy']) - self.model = model - self.model.summary() - - -class DropoutBGRUModel(ClassificationModel): - __architect_name__ = 'DropoutBGRUModel' - __base_hyper_parameters__ = { - 'rnn_0': { - 'units': 64, - 'return_sequences': True - }, - 'dropout_rnn':{ - 'rate': 0.35 - }, - 'rnn_1': { - 'units': 64, - 'return_sequences': True - }, - 'last': {}, - 'maxpool': {}, - 'average': {}, - 'all_views': { - 'axis': 1 - }, - 'dropout':{ - 'rate': 0.5 - }, - 'dense':{ - 'units': 72, - 'activation': 'relu' - }, - 'activation_layer':{ - 'activation': 'softmax' - } - } - - def build_model(self): - base_model = self.embedding.model - rnn_0 = Bidirectional(GRU(**self.hyper_parameters['rnn_0']))(base_model.output) - dropout_rnn = Dropout(**self.hyper_parameters['dropout_rnn'])(rnn_0) - rnn_1 = Bidirectional(GRU(**self.hyper_parameters['rnn_1']))(dropout_rnn) - last = Lambda(lambda t: t[:, -1], name='last')(rnn_1) - maxpool = GlobalMaxPooling1D()(rnn_1) - #attn = AttentionWeightedAverage()(rnn_1) - average = GlobalAveragePooling1D()(rnn_1) - - all_views = concatenate([last, maxpool, average], - **self.hyper_parameters['all_views']) - output = Dropout(**self.hyper_parameters['dropout'])(all_views) - output = Dense(**self.hyper_parameters['dense'])(output) - output = Dense(len(self.label2idx), - **self.hyper_parameters['activation_layer'])(output) - - model = Model(base_model.inputs, output) - #adam_optimizer = optimizers.Adam(**self.hyper_parameters['adam_optimizer']) - model.compile(loss='categorical_crossentropy', - optimizer='adam', - metrics=['accuracy']) - self.model = model - self.model.summary() - - -class DropoutAVRNNModel(ClassificationModel): - __architect_name__ = 'DropoutAVRNNModel' - __base_hyper_parameters__ = { - 'rnn_0': { - 'units': 56, - 'return_sequences': True - }, - 'rnn_dropout': { - 'rate': 0.3 - }, - 'rnn_1': { - 'units': 56, - 'return_sequences': True - }, - 'last': {}, - 'maxpool': {}, - 'attn': {}, - 'average': {}, - 'all_views':{ - 'axis': 1 - }, - 'dropout_0':{ - 'rate': 0.5 - }, - 'dense':{ - 'units': 128, - 'activation': 'relu' - }, - 'dropout_1':{ - 'rate': 0.25 - }, - 'activation_layer':{ - 'activation': 'softmax' - }, - 'adam_optimizer':{ - 'lr': 1e-3, - 'clipvalue': 5, - 'decay': 1e-7 - } - } - - def build_model(self): - base_model = self.embedding.model - rnn_0 = Bidirectional(GRU(**self.hyper_parameters['rnn_0']))(base_model.output) - rnn_dropout = SpatialDropout1D(**self.hyper_parameters['rnn_dropout'])(rnn_0) - rnn_1 = Bidirectional(GRU(**self.hyper_parameters['rnn_dropout']))(rnn_dropout) - - last = Lambda(lambda t: t[:, -1], name='last')(rnn_1) - maxpool = GlobalMaxPooling1D()(rnn_1) - attn = AttentionWeightedAverage()(rnn_1) - average = GlobalAveragePooling1D()(rnn_1) - - all_views = concatenate([last, maxpool, attn, average], - **self.hyper_parameters) - output = Dropout(**self.hyper_parameters['dropout_0'])(all_views) - output = Dense(**self.hyper_parameters['dense'])(output) - output = Dropout(**self.hyper_parameters['dropout_1'])(output) - output = Dense(len(self.label2idx), - **self.hyper_parameters['activation_layer'])(output) - - model = Model(base_model.inputs, output) - adam_optimizer = optimizers.Adam(**self.hyper_parameters['adam_optimizer']) - model.compile(loss='categorical_crossentropy', - optimizer=adam_optimizer, - metrics=['accuracy']) - self.model = model - self.model.summary() - diff --git a/kashgari/tasks/classification/shadow_models.py b/kashgari/tasks/classification/shadow_models.py deleted file mode 100644 index 68e6ee83..00000000 --- a/kashgari/tasks/classification/shadow_models.py +++ /dev/null @@ -1,117 +0,0 @@ -# encoding: utf-8 -""" -@author: Alex -@contact: ialexwwang@gmail.com - -@version: 0.1 -@license: Apache Licence -@file: shadow_models.py -@time: 2019-02-20 16:40 -""" -import logging -from keras.layers import Dense, Bidirectional -from keras.layers import Conv1D, MaxPooling1D, GlobalMaxPooling1D -from keras.layers.recurrent import LSTM -from keras.models import Model - -from kashgari.tasks.classification.base_model import ClassificationModel - - -class BLSTMModel(ClassificationModel): - __architect_name__ = 'BLSTMModel' - __base_hyper_parameters__ = { - 'lstm_layer': { - 'units': 256, - 'return_sequences': False - }, - 'activation_layer': { - 'activation': 'softmax' - } - } - - def build_model(self): - base_model = self.embedding.model - bilstm_layer = Bidirectional(LSTM(**self.hyper_parameters['lstm_layer']) - )(base_model.output) - dense_layer = Dense(len(self.label2idx), - **self.hyper_parameters['activation_layer'])(bilstm_layer) - output_layers = [dense_layer] - - model = Model(base_model.inputs, output_layers) - model.compile(loss='categorical_crossentropy', - optimizer='adam', - metrics=['accuracy']) - self.model = model - self.model.summary() - - -class CNNLSTMModel(ClassificationModel): - __architect_name__ = 'CNNLSTMModel' - __base_hyper_parameters__ = { - 'conv_layer': { - 'filters': 32, - 'kernel_size': 3, - 'padding': 'same', - 'activation': 'relu' - }, - 'max_pool_layer': { - 'pool_size': 2 - }, - 'lstm_layer': { - 'units': 100 - }, - 'activation_layer': { - 'activation': 'softmax' - } - } - - def build_model(self): - base_model = self.embedding.model - conv_layer = Conv1D(**self.hyper_parameters['conv_layer'])(base_model.output) - max_pool_layer = MaxPooling1D(**self.hyper_parameters['max_pool_layer'])(conv_layer) - lstm_layer = LSTM(**self.hyper_parameters['lstm_layer'])(max_pool_layer) - dense_layer = Dense(len(self.label2idx), - **self.hyper_parameters['activation_layer'])(lstm_layer) - output_layers = [dense_layer] - - model = Model(base_model.inputs, output_layers) - model.compile(loss='categorical_crossentropy', - optimizer='adam', - metrics=['accuracy']) - self.model = model - self.model.summary() - - -class CNNModel(ClassificationModel): - __architect_name__ = 'CNNModel' - __base_hyper_parameters__ = { - 'conv1d_layer': { - 'filters': 128, - 'kernel_size': 5, - 'activation': 'relu' - }, - 'max_pool_layer': {}, - 'dense_1_layer': { - 'units': 64, - 'activation': 'relu' - }, - 'activation_layer': { - 'activation': 'softmax' - } - } - - def build_model(self): - base_model = self.embedding.model - conv1d_layer = Conv1D(**self.hyper_parameters['conv1d_layer'])(base_model.output) - max_pool_layer = GlobalMaxPooling1D(**self.hyper_parameters['max_pool_layer'])(conv1d_layer) - dense_1_layer = Dense(**self.hyper_parameters['dense_1_layer'])(max_pool_layer) - dense_2_layer = Dense(len(self.label2idx), - **self.hyper_parameters['activation_layer'])(dense_1_layer) - - model = Model(base_model.inputs, dense_2_layer) - model.compile(loss='categorical_crossentropy', - optimizer='adam', - metrics=['accuracy']) - self.model = model - self.model.summary() - diff --git a/tests/test_classifier_models.py b/tests/test_classifier_models.py index e20ed1a2..cf6af33a 100644 --- a/tests/test_classifier_models.py +++ b/tests/test_classifier_models.py @@ -18,7 +18,12 @@ import unittest from kashgari.embeddings import WordEmbeddings, BERTEmbedding -from kashgari.tasks.classification import BLSTMModel, CNNModel, CNNLSTMModel, ClassificationModel + +from kashgari.tasks.classification import BLSTMModel, CNNLSTMModel, CNNModel +from kashgari.tasks.classification import AVCNNModel, KMaxCNNModel, RCNNModel, AVRNNModel +from kashgari.tasks.classification import DropoutBGRUModel, DropoutAVRNNModel + + from kashgari.utils.logger import init_logger init_logger() @@ -54,9 +59,7 @@ class EmbeddingManager(object): @classmethod def get_bert(cls): if cls.bert_embedding is None: - dir_path = os.path.dirname(os.path.realpath(__file__)) - bert_path = os.path.join(dir_path, 'data', 'test_bert_checkpoint') - cls.bert_embedding = BERTEmbedding(bert_path, sequence_length=15) + cls.bert_embedding = BERTEmbedding('bert-base-chinese', sequence_length=15) logging.info('bert_embedding seq len: {}'.format(cls.bert_embedding.sequence_length)) return cls.bert_embedding @@ -68,12 +71,12 @@ def get_w2v(cls): class TestBLSTMModelModel(unittest.TestCase): - model: ClassificationModel = None @classmethod def setUpClass(cls): - cls.epochs = 3 - cls.model = BLSTMModel() + cls.epochs = 2 + cls.model_class = BLSTMModel + cls.model = cls.model_class() def test_build(self): self.model.fit(train_x, train_y, epochs=1) @@ -119,86 +122,107 @@ def test_save_and_load(self): result = new_model.predict(sentence) assert isinstance(result, str) + def test_bert_embedding(self): + embedding = EmbeddingManager.get_bert() + bert_model = self.model_class(embedding) + bert_model.fit(train_x, train_y, epochs=1) + assert len(bert_model.label2idx) == 4 + assert len(bert_model.token2idx) > 4 + + sentence = list('语言学包含了几种分支领域。') + assert isinstance(bert_model.predict(sentence), str) + assert isinstance(bert_model.predict([sentence]), list) + logging.info('test predict: {} -> {}'.format(sentence, self.model.predict(sentence))) + bert_model.predict(sentence, output_dict=True) + bert_model.predict(sentence, output_dict=False) + + def test_w2v_embedding(self): + embedding = EmbeddingManager.get_w2v() + w2v_model = self.model_class(embedding) + w2v_model.fit(train_x, train_y, epochs=1) + assert len(w2v_model.label2idx) == 4 + assert len(w2v_model.token2idx) > 4 + + sentence = list('语言学包含了几种分支领域。') + assert isinstance(w2v_model.predict(sentence), str) + assert isinstance(w2v_model.predict([sentence]), list) + logging.info('test predict: {} -> {}'.format(sentence, self.model.predict(sentence))) + w2v_model.predict(sentence, output_dict=True) + w2v_model.predict(sentence, output_dict=False) + @classmethod def tearDownClass(cls): del cls.model logging.info('tearDownClass {}'.format(cls)) -class TestBLSTMModelWithWord2Vec(TestBLSTMModelModel): +class TestCNNLSTMModel(TestBLSTMModelModel): @classmethod def setUpClass(cls): - cls.epochs = 3 - embedding = EmbeddingManager.get_w2v() - cls.model = BLSTMModel(embedding) + cls.epochs = 2 + cls.model_class = CNNLSTMModel + cls.model = cls.model_class() -class TestBLSTMModelWithBERT(TestBLSTMModelModel): +class TestCNNModel(TestBLSTMModelModel): @classmethod def setUpClass(cls): - cls.epochs = 1 - embedding = EmbeddingManager.get_bert() - cls.model = BLSTMModel(embedding) + cls.epochs = 2 + cls.model_class = CNNModel + cls.model = cls.model_class() - def test_save_and_load(self): - super(TestBLSTMModelWithBERT, self).test_save_and_load() - -class TestCNNModel(TestBLSTMModelModel): +class TestAVCNNModel(TestBLSTMModelModel): @classmethod def setUpClass(cls): - cls.epochs = 3 - TestCNNModel.model = CNNModel() - - def test_fit(self): - super(TestCNNModel, self).test_fit() + cls.epochs = 2 + cls.model_class = AVCNNModel + cls.model = cls.model_class() -class TestCNNModelWithWord2Vec(TestBLSTMModelModel): +class TestKMaxCNNModel(TestBLSTMModelModel): @classmethod def setUpClass(cls): - cls.epochs = 3 - embedding = EmbeddingManager.get_w2v() - cls.model = CNNModel(embedding) + cls.epochs = 2 + cls.model_class = KMaxCNNModel + cls.model = cls.model_class() + +class TestRCNNModel(TestBLSTMModelModel): -class TestCNNModelWithBERT(TestBLSTMModelModel): @classmethod def setUpClass(cls): - cls.epochs = 1 - embedding = EmbeddingManager.get_bert() - TestCNNModelWithBERT.model = CNNModel(embedding) + cls.epochs = 2 + cls.model_class = RCNNModel + cls.model = cls.model_class() -class TestLSTMCNNModel(TestBLSTMModelModel): +class TestAVRNNModel(TestBLSTMModelModel): @classmethod def setUpClass(cls): - cls.epochs = 3 - cls.model = CNNLSTMModel() + cls.epochs = 2 + cls.model_class = AVRNNModel + cls.model = cls.model_class() -class TestLSTMCNNModelWithWord2Vec(TestBLSTMModelModel): +class TestDropoutBGRUModel(TestBLSTMModelModel): @classmethod def setUpClass(cls): - cls.epochs = 3 - embedding = EmbeddingManager.get_w2v() - cls.model = CNNLSTMModel(embedding) + cls.epochs = 2 + cls.model_class = DropoutBGRUModel + cls.model = cls.model_class() -class TestLSTMCNNModelWithBERT(TestBLSTMModelModel): +class TestDropoutAVRNNModel(TestBLSTMModelModel): @classmethod def setUpClass(cls): - cls.epochs = 1 - embedding = EmbeddingManager.get_bert() - cls.model = CNNLSTMModel(embedding) - - -if __name__ == "__main__": - unittest.main() + cls.epochs = 2 + cls.model_class = DropoutAVRNNModel + cls.model = cls.model_class() From 1eb0efb86326111df74d4a270f8e5a22d1042175 Mon Sep 17 00:00:00 2001 From: BrikerMan Date: Sat, 23 Feb 2019 23:35:03 +0800 Subject: [PATCH 04/20] try testcases --- kashgari/tasks/classification/__init__.py | 6 +- kashgari/tasks/classification/models.py | 609 ++++++++++++++++++++++ tests/test_classifier_models.py | 26 +- 3 files changed, 625 insertions(+), 16 deletions(-) create mode 100644 kashgari/tasks/classification/models.py diff --git a/kashgari/tasks/classification/__init__.py b/kashgari/tasks/classification/__init__.py index 62c53768..a24d9593 100644 --- a/kashgari/tasks/classification/__init__.py +++ b/kashgari/tasks/classification/__init__.py @@ -11,6 +11,6 @@ """ from .base_model import ClassificationModel -from .models import BLSTMModel, CNNLSTMModel, CNNModel -from .models import AVCNNModel, KMaxCNNModel, RCNNModel, AVRNNModel -from .models import DropoutBGRUModel, DropoutAVRNNModel +from kashgari.tasks.classification.models import BLSTMModel, CNNLSTMModel, CNNModel +from kashgari.tasks.classification.models import AVCNNModel, KMaxCNNModel, RCNNModel, AVRNNModel +from kashgari.tasks.classification.models import DropoutBGRUModel, DropoutAVRNNModel diff --git a/kashgari/tasks/classification/models.py b/kashgari/tasks/classification/models.py new file mode 100644 index 00000000..cb4cf62e --- /dev/null +++ b/kashgari/tasks/classification/models.py @@ -0,0 +1,609 @@ +# encoding: utf-8 +""" +@author: Alex +@contact: ialexwwang@gmail.com + +@version: 0.1 +@license: Apache Licence +@file: deep_models.py +@time: 2019-02-21 17:54 + +@Reference: https://github.com/zake7749/DeepToxic/blob/master/sotoxic/models/keras/model_zoo.py +""" +from __future__ import absolute_import, division + +import logging + +from keras import optimizers + +from keras.models import Model +from keras.layers import Dense, Lambda +from keras.layers import Dropout, SpatialDropout1D +from keras.layers import GlobalAveragePooling1D, GlobalMaxPooling1D, MaxPooling1D +from keras.layers import Bidirectional, Conv1D +from keras.layers import concatenate + +from kashgari.layers import AttentionWeightedAverage, KMaxPooling, LSTMLayer, GRULayer + +from kashgari.tasks.classification.base_model import ClassificationModel + + +class CNNModel(ClassificationModel): + __architect_name__ = 'CNNModel' + __base_hyper_parameters__ = { + 'conv1d_layer': { + 'filters': 128, + 'kernel_size': 5, + 'activation': 'relu' + }, + 'max_pool_layer': {}, + 'dense_1_layer': { + 'units': 64, + 'activation': 'relu' + }, + 'activation_layer': { + 'activation': 'softmax' + } + } + + def build_model(self): + base_model = self.embedding.model + conv1d_layer = Conv1D(**self.hyper_parameters['conv1d_layer'])(base_model.output) + max_pool_layer = GlobalMaxPooling1D(**self.hyper_parameters['max_pool_layer'])(conv1d_layer) + dense_1_layer = Dense(**self.hyper_parameters['dense_1_layer'])(max_pool_layer) + dense_2_layer = Dense(len(self.label2idx), **self.hyper_parameters['activation_layer'])(dense_1_layer) + + model = Model(base_model.inputs, dense_2_layer) + model.compile(loss='categorical_crossentropy', + optimizer='adam', + metrics=['accuracy']) + self.model = model + self.model.summary() + + +class BLSTMModel(ClassificationModel): + __architect_name__ = 'BLSTMModel' + __base_hyper_parameters__ = { + 'lstm_layer': { + 'units': 256, + 'return_sequences': False + }, + 'activation_layer': { + 'activation': 'softmax' + } + } + + def build_model(self): + base_model = self.embedding.model + blstm_layer = Bidirectional(LSTMLayer(**self.hyper_parameters['lstm_layer']))(base_model.output) + dense_layer = Dense(len(self.label2idx), **self.hyper_parameters['activation_layer'])(blstm_layer) + output_layers = [dense_layer] + + model = Model(base_model.inputs, output_layers) + model.compile(loss='categorical_crossentropy', + optimizer='adam', + metrics=['accuracy']) + self.model = model + self.model.summary() + + +class CNNLSTMModel(ClassificationModel): + __architect_name__ = 'CNNLSTMModel' + __base_hyper_parameters__ = { + 'conv_layer': { + 'filters': 32, + 'kernel_size': 3, + 'padding': 'same', + 'activation': 'relu' + }, + 'max_pool_layer': { + 'pool_size': 2 + }, + 'lstm_layer': { + 'units': 100 + }, + 'activation_layer': { + 'activation': 'softmax' + } + } + + def build_model(self): + base_model = self.embedding.model + conv_layer = Conv1D(**self.hyper_parameters['conv_layer'])(base_model.output) + max_pool_layer = MaxPooling1D(**self.hyper_parameters['max_pool_layer'])(conv_layer) + lstm_layer = LSTMLayer(**self.hyper_parameters['lstm_layer'])(max_pool_layer) + dense_layer = Dense(len(self.label2idx), + **self.hyper_parameters['activation_layer'])(lstm_layer) + output_layers = [dense_layer] + + model = Model(base_model.inputs, output_layers) + model.compile(loss='categorical_crossentropy', + optimizer='adam', + metrics=['accuracy']) + self.model = model + self.model.summary() + + +class AVCNNModel(ClassificationModel): + __architect_name__ = 'AVCNNModel' + __base_hyper_parameters__ = { + 'conv_0': { + 'filters': 300, + 'kernel_size':1, + 'kernel_initializer': 'normal', + 'padding': 'valid', + 'activation': 'relu' + }, + 'conv_1': { + 'filters': 300, + 'kernel_size':2, + 'kernel_initializer': 'normal', + 'padding': 'valid', + 'activation': 'relu' + }, + 'conv_2': { + 'filters': 300, + 'kernel_size':3, + 'kernel_initializer': 'normal', + 'padding': 'valid', + 'activation': 'relu' + }, + 'conv_3': { + 'filters': 300, + 'kernel_size':4, + 'kernel_initializer': 'normal', + 'padding': 'valid', + 'activation': 'relu' + }, + # --- + 'attn_0': {}, + 'avg_0': {}, + 'maxpool_0': {}, + # --- + 'maxpool_1': {}, + 'attn_1': {}, + 'avg_1': {}, + # --- + 'maxpool_2': {}, + 'attn_2': {}, + 'avg_2': {}, + # --- + 'maxpool_3': {}, + 'attn_3': {}, + 'avg_3': {}, + # --- + 'v0_col': { + #'mode': 'concat', + 'axis': 1 + }, + 'v1_col': { + #'mode': 'concat', + 'axis': 1 + }, + 'v2_col': { + #'mode': 'concat', + 'axis': 1 + }, + 'merged_tensor': { + #'mode': 'concat', + 'axis': 1 + }, + 'dropout': { + 'rate': 0.7 + }, + 'dense': { + 'units': 144, + 'activation': 'relu' + }, + 'activation_layer': { + 'activation': 'softmax' + }, + 'adam_optimizer': { + 'lr': 1e-3, + 'decay': 1e-7 + } + } + + def build_model(self): + base_model = self.embedding.model + conv_0 = Conv1D(**self.hyper_parameters['conv_0'])(base_model.output) + conv_1 = Conv1D(**self.hyper_parameters['conv_1'])(base_model.output) + conv_2 = Conv1D(**self.hyper_parameters['conv_2'])(base_model.output) + conv_3 = Conv1D(**self.hyper_parameters['conv_3'])(base_model.output) + + maxpool_0 = GlobalMaxPooling1D()(conv_0) + attn_0 = AttentionWeightedAverage()(conv_0) + avg_0 = GlobalAveragePooling1D()(conv_0) + + maxpool_1 = GlobalMaxPooling1D()(conv_1) + attn_1 = AttentionWeightedAverage()(conv_1) + avg_1 = GlobalAveragePooling1D()(conv_1) + + maxpool_2 = GlobalMaxPooling1D()(conv_2) + attn_2 = AttentionWeightedAverage()(conv_2) + avg_2 = GlobalAveragePooling1D()(conv_2) + + maxpool_3 = GlobalMaxPooling1D()(conv_3) + attn_3 = AttentionWeightedAverage()(conv_3) + avg_3 = GlobalAveragePooling1D()(conv_3) + + v0_col = concatenate([maxpool_0, maxpool_1, maxpool_2, maxpool_3], + **self.hyper_parameters['v0_col']) + v1_col = concatenate([attn_0, attn_1, attn_2, attn_3], + **self.hyper_parameters['v1_col']) + v2_col = concatenate([avg_1, avg_2, avg_0, avg_3], + **self.hyper_parameters['v2_col']) + merged_tensor = concatenate([v0_col, v1_col, v2_col], + **self.hyper_parameters['merged_tensor']) + output = Dropout(**self.hyper_parameters['dropout'])(merged_tensor) + output = Dense(**self.hyper_parameters['dense'])(output) + output = Dense(len(self.label2idx), + **self.hyper_parameters['activation_layer'])(output) + + model = Model(base_model.inputs, output) + adam_optimizer = optimizers.Adam(**self.hyper_parameters['adam_optimizer']) + model.compile(loss='categorical_crossentropy', + optimizer=adam_optimizer, + metrics=['accuracy']) + self.model = model + self.model.summary() + + +class KMaxCNNModel(ClassificationModel): + __architect_name__ = 'KMaxCNNModel' + __base_hyper_parameters__ = { + 'conv_0': { + 'filters': 180, + 'kernel_size': 1, + 'kernel_initializer': 'normal', + 'padding': 'valid', + 'activation': 'relu' + }, + 'conv_1': { + 'filters': 180, + 'kernel_size': 2, + 'kernel_initializer': 'normal', + 'padding': 'valid', + 'activation': 'relu' + }, + 'conv_2': { + 'filters': 180, + 'kernel_size': 3, + 'kernel_initializer': 'normal', + 'padding': 'valid', + 'activation': 'relu' + }, + 'conv_3': { + 'filters': 180, + 'kernel_size': 4, + 'kernel_initializer': 'normal', + 'padding': 'valid', + 'activation': 'relu' + }, + 'maxpool_0': { + 'k': 3 + }, + 'maxpool_1': { + 'k': 3 + }, + 'maxpool_2': { + 'k': 3 + }, + 'maxpool_3': { + 'k': 3 + }, + 'merged_tensor': { + #'mode': 'concat', + 'axis': 1 + }, + 'dropout': { + 'rate': 0.6 + }, + 'dense': { + 'units': 144, + 'activation': 'relu' + }, + 'activation_layer': { + 'activation': 'softmax' + }, + 'adam_optimizer': { + 'lr': 1e-3, + 'decay': 1e-7 + } + } + + def build_model(self): + base_model = self.embedding.model + conv_0 = Conv1D(**self.hyper_parameters['conv_0'])(base_model.output) + conv_1 = Conv1D(**self.hyper_parameters['conv_1'])(base_model.output) + conv_2 = Conv1D(**self.hyper_parameters['conv_2'])(base_model.output) + conv_3 = Conv1D(**self.hyper_parameters['conv_3'])(base_model.output) + + maxpool_0 = KMaxPooling(**self.hyper_parameters['maxpool_0'])(conv_0) + maxpool_1 = KMaxPooling(**self.hyper_parameters['maxpool_1'])(conv_1) + maxpool_2 = KMaxPooling(**self.hyper_parameters['maxpool_2'])(conv_2) + maxpool_3 = KMaxPooling(**self.hyper_parameters['maxpool_3'])(conv_3) + + merged_tensor = concatenate([maxpool_0, maxpool_1, maxpool_2, maxpool_3], + **self.hyper_parameters['merged_tensor']) + output = Dropout(**self.hyper_parameters['dropout'])(merged_tensor) + output = Dense(**self.hyper_parameters['dense'])(output) + output = Dense(len(self.label2idx), + **self.hyper_parameters['activation_layer'])(output) + + model = Model(base_model.inputs, output) + adam_optimizer = optimizers.Adam(**self.hyper_parameters['adam_optimizer']) + model.compile(loss='categorical_crossentropy', + optimizer=adam_optimizer, + metrics=['accuracy']) + self.model = model + self.model.summary() + + +class RCNNModel(ClassificationModel): + __architect_name__ = 'RCNNModel' + __base_hyper_parameters__ = { + 'rnn_0': { + 'units': 64, + 'return_sequences': True + }, + 'conv_0': { + 'filters': 128, + 'kernel_size': 2, + 'kernel_initializer': 'normal', + 'padding': 'valid', + 'activation': 'relu', + 'strides': 1 + }, + 'maxpool': {}, + 'attn': {}, + 'average': {}, + 'concat': { + 'axis': 1 + }, + 'dropout': { + 'rate': 0.5 + }, + 'dense': { + 'units': 120, + 'activation': 'relu' + }, + 'activation_layer': { + 'activation': 'softmax' + }, + 'adam_optimizer': { + 'lr': 1e-3, + 'clipvalue': 5, + 'decay': 1e-5 + } + } + + def build_model(self): + base_model = self.embedding.model + rnn_0 = Bidirectional(GRULayer(**self.hyper_parameters['rnn_0']))(base_model.output) + conv_0 = Conv1D(**self.hyper_parameters['conv_0'])(rnn_0) + maxpool = GlobalMaxPooling1D()(conv_0) + attn = AttentionWeightedAverage()(conv_0) + average = GlobalAveragePooling1D()(conv_0) + + concatenated = concatenate([maxpool, attn, average], + **self.hyper_parameters['concat']) + output = Dropout(**self.hyper_parameters['dropout'])(concatenated) + output = Dense(**self.hyper_parameters['dense'])(output) + output = Dense(len(self.label2idx), + **self.hyper_parameters['activation_layer'])(output) + + model = Model(base_model.inputs, output) + adam_optimizer = optimizers.Adam(**self.hyper_parameters['adam_optimizer']) + model.compile(loss='categorical_crossentropy', + optimizer=adam_optimizer, + metrics=['accuracy']) + self.model = model + self.model.summary() + + +class AVRNNModel(ClassificationModel): + __architect_name__ = 'AVRNNModel' + __base_hyper_parameters__ = { + 'rnn_0': { + 'units': 60, + 'return_sequences': True + }, + 'rnn_1': { + 'units': 60, + 'return_sequences': True + }, + 'concat_rnn': { + 'axis': 2 + }, + 'last': {}, + 'maxpool': {}, + 'attn': {}, + 'average': {}, + 'all_views': { + 'axis': 1 + }, + 'dropout': { + 'rate': 0.5 + }, + 'dense': { + 'units': 144, + 'activation': 'relu' + }, + 'activation_layer': { + 'activation': 'softmax' + }, + 'adam_optimizer': { + 'lr': 1e-3, + 'clipvalue': 5, + 'decay': 1e-6 + } + } + + def build_model(self): + base_model = self.embedding.model + rnn_0 = Bidirectional(GRULayer(**self.hyper_parameters['rnn_0']))(base_model.output) + rnn_1 = Bidirectional(GRULayer(**self.hyper_parameters['rnn_1']))(rnn_0) + concat_rnn = concatenate([rnn_0, rnn_1], + **self.hyper_parameters['concat_rnn']) + + last = Lambda(lambda t: t[:, -1], name='last')(concat_rnn) + maxpool = GlobalMaxPooling1D()(concat_rnn) + attn = AttentionWeightedAverage()(concat_rnn) + average = GlobalAveragePooling1D()(concat_rnn) + + all_views = concatenate([last, maxpool, attn, average], + **self.hyper_parameters['all_views']) + output = Dropout(**self.hyper_parameters['dropout'])(all_views) + output = Dense(**self.hyper_parameters['dense'])(output) + output = Dense(len(self.label2idx), + **self.hyper_parameters['activation_layer'])(output) + + model = Model(base_model.inputs, output) + adam_optimizer = optimizers.Adam(**self.hyper_parameters['adam_optimizer']) + model.compile(loss='categorical_crossentropy', + optimizer=adam_optimizer, + metrics=['accuracy']) + self.model = model + self.model.summary() + + +class DropoutBGRUModel(ClassificationModel): + __architect_name__ = 'DropoutBGRUModel' + __base_hyper_parameters__ = { + 'rnn_0': { + 'units': 64, + 'return_sequences': True + }, + 'dropout_rnn': { + 'rate': 0.35 + }, + 'rnn_1': { + 'units': 64, + 'return_sequences': True + }, + 'last': {}, + 'maxpool': {}, + 'average': {}, + 'all_views': { + 'axis': 1 + }, + 'dropout': { + 'rate': 0.5 + }, + 'dense': { + 'units': 72, + 'activation': 'relu' + }, + 'activation_layer': { + 'activation': 'softmax' + } + } + + def build_model(self): + base_model = self.embedding.model + rnn_0 = Bidirectional(GRULayer(**self.hyper_parameters['rnn_0']))(base_model.output) + dropout_rnn = Dropout(**self.hyper_parameters['dropout_rnn'])(rnn_0) + rnn_1 = Bidirectional(GRULayer(**self.hyper_parameters['rnn_1']))(dropout_rnn) + last = Lambda(lambda t: t[:, -1], name='last')(rnn_1) + maxpool = GlobalMaxPooling1D()(rnn_1) + # attn = AttentionWeightedAverage()(rnn_1) + average = GlobalAveragePooling1D()(rnn_1) + + all_views = concatenate([last, maxpool, average], + **self.hyper_parameters['all_views']) + output = Dropout(**self.hyper_parameters['dropout'])(all_views) + output = Dense(**self.hyper_parameters['dense'])(output) + output = Dense(len(self.label2idx), + **self.hyper_parameters['activation_layer'])(output) + + model = Model(base_model.inputs, output) + # adam_optimizer = optimizers.Adam(**self.hyper_parameters['adam_optimizer']) + model.compile(loss='categorical_crossentropy', + optimizer='adam', + metrics=['accuracy']) + self.model = model + self.model.summary() + + +class DropoutAVRNNModel(ClassificationModel): + __architect_name__ = 'DropoutAVRNNModel' + __base_hyper_parameters__ = { + 'rnn_0': { + 'units': 56, + 'return_sequences': True + }, + 'rnn_dropout': { + 'rate': 0.3 + }, + 'rnn_1': { + 'units': 56, + 'return_sequences': True + }, + 'last': {}, + 'maxpool': {}, + 'attn': {}, + 'average': {}, + 'all_views': { + 'axis': 1 + }, + 'dropout_0': { + 'rate': 0.5 + }, + 'dense': { + 'units': 128, + 'activation': 'relu' + }, + 'dropout_1': { + 'rate': 0.25 + }, + 'activation_layer': { + 'activation': 'softmax' + }, + 'adam_optimizer': { + 'lr': 1e-3, + 'clipvalue': 5, + 'decay': 1e-7 + } + } + + def build_model(self): + base_model = self.embedding.model + rnn_0 = Bidirectional(GRULayer(**self.hyper_parameters['rnn_0']))(base_model.output) + rnn_dropout = SpatialDropout1D(**self.hyper_parameters['rnn_dropout'])(rnn_0) + rnn_1 = Bidirectional(GRULayer(**self.hyper_parameters['rnn_1']))(rnn_dropout) + + last = Lambda(lambda t: t[:, -1], name='last')(rnn_1) + maxpool = GlobalMaxPooling1D()(rnn_1) + attn = AttentionWeightedAverage()(rnn_1) + average = GlobalAveragePooling1D()(rnn_1) + + all_views = concatenate([last, maxpool, attn, average], + **self.hyper_parameters) + output = Dropout(**self.hyper_parameters['dropout_0'])(all_views) + output = Dense(**self.hyper_parameters['dense'])(output) + output = Dropout(**self.hyper_parameters['dropout_1'])(output) + output = Dense(len(self.label2idx), + **self.hyper_parameters['activation_layer'])(output) + + model = Model(base_model.inputs, output) + adam_optimizer = optimizers.Adam(**self.hyper_parameters['adam_optimizer']) + model.compile(loss='categorical_crossentropy', + optimizer=adam_optimizer, + metrics=['accuracy']) + self.model = model + self.model.summary() + + +if __name__ == '__main__': + from kashgari.corpus import TencentDingdangSLUCorpus + from kashgari.embeddings import WordEmbeddings, BERTEmbedding + + train_x, train_y = TencentDingdangSLUCorpus.get_classification_data() + + w2v = WordEmbeddings('sgns.weibo.bigram', + sequence_length=15, + limit=5000) + bert = BERTEmbedding('bert-base-chinese', sequence_length=15) + model = CNNModel(bert) + model.fit(train_x, train_y, epochs=1) diff --git a/tests/test_classifier_models.py b/tests/test_classifier_models.py index cf6af33a..41ecf2df 100644 --- a/tests/test_classifier_models.py +++ b/tests/test_classifier_models.py @@ -122,19 +122,19 @@ def test_save_and_load(self): result = new_model.predict(sentence) assert isinstance(result, str) - def test_bert_embedding(self): - embedding = EmbeddingManager.get_bert() - bert_model = self.model_class(embedding) - bert_model.fit(train_x, train_y, epochs=1) - assert len(bert_model.label2idx) == 4 - assert len(bert_model.token2idx) > 4 - - sentence = list('语言学包含了几种分支领域。') - assert isinstance(bert_model.predict(sentence), str) - assert isinstance(bert_model.predict([sentence]), list) - logging.info('test predict: {} -> {}'.format(sentence, self.model.predict(sentence))) - bert_model.predict(sentence, output_dict=True) - bert_model.predict(sentence, output_dict=False) + # def test_bert_embedding(self): + # embedding = EmbeddingManager.get_bert() + # bert_model = self.model_class(embedding) + # bert_model.fit(train_x, train_y, epochs=1) + # assert len(bert_model.label2idx) == 4 + # assert len(bert_model.token2idx) > 4 + # + # sentence = list('语言学包含了几种分支领域。') + # assert isinstance(bert_model.predict(sentence), str) + # assert isinstance(bert_model.predict([sentence]), list) + # logging.info('test predict: {} -> {}'.format(sentence, self.model.predict(sentence))) + # bert_model.predict(sentence, output_dict=True) + # bert_model.predict(sentence, output_dict=False) def test_w2v_embedding(self): embedding = EmbeddingManager.get_w2v() From 2ed5b5d15fcf1c68bc86acccf09b43a1c143c761 Mon Sep 17 00:00:00 2001 From: BrikerMan Date: Sat, 23 Feb 2019 23:44:30 +0800 Subject: [PATCH 05/20] fix DropoutAVRNNModel error --- kashgari/tasks/classification/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kashgari/tasks/classification/models.py b/kashgari/tasks/classification/models.py index cb4cf62e..fc6214d3 100644 --- a/kashgari/tasks/classification/models.py +++ b/kashgari/tasks/classification/models.py @@ -579,7 +579,7 @@ def build_model(self): average = GlobalAveragePooling1D()(rnn_1) all_views = concatenate([last, maxpool, attn, average], - **self.hyper_parameters) + **self.hyper_parameters['all_views']) output = Dropout(**self.hyper_parameters['dropout_0'])(all_views) output = Dense(**self.hyper_parameters['dense'])(output) output = Dropout(**self.hyper_parameters['dropout_1'])(output) From e67cc8ad9adf59b04ffbddfbaff22d23399b792f Mon Sep 17 00:00:00 2001 From: Eliyar Eziz Date: Sat, 23 Feb 2019 23:54:18 +0800 Subject: [PATCH 06/20] Update README.md --- README.md | 81 +++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 73 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 1515dd86..a7ebd88d 100644 --- a/README.md +++ b/README.md @@ -25,19 +25,84 @@ Kashgare is: * Embedding support * Classic word2vec embedding * BERT embedding -* Text Classification Models - * CNN Classification Model - * CNN LSTM Classification Model - * Bidirectional LSTM Classification Model -* Text Labeling Models (NER, PoS) - * Bidirectional LSTM Labeling Model - * Bidirectional LSTM CRF Labeling Model - * CNN LSTM Labeling Model +* Sequence(Text) Classification Models +* Sequence(Text) Labeling Models (NER, PoS) * Model Training * Model Evaluate * GPU Support * Customize Model + + + + + + + + + + + + + + + +
+

+ task +

+
+

+ Models +

+
+

+ Text Classification +

+
+

+ * CNNModel +

+

+ * BLSTMModel +

+

+ * CNNLSTMModel +

+

+ * AVCNNModel +

+

+ * KMaxCNNModel +

+

+ * RCNNModel +

+

+ * AVRNNModel +

+

+ * DropoutBGRUModel +

+

+ * DropoutAVRNNModel +

+
+

+ Sequence Labeling +

+
+

+ * CNNLSTMModel +

+

+ * BLSTMModel +

+

+ * BLSTMCRFModel +

+
+ ## Performance | Task | Language | Dataset | Score | Detail | From 5fac9c8a19a2334888afa0a7d0613165c666b5c8 Mon Sep 17 00:00:00 2001 From: BrikerMan Date: Sun, 24 Feb 2019 11:13:21 +0800 Subject: [PATCH 07/20] simplify the testcases, speed up test --- .travis.yml | 10 +++++- kashgari/utils/helper.py | 63 ++++++++++++--------------------- tests/test_classifier_models.py | 59 +++++++++++++++++++++++++----- 3 files changed, 81 insertions(+), 51 deletions(-) diff --git a/.travis.yml b/.travis.yml index 44754af3..296f29b9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,14 +4,22 @@ python: - "3.6" cache: pip # command to install dependencies +env: + - TEST_FILE=tests/test_classifier_models.py + - TEST_FILE=tests/test_corpus.py + - TEST_FILE=tests/test_embeddings.py + - TEST_FILE=tests/test_seq_labeling_models.py before_install: - export BOTO_CONFIG=/dev/null install: - pip install python-coveralls - pip install -r requirements.txt - pip install . + - pip install coverage + - pip install nose # command to run tests script: - - sh test.sh +# - sh test.sh + - nosetests --cover-erase --with-coverage --cover-html --cover-html-dir=htmlcov --cover-package="kashgari" $TEST_FILE after_success: - coveralls \ No newline at end of file diff --git a/kashgari/utils/helper.py b/kashgari/utils/helper.py index f5df1089..531ac2c9 100644 --- a/kashgari/utils/helper.py +++ b/kashgari/utils/helper.py @@ -27,47 +27,28 @@ from kashgari.macros import STORAGE_HOST -def h5f_generator(h5path: str, - # indices: List[int], - num_classes: int, - batch_size: int = 128): - """ - fit generator for h5 file - :param h5path: target f5file - :param num_classes: label counts to covert y label to one hot array - :param batch_size: - :return: - """ - - db = h5py.File(h5path, "r") - while True: - page_list = list(range(len(db['x']) // batch_size + 1)) - random.shuffle(page_list) - for page in page_list: - x = db["x"][page: (page + 1) * batch_size] - y = to_categorical(db["y"][page: (page + 1) * batch_size], - num_classes=num_classes, - dtype=np.int) - yield (x, y) - - -def classification_list_generator(x_data: List, - y_data: List, - sequence_lenght: int, - num_classes: int, - batch_size: int = 128): - assert len(x_data) == len(y_data) - while True: - page_list = list(range(len(x_data) // batch_size + 1)) - random.shuffle(page_list) - for page in page_list: - x = x_data[page: (page + 1) * batch_size] - x = sequence.pad_sequences(x, - maxlen=sequence_lenght) - y = to_categorical(y_data[page: (page + 1) * batch_size], - num_classes=num_classes, - dtype=np.int) - yield (x, y) +# def h5f_generator(h5path: str, +# # indices: List[int], +# num_classes: int, +# batch_size: int = 128): +# """ +# fit generator for h5 file +# :param h5path: target f5file +# :param num_classes: label counts to covert y label to one hot array +# :param batch_size: +# :return: +# """ +# +# db = h5py.File(h5path, "r") +# while True: +# page_list = list(range(len(db['x']) // batch_size + 1)) +# random.shuffle(page_list) +# for page in page_list: +# x = db["x"][page: (page + 1) * batch_size] +# y = to_categorical(db["y"][page: (page + 1) * batch_size], +# num_classes=num_classes, +# dtype=np.int) +# yield (x, y) def unison_shuffled_copies(a, b): diff --git a/tests/test_classifier_models.py b/tests/test_classifier_models.py index 41ecf2df..162331e5 100644 --- a/tests/test_classifier_models.py +++ b/tests/test_classifier_models.py @@ -70,7 +70,48 @@ def get_w2v(cls): return cls.word2vec_embedding -class TestBLSTMModelModel(unittest.TestCase): +class TestBLSTMModelModelBasic(unittest.TestCase): + + @classmethod + def setUpClass(cls): + cls.epochs = 2 + cls.model_class = BLSTMModel + cls.model = cls.model_class() + + def test_fit(self): + self.model.fit(train_x, train_y, eval_x, eval_y, epochs=self.epochs) + + def test_save_and_load(self): + self.test_fit() + model_path = os.path.join(tempfile.gettempdir(), 'kashgari_model', str(time.time())) + self.model.save(model_path) + new_model = BLSTMModel.load_model(model_path) + assert new_model is not None + sentence = list('语言学包含了几种分支领域。') + result = new_model.predict(sentence) + assert isinstance(result, str) + + def test_w2v_embedding(self): + embedding = EmbeddingManager.get_w2v() + w2v_model = self.model_class(embedding) + w2v_model.fit(train_x, train_y, epochs=1) + assert len(w2v_model.label2idx) == 4 + assert len(w2v_model.token2idx) > 4 + + sentence = list('语言学包含了几种分支领域。') + assert isinstance(w2v_model.predict(sentence), str) + assert isinstance(w2v_model.predict([sentence]), list) + logging.info('test predict: {} -> {}'.format(sentence, self.model.predict(sentence))) + w2v_model.predict(sentence, output_dict=True) + w2v_model.predict(sentence, output_dict=False) + + @classmethod + def tearDownClass(cls): + del cls.model + logging.info('tearDownClass {}'.format(cls)) + + +class TestAllBLSTMModelModel(unittest.TestCase): @classmethod def setUpClass(cls): @@ -156,7 +197,7 @@ def tearDownClass(cls): logging.info('tearDownClass {}'.format(cls)) -class TestCNNLSTMModel(TestBLSTMModelModel): +class TestCNNLSTMModelBasic(TestBLSTMModelModelBasic): @classmethod def setUpClass(cls): @@ -165,7 +206,7 @@ def setUpClass(cls): cls.model = cls.model_class() -class TestCNNModel(TestBLSTMModelModel): +class TestCNNModelBasic(TestBLSTMModelModelBasic): @classmethod def setUpClass(cls): @@ -174,7 +215,7 @@ def setUpClass(cls): cls.model = cls.model_class() -class TestAVCNNModel(TestBLSTMModelModel): +class TestAVCNNModelBasic(TestBLSTMModelModelBasic): @classmethod def setUpClass(cls): @@ -183,7 +224,7 @@ def setUpClass(cls): cls.model = cls.model_class() -class TestKMaxCNNModel(TestBLSTMModelModel): +class TestKMaxCNNModelBasic(TestBLSTMModelModelBasic): @classmethod def setUpClass(cls): @@ -192,7 +233,7 @@ def setUpClass(cls): cls.model = cls.model_class() -class TestRCNNModel(TestBLSTMModelModel): +class TestRCNNModelBasic(TestBLSTMModelModelBasic): @classmethod def setUpClass(cls): @@ -201,7 +242,7 @@ def setUpClass(cls): cls.model = cls.model_class() -class TestAVRNNModel(TestBLSTMModelModel): +class TestAVRNNModelBasic(TestBLSTMModelModelBasic): @classmethod def setUpClass(cls): @@ -210,7 +251,7 @@ def setUpClass(cls): cls.model = cls.model_class() -class TestDropoutBGRUModel(TestBLSTMModelModel): +class TestDropoutBGRUModelBasic(TestBLSTMModelModelBasic): @classmethod def setUpClass(cls): @@ -219,7 +260,7 @@ def setUpClass(cls): cls.model = cls.model_class() -class TestDropoutAVRNNModel(TestBLSTMModelModel): +class TestDropoutAVRNNModelBasic(TestBLSTMModelModelBasic): @classmethod def setUpClass(cls): From e949bcc350147609e52ad14cc8725158371e6ce5 Mon Sep 17 00:00:00 2001 From: BrikerMan Date: Sun, 24 Feb 2019 11:36:47 +0800 Subject: [PATCH 08/20] update coverage config --- .coveragerc | 1 + .travis.yml | 5 ++--- tests/test_classifier_models.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.coveragerc b/.coveragerc index 4b37f0b1..0d1279e0 100644 --- a/.coveragerc +++ b/.coveragerc @@ -12,6 +12,7 @@ exclude_lines = # Don't complain if tests don't hit defensive assertion code: raise AssertionError raise NotImplementedError + raise ValueError # Don't complain if non-runnable code isn't run: if __name__ == .__main__.: diff --git a/.travis.yml b/.travis.yml index 296f29b9..2046c5a6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,9 +6,9 @@ cache: pip # command to install dependencies env: - TEST_FILE=tests/test_classifier_models.py + - TEST_FILE=tests/test_seq_labeling_models.py - TEST_FILE=tests/test_corpus.py - TEST_FILE=tests/test_embeddings.py - - TEST_FILE=tests/test_seq_labeling_models.py before_install: - export BOTO_CONFIG=/dev/null install: @@ -19,7 +19,6 @@ install: - pip install nose # command to run tests script: -# - sh test.sh - - nosetests --cover-erase --with-coverage --cover-html --cover-html-dir=htmlcov --cover-package="kashgari" $TEST_FILE + - nosetests --with-coverage --cover-html --cover-html-dir=htmlcov --cover-package="kashgari" $TEST_FILE after_success: - coveralls \ No newline at end of file diff --git a/tests/test_classifier_models.py b/tests/test_classifier_models.py index 162331e5..19fd5b0f 100644 --- a/tests/test_classifier_models.py +++ b/tests/test_classifier_models.py @@ -111,12 +111,12 @@ def tearDownClass(cls): logging.info('tearDownClass {}'.format(cls)) -class TestAllBLSTMModelModel(unittest.TestCase): +class TestAllCNNModelModel(unittest.TestCase): @classmethod def setUpClass(cls): cls.epochs = 2 - cls.model_class = BLSTMModel + cls.model_class = CNNModel cls.model = cls.model_class() def test_build(self): From 46604c9fecb9ee8625a2007b8644d39a94987c75 Mon Sep 17 00:00:00 2001 From: BrikerMan Date: Sun, 24 Feb 2019 14:06:59 +0800 Subject: [PATCH 09/20] remove acc metrics from crf model, fix confusion at #10 --- kashgari/tasks/seq_labeling/blstm_crf_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kashgari/tasks/seq_labeling/blstm_crf_model.py b/kashgari/tasks/seq_labeling/blstm_crf_model.py index a3a14383..c9796d99 100644 --- a/kashgari/tasks/seq_labeling/blstm_crf_model.py +++ b/kashgari/tasks/seq_labeling/blstm_crf_model.py @@ -41,7 +41,7 @@ def build_model(self): model = Model(base_model.inputs, crf_layer) model.compile(loss=crf_loss, optimizer='adam', - metrics=[crf_accuracy, 'acc']) + metrics=[crf_accuracy]) self.model = model self.model.summary() From 10d3ae20c0160e157e43e2ca186720461cd93930 Mon Sep 17 00:00:00 2001 From: BrikerMan Date: Sun, 24 Feb 2019 22:44:43 +0800 Subject: [PATCH 10/20] add flask-api example, fix #24 --- examples/run_flask_api.py | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 examples/run_flask_api.py diff --git a/examples/run_flask_api.py b/examples/run_flask_api.py new file mode 100644 index 00000000..1d894321 --- /dev/null +++ b/examples/run_flask_api.py @@ -0,0 +1,37 @@ +# encoding: utf-8 +""" +@author: BrikerMan +@contact: eliyar917@gmail.com +@blog: https://eliyar.biz + +@version: 1.0 +@license: Apache Licence +@file: run_flask_api +@time: 2019-02-24 + +""" +import random +from flask import Flask, jsonify +from kashgari.tasks.classification import KMaxCNNModel +from kashgari.corpus import SMP2017ECDTClassificationCorpus + +train_x, train_y = SMP2017ECDTClassificationCorpus.get_classification_data() + +model = KMaxCNNModel() +model.fit(train_x, train_y) + + +app = Flask(__name__) + + +@app.route('/predict', methods=['GET']) +def get_tasks(): + x = random.choice(train_x) + y = model.predict(x, output_dict=True) + return jsonify({'x': x, 'y': y}) + + +if __name__ == '__main__': + # must run predict once before `app.run` to prevent predict error + model.predict(train_x[10]) + app.run(debug=True, port=8080) From be74d113b79c95693bea5f86f0c1d3fd59fdfdf2 Mon Sep 17 00:00:00 2001 From: Alex Wang Date: Wed, 27 Feb 2019 10:59:56 +0800 Subject: [PATCH 11/20] some adjust to `KMaxPooling` layer to suit more models. --- kashgari/layers.py | 57 +++++++++++++++++++++---- kashgari/tasks/classification/models.py | 34 ++++++++++++--- 2 files changed, 75 insertions(+), 16 deletions(-) diff --git a/kashgari/layers.py b/kashgari/layers.py index 67c79bee..349e0678 100644 --- a/kashgari/layers.py +++ b/kashgari/layers.py @@ -94,25 +94,64 @@ class KMaxPooling(Layer): ''' K-max pooling layer that extracts the k-highest activation from a sequence (2nd dimension). TensorFlow backend. + + # Arguments + k: An int scale, + indicate k max steps of features to pool. + sorted: A bool, + if output is sorted (default) or not. + data_format: A string, + one of `channels_last` (default) or `channels_first`. + The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape + `(batch, steps, features)` while `channels_first` + corresponds to inputs with shape + `(batch, features, steps)`. + # Input shape + - If `data_format='channels_last'`: + 3D tensor with shape: + `(batch_size, steps, features)` + - If `data_format='channels_first'`: + 3D tensor with shape: + `(batch_size, features, steps)` + # Output shape + 3D tensor with shape: + `(batch_size, top-k-steps, features)` ''' - def __init__(self, k=1, **kwargs): - super().__init__(**kwargs) + def __init__(self, k=1, sorted=True, data_format='channels_last', **kwargs): + super(KMaxPooling, self).__init__(**kwargs) self.input_spec = InputSpec(ndim=3) self.k = k + self.sorted = sorted + self.data_format = K.normalize_data_format(data_format) + + def build(self, input_shape): + assert len(input_shape) == 3 + super(KMaxPooling, self).build(input_shape) def compute_output_shape(self, input_shape): - return (input_shape[0], (input_shape[2] * self.k)) + if self.data_format == 'channels_first': + return (input_shape[0], self.k, input_shape[1]) + else: + return (input_shape[0], self.k, input_shape[2]) def call(self, inputs): - # swap last two dimensions since top_k will be applied along the last dimension - shifted_input = tf.transpose(inputs, [0, 2,1]) - - # extract top_k, returns two tensors [values, indices] - top_k = tf.nn.top_k(shifted_input, k=self.k, sorted=True, name=None)[0] + if self.data_format == 'channels_last': + # swap last two dimensions since top_k will be applied along the last dimension + shifted_input = tf.transpose(inputs, [0, 2, 1]) + # extract top_k, returns two tensors [values, indices] + top_k = tf.nn.top_k(shifted_input, k=self.k, sorted=self.sorted)[0] + else: + top_k = tf.nn.top_k(inputs, k=self.k, sorted=self.sorted)[0] # return flattened output - return Flatten()(top_k) + return tf.transpose(top_k, [0, 2, 1]) + + def get_config(self): + config = {'data_format': self.data_format} + base_config = super(KMaxPooling, self).get_config() + return dict(list(base_config.items()) + list(config.items())) if __name__ == '__main__': diff --git a/kashgari/tasks/classification/models.py b/kashgari/tasks/classification/models.py index fc6214d3..7d1f0562 100644 --- a/kashgari/tasks/classification/models.py +++ b/kashgari/tasks/classification/models.py @@ -17,7 +17,7 @@ from keras import optimizers from keras.models import Model -from keras.layers import Dense, Lambda +from keras.layers import Dense, Lambda, Flatten, Reshape from keras.layers import Dropout, SpatialDropout1D from keras.layers import GlobalAveragePooling1D, GlobalMaxPooling1D, MaxPooling1D from keras.layers import Bidirectional, Conv1D @@ -252,6 +252,9 @@ def build_model(self): class KMaxCNNModel(ClassificationModel): __architect_name__ = 'KMaxCNNModel' __base_hyper_parameters__ = { + 'spatial_dropout': { + 'rate': 0.2 + }, 'conv_0': { 'filters': 180, 'kernel_size': 1, @@ -314,18 +317,35 @@ class KMaxCNNModel(ClassificationModel): def build_model(self): base_model = self.embedding.model - conv_0 = Conv1D(**self.hyper_parameters['conv_0'])(base_model.output) - conv_1 = Conv1D(**self.hyper_parameters['conv_1'])(base_model.output) - conv_2 = Conv1D(**self.hyper_parameters['conv_2'])(base_model.output) - conv_3 = Conv1D(**self.hyper_parameters['conv_3'])(base_model.output) + embedded_seq = SpatialDropout1D(**self.hyper_parameters['spatial_dropout'])(base_model.output) + conv_0 = Conv1D(**self.hyper_parameters['conv_0'])(embedded_seq) + conv_1 = Conv1D(**self.hyper_parameters['conv_1'])(embedded_seq) + conv_2 = Conv1D(**self.hyper_parameters['conv_2'])(embedded_seq) + conv_3 = Conv1D(**self.hyper_parameters['conv_3'])(embedded_seq) maxpool_0 = KMaxPooling(**self.hyper_parameters['maxpool_0'])(conv_0) + #maxpool_0f = Reshape((-1,))(maxpool_0) + maxpool_0f = Flatten()(maxpool_0) maxpool_1 = KMaxPooling(**self.hyper_parameters['maxpool_1'])(conv_1) + #maxpool_1f = Reshape((-1,))(maxpool_1) + maxpool_0f = Flatten()(maxpool_0) maxpool_2 = KMaxPooling(**self.hyper_parameters['maxpool_2'])(conv_2) + #maxpool_2f = Reshape((-1,))(maxpool_2) + maxpool_0f = Flatten()(maxpool_0) maxpool_3 = KMaxPooling(**self.hyper_parameters['maxpool_3'])(conv_3) - - merged_tensor = concatenate([maxpool_0, maxpool_1, maxpool_2, maxpool_3], + #maxpool_3f = Reshape((-1,))(maxpool_3) + maxpool_0f = Flatten()(maxpool_0) + #maxpool_0 = GlobalMaxPooling1D()(conv_0) + #maxpool_1 = GlobalMaxPooling1D()(conv_1) + #maxpool_2 = GlobalMaxPooling1D()(conv_2) + #maxpool_3 = GlobalMaxPooling1D()(conv_3) + + #merged_tensor = concatenate([maxpool_0, maxpool_1, maxpool_2, maxpool_3], + # **self.hyper_parameters['merged_tensor']) + merged_tensor = concatenate([maxpool_0f, maxpool_1f, maxpool_2f, maxpool_3f], **self.hyper_parameters['merged_tensor']) + #flatten = Reshape((-1,))(merged_tensor) + #output = Dropout(**self.hyper_parameters['dropout'])(flatten) output = Dropout(**self.hyper_parameters['dropout'])(merged_tensor) output = Dense(**self.hyper_parameters['dense'])(output) output = Dense(len(self.label2idx), From 3443c02e7bf35c6f83e9882c51d48d43f8f7bc86 Mon Sep 17 00:00:00 2001 From: Alex Wang Date: Wed, 27 Feb 2019 11:25:41 +0800 Subject: [PATCH 12/20] fix a model bug caused by some typos --- kashgari/tasks/classification/models.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kashgari/tasks/classification/models.py b/kashgari/tasks/classification/models.py index 7d1f0562..4a2fba4a 100644 --- a/kashgari/tasks/classification/models.py +++ b/kashgari/tasks/classification/models.py @@ -328,13 +328,13 @@ def build_model(self): maxpool_0f = Flatten()(maxpool_0) maxpool_1 = KMaxPooling(**self.hyper_parameters['maxpool_1'])(conv_1) #maxpool_1f = Reshape((-1,))(maxpool_1) - maxpool_0f = Flatten()(maxpool_0) + maxpool_1f = Flatten()(maxpool_1) maxpool_2 = KMaxPooling(**self.hyper_parameters['maxpool_2'])(conv_2) #maxpool_2f = Reshape((-1,))(maxpool_2) - maxpool_0f = Flatten()(maxpool_0) + maxpool_2f = Flatten()(maxpool_2) maxpool_3 = KMaxPooling(**self.hyper_parameters['maxpool_3'])(conv_3) #maxpool_3f = Reshape((-1,))(maxpool_3) - maxpool_0f = Flatten()(maxpool_0) + maxpool_3f = Flatten()(maxpool_3) #maxpool_0 = GlobalMaxPooling1D()(conv_0) #maxpool_1 = GlobalMaxPooling1D()(conv_1) #maxpool_2 = GlobalMaxPooling1D()(conv_2) From a517ab8f31407e612c4be4f55c64647fb7687446 Mon Sep 17 00:00:00 2001 From: Alex Wang Date: Wed, 27 Feb 2019 12:09:25 +0800 Subject: [PATCH 13/20] support change optimizer type when initialize a model and add missed drop layer to some models --- kashgari/tasks/classification/models.py | 122 +++++++++++++++++------- 1 file changed, 87 insertions(+), 35 deletions(-) diff --git a/kashgari/tasks/classification/models.py b/kashgari/tasks/classification/models.py index 4a2fba4a..a898d9e9 100644 --- a/kashgari/tasks/classification/models.py +++ b/kashgari/tasks/classification/models.py @@ -43,6 +43,11 @@ class CNNModel(ClassificationModel): }, 'activation_layer': { 'activation': 'softmax' + }, + 'optimizer_param': { + 'loss': 'categorical_crossentropy', + 'optimizer': 'adam', + 'metrics': ['accuracy'] } } @@ -54,9 +59,7 @@ def build_model(self): dense_2_layer = Dense(len(self.label2idx), **self.hyper_parameters['activation_layer'])(dense_1_layer) model = Model(base_model.inputs, dense_2_layer) - model.compile(loss='categorical_crossentropy', - optimizer='adam', - metrics=['accuracy']) + model.compile(**self.hyper_parameters['optimizer_param']) self.model = model self.model.summary() @@ -70,6 +73,11 @@ class BLSTMModel(ClassificationModel): }, 'activation_layer': { 'activation': 'softmax' + }, + 'optimizer_param': { + 'loss': 'categorical_crossentropy', + 'optimizer': 'adam', + 'metrics': ['accuracy'] } } @@ -80,9 +88,7 @@ def build_model(self): output_layers = [dense_layer] model = Model(base_model.inputs, output_layers) - model.compile(loss='categorical_crossentropy', - optimizer='adam', - metrics=['accuracy']) + model.compile(**self.hyper_parameters['optimizer_param']) self.model = model self.model.summary() @@ -104,6 +110,11 @@ class CNNLSTMModel(ClassificationModel): }, 'activation_layer': { 'activation': 'softmax' + }, + 'optimizer_param': { + 'loss': 'categorical_crossentropy', + 'optimizer': 'adam', + 'metrics': ['accuracy'] } } @@ -117,9 +128,7 @@ def build_model(self): output_layers = [dense_layer] model = Model(base_model.inputs, output_layers) - model.compile(loss='categorical_crossentropy', - optimizer='adam', - metrics=['accuracy']) + model.compile(**self.hyper_parameters['optimizer_param']) self.model = model self.model.summary() @@ -127,6 +136,9 @@ def build_model(self): class AVCNNModel(ClassificationModel): __architect_name__ = 'AVCNNModel' __base_hyper_parameters__ = { + 'spatial_dropout': { + 'rate': 0.25 + }, 'conv_0': { 'filters': 300, 'kernel_size':1, @@ -201,15 +213,21 @@ class AVCNNModel(ClassificationModel): 'adam_optimizer': { 'lr': 1e-3, 'decay': 1e-7 + }, + 'optimizer_param': { + 'loss': 'categorical_crossentropy', + #'optimizer': 'adam', + 'metrics': ['accuracy'] } } def build_model(self): base_model = self.embedding.model - conv_0 = Conv1D(**self.hyper_parameters['conv_0'])(base_model.output) - conv_1 = Conv1D(**self.hyper_parameters['conv_1'])(base_model.output) - conv_2 = Conv1D(**self.hyper_parameters['conv_2'])(base_model.output) - conv_3 = Conv1D(**self.hyper_parameters['conv_3'])(base_model.output) + embedded_seq = SpatialDropout1D(**self.hyper_parameters['spatial_dropout'])(base_model.output) + conv_0 = Conv1D(**self.hyper_parameters['conv_0'])(embedded_seq) + conv_1 = Conv1D(**self.hyper_parameters['conv_1'])(embedded_seq) + conv_2 = Conv1D(**self.hyper_parameters['conv_2'])(embedded_seq) + conv_3 = Conv1D(**self.hyper_parameters['conv_3'])(embedded_seq) maxpool_0 = GlobalMaxPooling1D()(conv_0) attn_0 = AttentionWeightedAverage()(conv_0) @@ -242,9 +260,8 @@ def build_model(self): model = Model(base_model.inputs, output) adam_optimizer = optimizers.Adam(**self.hyper_parameters['adam_optimizer']) - model.compile(loss='categorical_crossentropy', - optimizer=adam_optimizer, - metrics=['accuracy']) + model.compile(optimizer=adam_optimizer, + **self.hyper_parameters['optimizer_param']) self.model = model self.model.summary() @@ -312,6 +329,11 @@ class KMaxCNNModel(ClassificationModel): 'adam_optimizer': { 'lr': 1e-3, 'decay': 1e-7 + }, + 'optimizer_param': { + 'loss': 'categorical_crossentropy', + #'optimizer': 'adam', + 'metrics': ['accuracy'] } } @@ -353,9 +375,8 @@ def build_model(self): model = Model(base_model.inputs, output) adam_optimizer = optimizers.Adam(**self.hyper_parameters['adam_optimizer']) - model.compile(loss='categorical_crossentropy', - optimizer=adam_optimizer, - metrics=['accuracy']) + model.compile(optimizer=adam_optimizer, + **self.hyper_parameters['optimizer_param']) self.model = model self.model.summary() @@ -363,6 +384,9 @@ def build_model(self): class RCNNModel(ClassificationModel): __architect_name__ = 'RCNNModel' __base_hyper_parameters__ = { + 'spatial_dropout': { + 'rate': 0.2 + }, 'rnn_0': { 'units': 64, 'return_sequences': True @@ -395,12 +419,18 @@ class RCNNModel(ClassificationModel): 'lr': 1e-3, 'clipvalue': 5, 'decay': 1e-5 + }, + 'optimizer_param': { + 'loss': 'categorical_crossentropy', + #'optimizer': 'adam', + 'metrics': ['accuracy'] } } def build_model(self): base_model = self.embedding.model - rnn_0 = Bidirectional(GRULayer(**self.hyper_parameters['rnn_0']))(base_model.output) + embedded_seq = SpatialDropout1D(**self.hyper_parameters['spatial_dropout'])(base_model.output) + rnn_0 = Bidirectional(GRULayer(**self.hyper_parameters['rnn_0']))(embedded_seq) conv_0 = Conv1D(**self.hyper_parameters['conv_0'])(rnn_0) maxpool = GlobalMaxPooling1D()(conv_0) attn = AttentionWeightedAverage()(conv_0) @@ -415,9 +445,8 @@ def build_model(self): model = Model(base_model.inputs, output) adam_optimizer = optimizers.Adam(**self.hyper_parameters['adam_optimizer']) - model.compile(loss='categorical_crossentropy', - optimizer=adam_optimizer, - metrics=['accuracy']) + model.compile(optimizer=adam_optimizer, + **self.hyper_parameters['optimizer_param']) self.model = model self.model.summary() @@ -425,6 +454,9 @@ def build_model(self): class AVRNNModel(ClassificationModel): __architect_name__ = 'AVRNNModel' __base_hyper_parameters__ = { + 'spatial_dropout': { + 'rate': 0.25 + }, 'rnn_0': { 'units': 60, 'return_sequences': True @@ -457,12 +489,18 @@ class AVRNNModel(ClassificationModel): 'lr': 1e-3, 'clipvalue': 5, 'decay': 1e-6 + }, + 'optimizer_param': { + 'loss': 'categorical_crossentropy', + #'optimizer': 'adam', + 'metrics': ['accuracy'] } } def build_model(self): base_model = self.embedding.model - rnn_0 = Bidirectional(GRULayer(**self.hyper_parameters['rnn_0']))(base_model.output) + embedded_seq = SpatialDropout1D(**self.hyper_parameters['spatial_dropout'])(base_model.output) + rnn_0 = Bidirectional(GRULayer(**self.hyper_parameters['rnn_0']))(embedded_seq) rnn_1 = Bidirectional(GRULayer(**self.hyper_parameters['rnn_1']))(rnn_0) concat_rnn = concatenate([rnn_0, rnn_1], **self.hyper_parameters['concat_rnn']) @@ -481,9 +519,8 @@ def build_model(self): model = Model(base_model.inputs, output) adam_optimizer = optimizers.Adam(**self.hyper_parameters['adam_optimizer']) - model.compile(loss='categorical_crossentropy', - optimizer=adam_optimizer, - metrics=['accuracy']) + model.compile(optimizer=adam_optimizer, + **self.hyper_parameters['optimizer_param']) self.model = model self.model.summary() @@ -491,6 +528,9 @@ def build_model(self): class DropoutBGRUModel(ClassificationModel): __architect_name__ = 'DropoutBGRUModel' __base_hyper_parameters__ = { + 'spatial_dropout': { + 'rate': 0.15 + }, 'rnn_0': { 'units': 64, 'return_sequences': True @@ -517,12 +557,18 @@ class DropoutBGRUModel(ClassificationModel): }, 'activation_layer': { 'activation': 'softmax' + }, + 'optimizer_param': { + 'loss': 'categorical_crossentropy', + 'optimizer': 'adam', + 'metrics': ['accuracy'] } } def build_model(self): base_model = self.embedding.model - rnn_0 = Bidirectional(GRULayer(**self.hyper_parameters['rnn_0']))(base_model.output) + embedded_seq = SpatialDropout1D(**self.hyper_parameters['spatial_dropout'])(base_model.output) + rnn_0 = Bidirectional(GRULayer(**self.hyper_parameters['rnn_0']))(embedded_seq) dropout_rnn = Dropout(**self.hyper_parameters['dropout_rnn'])(rnn_0) rnn_1 = Bidirectional(GRULayer(**self.hyper_parameters['rnn_1']))(dropout_rnn) last = Lambda(lambda t: t[:, -1], name='last')(rnn_1) @@ -539,9 +585,7 @@ def build_model(self): model = Model(base_model.inputs, output) # adam_optimizer = optimizers.Adam(**self.hyper_parameters['adam_optimizer']) - model.compile(loss='categorical_crossentropy', - optimizer='adam', - metrics=['accuracy']) + model.compile(**self.hyper_parameters['optimizer_param']) self.model = model self.model.summary() @@ -549,6 +593,9 @@ def build_model(self): class DropoutAVRNNModel(ClassificationModel): __architect_name__ = 'DropoutAVRNNModel' __base_hyper_parameters__ = { + 'spatial_dropout': { + 'rate': 0.25 + }, 'rnn_0': { 'units': 56, 'return_sequences': True @@ -584,12 +631,18 @@ class DropoutAVRNNModel(ClassificationModel): 'lr': 1e-3, 'clipvalue': 5, 'decay': 1e-7 + }, + 'optimizer_param': { + 'loss': 'categorical_crossentropy', + #'optimizer': 'adam', + 'metrics': ['accuracy'] } } def build_model(self): base_model = self.embedding.model - rnn_0 = Bidirectional(GRULayer(**self.hyper_parameters['rnn_0']))(base_model.output) + embedded_seq = SpatialDropout1D(**self.hyper_parameters['spatial_dropout'])(base_model.output) + rnn_0 = Bidirectional(GRULayer(**self.hyper_parameters['rnn_0']))(embedded_seq) rnn_dropout = SpatialDropout1D(**self.hyper_parameters['rnn_dropout'])(rnn_0) rnn_1 = Bidirectional(GRULayer(**self.hyper_parameters['rnn_1']))(rnn_dropout) @@ -608,9 +661,8 @@ def build_model(self): model = Model(base_model.inputs, output) adam_optimizer = optimizers.Adam(**self.hyper_parameters['adam_optimizer']) - model.compile(loss='categorical_crossentropy', - optimizer=adam_optimizer, - metrics=['accuracy']) + model.compile(optimizer=adam_optimizer, + **self.hyper_parameters['optimizer_param']) self.model = model self.model.summary() From 5415f9cb5fe8e916c1a5a00db702b05bd52bfc42 Mon Sep 17 00:00:00 2001 From: Alex Wang Date: Wed, 27 Feb 2019 19:13:26 +0800 Subject: [PATCH 14/20] fix the shape mismatch error when load KMaxPooling model from storage --- kashgari/layers.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/kashgari/layers.py b/kashgari/layers.py index 349e0678..fcaf5426 100644 --- a/kashgari/layers.py +++ b/kashgari/layers.py @@ -126,9 +126,9 @@ def __init__(self, k=1, sorted=True, data_format='channels_last', **kwargs): self.sorted = sorted self.data_format = K.normalize_data_format(data_format) - def build(self, input_shape): - assert len(input_shape) == 3 - super(KMaxPooling, self).build(input_shape) + # def build(self, input_shape): + # assert len(input_shape) == 3 + # super(KMaxPooling, self).build(input_shape) def compute_output_shape(self, input_shape): if self.data_format == 'channels_first': @@ -149,7 +149,9 @@ def call(self, inputs): return tf.transpose(top_k, [0, 2, 1]) def get_config(self): - config = {'data_format': self.data_format} + config = {'k': self.k, + 'sorted': self.sorted, + 'data_format': self.data_format} base_config = super(KMaxPooling, self).get_config() return dict(list(base_config.items()) + list(config.items())) From 623d73f79cde9a057dd7f31e39fea31012f42f3a Mon Sep 17 00:00:00 2001 From: Alex Wang Date: Wed, 27 Feb 2019 19:15:45 +0800 Subject: [PATCH 15/20] improved the robust of save load process and optimized the key-value pairs in hyper_parameters dict to be more flexible to config at model initialization --- kashgari/tasks/base/base_model.py | 84 +++++++++++- kashgari/tasks/classification/models.py | 171 +++++++++++++++++------- 2 files changed, 198 insertions(+), 57 deletions(-) diff --git a/kashgari/tasks/base/base_model.py b/kashgari/tasks/base/base_model.py index af949a0f..970d1361 100644 --- a/kashgari/tasks/base/base_model.py +++ b/kashgari/tasks/base/base_model.py @@ -12,13 +12,18 @@ """ import os import json +import pickle import pathlib +import traceback import logging +logger = logging.getLogger(__name__) import numpy as np from typing import Dict import keras from keras.models import Model +from keras import backend as K + from kashgari.utils import helper from kashgari.embeddings import CustomEmbedding, BaseEmbedding from kashgari.utils.crf import CRF, crf_loss, crf_accuracy @@ -73,8 +78,24 @@ def save(self, model_path: str): with open(os.path.join(model_path, 'model.json'), 'w', encoding='utf-8') as f: f.write(json.dumps(model_info, indent=2, ensure_ascii=False)) + with open(os.path.join(model_path, 'struct.json'), 'w', encoding='utf-8') as f: + f.write(self.model.to_json()) + + #self.model.save_weights(os.path.join(model_path, 'weights.h5')) + optimizer_weight_values = None + try: + symbolic_weights = getattr(self.model.optimizer, 'weights') + optimizer_weight_values = K.batch_get_value(symbolic_weights) + except Exception as e: + logger.warn('error occur: {}'.format(e)) + traceback.print_tb(e.__traceback__) + logger.warn('No optimizer weights found.') + if optimizer_weight_values is not None: + with open(os.path.join(model_path, 'optimizer.pkl'), 'wb') as f: + pickle.dump(optimizer_weight_values, f) + self.model.save(os.path.join(model_path, 'model.model')) - logging.info('model saved to {}'.format(os.path.abspath(model_path))) + logger.info('model saved to {}'.format(os.path.abspath(model_path))) @staticmethod def create_custom_objects(model_info): @@ -113,15 +134,66 @@ def load_model(cls, model_path: str): custom_objects = cls.create_custom_objects(model_info) if custom_objects: - logging.debug('prepared custom objects: {}'.format(custom_objects)) - - agent.model = keras.models.load_model(os.path.join(model_path, 'model.model'), - custom_objects=custom_objects) + logger.debug('prepared custom objects: {}'.format(custom_objects)) + + try: + agent.model = keras.models.load_model(os.path.join(model_path, 'model.model'), + custom_objects=custom_objects) + except Exception as e: + logger.warn('Error `{}` occured trying directly model loading. Try to rebuild.'.format(e)) + logger.debug('Load model structure from json.') + with open(os.path.join(model_path, 'struct.json'), 'r', encoding='utf-8') as f: + model_struct = f.read() + agent.model = keras.models.model_from_json(model_struct, + custom_objects=custom_objects) + logger.debug('Build optimizer with model info.') + optimizer_conf = model_info['hyper_parameters'].get('optimizer', None) + optimizer = 'adam' #default + if optimizer_conf is not None and isinstance(optimizer_conf, dict): + module_str = optimizer_conf.get('module', 'None') + name_str = optimizer_conf.get('name', 'None') + params = optimizer_conf.get('params', None) + invalid_set = [None, 'None', '', {}] + if not any([module_str.strip() in invalid_set, + name_str.strip() in invalid_set, + params in invalid_set]): + try: + optimizer = getattr(eval(module_str), name_str)(**params) + except: + logger.warn('Invalid optimizer configuration in model info. Use `adam` as default.') + else: + logger.warn('No optimizer configuration found in model info. Use `adam` as default.') + + default_compile_params = {'loss': 'categorical_crossentropy', 'metrics':['accuracy']} + compile_params = model_info['hyper_parameters'].get('compile_params', default_compile_params) + logger.debug('Compile model from scratch.') + try: + agent.model.compile(optimizer=optimizer, **compile_params) + except: + logger.warn('Failed to compile model. Compile params seems incorrect.') + logger.warn('Use default options `{}` to compile.'.format(default_compile_params)) + agent.model.compile(optimizer=optimizer, **default_compile_params) + logger.debug('Load model weights.') + agent.model.summary() + agent.model.load_weights(os.path.join(model_path, 'model.model')) + agent.model._make_train_function() + optimizer_weight_values = None + logger.debug('Load optimizer weights.') + try: + with open(os.path.join(model_path, 'optimizer.pkl'), 'rb') as f: + optimizer_weight_values = pickle.load(f) + except Exception as e: + logger.warn('Try to load optimizer weights but no optimizer weights file found.') + if optimizer_weight_values is not None: + agent.model.optimizer.set_weights(optimizer_weight_values) + else: + logger.warn('Rebuild model but optimizer weights missed. Retrain needed.') + logger.info('Model rebuild finished.') agent.embedding.update(model_info.get('embedding', {})) agent.model.summary() agent.label2idx = label2idx agent.embedding.token2idx = token2idx - logging.info('loaded model from {}'.format(os.path.abspath(model_path))) + logger.info('loaded model from {}'.format(os.path.abspath(model_path))) return agent diff --git a/kashgari/tasks/classification/models.py b/kashgari/tasks/classification/models.py index a898d9e9..3b740d59 100644 --- a/kashgari/tasks/classification/models.py +++ b/kashgari/tasks/classification/models.py @@ -14,7 +14,8 @@ import logging -from keras import optimizers +import keras +#from keras import optimizers from keras.models import Model from keras.layers import Dense, Lambda, Flatten, Reshape @@ -44,9 +45,17 @@ class CNNModel(ClassificationModel): 'activation_layer': { 'activation': 'softmax' }, - 'optimizer_param': { + 'optimizer': { + 'module': 'keras.optimizers', + 'name': 'Adam', + 'params': { + 'lr': 1e-3, + 'decay': 0.0 + } + }, + 'compile_params': { 'loss': 'categorical_crossentropy', - 'optimizer': 'adam', + #'optimizer': 'adam', 'metrics': ['accuracy'] } } @@ -59,7 +68,10 @@ def build_model(self): dense_2_layer = Dense(len(self.label2idx), **self.hyper_parameters['activation_layer'])(dense_1_layer) model = Model(base_model.inputs, dense_2_layer) - model.compile(**self.hyper_parameters['optimizer_param']) + optimizer = getattr(eval(self.hyper_parameters['optimizer']['module']), + self.hyper_parameters['optimizer']['name'])( + **self.hyper_parameters['optimizer']['params']) + model.compile(optimizer=optimizer, **self.hyper_parameters['compile_params']) self.model = model self.model.summary() @@ -74,9 +86,17 @@ class BLSTMModel(ClassificationModel): 'activation_layer': { 'activation': 'softmax' }, - 'optimizer_param': { + 'optimizer': { + 'module': 'keras.optimizers', + 'name': 'Adam', + 'params': { + 'lr': 1e-3, + 'decay': 0.0 + } + }, + 'compile_params': { 'loss': 'categorical_crossentropy', - 'optimizer': 'adam', + #'optimizer': 'adam', 'metrics': ['accuracy'] } } @@ -88,7 +108,10 @@ def build_model(self): output_layers = [dense_layer] model = Model(base_model.inputs, output_layers) - model.compile(**self.hyper_parameters['optimizer_param']) + optimizer = getattr(eval(self.hyper_parameters['optimizer']['module']), + self.hyper_parameters['optimizer']['name'])( + **self.hyper_parameters['optimizer']['params']) + model.compile(optimizer=optimizer, **self.hyper_parameters['compile_params']) self.model = model self.model.summary() @@ -111,7 +134,15 @@ class CNNLSTMModel(ClassificationModel): 'activation_layer': { 'activation': 'softmax' }, - 'optimizer_param': { + 'optimizer': { + 'module': 'keras.optimizers', + 'name': 'Adam', + 'params': { + 'lr': 1e-3, + 'decay': 0.0 + } + }, + 'compile_params': { 'loss': 'categorical_crossentropy', 'optimizer': 'adam', 'metrics': ['accuracy'] @@ -128,7 +159,10 @@ def build_model(self): output_layers = [dense_layer] model = Model(base_model.inputs, output_layers) - model.compile(**self.hyper_parameters['optimizer_param']) + optimizer = getattr(eval(self.hyper_parameters['optimizer']['module']), + self.hyper_parameters['optimizer']['name'])( + **self.hyper_parameters['optimizer']['params']) + model.compile(optimizer=optimizer, **self.hyper_parameters['compile_params']) self.model = model self.model.summary() @@ -210,11 +244,15 @@ class AVCNNModel(ClassificationModel): 'activation_layer': { 'activation': 'softmax' }, - 'adam_optimizer': { - 'lr': 1e-3, - 'decay': 1e-7 + 'optimizer': { + 'module': 'keras.optimizers', + 'name': 'Adam', + 'params': { + 'lr': 1e-3, + 'decay': 1e-7 + } }, - 'optimizer_param': { + 'compile_params': { 'loss': 'categorical_crossentropy', #'optimizer': 'adam', 'metrics': ['accuracy'] @@ -259,9 +297,10 @@ def build_model(self): **self.hyper_parameters['activation_layer'])(output) model = Model(base_model.inputs, output) - adam_optimizer = optimizers.Adam(**self.hyper_parameters['adam_optimizer']) - model.compile(optimizer=adam_optimizer, - **self.hyper_parameters['optimizer_param']) + optimizer = getattr(eval(self.hyper_parameters['optimizer']['module']), + self.hyper_parameters['optimizer']['name'])( + **self.hyper_parameters['optimizer']['params']) + model.compile(optimizer=optimizer, **self.hyper_parameters['compile_params']) self.model = model self.model.summary() @@ -326,11 +365,15 @@ class KMaxCNNModel(ClassificationModel): 'activation_layer': { 'activation': 'softmax' }, - 'adam_optimizer': { - 'lr': 1e-3, - 'decay': 1e-7 + 'optimizer': { + 'module': 'keras.optimizers', + 'name': 'Adam', + 'params': { + 'lr': 1e-3, + 'decay': 1e-7 + } }, - 'optimizer_param': { + 'compile_params': { 'loss': 'categorical_crossentropy', #'optimizer': 'adam', 'metrics': ['accuracy'] @@ -374,9 +417,10 @@ def build_model(self): **self.hyper_parameters['activation_layer'])(output) model = Model(base_model.inputs, output) - adam_optimizer = optimizers.Adam(**self.hyper_parameters['adam_optimizer']) - model.compile(optimizer=adam_optimizer, - **self.hyper_parameters['optimizer_param']) + optimizer = getattr(eval(self.hyper_parameters['optimizer']['module']), + self.hyper_parameters['optimizer']['name'])( + **self.hyper_parameters['optimizer']['params']) + model.compile(optimizer=optimizer, **self.hyper_parameters['compile_params']) self.model = model self.model.summary() @@ -415,12 +459,16 @@ class RCNNModel(ClassificationModel): 'activation_layer': { 'activation': 'softmax' }, - 'adam_optimizer': { - 'lr': 1e-3, - 'clipvalue': 5, - 'decay': 1e-5 + 'optimizer': { + 'module': 'keras.optimizers', + 'name': 'Adam', + 'params': { + 'lr': 1e-3, + 'clipvalue': 5, + 'decay': 1e-5 + } }, - 'optimizer_param': { + 'compile_params': { 'loss': 'categorical_crossentropy', #'optimizer': 'adam', 'metrics': ['accuracy'] @@ -444,9 +492,10 @@ def build_model(self): **self.hyper_parameters['activation_layer'])(output) model = Model(base_model.inputs, output) - adam_optimizer = optimizers.Adam(**self.hyper_parameters['adam_optimizer']) - model.compile(optimizer=adam_optimizer, - **self.hyper_parameters['optimizer_param']) + optimizer = getattr(eval(self.hyper_parameters['optimizer']['module']), + self.hyper_parameters['optimizer']['name'])( + **self.hyper_parameters['optimizer']['params']) + model.compile(optimizer=optimizer, **self.hyper_parameters['compile_params']) self.model = model self.model.summary() @@ -485,12 +534,16 @@ class AVRNNModel(ClassificationModel): 'activation_layer': { 'activation': 'softmax' }, - 'adam_optimizer': { - 'lr': 1e-3, - 'clipvalue': 5, - 'decay': 1e-6 + 'optimizer': { + 'module': 'keras.optimizers', + 'name': 'Adam', + 'params': { + 'lr': 1e-3, + 'clipvalue': 5, + 'decay': 1e-6 + } }, - 'optimizer_param': { + 'compile_params': { 'loss': 'categorical_crossentropy', #'optimizer': 'adam', 'metrics': ['accuracy'] @@ -518,9 +571,10 @@ def build_model(self): **self.hyper_parameters['activation_layer'])(output) model = Model(base_model.inputs, output) - adam_optimizer = optimizers.Adam(**self.hyper_parameters['adam_optimizer']) - model.compile(optimizer=adam_optimizer, - **self.hyper_parameters['optimizer_param']) + optimizer = getattr(eval(self.hyper_parameters['optimizer']['module']), + self.hyper_parameters['optimizer']['name'])( + **self.hyper_parameters['optimizer']['params']) + model.compile(optimizer=optimizer, **self.hyper_parameters['compile_params']) self.model = model self.model.summary() @@ -558,9 +612,17 @@ class DropoutBGRUModel(ClassificationModel): 'activation_layer': { 'activation': 'softmax' }, - 'optimizer_param': { + 'optimizer': { + 'module': 'keras.optimizers', + 'name': 'Adam', + 'params': { + 'lr': 1e-3, + 'decay': 0.0 + } + }, + 'compile_params': { 'loss': 'categorical_crossentropy', - 'optimizer': 'adam', + #'optimizer': 'adam', 'metrics': ['accuracy'] } } @@ -584,8 +646,10 @@ def build_model(self): **self.hyper_parameters['activation_layer'])(output) model = Model(base_model.inputs, output) - # adam_optimizer = optimizers.Adam(**self.hyper_parameters['adam_optimizer']) - model.compile(**self.hyper_parameters['optimizer_param']) + optimizer = getattr(eval(self.hyper_parameters['optimizer']['module']), + self.hyper_parameters['optimizer']['name'])( + **self.hyper_parameters['optimizer']['params']) + model.compile(optimizer=optimizer, **self.hyper_parameters['compile_params']) self.model = model self.model.summary() @@ -627,12 +691,16 @@ class DropoutAVRNNModel(ClassificationModel): 'activation_layer': { 'activation': 'softmax' }, - 'adam_optimizer': { - 'lr': 1e-3, - 'clipvalue': 5, - 'decay': 1e-7 + 'optimizer': { + 'module': 'keras.optimizers', + 'name': 'Adam', + 'params': { + 'lr': 1e-3, + 'clipvalue': 5, + 'decay': 1e-7 + } }, - 'optimizer_param': { + 'compile_params': { 'loss': 'categorical_crossentropy', #'optimizer': 'adam', 'metrics': ['accuracy'] @@ -660,9 +728,10 @@ def build_model(self): **self.hyper_parameters['activation_layer'])(output) model = Model(base_model.inputs, output) - adam_optimizer = optimizers.Adam(**self.hyper_parameters['adam_optimizer']) - model.compile(optimizer=adam_optimizer, - **self.hyper_parameters['optimizer_param']) + optimizer = getattr(eval(self.hyper_parameters['optimizer']['module']), + self.hyper_parameters['optimizer']['name'])( + **self.hyper_parameters['optimizer']['params']) + model.compile(optimizer=optimizer, **self.hyper_parameters['compile_params']) self.model = model self.model.summary() From 6f4c39da8e2871b42437bb263a5aa5e08dbc7766 Mon Sep 17 00:00:00 2001 From: Alex Wang Date: Wed, 27 Feb 2019 21:50:57 +0800 Subject: [PATCH 16/20] remove duplicate keys in hyper_parameters config in model CNNLSTMModel --- kashgari/tasks/classification/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kashgari/tasks/classification/models.py b/kashgari/tasks/classification/models.py index 3b740d59..0bb1eabc 100644 --- a/kashgari/tasks/classification/models.py +++ b/kashgari/tasks/classification/models.py @@ -144,7 +144,7 @@ class CNNLSTMModel(ClassificationModel): }, 'compile_params': { 'loss': 'categorical_crossentropy', - 'optimizer': 'adam', + #'optimizer': 'adam', 'metrics': ['accuracy'] } } From c6e0af6b1b000c22fb9e6a06e0757e47667edaf1 Mon Sep 17 00:00:00 2001 From: BrikerMan Date: Thu, 28 Feb 2019 10:42:27 +0800 Subject: [PATCH 17/20] clean up --- .coveragerc | 1 + kashgari/tasks/classification/models.py | 80 ++++++++++++------------- tests/test_classifier_models.py | 2 +- 3 files changed, 42 insertions(+), 41 deletions(-) diff --git a/.coveragerc b/.coveragerc index 0d1279e0..ae01d6a2 100644 --- a/.coveragerc +++ b/.coveragerc @@ -13,6 +13,7 @@ exclude_lines = raise AssertionError raise NotImplementedError raise ValueError + except Exception as e: # Don't complain if non-runnable code isn't run: if __name__ == .__main__.: diff --git a/kashgari/tasks/classification/models.py b/kashgari/tasks/classification/models.py index 0bb1eabc..410864b0 100644 --- a/kashgari/tasks/classification/models.py +++ b/kashgari/tasks/classification/models.py @@ -55,7 +55,7 @@ class CNNModel(ClassificationModel): }, 'compile_params': { 'loss': 'categorical_crossentropy', - #'optimizer': 'adam', + # 'optimizer': 'adam', 'metrics': ['accuracy'] } } @@ -70,7 +70,7 @@ def build_model(self): model = Model(base_model.inputs, dense_2_layer) optimizer = getattr(eval(self.hyper_parameters['optimizer']['module']), self.hyper_parameters['optimizer']['name'])( - **self.hyper_parameters['optimizer']['params']) + **self.hyper_parameters['optimizer']['params']) model.compile(optimizer=optimizer, **self.hyper_parameters['compile_params']) self.model = model self.model.summary() @@ -96,7 +96,7 @@ class BLSTMModel(ClassificationModel): }, 'compile_params': { 'loss': 'categorical_crossentropy', - #'optimizer': 'adam', + # 'optimizer': 'adam', 'metrics': ['accuracy'] } } @@ -110,7 +110,7 @@ def build_model(self): model = Model(base_model.inputs, output_layers) optimizer = getattr(eval(self.hyper_parameters['optimizer']['module']), self.hyper_parameters['optimizer']['name'])( - **self.hyper_parameters['optimizer']['params']) + **self.hyper_parameters['optimizer']['params']) model.compile(optimizer=optimizer, **self.hyper_parameters['compile_params']) self.model = model self.model.summary() @@ -144,7 +144,7 @@ class CNNLSTMModel(ClassificationModel): }, 'compile_params': { 'loss': 'categorical_crossentropy', - #'optimizer': 'adam', + # 'optimizer': 'adam', 'metrics': ['accuracy'] } } @@ -161,7 +161,7 @@ def build_model(self): model = Model(base_model.inputs, output_layers) optimizer = getattr(eval(self.hyper_parameters['optimizer']['module']), self.hyper_parameters['optimizer']['name'])( - **self.hyper_parameters['optimizer']['params']) + **self.hyper_parameters['optimizer']['params']) model.compile(optimizer=optimizer, **self.hyper_parameters['compile_params']) self.model = model self.model.summary() @@ -175,28 +175,28 @@ class AVCNNModel(ClassificationModel): }, 'conv_0': { 'filters': 300, - 'kernel_size':1, + 'kernel_size': 1, 'kernel_initializer': 'normal', 'padding': 'valid', 'activation': 'relu' }, 'conv_1': { 'filters': 300, - 'kernel_size':2, + 'kernel_size': 2, 'kernel_initializer': 'normal', 'padding': 'valid', 'activation': 'relu' }, 'conv_2': { 'filters': 300, - 'kernel_size':3, + 'kernel_size': 3, 'kernel_initializer': 'normal', 'padding': 'valid', 'activation': 'relu' }, 'conv_3': { 'filters': 300, - 'kernel_size':4, + 'kernel_size': 4, 'kernel_initializer': 'normal', 'padding': 'valid', 'activation': 'relu' @@ -219,19 +219,19 @@ class AVCNNModel(ClassificationModel): 'avg_3': {}, # --- 'v0_col': { - #'mode': 'concat', + # 'mode': 'concat', 'axis': 1 }, 'v1_col': { - #'mode': 'concat', + # 'mode': 'concat', 'axis': 1 }, 'v2_col': { - #'mode': 'concat', + # 'mode': 'concat', 'axis': 1 }, 'merged_tensor': { - #'mode': 'concat', + # 'mode': 'concat', 'axis': 1 }, 'dropout': { @@ -254,7 +254,7 @@ class AVCNNModel(ClassificationModel): }, 'compile_params': { 'loss': 'categorical_crossentropy', - #'optimizer': 'adam', + # 'optimizer': 'adam', 'metrics': ['accuracy'] } } @@ -299,7 +299,7 @@ def build_model(self): model = Model(base_model.inputs, output) optimizer = getattr(eval(self.hyper_parameters['optimizer']['module']), self.hyper_parameters['optimizer']['name'])( - **self.hyper_parameters['optimizer']['params']) + **self.hyper_parameters['optimizer']['params']) model.compile(optimizer=optimizer, **self.hyper_parameters['compile_params']) self.model = model self.model.summary() @@ -352,7 +352,7 @@ class KMaxCNNModel(ClassificationModel): 'k': 3 }, 'merged_tensor': { - #'mode': 'concat', + # 'mode': 'concat', 'axis': 1 }, 'dropout': { @@ -375,7 +375,7 @@ class KMaxCNNModel(ClassificationModel): }, 'compile_params': { 'loss': 'categorical_crossentropy', - #'optimizer': 'adam', + # 'optimizer': 'adam', 'metrics': ['accuracy'] } } @@ -389,28 +389,28 @@ def build_model(self): conv_3 = Conv1D(**self.hyper_parameters['conv_3'])(embedded_seq) maxpool_0 = KMaxPooling(**self.hyper_parameters['maxpool_0'])(conv_0) - #maxpool_0f = Reshape((-1,))(maxpool_0) + # maxpool_0f = Reshape((-1,))(maxpool_0) maxpool_0f = Flatten()(maxpool_0) maxpool_1 = KMaxPooling(**self.hyper_parameters['maxpool_1'])(conv_1) - #maxpool_1f = Reshape((-1,))(maxpool_1) + # maxpool_1f = Reshape((-1,))(maxpool_1) maxpool_1f = Flatten()(maxpool_1) maxpool_2 = KMaxPooling(**self.hyper_parameters['maxpool_2'])(conv_2) - #maxpool_2f = Reshape((-1,))(maxpool_2) + # maxpool_2f = Reshape((-1,))(maxpool_2) maxpool_2f = Flatten()(maxpool_2) maxpool_3 = KMaxPooling(**self.hyper_parameters['maxpool_3'])(conv_3) - #maxpool_3f = Reshape((-1,))(maxpool_3) + # maxpool_3f = Reshape((-1,))(maxpool_3) maxpool_3f = Flatten()(maxpool_3) - #maxpool_0 = GlobalMaxPooling1D()(conv_0) - #maxpool_1 = GlobalMaxPooling1D()(conv_1) - #maxpool_2 = GlobalMaxPooling1D()(conv_2) - #maxpool_3 = GlobalMaxPooling1D()(conv_3) + # maxpool_0 = GlobalMaxPooling1D()(conv_0) + # maxpool_1 = GlobalMaxPooling1D()(conv_1) + # maxpool_2 = GlobalMaxPooling1D()(conv_2) + # maxpool_3 = GlobalMaxPooling1D()(conv_3) - #merged_tensor = concatenate([maxpool_0, maxpool_1, maxpool_2, maxpool_3], + # merged_tensor = concatenate([maxpool_0, maxpool_1, maxpool_2, maxpool_3], # **self.hyper_parameters['merged_tensor']) merged_tensor = concatenate([maxpool_0f, maxpool_1f, maxpool_2f, maxpool_3f], **self.hyper_parameters['merged_tensor']) - #flatten = Reshape((-1,))(merged_tensor) - #output = Dropout(**self.hyper_parameters['dropout'])(flatten) + # flatten = Reshape((-1,))(merged_tensor) + # output = Dropout(**self.hyper_parameters['dropout'])(flatten) output = Dropout(**self.hyper_parameters['dropout'])(merged_tensor) output = Dense(**self.hyper_parameters['dense'])(output) output = Dense(len(self.label2idx), @@ -419,7 +419,7 @@ def build_model(self): model = Model(base_model.inputs, output) optimizer = getattr(eval(self.hyper_parameters['optimizer']['module']), self.hyper_parameters['optimizer']['name'])( - **self.hyper_parameters['optimizer']['params']) + **self.hyper_parameters['optimizer']['params']) model.compile(optimizer=optimizer, **self.hyper_parameters['compile_params']) self.model = model self.model.summary() @@ -470,7 +470,7 @@ class RCNNModel(ClassificationModel): }, 'compile_params': { 'loss': 'categorical_crossentropy', - #'optimizer': 'adam', + # 'optimizer': 'adam', 'metrics': ['accuracy'] } } @@ -494,7 +494,7 @@ def build_model(self): model = Model(base_model.inputs, output) optimizer = getattr(eval(self.hyper_parameters['optimizer']['module']), self.hyper_parameters['optimizer']['name'])( - **self.hyper_parameters['optimizer']['params']) + **self.hyper_parameters['optimizer']['params']) model.compile(optimizer=optimizer, **self.hyper_parameters['compile_params']) self.model = model self.model.summary() @@ -545,7 +545,7 @@ class AVRNNModel(ClassificationModel): }, 'compile_params': { 'loss': 'categorical_crossentropy', - #'optimizer': 'adam', + # 'optimizer': 'adam', 'metrics': ['accuracy'] } } @@ -573,7 +573,7 @@ def build_model(self): model = Model(base_model.inputs, output) optimizer = getattr(eval(self.hyper_parameters['optimizer']['module']), self.hyper_parameters['optimizer']['name'])( - **self.hyper_parameters['optimizer']['params']) + **self.hyper_parameters['optimizer']['params']) model.compile(optimizer=optimizer, **self.hyper_parameters['compile_params']) self.model = model self.model.summary() @@ -622,7 +622,7 @@ class DropoutBGRUModel(ClassificationModel): }, 'compile_params': { 'loss': 'categorical_crossentropy', - #'optimizer': 'adam', + # 'optimizer': 'adam', 'metrics': ['accuracy'] } } @@ -648,7 +648,7 @@ def build_model(self): model = Model(base_model.inputs, output) optimizer = getattr(eval(self.hyper_parameters['optimizer']['module']), self.hyper_parameters['optimizer']['name'])( - **self.hyper_parameters['optimizer']['params']) + **self.hyper_parameters['optimizer']['params']) model.compile(optimizer=optimizer, **self.hyper_parameters['compile_params']) self.model = model self.model.summary() @@ -702,7 +702,7 @@ class DropoutAVRNNModel(ClassificationModel): }, 'compile_params': { 'loss': 'categorical_crossentropy', - #'optimizer': 'adam', + # 'optimizer': 'adam', 'metrics': ['accuracy'] } } @@ -730,7 +730,7 @@ def build_model(self): model = Model(base_model.inputs, output) optimizer = getattr(eval(self.hyper_parameters['optimizer']['module']), self.hyper_parameters['optimizer']['name'])( - **self.hyper_parameters['optimizer']['params']) + **self.hyper_parameters['optimizer']['params']) model.compile(optimizer=optimizer, **self.hyper_parameters['compile_params']) self.model = model self.model.summary() @@ -746,5 +746,5 @@ def build_model(self): sequence_length=15, limit=5000) bert = BERTEmbedding('bert-base-chinese', sequence_length=15) - model = CNNModel(bert) - model.fit(train_x, train_y, epochs=1) + t_model = CNNModel(bert) + t_model.fit(train_x, train_y, epochs=1) diff --git a/tests/test_classifier_models.py b/tests/test_classifier_models.py index 19fd5b0f..4653fb3f 100644 --- a/tests/test_classifier_models.py +++ b/tests/test_classifier_models.py @@ -229,7 +229,7 @@ class TestKMaxCNNModelBasic(TestBLSTMModelModelBasic): @classmethod def setUpClass(cls): cls.epochs = 2 - cls.model_class = KMaxCNNModel + cls .model_class = KMaxCNNModel cls.model = cls.model_class() From d6d9194be441d047950d113d90a61f98d7d509ca Mon Sep 17 00:00:00 2001 From: BrikerMan Date: Thu, 28 Feb 2019 10:42:37 +0800 Subject: [PATCH 18/20] update version to 0.1.9 --- version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.py b/version.py index b9eed60e..44da0019 100644 --- a/version.py +++ b/version.py @@ -11,4 +11,4 @@ """ -__version__ = '0.1.8' +__version__ = '0.1.9' From 3ffb9ced84c03d48fe88abaf3f87321b877848ab Mon Sep 17 00:00:00 2001 From: BrikerMan Date: Thu, 28 Feb 2019 10:42:43 +0800 Subject: [PATCH 19/20] update readme --- README.md | 85 +++++++++---------------------------------------------- 1 file changed, 13 insertions(+), 72 deletions(-) diff --git a/README.md b/README.md index a7ebd88d..41fb076f 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ [![Issues](https://img.shields.io/github/issues/BrikerMan/Kashgari.svg)](https://github.com/BrikerMan/Kashgari/issues) [![Contributions welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg)](CONTRIBUTING.md) ![](https://img.shields.io/pypi/l/kashgari.svg?style=flat) -[![](https://img.shields.io/pypi/dw/kashgari.svg)](https://pypi.org/project/kashgari/) +[![](https://img.shields.io/pypi/dm/kashgari.svg)](https://pypi.org/project/kashgari/) Simple and powerful NLP framework, build your state-of-art model in 5 minutes for named entity recognition (NER), part-of-speech tagging (PoS) and text classification tasks. @@ -26,83 +26,24 @@ Kashgare is: * Classic word2vec embedding * BERT embedding * Sequence(Text) Classification Models + * CNNModel + * BLSTMModel + * CNNLSTMModel + * AVCNNModel + * KMaxCNNModel + * RCNNModel + * AVRNNModel + * DropoutBGRUModel + * DropoutAVRNNModel * Sequence(Text) Labeling Models (NER, PoS) + * CNNLSTMModel + * BLSTMModel + * BLSTMCRFModel * Model Training * Model Evaluate * GPU Support * Customize Model - - - - - - - - - - - - - - - -
-

- task -

-
-

- Models -

-
-

- Text Classification -

-
-

- * CNNModel -

-

- * BLSTMModel -

-

- * CNNLSTMModel -

-

- * AVCNNModel -

-

- * KMaxCNNModel -

-

- * RCNNModel -

-

- * AVRNNModel -

-

- * DropoutBGRUModel -

-

- * DropoutAVRNNModel -

-
-

- Sequence Labeling -

-
-

- * CNNLSTMModel -

-

- * BLSTMModel -

-

- * BLSTMCRFModel -

-
- ## Performance | Task | Language | Dataset | Score | Detail | From 9fc6c2e5d115af6373968eb567c0c38f1b3b8868 Mon Sep 17 00:00:00 2001 From: BrikerMan Date: Thu, 28 Feb 2019 11:08:20 +0800 Subject: [PATCH 20/20] fix embedding tokenize error add sequence length check at sequence labeling model --- kashgari/embeddings/embeddings.py | 2 +- kashgari/tasks/seq_labeling/base_model.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/kashgari/embeddings/embeddings.py b/kashgari/embeddings/embeddings.py index 6dc943f2..59cdf393 100644 --- a/kashgari/embeddings/embeddings.py +++ b/kashgari/embeddings/embeddings.py @@ -133,7 +133,7 @@ def tokenize(self, def tokenize_sentence(text: TextSeqType) -> TokenSeqType: tokens = [self.token2idx.get(token, self.token2idx[k.UNK]) for token in text] if add_bos_eos: - tokens = [self.token2idx[k.BOS]] + tokens + [self.token2idx[k.BOS]] + tokens = [self.token2idx[k.BOS]] + tokens + [self.token2idx[k.EOS]] return tokens if is_list: diff --git a/kashgari/tasks/seq_labeling/base_model.py b/kashgari/tasks/seq_labeling/base_model.py index 42d434c2..565551f1 100644 --- a/kashgari/tasks/seq_labeling/base_model.py +++ b/kashgari/tasks/seq_labeling/base_model.py @@ -59,6 +59,8 @@ def build_token2id_label2id_dict(self, y_train: List[List[str]], x_validate: List[List[str]] = None, y_validate: List[List[str]] = None): + for index in range(len(x_train)): + assert len(x_train[index]) == len(y_train[index]) x_data = x_train y_data = y_train if x_validate: