Merge 622229a into 692e2c2

BrikerMan · Feb 24, 2019 · 4311a17 · 4311a17
2 parents 692e2c2 + 622229a
commit 4311a17
Show file tree

Hide file tree

Showing 14 changed files with 968 additions and 289 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -4,14 +4,22 @@ python:
   - "3.6"
 cache: pip
 # command to install dependencies
+env:
+  - TEST_FILE=tests/test_classifier_models.py
+  - TEST_FILE=tests/test_corpus.py
+  - TEST_FILE=tests/test_embeddings.py
+  - TEST_FILE=tests/test_seq_labeling_models.py
 before_install:
   - export BOTO_CONFIG=/dev/null
 install:
   - pip install python-coveralls
   - pip install -r requirements.txt
   - pip install .
+  - pip install coverage
+  - pip install nose
 # command to run tests
 script:
-  - sh test.sh
+#  - sh test.sh
+  - nosetests --cover-erase --with-coverage --cover-html --cover-html-dir=htmlcov --cover-package="kashgari" $TEST_FILE
 after_success:
   - coveralls
diff --git a/README.md b/README.md
@@ -25,19 +25,84 @@ Kashgare is:
 * Embedding support
     * Classic word2vec embedding
     * BERT embedding
-* Text Classification Models
-    * CNN Classification Model
-    * CNN LSTM Classification Model
-    * Bidirectional LSTM Classification Model
-* Text Labeling Models (NER, PoS)
-    * Bidirectional LSTM Labeling Model
-    * Bidirectional LSTM CRF Labeling Model
-    * CNN LSTM Labeling Model
+* Sequence(Text) Classification Models
+* Sequence(Text) Labeling Models (NER, PoS)
 * Model Training
 * Model Evaluate
 * GPU Support
 * Customize Model
 
+<table>
+		<tbody>
+			<tr>
+				<td valign="top">
+					<p>
+						<span>task</span>
+					</p>
+				</td>
+				<td valign="top">
+					<p>
+						<span>Models</span>
+					</p>
+				</td>
+			</tr>
+			<tr>
+				<td valign="top">
+					<p>
+						<span>Text Classification</span>
+					</p>
+				</td>
+				<td valign="top">
+					<p>
+						<span>* CNNModel</span>
+					</p>
+					<p>
+						<span>* BLSTMModel</span>
+					</p>
+					<p>
+						<span>* CNNLSTMModel</span>
+					</p>
+					<p>
+						<span>* AVCNNModel</span>
+					</p>
+					<p>
+						<span>* KMaxCNNModel</span>
+					</p>
+					<p>
+						<span>* RCNNModel</span>
+					</p>
+					<p>
+						<span>* AVRNNModel</span>
+					</p>
+					<p>
+						<span>* DropoutBGRUModel</span>
+					</p>
+					<p>
+						<span>* DropoutAVRNNModel</span>
+					</p>
+				</td>
+			</tr>
+			<tr>
+				<td valign="top">
+					<p>
+						<span>Sequence Labeling</span>
+					</p>
+				</td>
+				<td valign="top">
+					<p>
+						<span>* CNNLSTMModel</span>
+					</p>
+					<p>
+						<span>* BLSTMModel</span>
+					</p>
+					<p>
+						<span>* BLSTMCRFModel</span>
+					</p>
+				</td>
+			</tr>
+		</tbody>
+	</table>
+
 ## Performance
 
 | Task                     | Language | Dataset                   | Score          | Detail                                                                         |

diff --git a/kashgari/__init__.py b/kashgari/__init__.py
@@ -17,6 +17,8 @@
 from kashgari.tasks import classification
 from kashgari.tasks import seq_labeling
 
+from kashgari.macros import config
+
 
 if __name__ == "__main__":
     print("Hello world")
diff --git a/kashgari/layers.py b/kashgari/layers.py
@@ -0,0 +1,119 @@
+# encoding: utf-8
+"""
+@author: BrikerMan
+@contact: eliyar917@gmail.com
+@blog: https://eliyar.biz
+
+@version: 1.0
+@license: Apache Licence
+@file: layers
+@time: 2019-02-23
+
+"""
+from __future__ import absolute_import, division
+import logging
+
+import tensorflow as tf
+from keras.layers import Flatten
+from keras.layers import GRU, LSTM
+from keras.layers import CuDNNGRU, CuDNNLSTM
+from keras import initializers
+from keras.engine import InputSpec, Layer
+from keras import backend as K
+
+from kashgari.macros import config
+
+if config.use_CuDNN_cell:
+    GRULayer = CuDNNGRU
+    LSTMLayer = CuDNNLSTM
+else:
+    GRULayer = GRU
+    LSTMLayer = LSTM
+
+
+class AttentionWeightedAverage(Layer):
+    '''
+    Computes a weighted average of the different channels across timesteps.
+    Uses 1 parameter pr. channel to compute the attention value for a single timestep.
+    '''
+
+    def __init__(self, return_attention=False, **kwargs):
+        self.init = initializers.get('uniform')
+        self.supports_masking = True
+        self.return_attention = return_attention
+        super(AttentionWeightedAverage, self).__init__(**kwargs)
+
+    def build(self, input_shape):
+        self.input_spec = [InputSpec(ndim=3)]
+        assert len(input_shape) == 3
+
+        self.W = self.add_weight(shape=(input_shape[2], 1),
+                                 name='{}_w'.format(self.name),
+                                 initializer=self.init)
+        self.trainable_weights = [self.W]
+        super(AttentionWeightedAverage, self).build(input_shape)
+
+    def call(self, x, mask=None):
+        # computes a probability distribution over the timesteps
+        # uses 'max trick' for numerical stability
+        # reshape is done to avoid issue with Tensorflow
+        # and 1-dimensional weights
+        logits = K.dot(x, self.W)
+        x_shape = K.shape(x)
+        logits = K.reshape(logits, (x_shape[0], x_shape[1]))
+        ai = K.exp(logits - K.max(logits, axis=-1, keepdims=True))
+
+        # masked timesteps have zero weight
+        if mask is not None:
+            mask = K.cast(mask, K.floatx())
+            ai = ai * mask
+        att_weights = ai / (K.sum(ai, axis=1, keepdims=True) + K.epsilon())
+        weighted_input = x * K.expand_dims(att_weights)
+        result = K.sum(weighted_input, axis=1)
+        if self.return_attention:
+            return [result, att_weights]
+        return result
+
+    def get_output_shape_for(self, input_shape):
+        return self.compute_output_shape(input_shape)
+
+    def compute_output_shape(self, input_shape):
+        output_len = input_shape[2]
+        if self.return_attention:
+            return [(input_shape[0], output_len), (input_shape[0], input_shape[1])]
+        return (input_shape[0], output_len)
+
+    def compute_mask(self, input, input_mask=None):
+        if isinstance(input_mask, list):
+            return [None] * len(input_mask)
+        else:
+            return None
+
+
+class KMaxPooling(Layer):
+    '''
+    K-max pooling layer that extracts the k-highest activation from a sequence (2nd dimension).
+    TensorFlow backend.
+    '''
+
+    def __init__(self, k=1, **kwargs):
+        super().__init__(**kwargs)
+        self.input_spec = InputSpec(ndim=3)
+        self.k = k
+
+    def compute_output_shape(self, input_shape):
+        return (input_shape[0], (input_shape[2] * self.k))
+
+    def call(self, inputs):
+        # swap last two dimensions since top_k will be applied along the last dimension
+        shifted_input = tf.transpose(inputs, [0, 2,1])
+
+        # extract top_k, returns two tensors [values, indices]
+        top_k = tf.nn.top_k(shifted_input, k=self.k, sorted=True, name=None)[0]
+
+        # return flattened output
+        return Flatten()(top_k)
+
+
+if __name__ == '__main__':
+    print("hello, world")
diff --git a/kashgari/macros.py b/kashgari/macros.py
@@ -36,6 +36,14 @@
 pathlib.Path(PROCESSED_CORPUS_PATH).mkdir(parents=True, exist_ok=True)
 
 
+class _Config(object):
+    def __init__(self):
+        self.use_CuDNN_cell = False
+
+
+config = _Config()
+
+
 class CustomEmbedding(object):
     def __init__(self, embedding_size=100):
         self.embedding_size = embedding_size

diff --git a/kashgari/tasks/base/base_model.py b/kashgari/tasks/base/base_model.py
@@ -23,6 +23,7 @@
 from kashgari.embeddings import CustomEmbedding, BaseEmbedding
 from kashgari.utils.crf import CRF, crf_loss, crf_accuracy
 from keras_bert.bert import get_custom_objects as get_bert_custom_objects
+from kashgari.layers import AttentionWeightedAverage, KMaxPooling
 
 
 class BaseModel(object):
@@ -94,7 +95,8 @@ def create_custom_objects(model_info):
         if embedding and embedding['embedding_type'] == 'bert':
             custom_objects['NonMaskingLayer'] = helper.NonMaskingLayer
             custom_objects.update(get_bert_custom_objects())
-
+        custom_objects['AttentionWeightedAverage'] = AttentionWeightedAverage
+        custom_objects['KMaxPooling'] = KMaxPooling
         return custom_objects
 
     @classmethod

diff --git a/kashgari/tasks/classification/__init__.py b/kashgari/tasks/classification/__init__.py
@@ -11,6 +11,6 @@
 
 """
 from .base_model import ClassificationModel
-from .blstm_model import BLSTMModel
-from .cnn_lstm_model import CNNLSTMModel
-from .cnn_model import CNNModel
+from kashgari.tasks.classification.models import BLSTMModel, CNNLSTMModel, CNNModel
+from kashgari.tasks.classification.models import AVCNNModel, KMaxCNNModel, RCNNModel, AVRNNModel
+from kashgari.tasks.classification.models import DropoutBGRUModel, DropoutAVRNNModel
diff --git a/kashgari/tasks/classification/blstm_model.py b/kashgari/tasks/classification/blstm_model.py