Merge 3b83f4b into 69580a4

BrikerMan · Jun 26, 2019 · 5d59beb · 5d59beb
2 parents 69580a4 + 3b83f4b
commit 5d59beb
Show file tree

Hide file tree

Showing 5 changed files with 383 additions and 3 deletions.
diff --git a/kashgari/layers/__init__.py b/kashgari/layers/__init__.py
@@ -10,6 +10,9 @@
 import tensorflow as tf
 from tensorflow.python import keras
 from kashgari.layers.non_masking_layer import NonMaskingLayer
+from kashgari.layers.att_wgt_avg_layer import AttentionWeightedAverageLayer
+from kashgari.layers.att_wgt_avg_layer import AttentionWeightedAverage, AttWgtAvgLayer
+from kashgari.layers.kmax_pool_layer import KMaxPoolingLayer, KMaxPoolLayer, KMaxPooling
 
 L = keras.layers
 

diff --git a/kashgari/layers/att_wgt_avg_layer.py b/kashgari/layers/att_wgt_avg_layer.py
@@ -0,0 +1,89 @@
+# encoding: utf-8
+
+# author: AlexWang
+# contact: ialexwwang@gmail.com
+
+# file: attention_weighted_average.py
+# time: 2019-06-24 19:35
+
+import kashgari
+import tensorflow as tf
+from tensorflow.python import keras
+from tensorflow.python.keras import backend as K
+from tensorflow.python.keras.engine.input_spec import InputSpec
+
+L = keras.layers
+initializers = keras.initializers
+
+if tf.test.is_gpu_available(cuda_only=True):
+    L.LSTM = L.CuDNNLSTM
+
+
+class AttentionWeightedAverageLayer(L.Layer):
+    '''
+    Computes a weighted average of the different channels across timesteps.
+    Uses 1 parameter pr. channel to compute the attention value for a single timestep.
+    '''
+
+    def __init__(self, return_attention=False, **kwargs):
+        self.init = initializers.get('uniform')
+        self.supports_masking = True
+        self.return_attention = return_attention
+        super(AttentionWeightedAverageLayer, self).__init__(**kwargs)
+
+    def build(self, input_shape):
+        self.input_spec = [InputSpec(ndim=3)]
+        assert len(input_shape) == 3
+
+        self.W = self.add_weight(shape=(input_shape[2], 1),
+                                 name='{}_w'.format(self.name),
+                                 initializer=self.init)
+        self.trainable_weights = [self.W]
+        super(AttentionWeightedAverageLayer, self).build(input_shape)
+
+    def call(self, x, mask=None):
+        # computes a probability distribution over the timesteps
+        # uses 'max trick' for numerical stability
+        # reshape is done to avoid issue with Tensorflow
+        # and 1-dimensional weights
+        logits = K.dot(x, self.W)
+        x_shape = K.shape(x)
+        logits = K.reshape(logits, (x_shape[0], x_shape[1]))
+        ai = K.exp(logits - K.max(logits, axis=-1, keepdims=True))
+
+        # masked timesteps have zero weight
+        if mask is not None:
+            mask = K.cast(mask, K.floatx())
+            ai = ai * mask
+        att_weights = ai / (K.sum(ai, axis=1, keepdims=True) + K.epsilon())
+        weighted_input = x * K.expand_dims(att_weights)
+        result = K.sum(weighted_input, axis=1)
+        if self.return_attention:
+            return [result, att_weights]
+        return result
+
+    def get_output_shape_for(self, input_shape):
+        return self.compute_output_shape(input_shape)
+
+    def compute_output_shape(self, input_shape):
+        output_len = input_shape[2]
+        if self.return_attention:
+            return [(input_shape[0], output_len), (input_shape[0], input_shape[1])]
+        return (input_shape[0], output_len)
+
+    def compute_mask(self, inputs, input_mask=None):
+        if isinstance(input_mask, list):
+            return [None] * len(input_mask)
+        else:
+            return None
+
+
+AttentionWeightedAverage = AttentionWeightedAverageLayer
+AttWgtAvgLayer = AttentionWeightedAverageLayer
+
+kashgari.custom_objects['AttentionWeightedAverageLayer'] = AttentionWeightedAverageLayer
+kashgari.custom_objects['AttentionWeightedAverage'] = AttentionWeightedAverage
+kashgari.custom_objects['AttWgtAvgLayer'] = AttWgtAvgLayer
+
+if __name__ == '__main__':
+    print('Hello world, AttentionWeightedAverageLayer/AttWgtAvgLayer.')
diff --git a/kashgari/layers/kmax_pool_layer.py b/kashgari/layers/kmax_pool_layer.py
@@ -0,0 +1,94 @@
+# encoding: utf-8
+
+# author: AlexWang
+# contact: ialexwwang@gmail.com
+
+# file: attention_weighted_average.py
+# time: 2019-06-25 16:35
+
+import kashgari
+import tensorflow as tf
+from tensorflow.python import keras
+from tensorflow.python.keras import backend as K
+from tensorflow.python.keras.engine.input_spec import InputSpec
+
+L = keras.layers
+
+if tf.test.is_gpu_available(cuda_only=True):
+    L.LSTM = L.CuDNNLSTM
+
+
+class KMaxPoolingLayer(L.Layer):
+    '''
+    K-max pooling layer that extracts the k-highest activation from a sequence (2nd dimension).
+    TensorFlow backend.
+    # Arguments
+        k: An int scale,
+            indicate k max steps of features to pool.
+        sorted: A bool,
+            if output is sorted (default) or not.
+        data_format: A string,
+            one of `channels_last` (default) or `channels_first`.
+            The ordering of the dimensions in the inputs.
+            `channels_last` corresponds to inputs with shape
+            `(batch, steps, features)` while `channels_first`
+            corresponds to inputs with shape
+            `(batch, features, steps)`.
+    # Input shape
+        - If `data_format='channels_last'`:
+            3D tensor with shape:
+            `(batch_size, steps, features)`
+        - If `data_format='channels_first'`:
+            3D tensor with shape:
+            `(batch_size, features, steps)`
+    # Output shape
+        3D tensor with shape:
+        `(batch_size, top-k-steps, features)`
+    '''
+
+    def __init__(self, k=1, sorted=True, data_format='channels_last', **kwargs):  # noqa: A002
+        super(KMaxPoolingLayer, self).__init__(**kwargs)
+        self.input_spec = InputSpec(ndim=3)
+        self.k = k
+        self.sorted = sorted
+        if data_format.lower() in ['channels_first', 'channels_last']:
+            self.data_format = data_format.lower()
+        else:
+            self.data_format = K.image_data_format()
+
+    def compute_output_shape(self, input_shape):
+        if self.data_format == 'channels_first':
+            return (input_shape[0], self.k, input_shape[1])
+        else:
+            return (input_shape[0], self.k, input_shape[2])
+
+    def call(self, inputs):
+        if self.data_format == 'channels_last':
+            # swap last two dimensions since top_k will be applied along the last dimension
+            shifted_input = tf.transpose(inputs, [0, 2, 1])
+
+            # extract top_k, returns two tensors [values, indices]
+            top_k = tf.nn.top_k(shifted_input, k=self.k, sorted=self.sorted)[0]
+        else:
+            top_k = tf.nn.top_k(inputs, k=self.k, sorted=self.sorted)[0]
+        # return flattened output
+        return tf.transpose(top_k, [0, 2, 1])
+
+    def get_config(self):
+        config = {'k': self.k,
+                  'sorted': self.sorted,
+                  'data_format': self.data_format}
+        base_config = super(KMaxPoolingLayer, self).get_config()
+        return dict(list(base_config.items()) + list(config.items()))
+
+
+KMaxPooling = KMaxPoolingLayer
+KMaxPoolLayer = KMaxPoolingLayer
+
+kashgari.custom_objects['KMaxPoolingLayer'] = KMaxPoolingLayer
+kashgari.custom_objects['KMaxPooling'] = KMaxPooling
+kashgari.custom_objects['KMaxPoolLayer'] = KMaxPoolLayer
+
+if __name__ == '__main__':
+    print('Hello world, KMaxPoolLayer/KMaxPoolingLayer.')
+
diff --git a/kashgari/tasks/classification/__init__.py b/kashgari/tasks/classification/__init__.py
@@ -13,7 +13,14 @@
 from kashgari.tasks.classification.models import CNN_Model
 from kashgari.tasks.classification.models import CNN_LSTM_Model
 from kashgari.tasks.classification.models import CNN_GRU_Model
+from kashgari.tasks.classification.models import AVCNN_Model
+from kashgari.tasks.classification.models import KMax_CNN_Model
+
 
 BLSTMModel = BiLSTM_Model
+BGRUModel = BiGRU_Model
 CNNModel = CNN_Model
-CNNLSTMModel = CNN_LSTM_Model
+CNNLSTMModel = CNN_LSTM_Model
+CNNGRUModel = CNN_GRU_Model
+AVCNNModel = AVCNN_Model
+KMaxCNNModel = KMax_CNN_Model