Skip to content

Commit

Permalink
Merge 3b83f4b into 69580a4
Browse files Browse the repository at this point in the history
  • Loading branch information
alexwwang committed Jun 26, 2019
2 parents 69580a4 + 3b83f4b commit 5d59beb
Show file tree
Hide file tree
Showing 5 changed files with 383 additions and 3 deletions.
3 changes: 3 additions & 0 deletions kashgari/layers/__init__.py
Expand Up @@ -10,6 +10,9 @@
import tensorflow as tf
from tensorflow.python import keras
from kashgari.layers.non_masking_layer import NonMaskingLayer
from kashgari.layers.att_wgt_avg_layer import AttentionWeightedAverageLayer
from kashgari.layers.att_wgt_avg_layer import AttentionWeightedAverage, AttWgtAvgLayer
from kashgari.layers.kmax_pool_layer import KMaxPoolingLayer, KMaxPoolLayer, KMaxPooling

L = keras.layers

Expand Down
89 changes: 89 additions & 0 deletions kashgari/layers/att_wgt_avg_layer.py
@@ -0,0 +1,89 @@
# encoding: utf-8

# author: AlexWang
# contact: ialexwwang@gmail.com

# file: attention_weighted_average.py
# time: 2019-06-24 19:35

import kashgari
import tensorflow as tf
from tensorflow.python import keras
from tensorflow.python.keras import backend as K
from tensorflow.python.keras.engine.input_spec import InputSpec

L = keras.layers
initializers = keras.initializers

if tf.test.is_gpu_available(cuda_only=True):
L.LSTM = L.CuDNNLSTM


class AttentionWeightedAverageLayer(L.Layer):
'''
Computes a weighted average of the different channels across timesteps.
Uses 1 parameter pr. channel to compute the attention value for a single timestep.
'''

def __init__(self, return_attention=False, **kwargs):
self.init = initializers.get('uniform')
self.supports_masking = True
self.return_attention = return_attention
super(AttentionWeightedAverageLayer, self).__init__(**kwargs)

def build(self, input_shape):
self.input_spec = [InputSpec(ndim=3)]
assert len(input_shape) == 3

self.W = self.add_weight(shape=(input_shape[2], 1),
name='{}_w'.format(self.name),
initializer=self.init)
self.trainable_weights = [self.W]
super(AttentionWeightedAverageLayer, self).build(input_shape)

def call(self, x, mask=None):
# computes a probability distribution over the timesteps
# uses 'max trick' for numerical stability
# reshape is done to avoid issue with Tensorflow
# and 1-dimensional weights
logits = K.dot(x, self.W)
x_shape = K.shape(x)
logits = K.reshape(logits, (x_shape[0], x_shape[1]))
ai = K.exp(logits - K.max(logits, axis=-1, keepdims=True))

# masked timesteps have zero weight
if mask is not None:
mask = K.cast(mask, K.floatx())
ai = ai * mask
att_weights = ai / (K.sum(ai, axis=1, keepdims=True) + K.epsilon())
weighted_input = x * K.expand_dims(att_weights)
result = K.sum(weighted_input, axis=1)
if self.return_attention:
return [result, att_weights]
return result

def get_output_shape_for(self, input_shape):
return self.compute_output_shape(input_shape)

def compute_output_shape(self, input_shape):
output_len = input_shape[2]
if self.return_attention:
return [(input_shape[0], output_len), (input_shape[0], input_shape[1])]
return (input_shape[0], output_len)

def compute_mask(self, inputs, input_mask=None):
if isinstance(input_mask, list):
return [None] * len(input_mask)
else:
return None


AttentionWeightedAverage = AttentionWeightedAverageLayer
AttWgtAvgLayer = AttentionWeightedAverageLayer

kashgari.custom_objects['AttentionWeightedAverageLayer'] = AttentionWeightedAverageLayer
kashgari.custom_objects['AttentionWeightedAverage'] = AttentionWeightedAverage
kashgari.custom_objects['AttWgtAvgLayer'] = AttWgtAvgLayer

if __name__ == '__main__':
print('Hello world, AttentionWeightedAverageLayer/AttWgtAvgLayer.')
94 changes: 94 additions & 0 deletions kashgari/layers/kmax_pool_layer.py
@@ -0,0 +1,94 @@
# encoding: utf-8

# author: AlexWang
# contact: ialexwwang@gmail.com

# file: attention_weighted_average.py
# time: 2019-06-25 16:35

import kashgari
import tensorflow as tf
from tensorflow.python import keras
from tensorflow.python.keras import backend as K
from tensorflow.python.keras.engine.input_spec import InputSpec

L = keras.layers

if tf.test.is_gpu_available(cuda_only=True):
L.LSTM = L.CuDNNLSTM


class KMaxPoolingLayer(L.Layer):
'''
K-max pooling layer that extracts the k-highest activation from a sequence (2nd dimension).
TensorFlow backend.
# Arguments
k: An int scale,
indicate k max steps of features to pool.
sorted: A bool,
if output is sorted (default) or not.
data_format: A string,
one of `channels_last` (default) or `channels_first`.
The ordering of the dimensions in the inputs.
`channels_last` corresponds to inputs with shape
`(batch, steps, features)` while `channels_first`
corresponds to inputs with shape
`(batch, features, steps)`.
# Input shape
- If `data_format='channels_last'`:
3D tensor with shape:
`(batch_size, steps, features)`
- If `data_format='channels_first'`:
3D tensor with shape:
`(batch_size, features, steps)`
# Output shape
3D tensor with shape:
`(batch_size, top-k-steps, features)`
'''

def __init__(self, k=1, sorted=True, data_format='channels_last', **kwargs): # noqa: A002
super(KMaxPoolingLayer, self).__init__(**kwargs)
self.input_spec = InputSpec(ndim=3)
self.k = k
self.sorted = sorted
if data_format.lower() in ['channels_first', 'channels_last']:
self.data_format = data_format.lower()
else:
self.data_format = K.image_data_format()

def compute_output_shape(self, input_shape):
if self.data_format == 'channels_first':
return (input_shape[0], self.k, input_shape[1])
else:
return (input_shape[0], self.k, input_shape[2])

def call(self, inputs):
if self.data_format == 'channels_last':
# swap last two dimensions since top_k will be applied along the last dimension
shifted_input = tf.transpose(inputs, [0, 2, 1])

# extract top_k, returns two tensors [values, indices]
top_k = tf.nn.top_k(shifted_input, k=self.k, sorted=self.sorted)[0]
else:
top_k = tf.nn.top_k(inputs, k=self.k, sorted=self.sorted)[0]
# return flattened output
return tf.transpose(top_k, [0, 2, 1])

def get_config(self):
config = {'k': self.k,
'sorted': self.sorted,
'data_format': self.data_format}
base_config = super(KMaxPoolingLayer, self).get_config()
return dict(list(base_config.items()) + list(config.items()))


KMaxPooling = KMaxPoolingLayer
KMaxPoolLayer = KMaxPoolingLayer

kashgari.custom_objects['KMaxPoolingLayer'] = KMaxPoolingLayer
kashgari.custom_objects['KMaxPooling'] = KMaxPooling
kashgari.custom_objects['KMaxPoolLayer'] = KMaxPoolLayer

if __name__ == '__main__':
print('Hello world, KMaxPoolLayer/KMaxPoolingLayer.')

9 changes: 8 additions & 1 deletion kashgari/tasks/classification/__init__.py
Expand Up @@ -13,7 +13,14 @@
from kashgari.tasks.classification.models import CNN_Model
from kashgari.tasks.classification.models import CNN_LSTM_Model
from kashgari.tasks.classification.models import CNN_GRU_Model
from kashgari.tasks.classification.models import AVCNN_Model
from kashgari.tasks.classification.models import KMax_CNN_Model


BLSTMModel = BiLSTM_Model
BGRUModel = BiGRU_Model
CNNModel = CNN_Model
CNNLSTMModel = CNN_LSTM_Model
CNNLSTMModel = CNN_LSTM_Model
CNNGRUModel = CNN_GRU_Model
AVCNNModel = AVCNN_Model
KMaxCNNModel = KMax_CNN_Model

0 comments on commit 5d59beb

Please sign in to comment.