In [0]:
import sys
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

In [0]:
max_seg = 5
max_word = 100

train_amount = 5000
test_amount = 1000

w2v_len = 30
level_class_cnt = 5

dropout_rate = 0.5
hidden_feature_dim = 70
gru_feature_dim = 150

In [32]:
x_train = np.random.randint(0, 100, (train_amount, max_seg, max_word))
x_test = np.random.randint(0, 100, (test_amount, max_seg, max_word))

y_train = np.random.randint(0, 5, (train_amount))
y_test = np.random.randint(0, 5, (test_amount))

fake_w2v = np.random.rand(100, w2v_len)

x_train = np.expand_dims(fake_w2v[x_train], axis=-1)
x_test = np.expand_dims(fake_w2v[x_test], axis=-1)

print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)

(5000, 5, 100, 30, 1) (5000,)
(1000, 5, 100, 30, 1) (1000,)


In [0]:
''' A CNN unit to encode segment with single kernel height.

The unit would apply a convolution to its input to get a 2-dimensional 
tensor, then apply max overtime pooling to get a single dimensional tensor.

Shape_In: (batch_size, seg_max, word_max, vec_len, 1)
Shape_Out: (batch_size, 1, hidden_feature_dim, 1)

Args:
    input_shape ((int, int)): The shape of segment matrix. (word_max * vec_len)
    kernel_height (int): The height of the convolution kernel.
    index (int): The index of the segment in its belonging document.

Returns:
    (Model): The CNN model to encode the segment matrix.
'''
def __get_sentence_encode_unit(input_shape, kernel_height, index):
    cnned_height = input_shape[0] - kernel_height + 1
    def doc_filter(tensor_in, index):
        begin, size = [0, index, 0, 0, 0], [-1, 1, -1, -1, -1]
        return tf.squeeze(tf.slice(tensor_in, begin, size), axis=1)
    return tf.keras.models.Sequential([
        tf.keras.layers.Lambda(doc_filter, arguments={'index':index}),
        tf.keras.layers.Conv2D(hidden_feature_dim, (kernel_height, input_shape[1])),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.ReLU(),
        tf.keras.layers.Reshape((cnned_height, hidden_feature_dim, 1)),
        tf.keras.layers.MaxPool2D((cnned_height, 1))
    ])


''' A CNN unit to encode segment with multiple kernel heights

The unit would apply operation defined in `__get_sentence_encode_unit` for 
different kernel heights, then concat the result as a 1-dimensional tensor.

Shape_In: (batch_size, seg_max, word_max, vec_len, 1)
Shape_Out: (batch_size, hidden_feature_dim * len(kernel_heights))

Args:
    input_shape ((int, int, int)): The shape of the document. (seg_max * word_max * vec_len)
    kernel_heights ([int]): The list of the kernel heights.
    index: The index of the segment in its belonging document.

Returns:
    (Model): The CNN model to encode the segment matrix.
'''
def __get_multi_kernel_encode_unit(input_shape, kernel_heights, index):
    model_input = tf.keras.Input((*input_shape, 1))
    cnn_layers = [__get_sentence_encode_unit(input_shape[1:], h, index)
                     (model_input) for h in kernel_heights]
    concated_layers = tf.keras.layers.concatenate(cnn_layers)
    model_output = tf.keras.layers.Flatten()(concated_layers)
    return tf.keras.Model(model_input, model_output)


''' The softmax linear classifier for predicting segment sentiment.

Shape_In: (batch_size, hidden_feature_dim * len(kernel_heights))
Shape_Out: (batch_size, level_class_cnt)

Args:
    index (int): The index of the segment in its belonging document.

Returns:
    (Model): The softmax linear classifier to predict segment sentiment.
'''
def __get_seg_classifier_unit(index):
    def doc_filter(tensor_in, index):
        begin, size = [0, index, 0], [-1, 1, -1]
        return tf.squeeze(tf.slice(tensor_in, begin, size), axis=1)
    return tf.keras.models.Sequential([
        tf.keras.layers.Lambda(doc_filter, arguments={'index':index}),
        tf.keras.layers.Dropout(dropout_rate),
        tf.keras.layers.Dense(level_class_cnt, activation='softmax')
    ])


''' The unit to get the attention weight for a segment from hidden feature.

Shape_In: (batch_size, seg_max, gru_feature_dim * 2)
Shape_Out: (batch_size, 1)

Args:
    index (int): The index of the segment in its belonging document.

Returns:
    (Model): The model for predicting attention weight for a segment.

'''
def __get_attention_unit(index):
    def doc_filter(tensor_in, index):
        begin, size = [0, index, 0], [-1, 1, -1]
        return tf.squeeze(tf.slice(tensor_in, begin, size), axis=1)
    return tf.keras.models.Sequential([
        tf.keras.layers.Lambda(doc_filter, arguments={'index':index}),
        tf.keras.layers.Dropout(dropout_rate),
        tf.keras.layers.Dense(2 * gru_feature_dim, activation='tanh'),
        tf.keras.layers.Dense(1, use_bias=False, activation='softmax')
    ])


''' A bidirectional-GRU unit to extract the hidden vectors.

The hidden vectors are used to predict the attention weights of the model.

Shape_In: (batch_size, seg_max, hidden_feature_dim * len(kernel_heights))
Shape_Out: (batch_size, seg_max, gru_feature_dim * 2)

Returns:
    (Model): The bidirectional-GRU unit to predict the hidden vectors.
'''
def __get_bidirectional_gru_unit():
    return tf.keras.models.Sequential([
        tf.keras.layers.Bidirectional(
            tf.keras.layers.GRU(gru_feature_dim, return_sequences=True)
        )
    ])


''' A CNN module to encode segment with multiple kernel heights.

The module would create feature matrices for a document.

Shape_In: (batch_size, seg_max, word_max, vec_len, 1)
Shape_Out: (batch_size, seg_max, hidden_feature_dim * len(kernel_heights))

Args:
    input_size ((int, int)): The size of the input. (seg_max * word_max * vec_len)
    kernel_heights ([int]): The list of the kernel heights.

Returns:
    (Model): The CNN model to encode the segment matrix.
'''
def get_sentence_encode_module(input_shape, kernel_heights):
    model_input = tf.keras.Input((*input_shape, 1))
    encoder_layers = [__get_multi_kernel_encode_unit(input_shape, 
                                                     kernel_heights, i)
                      (model_input) for i in range(input_shape[0])]
    concated_layers = tf.keras.layers.Concatenate()(encoder_layers)
    new_shape = (input_shape[0], hidden_feature_dim * len(kernel_heights))
    model_output = tf.keras.layers.Reshape(new_shape)(concated_layers)
    return tf.keras.Model(model_input, model_output)


''' The softmax linear classifier for predicting segment sentiment in document.

Shape_In: (batch_size, seg_max, hidden_feature_dim * len(kernel_heights))
Shape_Out: (batch_size, seg_max, level_class_cnt)

Args:
    input_shape ((int, int)): The shape of the document. (seg_max * 
        (hidden_feature_dim * len(kernel_heights)))

Returns:
    (Model): The sentiment classifier for all documents.
'''
def get_seg_classifier_module(input_shape):
    model_input = tf.keras.Input(input_shape)
    softmax_layers = [__get_seg_classifier_unit(i)(model_input) 
                      for i in range(input_shape[0])]
    concated_layers = tf.keras.layers.Concatenate()(softmax_layers)
    new_shape = (input_shape[0], level_class_cnt)
    model_output = tf.keras.layers.Reshape(new_shape)(concated_layers)
    return tf.keras.Model(model_input, model_output)


''' The attentioal model for predicting the attention weights for each segment.

Shape_In: (batch_size, seg_max, hidden_feature_dim * len(kernel_heights))
Shape_Out: (batch_size, seg_max, 1)

Args:
    input_shape ((int, int)): The shape of the document. (seg_max * 
        (hidden_feature_dim * len(kernel_heights)))

Returns:
    (Model): The model for predicting attention weights.
'''
def get_attention_module(input_shape):
    model_input = tf.keras.Input(input_shape)
    biglu_layer = __get_bidirectional_gru_unit()(model_input)
    weight_layers = [__get_attention_unit(i)(biglu_layer) 
                     for i in range(input_shape[0])]
    concated_layers = tf.keras.layers.Concatenate()(weight_layers)
    new_shape = (input_shape[0], 1)
    model_output = tf.keras.layers.Reshape(new_shape)(concated_layers)
    return tf.keras.Model(model_input, model_output)

In [36]:
model_input = tf.keras.Input((5, 100, 30, 1))
encoding_model = get_sentence_encode_module((5, 100, 30), [3, 4, 5])(model_input)

attention_model = get_attention_module((5, 210))(encoding_model)
classification_model = get_seg_classifier_module((5, 210))(encoding_model)

matmul_layer = tf.keras.layers.Lambda(tf.matmul, arguments={'transpose_a': True, 'b': attention_model})(classification_model)
squeeze_layer = tf.keras.layers.Lambda(tf.squeeze, arguments={'axis': -1})(matmul_layer)

model = tf.keras.Model(model_input, squeeze_layer)

model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss=tf.keras.losses.CategoricalCrossentropy(),
              metrics=['accuracy'])

print('Model Compiled.')

model.fit(x_train, y_train, epochs=1)

model.predict(x_test)

Model Compiled.


array([[0.9766311 , 1.0833453 , 0.8760857 , 1.0538265 , 1.0101113 ],
       [1.0264621 , 1.084794  , 0.86409116, 1.0326786 , 0.9919741 ],
       [0.994607  , 1.0902966 , 0.86998445, 1.0479234 , 0.99718827],
       ...,
       [0.98818886, 1.1085814 , 0.8810852 , 1.0568564 , 0.9652882 ],
       [0.9786608 , 1.0765887 , 0.8596323 , 1.0734288 , 1.0116892 ],
       [1.0153153 , 1.0630189 , 0.85709107, 1.0715418 , 0.9930332 ]],
      dtype=float32)