In [32]:
import tensorflow as tf
import numpy as np
import h5py
from time import time
from random import shuffle
# !pip install tensorflow==2.0.0-beta1

In [2]:
max_seg = 20
max_word = 30

train_amount = 400000
test_amount = 200

level_class_cnt = 5

dropout_rate = 0.5
hidden_feature_dim = 70
gru_feature_dim = 150
kernel_heights = [3, 4, 5]

batch_size = 256

w2v_weights_path = '/Volumes/CCsChunk2/datasets/nlp/milnet/nn_input/1/weights.npy'
tensorboard_log_dir = '/Users/Frost/Desktop/log/'
input_path = '/Volumes/CCsChunk2/datasets/nlp/milnet/nn_input/1/electronics.hdf5'

In [5]:
w2v = np.load(w2v_weights_path, allow_pickle=True)
w2v_len = w2v.shape[1]

fake_x = np.random.rand(1000, 20, 30)

def label_map(raw_label):
    if raw_label < 2:
        return 0
    elif raw_label < 3:
        return 1
    else:
        return 2

def data_generator(h5_input_path):
    with h5py.File(h5_input_path) as in_file:
        random_batch_order = [*range(len(in_file['document/'].keys()))]
        shuffle(random_batch_order)
        for index in random_batch_order:
            doc, label = in_file['document/' + str(index)], in_file['label/' + str(index)]
            random_doc_order = [*range(len(doc))]
            shuffle(random_doc_order)
            for i in random_doc_order:
                yield np.array([doc[i].astype('float64')]), np.array([label_map(label[i])])

In [26]:
''' Slice a piece from one dimension.

The layer would slice the `index`th dimension from `target_dim` dimension of
the input tensor, which have `total_dim` dimensions, then squeeze the tensor
over the sliced dimension.

Args:
    total_dim (int): The total number of dimensions of the input tensor.
    target_dim (int): The index of the dimension that need to slice.
    index (int): The index of the dimension to keep in the slicing operation.

Returns:
    (Model): A keras model that implement the operation.
'''
def __get_filter_layer(total_dim, target_dim, index):
    def tensor_filter(tensor_in):
        nonlocal index
        begin = [0 if i != target_dim else index for i in range(total_dim)]
        size = [-1 if i != target_dim else 1 for i in range(total_dim)]
        return tf.squeeze(tf.slice(tensor_in, begin, size), axis=target_dim)
    return tf.keras.models.Sequential([
        tf.keras.layers.Lambda(tensor_filter)
    ])


''' Implement `submodel` for each slice of tensor.

The model would slice its input tensor into pieces using `__get_filter_layer` 
along `branch_index`th dimension, then for each slice, implement submodel, 
finally the outputs of different submodels would be concated and reshaped to 
meet the demand of output.

Args:
    input_shape tuple(int): The shape of the input tensor.
    branch_index (int): The index of the dimension to slice, start from 0 as 
        sample amount dimension.
    output_shape tuple(int): The shape of the output tensor.
    submodel (Model): The model to apply to different slices.
    args (dict): The argument dictionary for `submodel`.
'''
def __get_branch_model(input_shape, branch_index, output_shape, submodel, args={}):
    model_input = tf.keras.Input(input_shape)
    sliced_inputs = [__get_filter_layer(len(input_shape) + 1, branch_index, i)(model_input) 
                     for i in range(input_shape[branch_index - 1])]
    sub_instance = submodel(**args)
    branch_models = [sub_instance(sliced_inputs[i]) 
                     for i in range(input_shape[branch_index - 1])]
    concated_layers = tf.keras.layers.Concatenate()(branch_models)
    model_output = tf.keras.layers.Reshape(output_shape)(concated_layers)
    return tf.keras.Model(model_input, model_output)


''' A CNN unit to encode segment with single kernel height.

The unit would apply a convolution to its input to get a 2-dimensional 
tensor, then apply max overtime pooling to get a single dimensional tensor.

Args:
    input_shape ((int, int)): The shape of segment matrix. (word_max, w2v_len)
    kernel_height (int): The height of the convolution kernel.
    index (int): The index of the segment in its belonging document.

Returns:
    (Model): The CNN model to encode the segment matrix.
'''
def __get_sentence_encode_unit(input_shape, kernel_height):
    global w2v_len
    cnned_height = input_shape[0] - kernel_height + 1
    return tf.keras.models.Sequential([
        tf.keras.layers.Reshape((*input_shape, 1)),
        tf.keras.layers.Conv2D(hidden_feature_dim, (kernel_height, w2v_len)),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.ReLU(),
        tf.keras.layers.Reshape((cnned_height, hidden_feature_dim, 1)),
        tf.keras.layers.MaxPool2D((cnned_height, 1))
    ])


''' A CNN unit to encode segment with multiple kernel heights

The unit would apply operation defined in `__get_sentence_encode_unit` for 
different kernel heights, then concat the result as a 1-dimensional tensor.

Args:
    input_shape ((int, int)): The shape of the document. (word_max, w2v_len)
    kernel_heights ([int]): The list of the kernel heights.
    index: The index of the segment in its belonging document.

Returns:
    (Model): The CNN model to encode the segment matrix.
'''
def __get_multi_kernel_encode_unit(input_shape, kernel_heights):
    global w2v_len
    model_input = tf.keras.Input(input_shape)
    cnn_layers = [__get_sentence_encode_unit((input_shape), h)
                     (model_input) for h in kernel_heights]
    concated_layers = tf.keras.layers.concatenate(cnn_layers)
    model_output = tf.keras.layers.Flatten()(concated_layers)
    return tf.keras.Model(model_input, model_output)


''' The softmax linear classifier for predicting segment sentiment.

Returns:
    (Model): The softmax linear classifier to predict segment sentiment.
'''
def __get_seg_classifier_unit():
    return tf.keras.models.Sequential([
        tf.keras.layers.Dropout(dropout_rate),
        tf.keras.layers.Dense(level_class_cnt, activation='softmax')
    ])


''' The unit to get the attention weight for a segment from hidden feature.

Returns:
    (Model): The model for predicting attention weight for a segment.

'''
def __get_attention_unit():
    return tf.keras.models.Sequential([
        tf.keras.layers.Dropout(dropout_rate),
        tf.keras.layers.Dense(2 * gru_feature_dim, activation='tanh'),
        tf.keras.layers.Dense(1, use_bias=False, activation='softmax')
    ])


''' A bidirectional-GRU unit to extract the hidden vectors.

The hidden vectors are used to predict the attention weights of the model.

Returns:
    (Model): The bidirectional-GRU unit to predict the hidden vectors.
'''
def __get_bidirectional_gru_unit():
    return tf.keras.models.Sequential([
        tf.keras.layers.Bidirectional(
            tf.keras.layers.GRU(gru_feature_dim, return_sequences=True)
        )
    ])

In [29]:
print('Constructing Model ...')

model_input = tf.keras.Input((max_seg, max_word))

embedding_layer = tf.keras.layers.Embedding(
    input_dim=w2v.shape[0], 
    output_dim=w2v_len, 
    weights=[w2v], 
    input_length=max_word, 
    trainable=False
)(model_input)

encoding_model = __get_branch_model(
    input_shape=(max_seg, max_word, w2v_len), 
    branch_index=1, 
    output_shape=(max_seg, len(kernel_heights) * hidden_feature_dim), 
    submodel=__get_multi_kernel_encode_unit, 
    args={'kernel_heights': kernel_heights, 'input_shape': (max_word, w2v_len)}
)(embedding_layer)

biglu_model = __get_bidirectional_gru_unit()(encoding_model)

attention_model = __get_branch_model(
    input_shape=(max_seg, 2 * gru_feature_dim), 
    branch_index=1, 
    output_shape=(max_seg, 1), 
    submodel=__get_attention_unit
)(biglu_model)

classification_model = __get_branch_model(
    input_shape=(max_seg, len(kernel_heights) * hidden_feature_dim), 
    branch_index=1, 
    output_shape=(max_seg, level_class_cnt), 
    submodel=__get_seg_classifier_unit
)(encoding_model)

weighted_layer = tf.keras.layers.Multiply()([attention_model, classification_model])

reduce_layer = tf.keras.layers.Lambda(tf.reduce_mean, arguments={'axis': 1})(weighted_layer)

model = tf.keras.Model(model_input, reduce_layer)

print('Model Constructed. Compiling ...')

model.compile(optimizer=tf.keras.optimizers.Adam(lr=0.1),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(),
              metrics=['accuracy'])

print('Model Compiled.')

model.summary()

Constructing Model ...
Model Constructed. Compiling ...
Model Compiled.
Model: "model_84"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_91 (InputLayer)           [(None, 20, 30)]     0                                            
__________________________________________________________________________________________________
embedding_10 (Embedding)        (None, 20, 30, 300)  58749600    input_91[0][0]                   
__________________________________________________________________________________________________
model_81 (Model)                (None, 20, 210)      253050      embedding_10[0][0]               
__________________________________________________________________________________________________
sequential_403 (Sequential)     (None, 20, 300)      325800      model_81[1][0]                   
___________________

In [None]:
tensorboard = tf.keras.callbacks.TensorBoard(log_dir=tensorboard_log_dir + "{}".format(time()))

model.fit_generator(
    data_generator(input_path), 
    steps_per_epoch=train_amount // batch_size,
    epochs=25,
    callbacks=[tensorboard]
)

# model.predict(fake_x).shape

Epoch 1/25


W0625 01:27:51.649326 140734800324032 deprecation.py:323] From /Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/tensorflow_core/python/ops/math_grad.py:1251: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
