### Package Preparation

In [0]:
!pip install tf_sentencepiece
import tensorflow as tf
import numpy as np
import tensorflow_hub as hub
import tf_sentencepiece
from random import shuffle, choice
import re
import os
import datetime
from functools import reduce
from operator import itemgetter
from google.colab import drive
drive.mount('/content/gdrive')

Collecting tf_sentencepiece
[?25l  Downloading https://files.pythonhosted.org/packages/72/fe/363d78d29c556d0da642ffe285c2c7573b6a83239a9b0d08d83376c9fbac/tf_sentencepiece-0.1.82.1-py2.py3-none-manylinux1_x86_64.whl (2.8MB)
[K     |████████████████████████████████| 2.8MB 2.8MB/s 
[?25hInstalling collected packages: tf-sentencepiece
Successfully installed tf-sentencepiece-0.1.82.1


W0716 08:03:34.886610 140485658814336 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/tf_sentencepiece/sentencepiece_processor_ops.py:259: The name tf.NotDifferentiable is deprecated. Please use tf.no_gradient instead.



Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/gdrive


### Configurations

In [0]:
xling_encoding_len = 512
max_seg = 10
level_class_cnt = 3

test_percentage = 0.1
validation_percentage = 0.1

dropout_rate = 0.5
eta = 1e-4
hidden_feature_dim = 100
attention_key_dim = 100
gru_feature_dim = 50

batch_size = 512
epochs = 8

label_re = re.compile('(\d+)\.\d+')
sentence_re = re.compile('(?:\.|!|\?)\s')

input_path = '/content/gdrive/My Drive/data_source/milnet/raw_text/gourmet.txt'
model_out_path = '/content/gdrive/My Drive/data_source/milnet/results/food_xling_c3.h5'
log_out_dir = '/content/gdrive/My Drive/data_source/milnet/log/'

sample_amount = 0
with open(input_path) as in_file:
    sample_amount = len(in_file.read().split('\n\n')) - 1
sample_indices = [*range(sample_amount)]
shuffle(sample_indices)
train_samples = sample_indices[0:int(sample_amount * (1 - test_percentage - validation_percentage))]
validation_samples = sample_indices[int(sample_amount * (1 - test_percentage - validation_percentage)): int(sample_amount * (1 - test_percentage))]
test_samples = sample_indices[int(sample_amount * (1 - test_percentage)):]

### Data Preloading

In [0]:
g = tf.Graph()
with g.as_default():
    text_input = tf.placeholder(dtype=tf.string, shape=[None])
    en_de_embed = hub.Module("https://tfhub.dev/google/universal-sentence-encoder-xling/en-de/1")
    embedded_text = en_de_embed(text_input)
    init_op = tf.group([tf.global_variables_initializer(), tf.tables_initializer()])
g.finalize()
session = tf.Session(graph=g)
session.run(init_op)

In [0]:
def __pad_doc_encoding(doc_encoding, max_seg):
    if doc_encoding.shape[0] > max_seg:
        return doc_encoding[:max_seg]
    elif doc_encoding.shape[0] < max_seg:
        topad_len = max_seg - doc_encoding.shape[0]
        pad_width = [(0, 0) if i != 0 else (0, topad_len) for i in range(len(doc_encoding.shape))]
        return np.pad(doc_encoding, pad_width, 'constant', constant_values=0)
    else:
        return doc_encoding

def __label_map(raw_label):
    if raw_label == 1 or raw_label == 2:
        return 0
    elif raw_label == 3:
        return 1
    else:
        return 2

def __balance_data(feature_array, label_array):
    to_balance_indices = np.concatenate([np.where(label_array == 2)[0], np.where(label_array == 4)[0]])
    return np.delete(feature_array, to_balance_indices, axis=0), np.delete(label_array, to_balance_indices, axis=0)

def data_generator(sample_indices, input_path, segment_re, label_re, 
                   batch_size=batch_size, max_seg=max_seg, xling_len=xling_encoding_len, epochs=epochs, use_balance=True):
    global session, embedded_text, text_input
    with open(input_path) as in_file:
        file_content = [*itemgetter(*sample_indices)(in_file.read().split('\n\n'))]
        for _ in range(epochs):
            shuffle(file_content)
            feature_cache, label_cache = [], []
            batch_index = 0
            for sample in file_content:
                label_cache.append(sample.split('\n')[0])
                feature_cache.append([*filter(lambda x: len(x) > 1, segment_re.split(' '.join(sample.split('\n')[1:])))])
                batch_index += 1
                if batch_index == batch_size:
                    len_lst = [*map(len, feature_cache)]
                    batch_features = session.run(embedded_text, feed_dict={text_input: reduce(lambda x, y: x + y, feature_cache)})
                    label_array = np.array([np.array([int(label_re.findall(l)[0])]) for l in label_cache])
                    feature_array = np.zeros((batch_size, max_seg, xling_len))
                    for index, length in enumerate(len_lst):
                        feature_array[index] = __pad_doc_encoding(np.array(batch_features[:length]), max_seg)
                        batch_features = batch_features[length:]
                    feature_array = np.array(feature_array)
                    if use_balance:
                        feature_array, label_array = __balance_data(feature_array, label_array)
                    yield feature_array, np.array([np.array([__label_map(l[0])]) for l in label_array])
                    feature_cache, label_cache = [], []
                    batch_index = 0

In [0]:
shared_sublayer_cache = {}

def branch_execute(layer_in, sublayer, args={}):
    instance_cnt = layer_in.shape[1]
    sliced_inputs = [tf.keras.layers.Lambda(lambda x: x[:,i])(layer_in) 
                     for i in range(instance_cnt)]
    branch_layers = [sublayer(**{**{'layer_in': sliced_inputs[i]}, **args}) 
                     for i in range(instance_cnt)]
    expand_layer = tf.keras.layers.Lambda(lambda x: tf.keras.backend.expand_dims(x, axis=1))
    expanded_layers = [expand_layer(branch_layers[i]) for i in range(instance_cnt)]
    concated_layer = tf.keras.layers.Concatenate(axis=1)(expanded_layers)
    return concated_layer

def __seg_classifier_layer_share(layer_in, class_cnt, dropout_rate, eta):
    global shared_sublayer_cache
    if 'shared_seg_classifier_sublayers' not in shared_sublayer_cache:
        shared_sublayer_cache['shared_seg_classifier_sublayers'] = {
            'drop_out_layer': tf.keras.layers.Dropout(
                dropout_rate
            ),
            'dense_layer': tf.keras.layers.Dense(
                units=class_cnt,
                activation='softmax',
                kernel_regularizer=tf.keras.regularizers.l2(eta),
                bias_regularizer=tf.keras.regularizers.l2(eta)
            )
        }
    shared_layers = shared_sublayer_cache['shared_seg_classifier_sublayers']
    drop_out_layer = shared_layers['drop_out_layer'](layer_in)
    dense_layer = shared_layers['dense_layer'](drop_out_layer)
    return dense_layer

def __attention_layer_share(layer_in, attention_key_dim, dropout_rate, eta):
    global shared_sublayer_cache
    if 'shared_attention_sublayers' not in shared_sublayer_cache:
        shared_sublayer_cache['shared_attention_sublayers'] = {
            'drop_out_layer': tf.keras.layers.Dropout(
                dropout_rate
            ),
            'dense_layer': tf.keras.layers.Dense(
                units=attention_key_dim, 
                activation='tanh',
                kernel_regularizer=tf.keras.regularizers.l2(eta),
                bias_regularizer=tf.keras.regularizers.l2(eta)
            ),
            'nobias_dense_layer': tf.keras.layers.Dense(
                units=1, 
                use_bias=False, 
                bias_regularizer=tf.keras.regularizers.l2(eta)
            )
        }
    shared_layers = shared_sublayer_cache['shared_attention_sublayers']
    drop_out_layer = shared_layers['drop_out_layer'](layer_in)
    dense_layer = shared_layers['dense_layer'](drop_out_layer)
    nobias_dense_layer = shared_layers['nobias_dense_layer'](dense_layer)
    return nobias_dense_layer

def bidirectional_gru_layer(layer_in, gru_feature_dim):
    bidirectional_layer = tf.keras.layers.Bidirectional(
        tf.keras.layers.GRU(gru_feature_dim, return_sequences=True)
    )(layer_in)
    return bidirectional_layer

def merge_layer(layer_in, class_cnt, eta):
    dot_layer = tf.keras.layers.Dot(axes=1)(layer_in)
    flatten_layer = tf.keras.layers.Flatten()(dot_layer)
    dense_layer = tf.keras.layers.Dense(
        units=class_cnt, 
        activation='softmax',
        kernel_regularizer=tf.keras.regularizers.l2(eta),
        bias_regularizer=tf.keras.regularizers.l2(eta)
    )(flatten_layer)
    return dense_layer

def performance_judge(model, generator, class_cnt):
    eps = np.finfo(float).eps
    accuracy, precisions, recalls, f1s = [], [], [], []
    for i, (features, labels) in enumerate(generator):
        predicted = model.predict(features)
        precisions.append([])
        recalls.append([])
        f1s.append([])
        contingency_table = np.zeros((class_cnt, class_cnt))
        for index in range(features.shape[0]):
            contingency_table[int(labels[index][0])][np.argmax(predicted[index])] += 1
        accuracy.append(np.trace(contingency_table) / features.shape[0])
        for index in range(class_cnt):
            precisions[i].append(contingency_table[index][index] / (np.sum(contingency_table[:, index]) + eps))
            recalls[i].append(contingency_table[index][index] / (np.sum(contingency_table[index, :]) + eps))
            f1s[i].append(2 * precisions[i][-1] * recalls[i][-1] / ((precisions[i][-1] + recalls[i][-1]) + eps))
    precisions = [float(sum(l))/len(l) for l in zip(*precisions)]
    recalls = [float(sum(l))/len(l) for l in zip(*recalls)]
    f1s = [float(sum(l))/len(l) for l in zip(*f1s)]
    print('Accuracy:', round(reduce(lambda x, y: x + y, accuracy) / len(accuracy), 3))
    for index in range(class_cnt):
        print('_____ Class', index, '_____')
        print('Precision\t', round(precisions[index], 3))
        print('Recall\t\t', round(recalls[index], 3))
        print('F1 Score\t', round(f1s[index], 3))

In [0]:
print('Constructing Model ...', end='')

model_input = tf.keras.Input((max_seg, xling_encoding_len))

biglu_layer = bidirectional_gru_layer(
    model_input, 
    gru_feature_dim=50
)

attention_layer = branch_execute(
    biglu_layer, 
    sublayer=__attention_layer_share, 
    args={
        'attention_key_dim': 100,
        'dropout_rate': 0.5,
        'eta': 1e-4
    }
)

softmaxed_attention_layer = tf.keras.layers.Softmax(
    axis=1
)(attention_layer)

classification_layer = branch_execute(
    model_input, 
    sublayer=__seg_classifier_layer_share, 
    args={
        'class_cnt': level_class_cnt,
        'dropout_rate': 0.5,
        'eta': 1e-4
    }
)

merge_layer = merge_layer(
    [softmaxed_attention_layer, classification_layer],
    class_cnt=level_class_cnt,
    eta=1e-4
)

model = tf.keras.Model(model_input, merge_layer)

print('\rModel Constructed. Compiling ...', end='')

model.compile(
    optimizer=tf.keras.optimizers.Adam(clipvalue=0.5),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(),
    metrics=['accuracy']
)

print('\rModel Compiled.')

model.summary()

W0716 08:08:03.067019 140485658814336 deprecation.py:506] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W0716 08:08:03.093594 140485658814336 deprecation.py:506] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/init_ops.py:97: calling GlorotUniform.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W0716 08:08:03.095370 140485658814336 deprecation.py:506] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/init_ops.py:97: calling Orthogonal.__init__ (from tensorflow.python.ops.init_ops) with

Model Compiled.
Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 10, 512)]    0                                            
__________________________________________________________________________________________________
bidirectional (Bidirectional)   (None, 10, 100)      168900      input_1[0][0]                    
__________________________________________________________________________________________________
lambda (Lambda)                 (None, 100)          0           bidirectional[0][0]              
__________________________________________________________________________________________________
lambda_1 (Lambda)               (None, 100)          0           bidirectional[0][0]              
______________________________________________________________________________

In [0]:
logdir = os.path.join(log_out_dir, datetime.datetime.now().strftime("%Y%m%d_%H%M%S"))
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=0)

model.fit_generator(
    data_generator(train_samples, input_path, sentence_re, label_re, use_balance=True),
    validation_data=data_generator(validation_samples, input_path, sentence_re, label_re, use_balance=True),
    steps_per_epoch=(sample_amount * (1 - test_percentage - validation_percentage) // batch_size) - 1,
    validation_steps=(sample_amount * (validation_percentage) // batch_size) - 1,
    validation_freq=2,
    epochs=epochs,
    callbacks=[tensorboard_callback]
)

model.save(model_out_path)

print('########## Training Error ##########')
performance_judge(model, data_generator(train_samples, input_path, sentence_re, label_re, epochs=1, use_balance=True), level_class_cnt)
print('')
print('############ Test Error ############')
performance_judge(model, data_generator(train_samples, input_path, sentence_re, label_re, epochs=1, use_balance=True), level_class_cnt)

print(logdir)

Epoch 1/8


W0716 08:08:46.489855 140485658814336 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


  2/701 [..............................] - ETA: 1:22:20 - loss: 1.1507 - acc: 0.3211

W0716 08:08:55.576535 140485658814336 callbacks.py:257] Method (on_train_batch_end) is slow compared to the batch update (0.105707). Check your callbacks.


Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
########## Training Error ##########
Accuracy: 0.78
_____ Class 0 _____
Precision	 0.792
Recall		 0.783
F1 Score	 0.787
_____ Class 1 _____
Precision	 0.687
Recall		 0.699
F1 Score	 0.692
_____ Class 2 _____
Precision	 0.864
Recall		 0.86
F1 Score	 0.861

############ Test Error ############
Accuracy: 0.78
_____ Class 0 _____
Precision	 0.792
Recall		 0.783
F1 Score	 0.786
_____ Class 1 _____
Precision	 0.687
Recall		 0.698
F1 Score	 0.692
_____ Class 2 _____
Precision	 0.864
Recall		 0.859
F1 Score	 0.861
/content/gdrive/My Drive/data_source/milnet/log/20190716_080839
