In [0]:
import tensorflow as tf
from tensorflow.keras import backend as K
import numpy as np
import h5py
import os
import pickle
import datetime
from random import shuffle
from functools import reduce
import matplotlib.pyplot as plt
from google.colab import drive
drive.mount('/content/gdrive')
# !pip install tensorflow-gpu

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/gdrive


In [0]:
max_seg = 7
max_word = 18
# max_seg = 11
# max_word = 7
max_doc = 60
level_class_cnt = 3

test_percentage = 0.1
validation_percentage = 0.1

batch_size = 512
epochs = 8

input_path = '/content/gdrive/My Drive/data_source/milnet/model_inputs/gourmet.hdf5'
w2v_weights_path = '/content/gdrive/My Drive/data_source/milnet/model_inputs/w2v_weights.npy'

model_out_path = '/content/gdrive/My Drive/data_source/milnet/results/gourmet_sentence_c3_w2v.h5'
log_out_dir = '/content/gdrive/My Drive/data_source/milnet/log/'

sample_amount = 0
mini_batch_cnt = 0
with h5py.File(input_path) as in_file:
    for index in range(len(in_file['label/'].keys())):
        mini_batch_cnt += 1
        sample_amount += len(in_file['label/' + str(index)])
batch_indices = [*range(mini_batch_cnt)]
shuffle(batch_indices)

train_batches = batch_indices[0:int(mini_batch_cnt * (1 - test_percentage - validation_percentage))]
validation_batches = batch_indices[int(mini_batch_cnt * (1 - test_percentage - validation_percentage)): int(mini_batch_cnt * (1 - test_percentage))]
test_batches = batch_indices[int(mini_batch_cnt * (1 - test_percentage)):]

In [0]:
w2v = np.load(w2v_weights_path, allow_pickle=True)
w2v_len = w2v.shape[1]

In [0]:
def __label_map(raw_label):
    if raw_label == 1 or raw_label == 2:
        return 0
    elif raw_label == 3:
        return 1
    else:
        return 2

def __balance_data(feature_array, label_array):
    to_balance_indices = np.concatenate([np.where(label_array == 2)[0], np.where(label_array == 4)[0]])
    return np.delete(feature_array, to_balance_indices, axis=0), np.delete(label_array, to_balance_indices, axis=0)

def data_generator(batch_indices, max_doc=max_doc, max_seg=max_seg, max_word=max_word, epochs=epochs, use_balance=True):
    global batch_size, input_path
    with h5py.File(input_path) as in_file:
        feature_array, label_array = np.zeros((batch_size, max_seg, max_word)), np.zeros((batch_size, 1))
        batch_index = 0
        for _ in range(epochs):
            shuffle(batch_indices)
            for index in batch_indices:
                doc, label = in_file['document/' + str(index)], in_file['label/' + str(index)]
                random_doc_order = [*range(len(doc))]
                shuffle(random_doc_order)
                for i in random_doc_order:
                    feature_array[batch_index] = doc[i][:max_seg, :max_word]
                    label_array[batch_index] = label[i]
                    batch_index += 1
                    if batch_index == batch_size:
                        if use_balance:
                            feature_array, label_array = __balance_data(feature_array, label_array)
                        yield feature_array, np.array([np.array([__label_map(l[0])]) for l in label_array])
                        batch_index = 0
                        feature_array, label_array = np.zeros((batch_size, max_seg, max_word)), np.zeros((batch_size, 1))

def get_data(batch_indices, max_seg=max_seg, max_word=max_word):
    global input_path
    with h5py.File(input_path) as in_file:
        sample_amount = sum([len(in_file['document/' + str(i)]) for i in batch_indices])
        feature_array, label_array = np.zeros((sample_amount, max_seg, max_word)), np.zeros((sample_amount, 1))
        batch_index = 0
        cnt = 0
        for index in batch_indices:
            doc, label = in_file['document/' + str(index)], in_file['label/' + str(index)]
            for i in range(len(doc)):
                feature_array[cnt] = doc[i][:max_seg, :max_word]
                label_array[cnt] = __label_map(label[i])
                cnt += 1
        return feature_array, label_array

In [0]:
shared_sublayer_cache = {}

def branch_execute(layer_in, sublayer, args={}):
    instance_cnt = layer_in.shape[1]
    sliced_inputs = [tf.keras.layers.Lambda(lambda x: x[:,i])(layer_in) 
                     for i in range(instance_cnt)]
    branch_layers = [sublayer(**{**{'layer_in': sliced_inputs[i]}, **args}) 
                     for i in range(instance_cnt)]
    expand_layer = tf.keras.layers.Lambda(lambda x: tf.keras.backend.expand_dims(x, axis=1))
    expanded_layers = [expand_layer(branch_layers[i]) for i in range(instance_cnt)]
    concated_layer = tf.keras.layers.Concatenate(axis=1)(expanded_layers)
    return concated_layer

def __sentence_encode_layer_share(layer_in, hidden_feature_dim, kernel_height, eta):
    cnned_height = layer_in.shape[1] - kernel_height + 1
    global shared_sublayer_cache
    if 'shared_sentence_encode_sublayers' + str(kernel_height) not in shared_sublayer_cache:
        shared_sublayer_cache['shared_sentence_encode_sublayers' + str(kernel_height)] = {
            'conv_layer': tf.keras.layers.Conv1D(
                filters=hidden_feature_dim,
                kernel_size=kernel_height,
                kernel_regularizer=tf.keras.regularizers.l2(eta)
            ),
            'batch_normalize_layer': tf.keras.layers.BatchNormalization(
            ),
            'relu_layer': tf.keras.layers.ReLU(
            ),
            'max_pool_layer': tf.keras.layers.MaxPool1D(
                (cnned_height,)
            )
        }
    shared_layers = shared_sublayer_cache['shared_sentence_encode_sublayers' + str(kernel_height)]
    conv_layer = shared_layers['conv_layer'](layer_in)
    batch_normalize_layer = shared_layers['batch_normalize_layer'](conv_layer)
    relu_layer = shared_layers['relu_layer'](batch_normalize_layer)
    max_pool_layer = shared_layers['max_pool_layer'](relu_layer)
    return max_pool_layer

def __multi_kernel_encode_layer(layer_in, hidden_feature_dim, kernel_heights, eta):
    cnn_layers = [__sentence_encode_layer_share(layer_in, hidden_feature_dim, h, eta) 
                  for h in kernel_heights]
    concated_layer = tf.keras.layers.Concatenate()(cnn_layers)
    flatten_layer = tf.keras.layers.Flatten()(concated_layer)
    return flatten_layer

def __seg_classifier_layer_share(layer_in, class_cnt, dropout_rate, eta):
    global shared_sublayer_cache
    if 'shared_seg_classifier_sublayers' not in shared_sublayer_cache:
        shared_sublayer_cache['shared_seg_classifier_sublayers'] = {
            'drop_out_layer': tf.keras.layers.Dropout(
                dropout_rate
            ),
            'dense_layer': tf.keras.layers.Dense(
                units=class_cnt,
                activation='softmax',
                kernel_regularizer=tf.keras.regularizers.l2(eta),
                bias_regularizer=tf.keras.regularizers.l2(eta)
            )
        }
    shared_layers = shared_sublayer_cache['shared_seg_classifier_sublayers']
    drop_out_layer = shared_layers['drop_out_layer'](layer_in)
    dense_layer = shared_layers['dense_layer'](drop_out_layer)
    return dense_layer

def __attention_layer_share(layer_in, attention_key_dim, dropout_rate, eta):
    global shared_sublayer_cache
    if 'shared_attention_sublayers' not in shared_sublayer_cache:
        shared_sublayer_cache['shared_attention_sublayers'] = {
            'drop_out_layer': tf.keras.layers.Dropout(
                dropout_rate
            ),
            'dense_layer': tf.keras.layers.Dense(
                units=attention_key_dim, 
                activation='tanh',
                kernel_regularizer=tf.keras.regularizers.l2(eta),
                bias_regularizer=tf.keras.regularizers.l2(eta)
            ),
            'nobias_dense_layer': tf.keras.layers.Dense(
                units=1, 
                use_bias=False, 
                bias_regularizer=tf.keras.regularizers.l2(eta)
            )
        }
    shared_layers = shared_sublayer_cache['shared_attention_sublayers']
    drop_out_layer = shared_layers['drop_out_layer'](layer_in)
    dense_layer = shared_layers['dense_layer'](drop_out_layer)
    nobias_dense_layer = shared_layers['nobias_dense_layer'](dense_layer)
    return nobias_dense_layer

def bidirectional_gru_layer(layer_in, gru_feature_dim):
    bidirectional_layer = tf.keras.layers.Bidirectional(
        tf.keras.layers.GRU(gru_feature_dim, return_sequences=True)
    )(layer_in)
    return bidirectional_layer

def merge_layer(layer_in, class_cnt, eta):
    dot_layer = tf.keras.layers.Dot(axes=1)(layer_in)
    flatten_layer = tf.keras.layers.Flatten()(dot_layer)
    dense_layer = tf.keras.layers.Dense(
        units=class_cnt, 
        activation='softmax',
        kernel_regularizer=tf.keras.regularizers.l2(eta),
        bias_regularizer=tf.keras.regularizers.l2(eta)
    )(flatten_layer)
    return dense_layer

def performance_judge(model, generator, class_cnt):
    eps = np.finfo(float).eps
    accuracy, precisions, recalls, f1s = [], [], [], []
    for i, (features, labels) in enumerate(generator):
        predicted = model.predict(features)
        precisions.append([])
        recalls.append([])
        f1s.append([])
        contingency_table = np.zeros((class_cnt, class_cnt))
        for index in range(features.shape[0]):
            contingency_table[int(labels[index][0])][np.argmax(predicted[index])] += 1
        accuracy.append(np.trace(contingency_table) / features.shape[0])
        for index in range(class_cnt):
            precisions[i].append(contingency_table[index][index] / (np.sum(contingency_table[:, index]) + eps))
            recalls[i].append(contingency_table[index][index] / (np.sum(contingency_table[index, :]) + eps))
            f1s[i].append(2 * precisions[i][-1] * recalls[i][-1] / ((precisions[i][-1] + recalls[i][-1]) + eps))
    precisions = [float(sum(l))/len(l) for l in zip(*precisions)]
    recalls = [float(sum(l))/len(l) for l in zip(*recalls)]
    f1s = [float(sum(l))/len(l) for l in zip(*f1s)]
    print('Accuracy:', round(reduce(lambda x, y: x + y, accuracy) / len(accuracy), 3))
    for index in range(class_cnt):
        print('_____ Class', index, '_____')
        print('Precision\t', round(precisions[index], 3))
        print('Recall\t\t', round(recalls[index], 3))
        print('F1 Score\t', round(f1s[index], 3))

In [0]:
print('Constructing Model ...', end='')

model_input = tf.keras.Input((max_seg, max_word))

embedding_layer = tf.keras.layers.Embedding(
    input_dim=w2v.shape[0], 
    output_dim=w2v_len, 
    weights=[w2v], 
    input_length=max_word, 
    trainable=False
)(model_input)

encoding_layer = branch_execute(
    embedding_layer, 
    sublayer=__multi_kernel_encode_layer, 
    args={
        'hidden_feature_dim': 100,
        'kernel_heights': [3, 4, 5],
        'eta': 1e-4
    }
)

biglu_layer = bidirectional_gru_layer(
    encoding_layer, 
    gru_feature_dim=50
)

attention_layer = branch_execute(
    biglu_layer, 
    sublayer=__attention_layer_share, 
    args={
        'attention_key_dim': 100,
        'dropout_rate': 0.5,
        'eta': 1e-4
    }
)

softmaxed_attention_layer = tf.keras.layers.Softmax(
    axis=1
)(attention_layer)

classification_layer = branch_execute(
    encoding_layer, 
    sublayer=__seg_classifier_layer_share, 
    args={
        'class_cnt': level_class_cnt,
        'dropout_rate': 0.5,
        'eta': 1e-4
    }
)

merge_layer = merge_layer(
    [softmaxed_attention_layer, classification_layer],
    class_cnt=level_class_cnt,
    eta=1e-4
)

model = tf.keras.Model(model_input, merge_layer)

print('\rModel Constructed. Compiling ...', end='')

model.compile(
    optimizer=tf.keras.optimizers.Adam(clipvalue=0.5),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(),
    metrics=['accuracy']
)

print('\rModel Compiled.')

model.summary()

W0716 21:49:14.589535 139727445047168 deprecation.py:506] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/initializers.py:119: calling RandomUniform.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


Constructing Model ...

W0716 21:49:18.106675 139727445047168 deprecation.py:506] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W0716 21:49:19.845074 139727445047168 deprecation.py:506] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/init_ops.py:97: calling GlorotUniform.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W0716 21:49:19.846743 139727445047168 deprecation.py:506] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/init_ops.py:97: calling Orthogonal.__init__ (from tensorflow.python.ops.init_ops) with

Model Constructed. Compiling ...Model Compiled.
Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 7, 18)]      0                                            
__________________________________________________________________________________________________
embedding (Embedding)           (None, 7, 18, 300)   49505700    input_1[0][0]                    
__________________________________________________________________________________________________
lambda (Lambda)                 (None, 18, 300)      0           embedding[0][0]                  
__________________________________________________________________________________________________
lambda_1 (Lambda)               (None, 18, 300)      0           embedding[0][0]                  
____________________________________________

In [0]:
logdir = os.path.join(log_out_dir, datetime.datetime.now().strftime("%Y%m%d_%H%M%S"))
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=0)

model.fit_generator(
    data_generator(train_batches, use_balance=True), 
    validation_data=data_generator(validation_batches, use_balance=True),
    steps_per_epoch=(sample_amount * (1 - test_percentage - validation_percentage) // batch_size) - 1,
    validation_steps=(sample_amount * (validation_percentage) // batch_size) - 1,
    validation_freq=2,
    epochs=epochs,
    callbacks=[tensorboard_callback],
)

model.save(model_out_path)

print('########## Training Error ##########')
performance_judge(model, data_generator(train_batches, epochs=1, use_balance=True), level_class_cnt)
print('')
print('############ Test Error ############')
performance_judge(model, data_generator(test_batches, epochs=1, use_balance=True), level_class_cnt)

print(logdir)

Epoch 1/8


W0716 21:49:34.033973 139727445047168 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
########## Training Error ##########
Accuracy: 0.82
_____ Class 0 _____
Precision	 0.811
Recall		 0.742
F1 Score	 0.774
_____ Class 1 _____
Precision	 0.821
Recall		 0.777
F1 Score	 0.798
_____ Class 2 _____
Precision	 0.827
Recall		 0.93
F1 Score	 0.875

############ Test Error ############
Accuracy: 0.77
_____ Class 0 _____
Precision	 0.75
Recall		 0.678
F1 Score	 0.711
_____ Class 1 _____
Precision	 0.776
Recall		 0.73
F1 Score	 0.752
_____ Class 2 _____
Precision	 0.777
Recall		 0.886
F1 Score	 0.827
/content/gdrive/My Drive/data_source/milnet/log/20190716_214930
