In [1]:
import numpy as np
import pandas as pd
from bert4keras.backend import keras, set_gelu, search_layer, K
from bert4keras.tokenizers import Tokenizer
from bert4keras.models import build_transformer_model
from bert4keras.optimizers import Adam, extend_with_piecewise_linear_lr
from bert4keras.snippets import sequence_padding

from keras.layers import Lambda, Dense, Dropout
from keras.utils import to_categorical

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, f1_score
from tqdm import tqdm

Using TensorFlow backend.


In [2]:
import tensorflow as tf
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"

config=tf.compat.v1.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.24
sess=tf.compat.v1.Session(config=config)

2021-10-25 20:10:15.404659: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 AVX512F FMA
2021-10-25 20:10:15.424162: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2200000000 Hz
2021-10-25 20:10:15.427207: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x556ca3e8b6b0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2021-10-25 20:10:15.427290: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
2021-10-25 20:10:15.430561: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1
2021-10-25 20:10:15.605369: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x556ca44bae70 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2021-10-25 20:10:15.605473: I tensorflow/compil

In [3]:
config_path = 'RoBERTa-tiny3L312-clue/bert_config.json'
checkpoint_path = 'RoBERTa-tiny3L312-clue/bert_model.ckpt'
dict_path = 'RoBERTa-tiny3L312-clue/vocab.txt'

In [2]:
train_data = pd.read_csv('data/sentiment_analysis_trainingset.csv').iloc[:, 1:].dropna()
valid_data = pd.read_csv('data/sentiment_analysis_validationset.csv').iloc[:, 1:].dropna()

In [5]:
train_data.head()

Unnamed: 0,content,location_traffic_convenience,location_distance_from_business_district,location_easy_to_find,service_wait_time,service_waiters_attitude,service_parking_convenience,service_serving_speed,price_level,price_cost_effective,...,environment_decoration,environment_noise,environment_space,environment_cleaness,dish_portion,dish_taste,dish_look,dish_recommendation,others_overall_experience,others_willing_to_consume_again
0,"""吼吼吼，萌死人的棒棒糖，中了大众点评的霸王餐，太可爱了。一直就好奇这个棒棒糖是怎么个东西，...",-2,-2,-2,-2,1,-2,-2,-2,-2,...,-2,-2,-2,-2,-2,-2,1,-2,1,-2
1,"""第三次参加大众点评网霸王餐的活动。这家店给人整体感觉一般。首先环境只能算中等，其次霸王餐提...",-2,-2,-2,-2,-2,-2,-2,0,-2,...,0,0,0,0,1,-2,-2,-2,1,-2
2,"""4人同行 点了10个小吃\n榴莲酥 榴莲味道不足 松软 奶味浓\n虾饺 好吃 两颗大虾仁\...",-2,-2,-2,-2,0,-2,1,0,-2,...,-2,-2,1,-2,0,1,-2,-2,0,-2
3,"""之前评价了莫名其妙被删 果断继续差评！ 换了菜单 价格更低 开始砸牌子 但套餐还是有150...",-2,-2,-2,-2,-2,-2,-2,0,-2,...,-2,-2,-2,-2,-2,-1,-2,-2,-1,-1
4,"""出乎意料地惊艳，椰子鸡清热降火，美容养颜，大大满足了爱吃火锅怕上火星人。椰子冻是帅帅的老板...",-2,-2,-2,-2,-2,-2,-2,-2,-2,...,-2,-2,-2,-2,-2,1,1,-2,1,-2


In [5]:
train_data.iloc[:, 1:] = train_data.iloc[:, 1:].apply(lambda x: x + 2)
valid_data.iloc[:, 1:] = valid_data.iloc[:, 1:].apply(lambda x: x + 2)

In [6]:
train_data.columns

Index(['content', 'location_traffic_convenience',
       'location_distance_from_business_district', 'location_easy_to_find',
       'service_wait_time', 'service_waiters_attitude',
       'service_parking_convenience', 'service_serving_speed', 'price_level',
       'price_cost_effective', 'price_discount', 'environment_decoration',
       'environment_noise', 'environment_space', 'environment_cleaness',
       'dish_portion', 'dish_taste', 'dish_look', 'dish_recommendation',
       'others_overall_experience', 'others_willing_to_consume_again'],
      dtype='object')

In [7]:
# for column in train_data.columns:
#     print(valid_data[column].value_counts())

In [8]:
len(train_data)

105000

In [9]:
len(valid_data)

15000

In [10]:
tags = train_data.columns[1:]
tags

Index(['location_traffic_convenience',
       'location_distance_from_business_district', 'location_easy_to_find',
       'service_wait_time', 'service_waiters_attitude',
       'service_parking_convenience', 'service_serving_speed', 'price_level',
       'price_cost_effective', 'price_discount', 'environment_decoration',
       'environment_noise', 'environment_space', 'environment_cleaness',
       'dish_portion', 'dish_taste', 'dish_look', 'dish_recommendation',
       'others_overall_experience', 'others_willing_to_consume_again'],
      dtype='object')

In [11]:
num_classes_topic = len(tags)
num_classes_topic

20

In [12]:
set_gelu('tanh')  # 切换gelu版本

maxlen = 256
batch_size = 128
epochs = 25
dropout_rate = 0.1
num_classes_sentiment = 4

bert

In [13]:
# 建立分词器
tokenizer = Tokenizer(dict_path, do_lower_case=True)

In [14]:
def get_data(data):
    token_ids = []
    segment_ids = []
    label = []
    # 循环每个句子
    for text in tqdm(data['content'].astype(str)):
        # 分词并把token变成编号
        token_id, segment_id = tokenizer.encode(text, maxlen=maxlen)
        token_ids.append(token_id)
        segment_ids.append(segment_id)
    token_ids = sequence_padding(token_ids)
    segment_ids = sequence_padding(segment_ids)
    
    # 获取20个维度的标签
    for columns in tags:
        label.append(np.array(data[columns]).astype('uint8'))
    label = np.array(label) 
    return [token_ids, segment_ids], label

In [15]:
train_input, train_label = get_data(train_data)
valid_input, valid_label = get_data(valid_data)

100%|██████████| 105000/105000 [04:22<00:00, 400.00it/s]
100%|██████████| 15000/15000 [00:37<00:00, 404.61it/s]


In [16]:
train_label.shape

(20, 105000)

In [17]:
len(train_input)

2

In [18]:
# 加载预训练模型
bert = build_transformer_model(
    config_path=config_path,
    checkpoint_path=checkpoint_path,
    model='bert',
    return_keras_model=False,
)

pooler = Lambda(lambda x: x[:, 0], name='CLS-token')(bert.model.output)

# 多任务学习
mutil_layers = []
for i in range(num_classes_topic):
    dropout = Dropout(dropout_rate)(pooler)
    preds = Dense(num_classes_sentiment, activation='softmax', kernel_initializer=bert.initializer, name='preds_{}'.format(i))(dropout)
    mutil_layers.append(preds)
    
model = keras.models.Model(bert.model.input, mutil_layers)
# model.summary()

2021-10-25 20:15:46.168830: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1555] Found device 0 with properties: 
pciBusID: 0000:af:00.0 name: Tesla V100-PCIE-32GB computeCapability: 7.0
coreClock: 1.38GHz coreCount: 80 deviceMemorySize: 31.72GiB deviceMemoryBandwidth: 836.37GiB/s
2021-10-25 20:15:46.169317: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1
2021-10-25 20:15:46.169362: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
2021-10-25 20:15:46.169395: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10
2021-10-25 20:15:46.169423: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10
2021-10-25 20:15:46.169452: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolv

In [19]:
loss_dict = {}
loss_weights_dict = {}
for i in range(num_classes_topic):
    loss_dict['preds_{}'.format(i)] = 'sparse_categorical_crossentropy'
    loss_weights_dict['preds_{}'.format(i)] = 1.

In [20]:
# 派生为带分段线性学习率的优化器。
# 其中name参数可选，但最好填入，以区分不同的派生优化器。
# loss_weights表示每个任务的权重，可以看情况设置

AdamLR = extend_with_piecewise_linear_lr(Adam, name='AdamLR')

model.compile(loss=loss_dict,
              loss_weights=loss_weights_dict,
              optimizer=AdamLR(learning_rate=1e-4, lr_schedule={
                    int((len(train_input[0]) // batch_size * epochs) * 0.2): 1,
                    int((len(train_input[0]) // batch_size * epochs) * 0.3): 0.1
              }),
              metrics=['accuracy'],
             )

In [21]:
def adversarial_training(model, embedding_name, epsilon=1):
    """给模型添加对抗训练
    其中model是需要添加对抗训练的keras模型，embedding_name
    则是model里边Embedding层的名字。要在模型compile之后使用。
    """
    if model.train_function is None:  # 如果还没有训练函数
        model._make_train_function()  # 手动make
    old_train_function = model.train_function  # 备份旧的训练函数

    # 查找Embedding层
    for output in model.outputs:
        embedding_layer = search_layer(output, embedding_name)
        if embedding_layer is not None:
            break
    if embedding_layer is None:
        raise Exception('Embedding layer not found')

    # 求Embedding梯度
    embeddings = embedding_layer.embeddings  # Embedding矩阵
    gradients = K.gradients(model.total_loss, [embeddings])  # Embedding梯度
    gradients = K.zeros_like(embeddings) + gradients[0]  # 转为dense tensor

    # 封装为函数
    inputs = (
        model._feed_inputs + model._feed_targets + model._feed_sample_weights
    )  # 所有输入层
    embedding_gradients = K.function(
        inputs=inputs,
        outputs=[gradients],
        name='embedding_gradients',
    )  # 封装为函数

    def train_function(inputs):  # 重新定义训练函数
        grads = embedding_gradients(inputs)[0]  # Embedding梯度
        delta = epsilon * grads / (np.sqrt((grads**2).sum()) + 1e-8)  # 计算扰动
        K.set_value(embeddings, K.eval(embeddings) + delta)  # 注入扰动
        outputs = old_train_function(inputs)  # 梯度下降
        K.set_value(embeddings, K.eval(embeddings) - delta)  # 删除扰动
        return outputs

    model.train_function = train_function  # 覆盖原训练函数

In [22]:
# 写好函数后，启用对抗训练只需要一行代码
adversarial_training(model, 'Embedding-Token', 0.5)

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


In [26]:
def evaluate(x_trues, y_trues):
    golds_topic, preds_topic = [], []
    golds_sentiment, preds_sentiment = [], []

    y_preds = np.argmax(model.predict(x_trues, batch_size = 128, verbose=1), axis=-1)
    for y_pred, y_true in zip(y_preds.T, y_trues.T):
        golds_topic.append([1 if y > 0 else 0 for y in y_true])
        preds_topic.append([1 if y > 0 else 0 for y in y_pred])
        golds_sentiment.extend(y_true)
        preds_sentiment.extend(y_pred)
    print(classification_report(y_true=golds_topic, y_pred=preds_topic, target_names=tags, digits=4))
    print(classification_report(y_true=golds_sentiment, y_pred=preds_sentiment, target_names=['未提及', '负', '中', '正'], digits=4))
    f1_score_topic = f1_score(y_true=golds_topic, y_pred=preds_topic, average='micro')
    f1_score_sentiment = f1_score(y_true=golds_sentiment, y_pred=preds_sentiment, average='micro')
    f1_score_average = float(format((f1_score_topic + f1_score_sentiment) / 2, '.4f'))
    print("f1_score_average", f1_score_average, type(f1_score_average))
    return f1_score_average

In [27]:
class Evaluator(keras.callbacks.Callback):
    """评估与保存
    """
    def __init__(self):
        self.best_val_f1 = 0.

    def on_epoch_end(self, epoch, logs=None):
        val_f1 = evaluate(valid_input, valid_label)
        if val_f1 > self.best_val_f1:
            self.best_val_f1 = val_f1
            model.save_weights('model/best_model_tag_sentiment.weights')
            model_json = model.to_json()
            with open('model/best_model_tag_sentiment.json', 'w') as json_file:
                json_file.write(model_json)
        print(
            u'val_f1: %.5f, best_val_f1: %.5f\n' %
            (val_f1, self.best_val_f1)
        )

In [28]:
evaluator = Evaluator()

model.fit(
    train_input, [train_label[i] for i in range(num_classes_topic)], 
    batch_size=batch_size,
    epochs=epochs,
    callbacks=[evaluator]
)

Epoch 1/25


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                                          precision    recall  f1-score   support

            location_traffic_convenience     0.8863    0.8338    0.8592      3243
location_distance_from_business_district     0.7832    0.4407    0.5640      2968
                   location_easy_to_find     0.8982    0.6912    0.7812      3484
                       service_wait_time     0.0000    0.0000    0.0000      1763
                service_waiters_attitude     0.9034    0.7930    0.8446      9008
             service_parking_convenience     0.9400    0.7222    0.8168       954
                   service_serving_speed     0.8324    0.3186    0.4608      2323
                             price_level     0.8595    0.5958    0.7037      7503
                    price_cost_effective     0.7911    0.2343    0.3616      3572
                          price_discount     0.8926    0.6675    0.7638      5738
                  environment_decoration     0.8431    0.8600    0.8515      7199
               

  _warn_prf(average, modifier, msg_start, len(result))


                                          precision    recall  f1-score   support

            location_traffic_convenience     0.8993    0.8369    0.8670      3243
location_distance_from_business_district     0.7556    0.5822    0.6577      2968
                   location_easy_to_find     0.8816    0.7609    0.8168      3484
                       service_wait_time     0.7699    0.2411    0.3672      1763
                service_waiters_attitude     0.9120    0.8089    0.8574      9008
             service_parking_convenience     0.9322    0.7212    0.8132       954
                   service_serving_speed     0.8981    0.5045    0.6461      2323
                             price_level     0.8853    0.7025    0.7834      7503
                    price_cost_effective     0.8335    0.4569    0.5902      3572
                          price_discount     0.8928    0.7285    0.8023      5738
                  environment_decoration     0.8738    0.8423    0.8578      7199
               

  _warn_prf(average, modifier, msg_start, len(result))


                                          precision    recall  f1-score   support

            location_traffic_convenience     0.9228    0.8076    0.8614      3243
location_distance_from_business_district     0.7919    0.5526    0.6509      2968
                   location_easy_to_find     0.8994    0.7566    0.8218      3484
                       service_wait_time     0.7852    0.3670    0.5002      1763
                service_waiters_attitude     0.9269    0.7901    0.8531      9008
             service_parking_convenience     0.9355    0.7453    0.8296       954
                   service_serving_speed     0.8745    0.5760    0.6945      2323
                             price_level     0.8914    0.7024    0.7857      7503
                    price_cost_effective     0.8252    0.4997    0.6225      3572
                          price_discount     0.8920    0.7553    0.8180      5738
                  environment_decoration     0.8972    0.8357    0.8654      7199
               

  _warn_prf(average, modifier, msg_start, len(result))


                                          precision    recall  f1-score   support

            location_traffic_convenience     0.8957    0.8501    0.8723      3243
location_distance_from_business_district     0.7134    0.6792    0.6959      2968
                   location_easy_to_find     0.8654    0.7988    0.8307      3484
                       service_wait_time     0.7901    0.4719    0.5909      1763
                service_waiters_attitude     0.9093    0.8214    0.8631      9008
             service_parking_convenience     0.9112    0.7956    0.8495       954
                   service_serving_speed     0.8119    0.6728    0.7359      2323
                             price_level     0.8941    0.7124    0.7930      7503
                    price_cost_effective     0.8242    0.5120    0.6317      3572
                          price_discount     0.8911    0.7719    0.8272      5738
                  environment_decoration     0.8851    0.8551    0.8699      7199
               

  _warn_prf(average, modifier, msg_start, len(result))


                                          precision    recall  f1-score   support

            location_traffic_convenience     0.8813    0.8656    0.8734      3243
location_distance_from_business_district     0.7666    0.6176    0.6841      2968
                   location_easy_to_find     0.8978    0.7844    0.8373      3484
                       service_wait_time     0.7865    0.5014    0.6124      1763
                service_waiters_attitude     0.9303    0.7956    0.8577      9008
             service_parking_convenience     0.9327    0.7841    0.8519       954
                   service_serving_speed     0.8929    0.6281    0.7374      2323
                             price_level     0.8727    0.7656    0.8156      7503
                    price_cost_effective     0.8278    0.5154    0.6353      3572
                          price_discount     0.8761    0.8121    0.8429      5738
                  environment_decoration     0.9026    0.8419    0.8712      7199
               

  _warn_prf(average, modifier, msg_start, len(result))


                                          precision    recall  f1-score   support

            location_traffic_convenience     0.9037    0.8449    0.8733      3243
location_distance_from_business_district     0.7438    0.6769    0.7088      2968
                   location_easy_to_find     0.8934    0.7890    0.8380      3484
                       service_wait_time     0.7780    0.5445    0.6406      1763
                service_waiters_attitude     0.9300    0.8022    0.8614      9008
             service_parking_convenience     0.9352    0.7862    0.8542       954
                   service_serving_speed     0.8878    0.6371    0.7419      2323
                             price_level     0.8698    0.7710    0.8174      7503
                    price_cost_effective     0.8026    0.5770    0.6713      3572
                          price_discount     0.8767    0.8142    0.8443      5738
                  environment_decoration     0.8902    0.8612    0.8755      7199
               

  _warn_prf(average, modifier, msg_start, len(result))


                                          precision    recall  f1-score   support

            location_traffic_convenience     0.8935    0.8566    0.8747      3243
location_distance_from_business_district     0.7550    0.6553    0.7017      2968
                   location_easy_to_find     0.8932    0.7922    0.8397      3484
                       service_wait_time     0.7832    0.5615    0.6541      1763
                service_waiters_attitude     0.9196    0.8149    0.8641      9008
             service_parking_convenience     0.9390    0.7746    0.8489       954
                   service_serving_speed     0.8598    0.6733    0.7552      2323
                             price_level     0.8786    0.7582    0.8140      7503
                    price_cost_effective     0.8019    0.5711    0.6671      3572
                          price_discount     0.8735    0.8181    0.8449      5738
                  environment_decoration     0.8990    0.8532    0.8755      7199
               

  _warn_prf(average, modifier, msg_start, len(result))


                                          precision    recall  f1-score   support

            location_traffic_convenience     0.8988    0.8514    0.8744      3243
location_distance_from_business_district     0.7573    0.6540    0.7019      2968
                   location_easy_to_find     0.8942    0.7905    0.8391      3484
                       service_wait_time     0.7926    0.5377    0.6408      1763
                service_waiters_attitude     0.9172    0.8203    0.8660      9008
             service_parking_convenience     0.9352    0.7872    0.8549       954
                   service_serving_speed     0.8806    0.6543    0.7508      2323
                             price_level     0.8760    0.7626    0.8154      7503
                    price_cost_effective     0.8029    0.5736    0.6692      3572
                          price_discount     0.8734    0.8179    0.8447      5738
                  environment_decoration     0.8912    0.8632    0.8769      7199
               

  _warn_prf(average, modifier, msg_start, len(result))


                                          precision    recall  f1-score   support

            location_traffic_convenience     0.8898    0.8615    0.8755      3243
location_distance_from_business_district     0.7471    0.6698    0.7063      2968
                   location_easy_to_find     0.8840    0.8002    0.8400      3484
                       service_wait_time     0.7806    0.5752    0.6623      1763
                service_waiters_attitude     0.9189    0.8163    0.8646      9008
             service_parking_convenience     0.9352    0.7862    0.8542       954
                   service_serving_speed     0.8876    0.6526    0.7522      2323
                             price_level     0.8766    0.7613    0.8149      7503
                    price_cost_effective     0.8019    0.5759    0.6704      3572
                          price_discount     0.8724    0.8189    0.8448      5738
                  environment_decoration     0.8936    0.8597    0.8763      7199
               

  _warn_prf(average, modifier, msg_start, len(result))


                                          precision    recall  f1-score   support

            location_traffic_convenience     0.8952    0.8532    0.8737      3243
location_distance_from_business_district     0.7453    0.6664    0.7037      2968
                   location_easy_to_find     0.8890    0.7979    0.8410      3484
                       service_wait_time     0.7862    0.5508    0.6478      1763
                service_waiters_attitude     0.9230    0.8122    0.8641      9008
             service_parking_convenience     0.9348    0.7820    0.8516       954
                   service_serving_speed     0.8546    0.6733    0.7532      2323
                             price_level     0.8794    0.7580    0.8142      7503
                    price_cost_effective     0.8058    0.5647    0.6640      3572
                          price_discount     0.8779    0.8132    0.8443      5738
                  environment_decoration     0.8988    0.8575    0.8777      7199
               

  _warn_prf(average, modifier, msg_start, len(result))


                                          precision    recall  f1-score   support

            location_traffic_convenience     0.8965    0.8545    0.8750      3243
location_distance_from_business_district     0.7625    0.6479    0.7005      2968
                   location_easy_to_find     0.8924    0.7931    0.8398      3484
                       service_wait_time     0.7882    0.5530    0.6500      1763
                service_waiters_attitude     0.9266    0.8045    0.8613      9008
             service_parking_convenience     0.9437    0.7736    0.8502       954
                   service_serving_speed     0.8778    0.6616    0.7545      2323
                             price_level     0.8828    0.7506    0.8114      7503
                    price_cost_effective     0.8114    0.5602    0.6628      3572
                          price_discount     0.8807    0.8095    0.8436      5738
                  environment_decoration     0.9023    0.8515    0.8762      7199
               

  _warn_prf(average, modifier, msg_start, len(result))


                                          precision    recall  f1-score   support

            location_traffic_convenience     0.8960    0.8554    0.8752      3243
location_distance_from_business_district     0.7612    0.6509    0.7018      2968
                   location_easy_to_find     0.8839    0.8017    0.8408      3484
                       service_wait_time     0.7861    0.5649    0.6574      1763
                service_waiters_attitude     0.9156    0.8206    0.8655      9008
             service_parking_convenience     0.9405    0.7788    0.8521       954
                   service_serving_speed     0.8681    0.6685    0.7554      2323
                             price_level     0.8809    0.7562    0.8138      7503
                    price_cost_effective     0.8140    0.5476    0.6547      3572
                          price_discount     0.8780    0.8130    0.8443      5738
                  environment_decoration     0.9002    0.8544    0.8767      7199
               

  _warn_prf(average, modifier, msg_start, len(result))


                                          precision    recall  f1-score   support

            location_traffic_convenience     0.8966    0.8557    0.8757      3243
location_distance_from_business_district     0.7426    0.6803    0.7100      2968
                   location_easy_to_find     0.8810    0.8074    0.8426      3484
                       service_wait_time     0.7795    0.5876    0.6701      1763
                service_waiters_attitude     0.9159    0.8206    0.8656      9008
             service_parking_convenience     0.9370    0.7945    0.8599       954
                   service_serving_speed     0.8646    0.6707    0.7554      2323
                             price_level     0.8811    0.7549    0.8132      7503
                    price_cost_effective     0.7984    0.5876    0.6770      3572
                          price_discount     0.8792    0.8120    0.8443      5738
                  environment_decoration     0.9016    0.8528    0.8765      7199
               

  _warn_prf(average, modifier, msg_start, len(result))


                                          precision    recall  f1-score   support

            location_traffic_convenience     0.8979    0.8569    0.8769      3243
location_distance_from_business_district     0.7543    0.6600    0.7040      2968
                   location_easy_to_find     0.8885    0.7985    0.8411      3484
                       service_wait_time     0.7797    0.5842    0.6680      1763
                service_waiters_attitude     0.9207    0.8154    0.8648      9008
             service_parking_convenience     0.9369    0.7778    0.8499       954
                   service_serving_speed     0.8630    0.6724    0.7559      2323
                             price_level     0.8789    0.7561    0.8129      7503
                    price_cost_effective     0.8011    0.5817    0.6740      3572
                          price_discount     0.8787    0.8158    0.8461      5738
                  environment_decoration     0.8963    0.8573    0.8764      7199
               

  _warn_prf(average, modifier, msg_start, len(result))


                                          precision    recall  f1-score   support

            location_traffic_convenience     0.8881    0.8615    0.8746      3243
location_distance_from_business_district     0.7455    0.6681    0.7047      2968
                   location_easy_to_find     0.8852    0.7991    0.8399      3484
                       service_wait_time     0.7819    0.5837    0.6684      1763
                service_waiters_attitude     0.9113    0.8280    0.8677      9008
             service_parking_convenience     0.9342    0.7893    0.8557       954
                   service_serving_speed     0.8599    0.6737    0.7555      2323
                             price_level     0.8691    0.7716    0.8174      7503
                    price_cost_effective     0.7972    0.5820    0.6728      3572
                          price_discount     0.8791    0.8133    0.8449      5738
                  environment_decoration     0.8909    0.8651    0.8778      7199
               

  _warn_prf(average, modifier, msg_start, len(result))


                                          precision    recall  f1-score   support

            location_traffic_convenience     0.9064    0.8480    0.8762      3243
location_distance_from_business_district     0.7520    0.6621    0.7042      2968
                   location_easy_to_find     0.8911    0.7942    0.8399      3484
                       service_wait_time     0.7805    0.5910    0.6727      1763
                service_waiters_attitude     0.9105    0.8280    0.8673      9008
             service_parking_convenience     0.9438    0.7746    0.8509       954
                   service_serving_speed     0.8576    0.6767    0.7565      2323
                             price_level     0.8839    0.7520    0.8126      7503
                    price_cost_effective     0.8171    0.5529    0.6595      3572
                          price_discount     0.8817    0.8078    0.8431      5738
                  environment_decoration     0.9015    0.8540    0.8771      7199
               

  _warn_prf(average, modifier, msg_start, len(result))


                                          precision    recall  f1-score   support

            location_traffic_convenience     0.8936    0.8600    0.8765      3243
location_distance_from_business_district     0.7297    0.6968    0.7129      2968
                   location_easy_to_find     0.8907    0.7976    0.8416      3484
                       service_wait_time     0.7877    0.5746    0.6645      1763
                service_waiters_attitude     0.9156    0.8217    0.8661      9008
             service_parking_convenience     0.9400    0.7715    0.8474       954
                   service_serving_speed     0.8757    0.6672    0.7574      2323
                             price_level     0.8802    0.7558    0.8133      7503
                    price_cost_effective     0.8013    0.5848    0.6762      3572
                          price_discount     0.8786    0.8133    0.8447      5738
                  environment_decoration     0.8985    0.8596    0.8786      7199
               

  _warn_prf(average, modifier, msg_start, len(result))


                                          precision    recall  f1-score   support

            location_traffic_convenience     0.8961    0.8566    0.8759      3243
location_distance_from_business_district     0.7363    0.6830    0.7086      2968
                   location_easy_to_find     0.8810    0.8051    0.8413      3484
                       service_wait_time     0.7818    0.5791    0.6654      1763
                service_waiters_attitude     0.9133    0.8252    0.8670      9008
             service_parking_convenience     0.9334    0.7935    0.8578       954
                   service_serving_speed     0.8630    0.6750    0.7575      2323
                             price_level     0.8782    0.7585    0.8140      7503
                    price_cost_effective     0.7998    0.5859    0.6764      3572
                          price_discount     0.8704    0.8231    0.8461      5738
                  environment_decoration     0.8942    0.8621    0.8779      7199
               

  _warn_prf(average, modifier, msg_start, len(result))


                                          precision    recall  f1-score   support

            location_traffic_convenience     0.8886    0.8612    0.8747      3243
location_distance_from_business_district     0.7447    0.6732    0.7071      2968
                   location_easy_to_find     0.8896    0.7956    0.8400      3484
                       service_wait_time     0.7778    0.5995    0.6771      1763
                service_waiters_attitude     0.9150    0.8230    0.8666      9008
             service_parking_convenience     0.9378    0.7904    0.8578       954
                   service_serving_speed     0.8575    0.6789    0.7578      2323
                             price_level     0.8764    0.7592    0.8136      7503
                    price_cost_effective     0.7946    0.5957    0.6810      3572
                          price_discount     0.8727    0.8186    0.8448      5738
                  environment_decoration     0.9047    0.8504    0.8767      7199
               

  _warn_prf(average, modifier, msg_start, len(result))


                                          precision    recall  f1-score   support

            location_traffic_convenience     0.9074    0.8495    0.8775      3243
location_distance_from_business_district     0.7320    0.6883    0.7095      2968
                   location_easy_to_find     0.8834    0.8022    0.8409      3484
                       service_wait_time     0.7845    0.5740    0.6630      1763
                service_waiters_attitude     0.9211    0.8141    0.8643      9008
             service_parking_convenience     0.9434    0.7862    0.8576       954
                   service_serving_speed     0.8689    0.6707    0.7570      2323
                             price_level     0.8753    0.7652    0.8165      7503
                    price_cost_effective     0.8073    0.5733    0.6705      3572
                          price_discount     0.8762    0.8188    0.8465      5738
                  environment_decoration     0.9038    0.8539    0.8781      7199
               

  _warn_prf(average, modifier, msg_start, len(result))


                                          precision    recall  f1-score   support

            location_traffic_convenience     0.9015    0.8526    0.8764      3243
location_distance_from_business_district     0.7401    0.6850    0.7115      2968
                   location_easy_to_find     0.8824    0.8031    0.8409      3484
                       service_wait_time     0.7784    0.5995    0.6773      1763
                service_waiters_attitude     0.9109    0.8273    0.8671      9008
             service_parking_convenience     0.9420    0.7830    0.8552       954
                   service_serving_speed     0.8633    0.6741    0.7571      2323
                             price_level     0.8765    0.7630    0.8158      7503
                    price_cost_effective     0.8004    0.5848    0.6758      3572
                          price_discount     0.8753    0.8207    0.8471      5738
                  environment_decoration     0.9028    0.8532    0.8773      7199
               

  _warn_prf(average, modifier, msg_start, len(result))


                                          precision    recall  f1-score   support

            location_traffic_convenience     0.8979    0.8569    0.8769      3243
location_distance_from_business_district     0.7580    0.6584    0.7047      2968
                   location_easy_to_find     0.8858    0.8014    0.8415      3484
                       service_wait_time     0.7795    0.5956    0.6752      1763
                service_waiters_attitude     0.9128    0.8255    0.8670      9008
             service_parking_convenience     0.9430    0.7809    0.8544       954
                   service_serving_speed     0.8651    0.6763    0.7591      2323
                             price_level     0.8744    0.7682    0.8179      7503
                    price_cost_effective     0.8029    0.5815    0.6745      3572
                          price_discount     0.8753    0.8182    0.8458      5738
                  environment_decoration     0.9006    0.8560    0.8777      7199
               

  _warn_prf(average, modifier, msg_start, len(result))


                                          precision    recall  f1-score   support

            location_traffic_convenience     0.8983    0.8554    0.8763      3243
location_distance_from_business_district     0.7464    0.6715    0.7070      2968
                   location_easy_to_find     0.8834    0.8042    0.8419      3484
                       service_wait_time     0.7965    0.5440    0.6464      1763
                service_waiters_attitude     0.9143    0.8223    0.8659      9008
             service_parking_convenience     0.9428    0.7778    0.8524       954
                   service_serving_speed     0.8790    0.6599    0.7539      2323
                             price_level     0.8807    0.7557    0.8134      7503
                    price_cost_effective     0.8048    0.5759    0.6713      3572
                          price_discount     0.8809    0.8099    0.8439      5738
                  environment_decoration     0.8978    0.8607    0.8789      7199
               

  _warn_prf(average, modifier, msg_start, len(result))


                                          precision    recall  f1-score   support

            location_traffic_convenience     0.8945    0.8603    0.8771      3243
location_distance_from_business_district     0.7400    0.6819    0.7098      2968
                   location_easy_to_find     0.8815    0.8074    0.8428      3484
                       service_wait_time     0.7746    0.6120    0.6838      1763
                service_waiters_attitude     0.9193    0.8204    0.8670      9008
             service_parking_convenience     0.9346    0.7945    0.8589       954
                   service_serving_speed     0.8441    0.6948    0.7622      2323
                             price_level     0.8790    0.7597    0.8150      7503
                    price_cost_effective     0.7980    0.5907    0.6789      3572
                          price_discount     0.8752    0.8186    0.8459      5738
                  environment_decoration     0.9039    0.8530    0.8777      7199
               

  _warn_prf(average, modifier, msg_start, len(result))


                                          precision    recall  f1-score   support

            location_traffic_convenience     0.8996    0.8566    0.8776      3243
location_distance_from_business_district     0.7600    0.6584    0.7055      2968
                   location_easy_to_find     0.8860    0.8031    0.8425      3484
                       service_wait_time     0.7720    0.6222    0.6891      1763
                service_waiters_attitude     0.9194    0.8173    0.8654      9008
             service_parking_convenience     0.9359    0.7956    0.8601       954
                   service_serving_speed     0.8498    0.6870    0.7598      2323
                             price_level     0.8768    0.7610    0.8148      7503
                    price_cost_effective     0.8007    0.5882    0.6782      3572
                          price_discount     0.8751    0.8203    0.8468      5738
                  environment_decoration     0.9011    0.8551    0.8775      7199
               

<keras.callbacks.callbacks.History at 0x7f806c20a650>

In [29]:
model.load_weights('model/best_model_tag_sentiment.weights')
print(u'final valid f1: %05f\n' % (evaluate(valid_input, valid_label)))



  _warn_prf(average, modifier, msg_start, len(result))


                                          precision    recall  f1-score   support

            location_traffic_convenience     0.8996    0.8566    0.8776      3243
location_distance_from_business_district     0.7600    0.6584    0.7055      2968
                   location_easy_to_find     0.8860    0.8031    0.8425      3484
                       service_wait_time     0.7720    0.6222    0.6891      1763
                service_waiters_attitude     0.9194    0.8173    0.8654      9008
             service_parking_convenience     0.9359    0.7956    0.8601       954
                   service_serving_speed     0.8498    0.6870    0.7598      2323
                             price_level     0.8768    0.7610    0.8148      7503
                    price_cost_effective     0.8007    0.5882    0.6782      3572
                          price_discount     0.8751    0.8203    0.8468      5738
                  environment_decoration     0.9011    0.8551    0.8775      7199
               