In [1]:
import tensorflow as tf
import os
from bert import run_classifier, modeling, tokenization

In [3]:
model_dir = "./output"
bert_base_dir = "./bert_base"
data_dir = "./data"
vocab_file = os.path.join(bert_base_dir, "vocab.txt")
bert_config_file = os.path.join(bert_base_dir, "bert_config.json")
init_checkpoint = os.path.join(bert_base_dir, "bert_model.ckpt")
learning_rate = 2e-5
num_train_steps = 12184
num_warmup_steps = 1218
max_seq_length = 128

In [4]:
def build_estimator():
    bert_config = modeling.BertConfig.from_json_file(bert_config_file)
    model_fn = run_classifier.model_fn_builder(
        bert_config=bert_config,
        num_labels=7,
        init_checkpoint=init_checkpoint,
        learning_rate=learning_rate,
        num_train_steps=num_train_steps,
        num_warmup_steps=num_warmup_steps,
        use_tpu=False,
        use_one_hot_embeddings=False)
    is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
    run_config = tf.contrib.tpu.RunConfig(
        cluster=None,
        master=None,
        model_dir=model_dir,
        save_checkpoints_steps=500,
        tpu_config=tf.contrib.tpu.TPUConfig(
            iterations_per_loop=50,
            num_shards=8,
            per_host_input_for_training=is_per_host))
    estimator = tf.contrib.tpu.TPUEstimator(model_fn=model_fn,
                                            model_dir=model_dir,
                                            config=run_config,
                                            use_tpu=False,
                                            predict_batch_size=8,
                                            eval_batch_size=8,
                                            warm_start_from=os.path.join(
                                                model_dir, "checkpoint"))
    return estimator

In [5]:
def output_test_eval_result_to_file(file_path, result):
    with tf.gfile.GFile(file_path, "w") as writer:
        tf.logging.info("***** Eval results *****")
        for key in sorted(result.keys()):
            tf.logging.info("  %s = %s", key, str(result[key]))
            writer.write("%s = %s\n" % (key, str(result[key])))

In [48]:
def get_prediction_for_sentences(estimator, sentences):
    tokenizer = tokenization.FullTokenizer(vocab_file=vocab_file,
                                           do_lower_case=True)
    label_list = sorted([
        'driving', 'exterior', 'fuel economy', 'interior', 'performance',
        'safety', 'space'
    ])
    input_examples = [
        run_classifier.InputExample(guid="test-" + str(i),
                                    text_a=tokenization.convert_to_unicode(x),
                                    text_b=None,
                                    label=tokenization.convert_to_unicode(
                                        label_list[0]))
        for (i, x) in enumerate(sentences)
    ]
    input_features = run_classifier.convert_examples_to_features(
        examples=input_examples,
        label_list=label_list,
        max_seq_length=max_seq_length,
        tokenizer=tokenizer)
    input_fn = run_classifier.input_fn_builder(features=input_features,
                                               seq_length=max_seq_length,
                                               is_training=False,
                                               drop_remainder=False)
    predictions = estimator.predict(input_fn)
    return [(sentence, prediction['probabilities'])
            for sentence, prediction in zip(sentences, predictions)]

In [55]:
sentences = [
    "standard safety features 2016 gmc sierra 1500 include antilock disc brakes traction stability control trailer sway control front rear side airbags side curtain airbags",
    "standard base sierra rearview camera driver blind-spot mirror onstar includes services automatic crash notification roadside assistance remote door unlocking stolen vehicle assistance",
    "high level confidence encourages spirited driving car stumbles traffic-filled commuting unsavory lurches low speeds optional six-speed automatic transmission",
    "terms performance driver engagement lexus nx 300 merely average among current crop luxury crossover suvs",
    "tested front-drive kia soul achieved better fuel economy also provided whopping 63-hp advantage trax accelerated 60 mph nearly three seconds quicker initial highway fuel-consumption test acadia denali v-6 all-wheel drive returned disappointing 20 mpg",
    "im 14 0-gallon fuel tank gives range roughly 500 miles neighborhood chevrolet cruze hatchback volkswagen golf devised fuel-economy test attempt replicate people drive highway"
]

In [56]:
estimator = build_estimator()

INFO:tensorflow:Using config: {'_keep_checkpoint_every_n_hours': 10000, '_device_fn': None, '_train_distribute': None, '_evaluation_master': '', '_save_checkpoints_steps': 500, '_master': '', '_is_chief': True, '_task_id': 0, '_num_worker_replicas': 1, '_save_checkpoints_secs': None, '_log_step_count_steps': None, '_session_config': None, '_tf_random_seed': None, '_global_id_in_cluster': 0, '_tpu_config': TPUConfig(iterations_per_loop=50, num_shards=8, num_cores_per_replica=None, per_host_input_for_training=3, tpu_job_name=None, initial_infeed_sleep_secs=None), '_save_summary_steps': 100, '_cluster': None, '_task_type': 'worker', '_model_dir': './output', '_num_ps_replicas': 0, '_keep_checkpoint_max': 5, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f7ff57aba20>}
INFO:tensorflow:_TPUContext: eval_on_tpu True


In [57]:
get_prediction_for_sentences(estimator=estimator, sentences=sentences)

INFO:tensorflow:Writing example 0 of 6
INFO:tensorflow:*** Example ***
INFO:tensorflow:guid: test-0
INFO:tensorflow:tokens: [CLS] standard safety features 2016 gm ##c sierra 1500 include anti ##lock disc brakes traction stability control trailer sway control front rear side air ##bags side curtain air ##bags [SEP]
INFO:tensorflow:input_ids: 101 3115 3808 2838 2355 13938 2278 7838 10347 2421 3424 7878 5860 13627 16493 9211 2491 9117 17812 2491 2392 4373 2217 2250 26813 2217 11002 2250 26813 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
INFO:tensorflow:segme

INFO:tensorflow:  name = bert/encoder/layer_0/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_0/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_0/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_0/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_0/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_0/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_0/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_1/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_1/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = be

INFO:tensorflow:  name = bert/encoder/layer_5/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_5/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_5/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_5/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_5/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_5/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_5/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_5/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_5/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CK

INFO:tensorflow:  name = bert/encoder/layer_9/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_10/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_10/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_10/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_10/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_10/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_10/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_10/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_10/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*

[('standard safety features 2016 gmc sierra 1500 include antilock disc brakes traction stability control trailer sway control front rear side airbags side curtain airbags',
  array([9.1294078e-06, 1.0322535e-05, 1.6186996e-05, 1.0102983e-05,
         9.9993420e-01, 1.0549163e-05, 9.5704136e-06], dtype=float32)),
 ('standard base sierra rearview camera driver blind-spot mirror onstar includes services automatic crash notification roadside assistance remote door unlocking stolen vehicle assistance',
  array([1.02110571e-05, 1.10906685e-05, 1.86515990e-05, 9.37573350e-06,
         9.99927759e-01, 1.01205260e-05, 1.27108224e-05], dtype=float32)),
 ('high level confidence encourages spirited driving car stumbles traffic-filled commuting unsavory lurches low speeds optional six-speed automatic transmission',
  array([1.5291871e-05, 1.5034661e-05, 9.9992836e-01, 1.0594954e-05,
         1.0793233e-05, 9.7715847e-06, 1.0184658e-05], dtype=float32)),
 ('terms performance driver engagement lexus 