In [1]:
import tensorflow as tf
from bert import run_classifier, modeling, optimization, tokenization
from datetime import datetime
import os

In [2]:
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

In [3]:
OUTPUT_DIR = "./output/car_reviews"
VOCAB_FILE = "./bert_base/vocab.txt"
BERT_CONFIG_FILE = "./bert_base/bert_config.json"
INIT_CHECKPOINT = "./bert_base/bert_model.ckpt"
TRAIN_FILE = "./data/car_reviews_final/train.tf_record"
TEST_FILE = "./data/car_reviews_final/test.tf_record"
VALIDATE_FILE = "./data/car_reviews_final/validate.tf_record"
MAX_SEQ_LENGTH = 128
BATCH_SIZE = 32
LEARNING_RATE = 2e-5
NUM_TRAIN_EPOCHS = 10.0
WARMUP_PROPORTION = 0.1
SAVE_CHECKPOINTS_STEPS = 500
SAVE_SUMMARY_STEPS = 50
NUM_TRAIN_SIZE = 4462
LABELS_LIST = [
    'control', 'interior', 'power', 'appearance', 'safety', 'energy', 'space'
]

In [4]:
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids,
                 labels, num_labels, use_one_hot_embeddings):
    """Creates a classification model."""
    model = modeling.BertModel(config=bert_config,
                               is_training=is_training,
                               input_ids=input_ids,
                               input_mask=input_mask,
                               token_type_ids=segment_ids,
                               use_one_hot_embeddings=use_one_hot_embeddings)
    output_layer = model.get_pooled_output()
    hidden_size = output_layer.shape[-1].value
    output_weights = tf.get_variable(
        "output_weights", [num_labels, hidden_size],
        initializer=tf.truncated_normal_initializer(stddev=0.02))

    output_bias = tf.get_variable("output_bias", [num_labels],
                                  initializer=tf.zeros_initializer())
    with tf.variable_scope("loss"):
        if is_training:
            output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

        logits = tf.matmul(output_layer, output_weights, transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)
        probabilities = tf.nn.softmax(logits, axis=-1)
        log_probs = tf.nn.log_softmax(logits, axis=-1)
        one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)
        predicted_labels = tf.squeeze(
            tf.argmax(log_probs, axis=-1, output_type=tf.int32))
        per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
        loss = tf.reduce_mean(per_example_loss)
        return (loss, per_example_loss, logits, probabilities,
                predicted_labels)

In [5]:
def model_fn_builder(bert_config, num_labels, init_checkpoint, learning_rate,
                     num_train_steps, num_warmup_steps, use_tpu,
                     use_one_hot_embeddings):
    """Returns `model_fn` closure for TPUEstimator."""
    def model_fn(features, labels, mode, params):
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]
        is_real_example = None
        if "is_real_example" in features:
            is_real_example = tf.cast(features["is_real_example"],
                                      dtype=tf.float32)
        else:
            is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (total_loss, per_example_loss, logits, probabilities,
         predicted_labels) = create_model(bert_config, is_training, input_ids,
                                          input_mask, segment_ids, label_ids,
                                          num_labels, use_one_hot_embeddings)

        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            tf.logging.info("**** Do Train ****")
            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:
            tf.logging.info("**** Do Validate ****")

            def metric_fn(per_example_loss, label_ids, logits,
                          is_real_example):
                predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                accuracy = tf.metrics.accuracy(labels=label_ids,
                                               predictions=predictions,
                                               weights=is_real_example)
                loss = tf.metrics.mean(values=per_example_loss,
                                       weights=is_real_example)
                return {
                    "eval_accuracy": accuracy,
                    "eval_loss": loss,
                }

            eval_metrics = (metric_fn, [
                per_example_loss, label_ids, logits, is_real_example
            ])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.PREDICT:
            tf.logging.info("**** Do Predict ****")
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                predictions={
                    "probabilities": probabilities,
                    "predicted_labels": predicted_labels
                },
                scaffold_fn=scaffold_fn)
        else:
            raise ValueError(
                "Only Train, Validate and Predict modes are supported: %s" %
                (mode))
        return output_spec

    return model_fn

In [6]:
def bulid_estimator():
    tf.logging.set_verbosity(tf.logging.INFO)
    bert_config = modeling.BertConfig.from_json_file(BERT_CONFIG_FILE)
    if MAX_SEQ_LENGTH > bert_config.max_position_embeddings:
        raise ValueError(
            "Cannot use sequence length %d because the BERT model was only trained up to sequence length %d"
            % (MAX_SEQ_LENGTH, bert_config.max_position_embeddings))
    is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
    run_config = tf.contrib.tpu.RunConfig(
        cluster=None,
        master=None,
        model_dir=OUTPUT_DIR,
        save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS,
        tpu_config=tf.contrib.tpu.TPUConfig(
            iterations_per_loop=SAVE_SUMMARY_STEPS,
            num_shards=8,
            per_host_input_for_training=is_per_host))
    num_train_step = int(NUM_TRAIN_SIZE / BATCH_SIZE * NUM_TRAIN_EPOCHS)
    num_warmup_steps = int(num_train_step * WARMUP_PROPORTION)

    model_fn = model_fn_builder(bert_config=bert_config,
                                num_labels=len(LABELS_LIST),
                                init_checkpoint=INIT_CHECKPOINT,
                                learning_rate=LEARNING_RATE,
                                num_train_steps=num_train_step,
                                num_warmup_steps=num_warmup_steps,
                                use_tpu=False,
                                use_one_hot_embeddings=False)

    estimator = tf.contrib.tpu.TPUEstimator(use_tpu=False,
                                            model_fn=model_fn,
                                            config=run_config,
                                            train_batch_size=BATCH_SIZE,
                                            eval_batch_size=8,
                                            predict_batch_size=8)
    return estimator, num_train_step

In [7]:
def do_train():
    estimator, num_train_step = bulid_estimator()
    train_input_fn = run_classifier.file_based_input_fn_builder(
        input_file=TRAIN_FILE,
        seq_length=MAX_SEQ_LENGTH,
        is_training=True,
        drop_remainder=True)
    print("=============== begin to train ===============")
    current_time = datetime.now()
    estimator.train(input_fn=train_input_fn, max_steps=num_train_step)
    print("=============== end of train ===============")
    print("Training took time ", datetime.now() - current_time)

In [8]:
do_train()

INFO:tensorflow:Using config: {'_log_step_count_steps': None, '_model_dir': './output/car_reviews', '_save_summary_steps': 100, '_save_checkpoints_steps': 500, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_num_ps_replicas': 0, '_is_chief': True, '_cluster': None, '_task_id': 0, '_service': None, '_train_distribute': None, '_keep_checkpoint_max': 5, '_task_type': 'worker', '_keep_checkpoint_every_n_hours': 10000, '_session_config': None, '_num_worker_replicas': 1, '_save_checkpoints_secs': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fe293bccf60>, '_tf_random_seed': None, '_tpu_config': TPUConfig(iterations_per_loop=50, num_shards=8, num_cores_per_replica=None, per_host_input_for_training=3, tpu_job_name=None, initial_infeed_sleep_secs=None), '_device_fn': None}
INFO:tensorflow:_TPUContext: eval_on_tpu True
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Running train on CPU
INFO:tensorflow:*** Features ***
INFO:tensor

INFO:tensorflow:  name = bert/encoder/layer_3/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_3/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_3/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_3/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_3/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_3/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_3/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_3/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_4/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  nam

INFO:tensorflow:  name = bert/encoder/layer_8/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_8/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_8/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_8/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_8/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_8/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_8/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_8/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_8/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKP

INFO:tensorflow:examples/sec: 62.5369
INFO:tensorflow:global_step/sec: 2.08123
INFO:tensorflow:examples/sec: 66.5992
INFO:tensorflow:global_step/sec: 2.08156
INFO:tensorflow:examples/sec: 66.6098
INFO:tensorflow:global_step/sec: 2.08148
INFO:tensorflow:examples/sec: 66.6075
INFO:tensorflow:Saving checkpoints for 1394 into ./output/car_reviews/model.ckpt.
INFO:tensorflow:Loss for final step: 0.0016567977.
Training took time  0:12:14.579225


In [9]:
def do_test():
    estimator, _ = bulid_estimator()
    predict_input_fn = run_classifier.file_based_input_fn_builder(
        input_file=TEST_FILE,
        seq_length=MAX_SEQ_LENGTH,
        is_training=False,
        drop_remainder=False)
    result = estimator.predict(input_fn=predict_input_fn)
    return result

In [31]:
result = do_test()

INFO:tensorflow:Using config: {'_train_distribute': None, '_is_chief': True, '_keep_checkpoint_max': 5, '_master': '', '_cluster': None, '_task_type': 'worker', '_evaluation_master': '', '_task_id': 0, '_model_dir': './output/car_reviews', '_service': None, '_keep_checkpoint_every_n_hours': 10000, '_num_worker_replicas': 1, '_save_checkpoints_secs': None, '_tf_random_seed': None, '_num_ps_replicas': 0, '_device_fn': None, '_save_checkpoints_steps': 500, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f3cfbae2d30>, '_tpu_config': TPUConfig(iterations_per_loop=50, num_shards=8, num_cores_per_replica=None, per_host_input_for_training=3, tpu_job_name=None, initial_infeed_sleep_secs=None), '_log_step_count_steps': None, '_global_id_in_cluster': 0, '_session_config': None, '_save_summary_steps': 100}
INFO:tensorflow:_TPUContext: eval_on_tpu True


In [20]:
def get_prediction_for_sentences(estimator, sentences):
    tokenizer = tokenization.FullTokenizer(vocab_file=VOCAB_FILE,
                                           do_lower_case=True)
    input_examples = [
        run_classifier.InputExample(guid="test-" + str(i),
                                    text_a=tokenization.convert_to_unicode(x),
                                    text_b=None,
                                    label=LABELS_LIST[0])
        for (i, x) in enumerate(sentences)
    ]
    input_features = run_classifier.convert_examples_to_features(
        examples=input_examples,
        label_list=LABELS_LIST,
        max_seq_length=MAX_SEQ_LENGTH,
        tokenizer=tokenizer)
    input_fn = run_classifier.input_fn_builder(features=input_features,
                                               seq_length=MAX_SEQ_LENGTH,
                                               is_training=False,
                                               drop_remainder=False)
    predictions = estimator.predict(input_fn)
    return [(sentence, prediction['probabilities'],
             LABELS_LIST[prediction['predicted_labels']])
            for sentence, prediction in zip(sentences, predictions)]

In [21]:
sentences = [
    "Workmanship is not bad. only limited to the cost of plastic. but it is also law-abiding. The car imitation leather seat is soft and tough. sitting comfortably and looking at the grade.",
    "The truth is really fuel-efficient. running long distances. basically 100 kilometers. around 6. I ran the best. once. 5.5 is very good.",
    "Steering is precise. the chassis is solid; once I tried a 70-mile speed over a steep bend. maybe the Watt link really played a big role. and the overall feeling was particularly stable when the car was cornering.",
    "The interior workmanship is also more meticulous. but the materials used are not as high-grade as the mid-level cars.",
    "The direction is light and heavy. pointing to the exact gear. It’s a bit boring. The chassis is generally running fast. It feels like floating because of the high chassis.",
    "Power is OK. overall. commercial. 1.5 power is completely enough.",
    "Possible personal problems. It feels a bit off the mark. The steering wheel is heavy  the hydraulic pressure assistant is common . and the direction of the road is strong. It is suitable for the title of small guns. Others have no problem.",
    "The space is relatively large in the compact level. which is slightly better than the Excelle horizontal space.",
    "1.6T did not say. it is definitely the highest level of power. 80 points after the acceleration of the lever",
    "It may be that the running-in period plus the new car is more cherished. anyway. the fuel consumption is higher than I expected.",
    "There is nothing to say about the appearance. I like it. I think it is not bad.",
    "The appearance is not said. I was attracted by its shape. I saw it at first sight. I believe many people are like this.",
    "There is no bad smell of domestic cars. the interior work can be done. not rough.",
    "The design of each function is very user-friendly and the workmanship is not bad. Basically no problem",
    "Rich in configuration. very technical. bringing more relaxed and happy feelings to the car"
]

In [22]:
estimator, _ = bulid_estimator()

INFO:tensorflow:Using config: {'_log_step_count_steps': None, '_model_dir': './output/car_reviews', '_save_summary_steps': 100, '_save_checkpoints_steps': 500, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_num_ps_replicas': 0, '_is_chief': True, '_cluster': None, '_task_id': 0, '_service': None, '_train_distribute': None, '_keep_checkpoint_max': 5, '_task_type': 'worker', '_keep_checkpoint_every_n_hours': 10000, '_session_config': None, '_num_worker_replicas': 1, '_save_checkpoints_secs': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fe276057710>, '_tf_random_seed': None, '_tpu_config': TPUConfig(iterations_per_loop=50, num_shards=8, num_cores_per_replica=None, per_host_input_for_training=3, tpu_job_name=None, initial_infeed_sleep_secs=None), '_device_fn': None}
INFO:tensorflow:_TPUContext: eval_on_tpu True


In [23]:
predictions = get_prediction_for_sentences(estimator, sentences)

INFO:tensorflow:Writing example 0 of 15
INFO:tensorflow:*** Example ***
INFO:tensorflow:guid: test-0
INFO:tensorflow:tokens: [CLS] work ##manship is not bad . only limited to the cost of plastic . but it is also law - ab ##idi ##ng . the car imitation leather seat is soft and tough . sitting comfortably and looking at the grade . [SEP]
INFO:tensorflow:input_ids: 101 2147 21530 2003 2025 2919 1012 2069 3132 2000 1996 3465 1997 6081 1012 2021 2009 2003 2036 2375 1011 11113 28173 3070 1012 1996 2482 20017 5898 2835 2003 3730 1998 7823 1012 3564 18579 1998 2559 2012 1996 3694 1012 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 

INFO:tensorflow:  name = bert/encoder/layer_0/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_0/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_0/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_0/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_0/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_0/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_0/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_0/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_1/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  nam

INFO:tensorflow:  name = bert/encoder/layer_5/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_5/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_5/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_5/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_5/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_5/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_5/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_5/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_5/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKP

INFO:tensorflow:  name = bert/encoder/layer_9/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_9/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_10/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_10/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_10/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_10/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_10/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_10/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_10/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:t

In [24]:
predictions

[('Workmanship is not bad. only limited to the cost of plastic. but it is also law-abiding. The car imitation leather seat is soft and tough. sitting comfortably and looking at the grade.',
  array([1.0210979e-04, 9.9887127e-01, 1.9428215e-04, 1.7886628e-04,
         3.4286772e-04, 1.2171197e-04, 1.8877473e-04], dtype=float32),
  'interior'),
 ('The truth is really fuel-efficient. running long distances. basically 100 kilometers. around 6. I ran the best. once. 5.5 is very good.',
  array([1.20789206e-04, 1.07856227e-04, 1.53992951e-04, 1.04713756e-04,
         4.34577320e-04, 9.98938978e-01, 1.39106967e-04], dtype=float32),
  'energy'),
 ('Steering is precise. the chassis is solid; once I tried a 70-mile speed over a steep bend. maybe the Watt link really played a big role. and the overall feeling was particularly stable when the car was cornering.',
  array([9.9922109e-01, 6.2890031e-05, 1.4882813e-04, 1.5693081e-04,
         1.3572088e-04, 1.3878632e-04, 1.3568361e-04], dtype=float3