In [40]:
import tensorflow as tf
import tensorflow_hub as hub
from bert import tokenization, optimization, run_classifier
import os
import csv
from datetime import datetime

In [44]:
def create_model(is_training, input_ids, input_mask, segment_ids, labels,
                 num_labels, bert_hub_module_handle):
    """
    Creates a classification model
    """
    tags = set()
    if is_training:
        tags.add("train")
    bert_module = hub.Module(bert_hub_module_handle,
                            tags=tags,
                            trainable=True)
    bert_inputs = dict(input_ids=input_ids,
                       input_mask=input_mask,
                       segment_ids=segment_ids)
    bert_outputs = bert_module(inputs=bert_inputs,
                               signature="tokens",
                               as_dict=True)
    output_layer = bert_outputs["pooled_output"]
    hidden_size = output_layer.shape[-1].value
    output_weight = tf.get_variable(
        "output_weights", [num_labels, hidden_size],
        initializer=tf.truncated_normal_initializer(stddev=0.02))
    output_bias = tf.get_variable("output_bias", [num_labels],
                                  initializer=tf.zeros_initializer())
    with tf.variable_scope("loss"):
        if is_training:
            output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)
        logits = tf.matmul(output_layer, output_weight, transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)
        probabilities = tf.nn.softmax(logits, axis=-1)
        log_probs = tf.nn.log_softmax(logits, axis=-1)
        one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)
        predicted_labels = tf.squeeze(
            tf.argmax(log_probs, axis=-1, output_type=tf.int32))
        per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
        loss = tf.reduce_mean(per_example_loss)
        return (loss, per_example_loss, logits, probabilities, log_probs,
                predicted_labels)

In [35]:
def model_fn_build(num_labels, learning_rate, num_train_steps,
                   num_warmup_steps, bert_hub_module_handle):
    def model_fn(features, labels, mode, params):
        tf.logging.info("****** Features ******")
        for name in sorted(features.keys()):
            tf.logging.info("name = %s, shape = %s" %
                            (name, features[name].shape))
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        (total_loss, per_example_loss, logits, probabilities, log_probs,
         predicted_labels) = create_model(is_training, input_ids, input_mask,
                                          segment_ids, label_ids, num_labels,
                                          bert_hub_module_handle)
        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(total_loss,
                                                     learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps,
                                                     use_tpu=False)
            output_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                     loss=total_loss,
                                                     train_op=train_op)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(per_example_loss, label_ids, logits):
                predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                accuracy = tf.metrics.accuracy(labels=label_ids,
                                               predictions=predictions)
                loss = tf.metrics.mean(per_example_loss)
                return {
                    "eval_accuracy": accuracy,
                    "eval_loss": loss,
                }

            eval_metrics = metric_fn(per_example_loss, label_ids, logits)
            output_spec = tf.estimator.EstimatorSpec(
                mode=mode, loss=total_loss, eval_metric_ops=eval_metrics)
        elif mode == tf.estimator.ModeKeys.PREDICT:
            output_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                     predictions={
                                                         "probabilities":
                                                         probabilities,
                                                         "log_probs":
                                                         log_probs,
                                                         "predicted_labels":
                                                         predicted_labels
                                                     })
        else:
            raise ValueError(
                "Only TRAIN, EVAL and PREDICT modes are supported: %s" %
                (mode))
        return output_spec

    return model_fn

In [5]:
def bbc_data_processor(data_dir, set_type, test_label):
    file_path = os.path.join(data_dir, "bbc_" + set_type + ".csv")
    with tf.gfile.Open(file_path, "r") as f:
        reader = csv.reader(f, delimiter=",", quotechar=None)
        examples = []
        labels = []
        labels_test = []
        for (i, line) in enumerate(reader):
            if i == 0:
                continue
            guid = set_type + "-" + str(i)
            text_a = tokenization.convert_to_unicode(line[1])
            label = tokenization.convert_to_unicode(line[0])
            labels.append(label)
            if set_type == "test":
                label = test_label
            labels_test.append(label)
            examples.append(
                run_classifier.InputExample(guid=guid,
                                            text_a=text_a,
                                            text_b=None,
                                            label=label))
        return examples, labels, labels_test

In [6]:
DATA_DIR = "./data"

In [7]:
train_InputExamples, train_labels, train_labels_test = bbc_data_processor(data_dir=DATA_DIR,set_type="train",test_label="tech")

In [8]:
train_labels_list = list(set(train_labels))

In [10]:
test_InputExamples,test_labels, test_labels_test = bbc_data_processor(data_dir=DATA_DIR,set_type="test",test_label="tech")

In [11]:
test_labels_list = list(set(test_labels))

In [12]:
BERT_MODEL_HUB = "https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1"

In [13]:
def create_tokenizer_from_hub_module(bert_hub_module_handle):
    """Get the vocab file and casing info from the Hub module."""
    with tf.Graph().as_default():
        bert_module = hub.Module(bert_hub_module_handle)
        tokenization_info = bert_module(signature="tokenization_info",
                                        as_dict=True)
        with tf.Session() as sess:
            vocab_file, do_lower_case = sess.run([
                tokenization_info["vocab_file"],
                tokenization_info["do_lower_case"]
            ])

    return tokenization.FullTokenizer(vocab_file=vocab_file,
                                           do_lower_case=do_lower_case)

In [14]:
tokenizer = create_tokenizer_from_hub_module(BERT_MODEL_HUB)

INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore








In [15]:
tokenizer.tokenize("This here's an example of using the BERT tokenizer")

['this',
 'here',
 "'",
 's',
 'an',
 'example',
 'of',
 'using',
 'the',
 'bert',
 'token',
 '##izer']

In [17]:
MAX_SEQ_LENGTH = 128

In [18]:
train_features = run_classifier.convert_examples_to_features(train_InputExamples, train_labels_list, MAX_SEQ_LENGTH, tokenizer)







INFO:tensorflow:Writing example 0 of 1780


INFO:tensorflow:Writing example 0 of 1780


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: train-1


INFO:tensorflow:guid: train-1


INFO:tensorflow:tokens: [CLS] tv future in the hands of viewers with home theatre systems plasma high - definition tv ##s and digital video recorder ##s moving into the living room the way people watch tv will be radically different in five years time . that is according to an expert panel which gathered at the annual consumer electronics show in las vegas to discuss how these new technologies will impact one of our favourite past ##ime ##s . with the us leading the trend programmes and other content will be delivered to viewers via home networks through cable satellite telecom ##s companies and broadband service providers to front rooms and portable devices . one of the most talked - about technologies of ce ##s has been digital and personal [SEP]


INFO:tensorflow:tokens: [CLS] tv future in the hands of viewers with home theatre systems plasma high - definition tv ##s and digital video recorder ##s moving into the living room the way people watch tv will be radically different in five years time . that is according to an expert panel which gathered at the annual consumer electronics show in las vegas to discuss how these new technologies will impact one of our favourite past ##ime ##s . with the us leading the trend programmes and other content will be delivered to viewers via home networks through cable satellite telecom ##s companies and broadband service providers to front rooms and portable devices . one of the most talked - about technologies of ce ##s has been digital and personal [SEP]


INFO:tensorflow:input_ids: 101 2694 2925 1999 1996 2398 1997 7193 2007 2188 3004 3001 12123 2152 1011 6210 2694 2015 1998 3617 2678 14520 2015 3048 2046 1996 2542 2282 1996 2126 2111 3422 2694 2097 2022 25796 2367 1999 2274 2086 2051 1012 2008 2003 2429 2000 2019 6739 5997 2029 5935 2012 1996 3296 7325 8139 2265 1999 5869 7136 2000 6848 2129 2122 2047 6786 2097 4254 2028 1997 2256 8837 2627 14428 2015 1012 2007 1996 2149 2877 1996 9874 8497 1998 2060 4180 2097 2022 5359 2000 7193 3081 2188 6125 2083 5830 5871 18126 2015 3316 1998 19595 2326 11670 2000 2392 4734 1998 12109 5733 1012 2028 1997 1996 2087 5720 1011 2055 6786 1997 8292 2015 2038 2042 3617 1998 3167 102


INFO:tensorflow:input_ids: 101 2694 2925 1999 1996 2398 1997 7193 2007 2188 3004 3001 12123 2152 1011 6210 2694 2015 1998 3617 2678 14520 2015 3048 2046 1996 2542 2282 1996 2126 2111 3422 2694 2097 2022 25796 2367 1999 2274 2086 2051 1012 2008 2003 2429 2000 2019 6739 5997 2029 5935 2012 1996 3296 7325 8139 2265 1999 5869 7136 2000 6848 2129 2122 2047 6786 2097 4254 2028 1997 2256 8837 2627 14428 2015 1012 2007 1996 2149 2877 1996 9874 8497 1998 2060 4180 2097 2022 5359 2000 7193 3081 2188 6125 2083 5830 5871 18126 2015 3316 1998 19595 2326 11670 2000 2392 4734 1998 12109 5733 1012 2028 1997 1996 2087 5720 1011 2055 6786 1997 8292 2015 2038 2042 3617 1998 3167 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: tech (id = 0)


INFO:tensorflow:label: tech (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: train-2


INFO:tensorflow:guid: train-2


INFO:tensorflow:tokens: [CLS] world ##com boss left books alone former world ##com boss bernie e ##bber ##s who is accused of overseeing an $ 11 ##bn ( £5 . 8 ##bn ) fraud never made accounting decisions a witness has told ju ##ror ##s . david myers made the comments under questioning by defence lawyers who have been arguing that mr e ##bber ##s was not responsible for world ##com s problems . the phone company collapsed in 2002 and prosecutors claim that losses were hidden to protect the firm s shares . mr myers has already pleaded guilty to fraud and is assisting prosecutors . on monday defence lawyer reid wei ##nga ##rte ##n tried to distance his client from the allegations . during cross examination he asked [SEP]


INFO:tensorflow:tokens: [CLS] world ##com boss left books alone former world ##com boss bernie e ##bber ##s who is accused of overseeing an $ 11 ##bn ( £5 . 8 ##bn ) fraud never made accounting decisions a witness has told ju ##ror ##s . david myers made the comments under questioning by defence lawyers who have been arguing that mr e ##bber ##s was not responsible for world ##com s problems . the phone company collapsed in 2002 and prosecutors claim that losses were hidden to protect the firm s shares . mr myers has already pleaded guilty to fraud and is assisting prosecutors . on monday defence lawyer reid wei ##nga ##rte ##n tried to distance his client from the allegations . during cross examination he asked [SEP]


INFO:tensorflow:input_ids: 101 2088 9006 5795 2187 2808 2894 2280 2088 9006 5795 15941 1041 29325 2015 2040 2003 5496 1997 19642 2019 1002 2340 24700 1006 27813 1012 1022 24700 1007 9861 2196 2081 9529 6567 1037 7409 2038 2409 18414 29165 2015 1012 2585 13854 2081 1996 7928 2104 11242 2011 4721 9559 2040 2031 2042 9177 2008 2720 1041 29325 2015 2001 2025 3625 2005 2088 9006 1055 3471 1012 1996 3042 2194 7798 1999 2526 1998 19608 4366 2008 6409 2020 5023 2000 4047 1996 3813 1055 6661 1012 2720 13854 2038 2525 12254 5905 2000 9861 1998 2003 13951 19608 1012 2006 6928 4721 5160 9027 11417 13807 19731 2078 2699 2000 3292 2010 7396 2013 1996 9989 1012 2076 2892 7749 2002 2356 102


INFO:tensorflow:input_ids: 101 2088 9006 5795 2187 2808 2894 2280 2088 9006 5795 15941 1041 29325 2015 2040 2003 5496 1997 19642 2019 1002 2340 24700 1006 27813 1012 1022 24700 1007 9861 2196 2081 9529 6567 1037 7409 2038 2409 18414 29165 2015 1012 2585 13854 2081 1996 7928 2104 11242 2011 4721 9559 2040 2031 2042 9177 2008 2720 1041 29325 2015 2001 2025 3625 2005 2088 9006 1055 3471 1012 1996 3042 2194 7798 1999 2526 1998 19608 4366 2008 6409 2020 5023 2000 4047 1996 3813 1055 6661 1012 2720 13854 2038 2525 12254 5905 2000 9861 1998 2003 13951 19608 1012 2006 6928 4721 5160 9027 11417 13807 19731 2078 2699 2000 3292 2010 7396 2013 1996 9989 1012 2076 2892 7749 2002 2356 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: business (id = 3)


INFO:tensorflow:label: business (id = 3)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: train-3


INFO:tensorflow:guid: train-3


INFO:tensorflow:tokens: [CLS] tigers wary of farrell gamble leicester say they will not be rushed into making a bid for andy farrell should the great britain rugby league captain decide to switch codes . we and anybody else involved in the process are still some way away from going to the next stage tigers boss john wells told bbc radio leicester . at the moment there are still a lot of unknown ##s about andy farrell not least his medical situation . whoever does take him on is going to take a big big gamble . farrell who has had persistent knee problems had an operation on his knee five weeks ago and is expected to be out for another three months . leicester and sara ##cens are believed [SEP]


INFO:tensorflow:tokens: [CLS] tigers wary of farrell gamble leicester say they will not be rushed into making a bid for andy farrell should the great britain rugby league captain decide to switch codes . we and anybody else involved in the process are still some way away from going to the next stage tigers boss john wells told bbc radio leicester . at the moment there are still a lot of unknown ##s about andy farrell not least his medical situation . whoever does take him on is going to take a big big gamble . farrell who has had persistent knee problems had an operation on his knee five weeks ago and is expected to be out for another three months . leicester and sara ##cens are believed [SEP]


INFO:tensorflow:input_ids: 101 7600 15705 1997 16248 18503 11258 2360 2027 2097 2025 2022 6760 2046 2437 1037 7226 2005 5557 16248 2323 1996 2307 3725 4043 2223 2952 5630 2000 6942 9537 1012 2057 1998 10334 2842 2920 1999 1996 2832 2024 2145 2070 2126 2185 2013 2183 2000 1996 2279 2754 7600 5795 2198 7051 2409 4035 2557 11258 1012 2012 1996 2617 2045 2024 2145 1037 2843 1997 4242 2015 2055 5557 16248 2025 2560 2010 2966 3663 1012 9444 2515 2202 2032 2006 2003 2183 2000 2202 1037 2502 2502 18503 1012 16248 2040 2038 2018 14516 6181 3471 2018 2019 3169 2006 2010 6181 2274 3134 3283 1998 2003 3517 2000 2022 2041 2005 2178 2093 2706 1012 11258 1998 7354 19023 2024 3373 102


INFO:tensorflow:input_ids: 101 7600 15705 1997 16248 18503 11258 2360 2027 2097 2025 2022 6760 2046 2437 1037 7226 2005 5557 16248 2323 1996 2307 3725 4043 2223 2952 5630 2000 6942 9537 1012 2057 1998 10334 2842 2920 1999 1996 2832 2024 2145 2070 2126 2185 2013 2183 2000 1996 2279 2754 7600 5795 2198 7051 2409 4035 2557 11258 1012 2012 1996 2617 2045 2024 2145 1037 2843 1997 4242 2015 2055 5557 16248 2025 2560 2010 2966 3663 1012 9444 2515 2202 2032 2006 2003 2183 2000 2202 1037 2502 2502 18503 1012 16248 2040 2038 2018 14516 6181 3471 2018 2019 3169 2006 2010 6181 2274 3134 3283 1998 2003 3517 2000 2022 2041 2005 2178 2093 2706 1012 11258 1998 7354 19023 2024 3373 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: sport (id = 2)


INFO:tensorflow:label: sport (id = 2)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: train-4


INFO:tensorflow:guid: train-4


INFO:tensorflow:tokens: [CLS] ye ##ading face newcastle in fa cup premiership side newcastle united face a trip to ry ##man premier league leaders ye ##ading in the fa cup third round . the game - arguably the highlight of the draw - is a potential money - spin ##ner for non - league ye ##ading who beat sl ##ough in the second round . conference side exeter city who knocked out doncaster on saturday will travel to old trafford to meet holders manchester united in january . arsenal were drawn at home to stoke and chelsea will play host to sc ##unt ##horpe . the only other non - league side in the draw are hi ##nc ##kley united who held brentford to a goal ##less draw on sunday [SEP]


INFO:tensorflow:tokens: [CLS] ye ##ading face newcastle in fa cup premiership side newcastle united face a trip to ry ##man premier league leaders ye ##ading in the fa cup third round . the game - arguably the highlight of the draw - is a potential money - spin ##ner for non - league ye ##ading who beat sl ##ough in the second round . conference side exeter city who knocked out doncaster on saturday will travel to old trafford to meet holders manchester united in january . arsenal were drawn at home to stoke and chelsea will play host to sc ##unt ##horpe . the only other non - league side in the draw are hi ##nc ##kley united who held brentford to a goal ##less draw on sunday [SEP]


INFO:tensorflow:input_ids: 101 6300 23782 2227 8142 1999 6904 2452 11264 2217 8142 2142 2227 1037 4440 2000 29431 2386 4239 2223 4177 6300 23782 1999 1996 6904 2452 2353 2461 1012 1996 2208 1011 15835 1996 12944 1997 1996 4009 1011 2003 1037 4022 2769 1011 6714 3678 2005 2512 1011 2223 6300 23782 2040 3786 22889 10593 1999 1996 2117 2461 1012 3034 2217 12869 2103 2040 6573 2041 18895 2006 5095 2097 3604 2000 2214 26894 2000 3113 13304 5087 2142 1999 2254 1012 9433 2020 4567 2012 2188 2000 13299 1998 9295 2097 2377 3677 2000 8040 16671 22044 1012 1996 2069 2060 2512 1011 2223 2217 1999 1996 4009 2024 7632 12273 22315 2142 2040 2218 26550 2000 1037 3125 3238 4009 2006 4465 102


INFO:tensorflow:input_ids: 101 6300 23782 2227 8142 1999 6904 2452 11264 2217 8142 2142 2227 1037 4440 2000 29431 2386 4239 2223 4177 6300 23782 1999 1996 6904 2452 2353 2461 1012 1996 2208 1011 15835 1996 12944 1997 1996 4009 1011 2003 1037 4022 2769 1011 6714 3678 2005 2512 1011 2223 6300 23782 2040 3786 22889 10593 1999 1996 2117 2461 1012 3034 2217 12869 2103 2040 6573 2041 18895 2006 5095 2097 3604 2000 2214 26894 2000 3113 13304 5087 2142 1999 2254 1012 9433 2020 4567 2012 2188 2000 13299 1998 9295 2097 2377 3677 2000 8040 16671 22044 1012 1996 2069 2060 2512 1011 2223 2217 1999 1996 4009 2024 7632 12273 22315 2142 2040 2218 26550 2000 1037 3125 3238 4009 2006 4465 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: sport (id = 2)


INFO:tensorflow:label: sport (id = 2)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: train-5


INFO:tensorflow:guid: train-5


INFO:tensorflow:tokens: [CLS] ocean s twelve raids box office ocean s twelve the crime cape ##r sequel starring george cl ##oon ##ey brad pitt and julia roberts has gone straight to number one in the us box office chart . it took $ 40 . 8 ##m ( £2 ##1 ##m ) in weekend ticket sales according to studio estimates . the sequel follows the master criminals as they try to pull off three major he ##ists across europe . it knocked last week s number one national treasure into third place . wesley s ##ni ##pes blade : trinity was in second taking $ 16 . 1 ##m ( £ ##8 . 4 ##m ) . rounding out the top five was animated fable the polar express starring [SEP]


INFO:tensorflow:tokens: [CLS] ocean s twelve raids box office ocean s twelve the crime cape ##r sequel starring george cl ##oon ##ey brad pitt and julia roberts has gone straight to number one in the us box office chart . it took $ 40 . 8 ##m ( £2 ##1 ##m ) in weekend ticket sales according to studio estimates . the sequel follows the master criminals as they try to pull off three major he ##ists across europe . it knocked last week s number one national treasure into third place . wesley s ##ni ##pes blade : trinity was in second taking $ 16 . 1 ##m ( £ ##8 . 4 ##m ) . rounding out the top five was animated fable the polar express starring [SEP]


INFO:tensorflow:input_ids: 101 4153 1055 4376 11217 3482 2436 4153 1055 4376 1996 4126 4880 2099 8297 4626 2577 18856 7828 3240 8226 15091 1998 6423 7031 2038 2908 3442 2000 2193 2028 1999 1996 2149 3482 2436 3673 1012 2009 2165 1002 2871 1012 1022 2213 1006 21853 2487 2213 1007 1999 5353 7281 4341 2429 2000 2996 10035 1012 1996 8297 4076 1996 3040 12290 2004 2027 3046 2000 4139 2125 2093 2350 2002 5130 2408 2885 1012 2009 6573 2197 2733 1055 2193 2028 2120 8813 2046 2353 2173 1012 11482 1055 3490 10374 6085 1024 7124 2001 1999 2117 2635 1002 2385 1012 1015 2213 1006 1069 2620 1012 1018 2213 1007 1012 26939 2041 1996 2327 2274 2001 6579 28458 1996 11508 4671 4626 102


INFO:tensorflow:input_ids: 101 4153 1055 4376 11217 3482 2436 4153 1055 4376 1996 4126 4880 2099 8297 4626 2577 18856 7828 3240 8226 15091 1998 6423 7031 2038 2908 3442 2000 2193 2028 1999 1996 2149 3482 2436 3673 1012 2009 2165 1002 2871 1012 1022 2213 1006 21853 2487 2213 1007 1999 5353 7281 4341 2429 2000 2996 10035 1012 1996 8297 4076 1996 3040 12290 2004 2027 3046 2000 4139 2125 2093 2350 2002 5130 2408 2885 1012 2009 6573 2197 2733 1055 2193 2028 2120 8813 2046 2353 2173 1012 11482 1055 3490 10374 6085 1024 7124 2001 1999 2117 2635 1002 2385 1012 1015 2213 1006 1069 2620 1012 1018 2213 1007 1012 26939 2041 1996 2327 2274 2001 6579 28458 1996 11508 4671 4626 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: entertainment (id = 1)


INFO:tensorflow:label: entertainment (id = 1)


In [19]:
len(train_features)

1780

In [22]:
test_features = run_classifier.convert_examples_to_features(test_InputExamples, test_labels_list, MAX_SEQ_LENGTH, tokenizer)

INFO:tensorflow:Writing example 0 of 445


INFO:tensorflow:Writing example 0 of 445


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: test-1


INFO:tensorflow:guid: test-1


INFO:tensorflow:tokens: [CLS] ho ##bb ##it picture four years away lord of the rings director peter jackson has said that it will be up to four years before he starts work on a film version of the ho ##bb ##it . the oscar winner said on a visit to sydney there was a desire to make it but not before lengthy negotiations . i think it s gonna be a lot of lawyers sitting in a room trying to thrash out a deal before it will ever happen said the new zealand ##er . the rights to jr ##r tolkien s book are split between two major film studios . jackson who is currently filming a remake of hollywood classic king kong said he thought that the sale of [SEP]


INFO:tensorflow:tokens: [CLS] ho ##bb ##it picture four years away lord of the rings director peter jackson has said that it will be up to four years before he starts work on a film version of the ho ##bb ##it . the oscar winner said on a visit to sydney there was a desire to make it but not before lengthy negotiations . i think it s gonna be a lot of lawyers sitting in a room trying to thrash out a deal before it will ever happen said the new zealand ##er . the rights to jr ##r tolkien s book are split between two major film studios . jackson who is currently filming a remake of hollywood classic king kong said he thought that the sale of [SEP]


INFO:tensorflow:input_ids: 101 7570 10322 4183 3861 2176 2086 2185 2935 1997 1996 7635 2472 2848 4027 2038 2056 2008 2009 2097 2022 2039 2000 2176 2086 2077 2002 4627 2147 2006 1037 2143 2544 1997 1996 7570 10322 4183 1012 1996 7436 3453 2056 2006 1037 3942 2000 3994 2045 2001 1037 4792 2000 2191 2009 2021 2025 2077 12401 7776 1012 1045 2228 2009 1055 6069 2022 1037 2843 1997 9559 3564 1999 1037 2282 2667 2000 27042 2041 1037 3066 2077 2009 2097 2412 4148 2056 1996 2047 3414 2121 1012 1996 2916 2000 3781 2099 23602 1055 2338 2024 3975 2090 2048 2350 2143 4835 1012 4027 2040 2003 2747 7467 1037 12661 1997 5365 4438 2332 4290 2056 2002 2245 2008 1996 5096 1997 102


INFO:tensorflow:input_ids: 101 7570 10322 4183 3861 2176 2086 2185 2935 1997 1996 7635 2472 2848 4027 2038 2056 2008 2009 2097 2022 2039 2000 2176 2086 2077 2002 4627 2147 2006 1037 2143 2544 1997 1996 7570 10322 4183 1012 1996 7436 3453 2056 2006 1037 3942 2000 3994 2045 2001 1037 4792 2000 2191 2009 2021 2025 2077 12401 7776 1012 1045 2228 2009 1055 6069 2022 1037 2843 1997 9559 3564 1999 1037 2282 2667 2000 27042 2041 1037 3066 2077 2009 2097 2412 4148 2056 1996 2047 3414 2121 1012 1996 2916 2000 3781 2099 23602 1055 2338 2024 3975 2090 2048 2350 2143 4835 1012 4027 2040 2003 2747 7467 1037 12661 1997 5365 4438 2332 4290 2056 2002 2245 2008 1996 5096 1997 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: tech (id = 0)


INFO:tensorflow:label: tech (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: test-2


INFO:tensorflow:guid: test-2


INFO:tensorflow:tokens: [CLS] game firm holds cast auditions video game firm bio ##ware is to hold open auditions for people to become cast members for future games . the company which makes role playing games such as knights of the old republic and never ##win ##ter nights is seeking people aged 18 to 99 . the canada - based company says it was looking for a wide variety of people to use as face models for characters . everyone chosen to appear in a video game will receive a performer s fee for the use of their image . the company is inviting people to come along to a shopping mall in west edmonton alberta on friday and saturday bringing along a piece of photo identification . there are [SEP]


INFO:tensorflow:tokens: [CLS] game firm holds cast auditions video game firm bio ##ware is to hold open auditions for people to become cast members for future games . the company which makes role playing games such as knights of the old republic and never ##win ##ter nights is seeking people aged 18 to 99 . the canada - based company says it was looking for a wide variety of people to use as face models for characters . everyone chosen to appear in a video game will receive a performer s fee for the use of their image . the company is inviting people to come along to a shopping mall in west edmonton alberta on friday and saturday bringing along a piece of photo identification . there are [SEP]


INFO:tensorflow:input_ids: 101 2208 3813 4324 3459 21732 2678 2208 3813 16012 8059 2003 2000 2907 2330 21732 2005 2111 2000 2468 3459 2372 2005 2925 2399 1012 1996 2194 2029 3084 2535 2652 2399 2107 2004 7307 1997 1996 2214 3072 1998 2196 10105 3334 6385 2003 6224 2111 4793 2324 2000 5585 1012 1996 2710 1011 2241 2194 2758 2009 2001 2559 2005 1037 2898 3528 1997 2111 2000 2224 2004 2227 4275 2005 3494 1012 3071 4217 2000 3711 1999 1037 2678 2208 2097 4374 1037 9256 1055 7408 2005 1996 2224 1997 2037 3746 1012 1996 2194 2003 15085 2111 2000 2272 2247 2000 1037 6023 6670 1999 2225 10522 7649 2006 5958 1998 5095 5026 2247 1037 3538 1997 6302 8720 1012 2045 2024 102


INFO:tensorflow:input_ids: 101 2208 3813 4324 3459 21732 2678 2208 3813 16012 8059 2003 2000 2907 2330 21732 2005 2111 2000 2468 3459 2372 2005 2925 2399 1012 1996 2194 2029 3084 2535 2652 2399 2107 2004 7307 1997 1996 2214 3072 1998 2196 10105 3334 6385 2003 6224 2111 4793 2324 2000 5585 1012 1996 2710 1011 2241 2194 2758 2009 2001 2559 2005 1037 2898 3528 1997 2111 2000 2224 2004 2227 4275 2005 3494 1012 3071 4217 2000 3711 1999 1037 2678 2208 2097 4374 1037 9256 1055 7408 2005 1996 2224 1997 2037 3746 1012 1996 2194 2003 15085 2111 2000 2272 2247 2000 1037 6023 6670 1999 2225 10522 7649 2006 5958 1998 5095 5026 2247 1037 3538 1997 6302 8720 1012 2045 2024 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: tech (id = 0)


INFO:tensorflow:label: tech (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: test-3


INFO:tensorflow:guid: test-3


INFO:tensorflow:tokens: [CLS] clarke plans migrant point scheme anyone planning to move to the uk will have to pass a test to prove they can contribute to the country home secretary charles clarke has said . he is proposing a points system similar to australia s but would avoid the quota system planned by the tori ##es . mr clarke who will un ##ve ##il his plans on monday said economic migration helped the uk but needed proper policing . the li ##b dem ##s say they will look at his plans but tory liam fox said his party offered a clear choice on the issue . the conservative party co - chairman said the british electorate had a choice between a labour government that had done nothing for [SEP]


INFO:tensorflow:tokens: [CLS] clarke plans migrant point scheme anyone planning to move to the uk will have to pass a test to prove they can contribute to the country home secretary charles clarke has said . he is proposing a points system similar to australia s but would avoid the quota system planned by the tori ##es . mr clarke who will un ##ve ##il his plans on monday said economic migration helped the uk but needed proper policing . the li ##b dem ##s say they will look at his plans but tory liam fox said his party offered a clear choice on the issue . the conservative party co - chairman said the british electorate had a choice between a labour government that had done nothing for [SEP]


INFO:tensorflow:input_ids: 101 8359 3488 20731 2391 5679 3087 4041 2000 2693 2000 1996 2866 2097 2031 2000 3413 1037 3231 2000 6011 2027 2064 9002 2000 1996 2406 2188 3187 2798 8359 2038 2056 1012 2002 2003 21991 1037 2685 2291 2714 2000 2660 1055 2021 2052 4468 1996 20563 2291 3740 2011 1996 23413 2229 1012 2720 8359 2040 2097 4895 3726 4014 2010 3488 2006 6928 2056 3171 9230 3271 1996 2866 2021 2734 5372 21107 1012 1996 5622 2497 17183 2015 2360 2027 2097 2298 2012 2010 3488 2021 17117 8230 4419 2056 2010 2283 3253 1037 3154 3601 2006 1996 3277 1012 1996 4603 2283 2522 1011 3472 2056 1996 2329 13694 2018 1037 3601 2090 1037 4428 2231 2008 2018 2589 2498 2005 102


INFO:tensorflow:input_ids: 101 8359 3488 20731 2391 5679 3087 4041 2000 2693 2000 1996 2866 2097 2031 2000 3413 1037 3231 2000 6011 2027 2064 9002 2000 1996 2406 2188 3187 2798 8359 2038 2056 1012 2002 2003 21991 1037 2685 2291 2714 2000 2660 1055 2021 2052 4468 1996 20563 2291 3740 2011 1996 23413 2229 1012 2720 8359 2040 2097 4895 3726 4014 2010 3488 2006 6928 2056 3171 9230 3271 1996 2866 2021 2734 5372 21107 1012 1996 5622 2497 17183 2015 2360 2027 2097 2298 2012 2010 3488 2021 17117 8230 4419 2056 2010 2283 3253 1037 3154 3601 2006 1996 3277 1012 1996 4603 2283 2522 1011 3472 2056 1996 2329 13694 2018 1037 3601 2090 1037 4428 2231 2008 2018 2589 2498 2005 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: tech (id = 0)


INFO:tensorflow:label: tech (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: test-4


INFO:tensorflow:guid: test-4


INFO:tensorflow:tokens: [CLS] radcliffe will compete in london paula radcliffe will compete in the flora london marathon this year after deciding her schedule for 2005 . the 31 - year - old won the race in 2002 on her marathon debut defended her title 12 months later and will now seek a third title in the 17 april race . it doesn t get any better than this for the 25th anniversary said race director david bedford . after announcing the greatest men s field ever we now have the greatest women s distance runner ever . three years ago radcliffe smashed the women s world record in two hours 18 minutes 15 seconds . the bedford star returned to london 12 months later lowering her mixed - race [SEP]


INFO:tensorflow:tokens: [CLS] radcliffe will compete in london paula radcliffe will compete in the flora london marathon this year after deciding her schedule for 2005 . the 31 - year - old won the race in 2002 on her marathon debut defended her title 12 months later and will now seek a third title in the 17 april race . it doesn t get any better than this for the 25th anniversary said race director david bedford . after announcing the greatest men s field ever we now have the greatest women s distance runner ever . three years ago radcliffe smashed the women s world record in two hours 18 minutes 15 seconds . the bedford star returned to london 12 months later lowering her mixed - race [SEP]


INFO:tensorflow:input_ids: 101 22603 2097 5566 1999 2414 13723 22603 2097 5566 1999 1996 10088 2414 8589 2023 2095 2044 10561 2014 6134 2005 2384 1012 1996 2861 1011 2095 1011 2214 2180 1996 2679 1999 2526 2006 2014 8589 2834 8047 2014 2516 2260 2706 2101 1998 2097 2085 6148 1037 2353 2516 1999 1996 2459 2258 2679 1012 2009 2987 1056 2131 2151 2488 2084 2023 2005 1996 10965 5315 2056 2679 2472 2585 12003 1012 2044 13856 1996 4602 2273 1055 2492 2412 2057 2085 2031 1996 4602 2308 1055 3292 5479 2412 1012 2093 2086 3283 22603 14368 1996 2308 1055 2088 2501 1999 2048 2847 2324 2781 2321 3823 1012 1996 12003 2732 2513 2000 2414 2260 2706 2101 13845 2014 3816 1011 2679 102


INFO:tensorflow:input_ids: 101 22603 2097 5566 1999 2414 13723 22603 2097 5566 1999 1996 10088 2414 8589 2023 2095 2044 10561 2014 6134 2005 2384 1012 1996 2861 1011 2095 1011 2214 2180 1996 2679 1999 2526 2006 2014 8589 2834 8047 2014 2516 2260 2706 2101 1998 2097 2085 6148 1037 2353 2516 1999 1996 2459 2258 2679 1012 2009 2987 1056 2131 2151 2488 2084 2023 2005 1996 10965 5315 2056 2679 2472 2585 12003 1012 2044 13856 1996 4602 2273 1055 2492 2412 2057 2085 2031 1996 4602 2308 1055 3292 5479 2412 1012 2093 2086 3283 22603 14368 1996 2308 1055 2088 2501 1999 2048 2847 2324 2781 2321 3823 1012 1996 12003 2732 2513 2000 2414 2260 2706 2101 13845 2014 3816 1011 2679 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: tech (id = 0)


INFO:tensorflow:label: tech (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: test-5


INFO:tensorflow:guid: test-5


INFO:tensorflow:tokens: [CLS] serena becomes world number two serena williams has moved up five places to second in the world rankings after her australian open win . williams won her first grand slam title since 2003 with victory over lindsay davenport the world number one . men s champion mara ##t sa ##fin remains fourth in the atp rankings while beaten finalist ll ##ey ##ton hewitt replaces andy rod ##dick as world number two . roger federer retains top spot but sa ##fin has over ##taken hewitt to become the new leader of the champions race . alicia mo ##lik who lost a three - set thriller against davenport in the quarter - finals is in the women s top 10 for the first time in her career . [SEP]


INFO:tensorflow:tokens: [CLS] serena becomes world number two serena williams has moved up five places to second in the world rankings after her australian open win . williams won her first grand slam title since 2003 with victory over lindsay davenport the world number one . men s champion mara ##t sa ##fin remains fourth in the atp rankings while beaten finalist ll ##ey ##ton hewitt replaces andy rod ##dick as world number two . roger federer retains top spot but sa ##fin has over ##taken hewitt to become the new leader of the champions race . alicia mo ##lik who lost a three - set thriller against davenport in the quarter - finals is in the women s top 10 for the first time in her career . [SEP]


INFO:tensorflow:input_ids: 101 14419 4150 2088 2193 2048 14419 3766 2038 2333 2039 2274 3182 2000 2117 1999 1996 2088 10385 2044 2014 2827 2330 2663 1012 3766 2180 2014 2034 2882 9555 2516 2144 2494 2007 3377 2058 12110 16273 1996 2088 2193 2028 1012 2273 1055 3410 13955 2102 7842 16294 3464 2959 1999 1996 12649 10385 2096 7854 9914 2222 3240 2669 19482 20736 5557 8473 24066 2004 2088 2193 2048 1012 5074 28294 14567 2327 3962 2021 7842 16294 2038 2058 25310 19482 2000 2468 1996 2047 3003 1997 1996 3966 2679 1012 15935 9587 18393 2040 2439 1037 2093 1011 2275 10874 2114 16273 1999 1996 4284 1011 4399 2003 1999 1996 2308 1055 2327 2184 2005 1996 2034 2051 1999 2014 2476 1012 102


INFO:tensorflow:input_ids: 101 14419 4150 2088 2193 2048 14419 3766 2038 2333 2039 2274 3182 2000 2117 1999 1996 2088 10385 2044 2014 2827 2330 2663 1012 3766 2180 2014 2034 2882 9555 2516 2144 2494 2007 3377 2058 12110 16273 1996 2088 2193 2028 1012 2273 1055 3410 13955 2102 7842 16294 3464 2959 1999 1996 12649 10385 2096 7854 9914 2222 3240 2669 19482 20736 5557 8473 24066 2004 2088 2193 2048 1012 5074 28294 14567 2327 3962 2021 7842 16294 2038 2058 25310 19482 2000 2468 1996 2047 3003 1997 1996 3966 2679 1012 15935 9587 18393 2040 2439 1037 2093 1011 2275 10874 2114 16273 1999 1996 4284 1011 4399 2003 1999 1996 2308 1055 2327 2184 2005 1996 2034 2051 1999 2014 2476 1012 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: tech (id = 0)


INFO:tensorflow:label: tech (id = 0)


In [23]:
len(test_features)

445

In [24]:
# Compute train and warmup steps from batch size
# These hyperparameters are copied from this colab notebook (https://colab.sandbox.google.com/github/tensorflow/tpu/blob/master/tools/colab/bert_finetuning_with_cloud_tpus.ipynb)
BATCH_SIZE = 32
LEARNING_RATE = 2e-5
NUM_TRAIN_EPOCHS = 10.0
# Warmup is a period of time where hte learning rate 
# is small and gradually increases--usually helps training.
WARMUP_PROPORTION = 0.1
# Model configs
SAVE_CHECKPOINTS_STEPS = 100
SAVE_SUMMARY_STEPS = 10

In [25]:
num_train_steps = int(len(train_features) / BATCH_SIZE * NUM_TRAIN_EPOCHS)

In [26]:
num_train_steps

556

In [31]:
num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)

In [32]:
num_warmup_steps

55

In [29]:
OUTPUT_DIR = "./output_based_on_private_model/bbc"

In [30]:
run_config = tf.estimator.RunConfig(
    model_dir=OUTPUT_DIR,
    save_summary_steps=SAVE_SUMMARY_STEPS,
    save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS)

In [37]:
model_fn = model_fn_build(num_labels=len(train_labels),
                          learning_rate=LEARNING_RATE,
                          num_train_steps=num_train_steps,
                          num_warmup_steps=num_warmup_steps,
                          bert_hub_module_handle=BERT_MODEL_HUB)

In [38]:
estimator = tf.estimator.Estimator(model_fn=model_fn,
                                   config=run_config,
                                   params={"batch_size": BATCH_SIZE})

INFO:tensorflow:Using config: {'_tf_random_seed': None, '_model_dir': './output_based_on_private_model/bbc', '_experimental_distribute': None, '_is_chief': True, '_num_ps_replicas': 0, '_keep_checkpoint_every_n_hours': 10000, '_experimental_max_worker_delay_secs': None, '_save_checkpoints_steps': 100, '_task_type': 'worker', '_global_id_in_cluster': 0, '_service': None, '_task_id': 0, '_eval_distribute': None, '_save_checkpoints_secs': None, '_save_summary_steps': 10, '_evaluation_master': '', '_device_fn': None, '_keep_checkpoint_max': 5, '_num_worker_replicas': 1, '_train_distribute': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f5370515710>, '_protocol': None, '_master': '', '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_log_step_count_steps': 100}


INFO:tensorflow:Using config: {'_tf_random_seed': None, '_model_dir': './output_based_on_private_model/bbc', '_experimental_distribute': None, '_is_chief': True, '_num_ps_replicas': 0, '_keep_checkpoint_every_n_hours': 10000, '_experimental_max_worker_delay_secs': None, '_save_checkpoints_steps': 100, '_task_type': 'worker', '_global_id_in_cluster': 0, '_service': None, '_task_id': 0, '_eval_distribute': None, '_save_checkpoints_secs': None, '_save_summary_steps': 10, '_evaluation_master': '', '_device_fn': None, '_keep_checkpoint_max': 5, '_num_worker_replicas': 1, '_train_distribute': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f5370515710>, '_protocol': None, '_master': '', '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_log_step_count_steps': 100}


In [39]:
train_input_fn = run_classifier.input_fn_builder(features=train_features,
                                                 seq_length=MAX_SEQ_LENGTH,
                                                 is_training=True,
                                                 drop_remainder=False)

In [45]:
print('Beginning Training!')
current_time = datetime.now()
estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
print("Training took time ", datetime.now() - current_time)

Beginning Training!
INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:****** Features ******


INFO:tensorflow:****** Features ******


INFO:tensorflow:name = input_ids, shape = (?, 128)


INFO:tensorflow:name = input_ids, shape = (?, 128)


INFO:tensorflow:name = input_mask, shape = (?, 128)


INFO:tensorflow:name = input_mask, shape = (?, 128)


INFO:tensorflow:name = label_ids, shape = (?,)


INFO:tensorflow:name = label_ids, shape = (?,)


INFO:tensorflow:name = segment_ids, shape = (?, 128)


INFO:tensorflow:name = segment_ids, shape = (?, 128)


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.














Instructions for updating:
Deprecated in favor of operator or tf.math.divide.


Instructions for updating:
Deprecated in favor of operator or tf.math.divide.








Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where






  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Create CheckpointSaverHook.


INFO:tensorflow:Create CheckpointSaverHook.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Saving checkpoints for 0 into ./output_based_on_private_model/bbc/model.ckpt.


INFO:tensorflow:Saving checkpoints for 0 into ./output_based_on_private_model/bbc/model.ckpt.


INFO:tensorflow:loss = 7.6226935, step = 1


INFO:tensorflow:loss = 7.6226935, step = 1






INFO:tensorflow:Saving checkpoints for 100 into ./output_based_on_private_model/bbc/model.ckpt.


INFO:tensorflow:Saving checkpoints for 100 into ./output_based_on_private_model/bbc/model.ckpt.


INFO:tensorflow:global_step/sec: 0.0947817


INFO:tensorflow:global_step/sec: 0.0947817


INFO:tensorflow:loss = 0.08171087, step = 101 (1055.062 sec)


INFO:tensorflow:loss = 0.08171087, step = 101 (1055.062 sec)


INFO:tensorflow:Saving checkpoints for 200 into ./output_based_on_private_model/bbc/model.ckpt.


INFO:tensorflow:Saving checkpoints for 200 into ./output_based_on_private_model/bbc/model.ckpt.


INFO:tensorflow:global_step/sec: 0.0961926


INFO:tensorflow:global_step/sec: 0.0961926


INFO:tensorflow:loss = 0.01232374, step = 201 (1039.578 sec)


INFO:tensorflow:loss = 0.01232374, step = 201 (1039.578 sec)


INFO:tensorflow:Saving checkpoints for 300 into ./output_based_on_private_model/bbc/model.ckpt.


INFO:tensorflow:Saving checkpoints for 300 into ./output_based_on_private_model/bbc/model.ckpt.


INFO:tensorflow:global_step/sec: 0.0965575


INFO:tensorflow:global_step/sec: 0.0965575


INFO:tensorflow:loss = 0.007597042, step = 301 (1035.652 sec)


INFO:tensorflow:loss = 0.007597042, step = 301 (1035.652 sec)






INFO:tensorflow:Saving checkpoints for 400 into ./output_based_on_private_model/bbc/model.ckpt.


INFO:tensorflow:Saving checkpoints for 400 into ./output_based_on_private_model/bbc/model.ckpt.


INFO:tensorflow:global_step/sec: 0.0963292


INFO:tensorflow:global_step/sec: 0.0963292


INFO:tensorflow:loss = 0.00611158, step = 401 (1038.106 sec)


INFO:tensorflow:loss = 0.00611158, step = 401 (1038.106 sec)






INFO:tensorflow:Saving checkpoints for 500 into ./output_based_on_private_model/bbc/model.ckpt.


INFO:tensorflow:Saving checkpoints for 500 into ./output_based_on_private_model/bbc/model.ckpt.


Instructions for updating:
Use standard file APIs to delete files with this prefix.


Instructions for updating:
Use standard file APIs to delete files with this prefix.


INFO:tensorflow:global_step/sec: 0.096636


INFO:tensorflow:global_step/sec: 0.096636


INFO:tensorflow:loss = 0.0054200483, step = 501 (1034.811 sec)


INFO:tensorflow:loss = 0.0054200483, step = 501 (1034.811 sec)


INFO:tensorflow:Saving checkpoints for 556 into ./output_based_on_private_model/bbc/model.ckpt.


INFO:tensorflow:Saving checkpoints for 556 into ./output_based_on_private_model/bbc/model.ckpt.


INFO:tensorflow:Loss for final step: 0.004494274.


INFO:tensorflow:Loss for final step: 0.004494274.


Training took time  1:37:40.456834


In [46]:
test_input_fn = run_classifier.input_fn_builder(features=test_features,
                                                seq_length=MAX_SEQ_LENGTH,
                                                is_training=False,
                                                drop_remainder=False)

In [47]:
estimator.evaluate(input_fn=test_input_fn, steps=None)

INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:****** Features ******


INFO:tensorflow:****** Features ******


INFO:tensorflow:name = input_ids, shape = (?, 128)


INFO:tensorflow:name = input_ids, shape = (?, 128)


INFO:tensorflow:name = input_mask, shape = (?, 128)


INFO:tensorflow:name = input_mask, shape = (?, 128)


INFO:tensorflow:name = label_ids, shape = (?,)


INFO:tensorflow:name = label_ids, shape = (?,)


INFO:tensorflow:name = segment_ids, shape = (?, 128)


INFO:tensorflow:name = segment_ids, shape = (?, 128)


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Starting evaluation at 2019-09-04T13:31:42Z


INFO:tensorflow:Starting evaluation at 2019-09-04T13:31:42Z


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Graph was finalized.


Instructions for updating:
Use standard file APIs to check for files with this prefix.


Instructions for updating:
Use standard file APIs to check for files with this prefix.


INFO:tensorflow:Restoring parameters from ./output_based_on_private_model/bbc/model.ckpt-556


INFO:tensorflow:Restoring parameters from ./output_based_on_private_model/bbc/model.ckpt-556


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Finished evaluation at 2019-09-04-13:32:41


INFO:tensorflow:Finished evaluation at 2019-09-04-13:32:41


INFO:tensorflow:Saving dict for global step 556: eval_accuracy = 0.2, eval_loss = 6.877123, global_step = 556, loss = 6.8728127


INFO:tensorflow:Saving dict for global step 556: eval_accuracy = 0.2, eval_loss = 6.877123, global_step = 556, loss = 6.8728127


INFO:tensorflow:Saving 'checkpoint_path' summary for global step 556: ./output_based_on_private_model/bbc/model.ckpt-556


INFO:tensorflow:Saving 'checkpoint_path' summary for global step 556: ./output_based_on_private_model/bbc/model.ckpt-556


{'eval_accuracy': 0.2,
 'eval_loss': 6.877123,
 'global_step': 556,
 'loss': 6.8728127}

In [48]:
train_labels_list

['tech', 'entertainment', 'sport', 'business', 'politics']

In [55]:
def getPrediction(in_sentences):
    label_list = ['tech', 'entertainment', 'sport', 'business', 'politics']
    input_examples = [
        run_classifier.InputExample(guid="",
                                    text_a=x,
                                    text_b=None,
                                    label=label_list[0]) for x in in_sentences
    ]
    input_features = run_classifier.convert_examples_to_features(
        input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
    predict_input_fn = run_classifier.input_fn_builder(
        features=input_features,
        seq_length=MAX_SEQ_LENGTH,
        is_training=False,
        drop_remainder=False)
    predictions = estimator.predict(predict_input_fn)
    return [(sentence, prediction['probabilities'], prediction['log_probs'],
             label_list[prediction['predicted_labels']])
            for sentence, prediction in zip(in_sentences, predictions)]

In [52]:
pred_sentences = [
    "hobbit picture  four years away  lord of the rings director peter jackson has said that it will be up to four years before he starts work on a film version of the hobbit.  the oscar winner said on a visit to sydney there was a  desire  to make it  but not before lengthy negotiations.  i think it s gonna be a lot of lawyers sitting in a room trying to thrash out a deal before it will ever happen   said the new zealander. the rights to jrr tolkien s book are split between two major film studios. jackson  who is currently filming a remake of hollywood classic king kong  said he thought that the sale of mgm studios to the sony corporation would cast further uncertainty on the project. the 43-year-old was in the australian city to visit a lord of the rings exhibition  which has attracted 140 000 visitors since it opened in december.  the film-maker recently sued film company new line cinema for undisclosed damages over alleged withheld profits and lost revenue from the first part of the middle earth trilogy. the fellowship of the ring from 2001 went on to make worldwide profits of $291 million (£152 million). jackson is thought to have secured the most lucrative film directing deal in history to remake king kong  which is currently in production in wellington. the picture  which stars naomi watts and oscar winner adrien brody  is due to be released in december. jackson has also committed to making a film version of lovely bones  based on the best-selling book by alice sebold.",
    "game firm holds  cast  auditions video game firm bioware is to hold open auditions for people to become cast members for future games.  the company  which makes role playing games such as knights of the old republic and neverwinter nights  is seeking people aged 18 to 99. the canada-based company says it was looking for  a wide variety of people to use as face models for characters . everyone chosen to appear in a video game will receive a performer s fee for the use of their image. the company is inviting people to come along to a shopping mall in west edmonton  alberta  on friday and saturday  bringing along a piece of photo identification.   there are hundreds and hundreds of characters in a typical bioware game   said shauna perry  bioware s audio and external resources producer.  those people live in any city and village and so we need ordinary people  people with interesting faces.  she added:  not everyone is a model in the world so we don t want just models in our games.  people chosen to appear in a game will have their head scanned in three dimensions. hundreds of photos of the person s head are taken so that a model of the head can be generated in 3d.  the 3d model will look exactly like the person - it s really quite incredible how detailed they are   said ms perry. she said chosen participants will have no control over how the image is used in a computer game.  we cannot give people any control over how the images are used.  but their face could be used in multiple games - so they could be the hero in one  the villain in another and just a merchant in a third.",
    "clarke plans migrant point scheme anyone planning to move to the uk will have to pass a test to prove they can contribute to the country  home secretary charles clarke has said.  he is proposing a points system similar to australia s but would avoid the quota system planned by the tories. mr clarke  who will unveil his plans on monday  said economic migration helped the uk but  needed proper policing . the lib dems say they will look at his plans  but tory liam fox said his party offered a  clear choice  on the issue. the conservative party co-chairman said the british electorate had a choice between a labour government that had  done nothing for eight years and will not set a limit  on immigration and a tory one that would impose quotas.  the home secretary said  by 2008  he wanted everyone given a visa and entering the uk to have their fingerprints taken  to  ensure we can know everybody who is in the country . speaking on bbc one s breakfast with frost  he said  economic migrants are of great value to this country   but stressed that proper policing was needed to ensure that they do not become a  burden on society . he said:  we will establish a system ... which looks at the skills  talents and abilities of people seeking to come and work in this country  and ensures that when they come here they have a job and can contribute to the economy of the country.   the home secretary  whose five-year blueprint for immigration and asylum is expected to be published on monday  also rejected claims that the immigration debate encouraged bigotry.  the issue of who does come into this country  and whether they are entitled to be in this country  who does settle here  how we have border controls  is a perfectly legitimate aspect of public debate   he said. liberal democrat home affairs spokesman mark oaten said:  whilst it is good that labour has rejected the tory idea of quotas on asylum  the jury is still out on the home office s ability to deliver a fair and efficient asylum system.   mr howard has said britain should take its fair share of the world s  genuine refugees . but he claims the current asylum system is being abused - and with it britain s generosity. trevor phillips  chairman of the commission for racial equality  called on mr clarke to denounce the suggestion britain s hospitality was being tested by immigration.  tell that to the 44 000 doctors in the nhs and the 70 000 nurses without whom we would really see what pressure on the health service means   he said.  ditto the teachers  from south africa  australia  jamaica  who are reducing the sizes of our classes and schools.  the refugee council said mr howard s proposals would mean there would be no safe haven in the uk.",
    "radcliffe will compete in london paula radcliffe will compete in the flora london marathon this year after deciding her schedule for 2005.  the 31-year-old won the race in 2002 on her marathon debut  defended her title 12 months later and will now seek a third title in the 17 april race.  it doesn t get any better than this for the 25th anniversary   said race director david bedford.  after announcing the greatest men s field ever we now have the greatest women s distance runner ever.  three years ago radcliffe smashed the women s world record in two hours 18 minutes 15 seconds.  the bedford star returned to london 12 months later  lowering her mixed-race world record of 2:17:18  which she set in chicago in october 2003  by one minute 53 secs. radcliffe s career took a setback when she failed to complete the olympic marathon and later dropped out of the athens 10 000m last august. but the 31-year-old bounced back to win the new york marathon in november. radcliffe  however  passed up the chance to go for the  big city  marathon grand slam. with wins in chicago  london and new york  only the boston marathon remains to be conquered but that takes place a day after london.  boston is definitely a race i want to do at some point  but london is very special to me   said radcliffe.  i don t pick races thinking about things like pressure. i pick the ones in my heart i really want to do.  i love the atmosphere  crowds and course and know it will always be a great quality race.  it is also the 25th anniversary this year which adds to the occasion."
]

In [53]:
len(pred_sentences)

4

In [56]:
predictions = getPrediction(pred_sentences)

INFO:tensorflow:Writing example 0 of 4


INFO:tensorflow:Writing example 0 of 4


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: 


INFO:tensorflow:guid: 


INFO:tensorflow:tokens: [CLS] ho ##bb ##it picture four years away lord of the rings director peter jackson has said that it will be up to four years before he starts work on a film version of the ho ##bb ##it . the oscar winner said on a visit to sydney there was a desire to make it but not before lengthy negotiations . i think it s gonna be a lot of lawyers sitting in a room trying to thrash out a deal before it will ever happen said the new zealand ##er . the rights to jr ##r tolkien s book are split between two major film studios . jackson who is currently filming a remake of hollywood classic king kong said he thought that the sale of [SEP]


INFO:tensorflow:tokens: [CLS] ho ##bb ##it picture four years away lord of the rings director peter jackson has said that it will be up to four years before he starts work on a film version of the ho ##bb ##it . the oscar winner said on a visit to sydney there was a desire to make it but not before lengthy negotiations . i think it s gonna be a lot of lawyers sitting in a room trying to thrash out a deal before it will ever happen said the new zealand ##er . the rights to jr ##r tolkien s book are split between two major film studios . jackson who is currently filming a remake of hollywood classic king kong said he thought that the sale of [SEP]


INFO:tensorflow:input_ids: 101 7570 10322 4183 3861 2176 2086 2185 2935 1997 1996 7635 2472 2848 4027 2038 2056 2008 2009 2097 2022 2039 2000 2176 2086 2077 2002 4627 2147 2006 1037 2143 2544 1997 1996 7570 10322 4183 1012 1996 7436 3453 2056 2006 1037 3942 2000 3994 2045 2001 1037 4792 2000 2191 2009 2021 2025 2077 12401 7776 1012 1045 2228 2009 1055 6069 2022 1037 2843 1997 9559 3564 1999 1037 2282 2667 2000 27042 2041 1037 3066 2077 2009 2097 2412 4148 2056 1996 2047 3414 2121 1012 1996 2916 2000 3781 2099 23602 1055 2338 2024 3975 2090 2048 2350 2143 4835 1012 4027 2040 2003 2747 7467 1037 12661 1997 5365 4438 2332 4290 2056 2002 2245 2008 1996 5096 1997 102


INFO:tensorflow:input_ids: 101 7570 10322 4183 3861 2176 2086 2185 2935 1997 1996 7635 2472 2848 4027 2038 2056 2008 2009 2097 2022 2039 2000 2176 2086 2077 2002 4627 2147 2006 1037 2143 2544 1997 1996 7570 10322 4183 1012 1996 7436 3453 2056 2006 1037 3942 2000 3994 2045 2001 1037 4792 2000 2191 2009 2021 2025 2077 12401 7776 1012 1045 2228 2009 1055 6069 2022 1037 2843 1997 9559 3564 1999 1037 2282 2667 2000 27042 2041 1037 3066 2077 2009 2097 2412 4148 2056 1996 2047 3414 2121 1012 1996 2916 2000 3781 2099 23602 1055 2338 2024 3975 2090 2048 2350 2143 4835 1012 4027 2040 2003 2747 7467 1037 12661 1997 5365 4438 2332 4290 2056 2002 2245 2008 1996 5096 1997 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: tech (id = 0)


INFO:tensorflow:label: tech (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: 


INFO:tensorflow:guid: 


INFO:tensorflow:tokens: [CLS] game firm holds cast auditions video game firm bio ##ware is to hold open auditions for people to become cast members for future games . the company which makes role playing games such as knights of the old republic and never ##win ##ter nights is seeking people aged 18 to 99 . the canada - based company says it was looking for a wide variety of people to use as face models for characters . everyone chosen to appear in a video game will receive a performer s fee for the use of their image . the company is inviting people to come along to a shopping mall in west edmonton alberta on friday and saturday bringing along a piece of photo identification . there are [SEP]


INFO:tensorflow:tokens: [CLS] game firm holds cast auditions video game firm bio ##ware is to hold open auditions for people to become cast members for future games . the company which makes role playing games such as knights of the old republic and never ##win ##ter nights is seeking people aged 18 to 99 . the canada - based company says it was looking for a wide variety of people to use as face models for characters . everyone chosen to appear in a video game will receive a performer s fee for the use of their image . the company is inviting people to come along to a shopping mall in west edmonton alberta on friday and saturday bringing along a piece of photo identification . there are [SEP]


INFO:tensorflow:input_ids: 101 2208 3813 4324 3459 21732 2678 2208 3813 16012 8059 2003 2000 2907 2330 21732 2005 2111 2000 2468 3459 2372 2005 2925 2399 1012 1996 2194 2029 3084 2535 2652 2399 2107 2004 7307 1997 1996 2214 3072 1998 2196 10105 3334 6385 2003 6224 2111 4793 2324 2000 5585 1012 1996 2710 1011 2241 2194 2758 2009 2001 2559 2005 1037 2898 3528 1997 2111 2000 2224 2004 2227 4275 2005 3494 1012 3071 4217 2000 3711 1999 1037 2678 2208 2097 4374 1037 9256 1055 7408 2005 1996 2224 1997 2037 3746 1012 1996 2194 2003 15085 2111 2000 2272 2247 2000 1037 6023 6670 1999 2225 10522 7649 2006 5958 1998 5095 5026 2247 1037 3538 1997 6302 8720 1012 2045 2024 102


INFO:tensorflow:input_ids: 101 2208 3813 4324 3459 21732 2678 2208 3813 16012 8059 2003 2000 2907 2330 21732 2005 2111 2000 2468 3459 2372 2005 2925 2399 1012 1996 2194 2029 3084 2535 2652 2399 2107 2004 7307 1997 1996 2214 3072 1998 2196 10105 3334 6385 2003 6224 2111 4793 2324 2000 5585 1012 1996 2710 1011 2241 2194 2758 2009 2001 2559 2005 1037 2898 3528 1997 2111 2000 2224 2004 2227 4275 2005 3494 1012 3071 4217 2000 3711 1999 1037 2678 2208 2097 4374 1037 9256 1055 7408 2005 1996 2224 1997 2037 3746 1012 1996 2194 2003 15085 2111 2000 2272 2247 2000 1037 6023 6670 1999 2225 10522 7649 2006 5958 1998 5095 5026 2247 1037 3538 1997 6302 8720 1012 2045 2024 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: tech (id = 0)


INFO:tensorflow:label: tech (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: 


INFO:tensorflow:guid: 


INFO:tensorflow:tokens: [CLS] clarke plans migrant point scheme anyone planning to move to the uk will have to pass a test to prove they can contribute to the country home secretary charles clarke has said . he is proposing a points system similar to australia s but would avoid the quota system planned by the tori ##es . mr clarke who will un ##ve ##il his plans on monday said economic migration helped the uk but needed proper policing . the li ##b dem ##s say they will look at his plans but tory liam fox said his party offered a clear choice on the issue . the conservative party co - chairman said the british electorate had a choice between a labour government that had done nothing for [SEP]


INFO:tensorflow:tokens: [CLS] clarke plans migrant point scheme anyone planning to move to the uk will have to pass a test to prove they can contribute to the country home secretary charles clarke has said . he is proposing a points system similar to australia s but would avoid the quota system planned by the tori ##es . mr clarke who will un ##ve ##il his plans on monday said economic migration helped the uk but needed proper policing . the li ##b dem ##s say they will look at his plans but tory liam fox said his party offered a clear choice on the issue . the conservative party co - chairman said the british electorate had a choice between a labour government that had done nothing for [SEP]


INFO:tensorflow:input_ids: 101 8359 3488 20731 2391 5679 3087 4041 2000 2693 2000 1996 2866 2097 2031 2000 3413 1037 3231 2000 6011 2027 2064 9002 2000 1996 2406 2188 3187 2798 8359 2038 2056 1012 2002 2003 21991 1037 2685 2291 2714 2000 2660 1055 2021 2052 4468 1996 20563 2291 3740 2011 1996 23413 2229 1012 2720 8359 2040 2097 4895 3726 4014 2010 3488 2006 6928 2056 3171 9230 3271 1996 2866 2021 2734 5372 21107 1012 1996 5622 2497 17183 2015 2360 2027 2097 2298 2012 2010 3488 2021 17117 8230 4419 2056 2010 2283 3253 1037 3154 3601 2006 1996 3277 1012 1996 4603 2283 2522 1011 3472 2056 1996 2329 13694 2018 1037 3601 2090 1037 4428 2231 2008 2018 2589 2498 2005 102


INFO:tensorflow:input_ids: 101 8359 3488 20731 2391 5679 3087 4041 2000 2693 2000 1996 2866 2097 2031 2000 3413 1037 3231 2000 6011 2027 2064 9002 2000 1996 2406 2188 3187 2798 8359 2038 2056 1012 2002 2003 21991 1037 2685 2291 2714 2000 2660 1055 2021 2052 4468 1996 20563 2291 3740 2011 1996 23413 2229 1012 2720 8359 2040 2097 4895 3726 4014 2010 3488 2006 6928 2056 3171 9230 3271 1996 2866 2021 2734 5372 21107 1012 1996 5622 2497 17183 2015 2360 2027 2097 2298 2012 2010 3488 2021 17117 8230 4419 2056 2010 2283 3253 1037 3154 3601 2006 1996 3277 1012 1996 4603 2283 2522 1011 3472 2056 1996 2329 13694 2018 1037 3601 2090 1037 4428 2231 2008 2018 2589 2498 2005 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: tech (id = 0)


INFO:tensorflow:label: tech (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: 


INFO:tensorflow:guid: 


INFO:tensorflow:tokens: [CLS] radcliffe will compete in london paula radcliffe will compete in the flora london marathon this year after deciding her schedule for 2005 . the 31 - year - old won the race in 2002 on her marathon debut defended her title 12 months later and will now seek a third title in the 17 april race . it doesn t get any better than this for the 25th anniversary said race director david bedford . after announcing the greatest men s field ever we now have the greatest women s distance runner ever . three years ago radcliffe smashed the women s world record in two hours 18 minutes 15 seconds . the bedford star returned to london 12 months later lowering her mixed - race [SEP]


INFO:tensorflow:tokens: [CLS] radcliffe will compete in london paula radcliffe will compete in the flora london marathon this year after deciding her schedule for 2005 . the 31 - year - old won the race in 2002 on her marathon debut defended her title 12 months later and will now seek a third title in the 17 april race . it doesn t get any better than this for the 25th anniversary said race director david bedford . after announcing the greatest men s field ever we now have the greatest women s distance runner ever . three years ago radcliffe smashed the women s world record in two hours 18 minutes 15 seconds . the bedford star returned to london 12 months later lowering her mixed - race [SEP]


INFO:tensorflow:input_ids: 101 22603 2097 5566 1999 2414 13723 22603 2097 5566 1999 1996 10088 2414 8589 2023 2095 2044 10561 2014 6134 2005 2384 1012 1996 2861 1011 2095 1011 2214 2180 1996 2679 1999 2526 2006 2014 8589 2834 8047 2014 2516 2260 2706 2101 1998 2097 2085 6148 1037 2353 2516 1999 1996 2459 2258 2679 1012 2009 2987 1056 2131 2151 2488 2084 2023 2005 1996 10965 5315 2056 2679 2472 2585 12003 1012 2044 13856 1996 4602 2273 1055 2492 2412 2057 2085 2031 1996 4602 2308 1055 3292 5479 2412 1012 2093 2086 3283 22603 14368 1996 2308 1055 2088 2501 1999 2048 2847 2324 2781 2321 3823 1012 1996 12003 2732 2513 2000 2414 2260 2706 2101 13845 2014 3816 1011 2679 102


INFO:tensorflow:input_ids: 101 22603 2097 5566 1999 2414 13723 22603 2097 5566 1999 1996 10088 2414 8589 2023 2095 2044 10561 2014 6134 2005 2384 1012 1996 2861 1011 2095 1011 2214 2180 1996 2679 1999 2526 2006 2014 8589 2834 8047 2014 2516 2260 2706 2101 1998 2097 2085 6148 1037 2353 2516 1999 1996 2459 2258 2679 1012 2009 2987 1056 2131 2151 2488 2084 2023 2005 1996 10965 5315 2056 2679 2472 2585 12003 1012 2044 13856 1996 4602 2273 1055 2492 2412 2057 2085 2031 1996 4602 2308 1055 3292 5479 2412 1012 2093 2086 3283 22603 14368 1996 2308 1055 2088 2501 1999 2048 2847 2324 2781 2321 3823 1012 1996 12003 2732 2513 2000 2414 2260 2706 2101 13845 2014 3816 1011 2679 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: tech (id = 0)


INFO:tensorflow:label: tech (id = 0)


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:****** Features ******


INFO:tensorflow:****** Features ******


INFO:tensorflow:name = input_ids, shape = (?, 128)


INFO:tensorflow:name = input_ids, shape = (?, 128)


INFO:tensorflow:name = input_mask, shape = (?, 128)


INFO:tensorflow:name = input_mask, shape = (?, 128)


INFO:tensorflow:name = label_ids, shape = (?,)


INFO:tensorflow:name = label_ids, shape = (?,)


INFO:tensorflow:name = segment_ids, shape = (?, 128)


INFO:tensorflow:name = segment_ids, shape = (?, 128)


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Restoring parameters from ./output_based_on_private_model/bbc/model.ckpt-556


INFO:tensorflow:Restoring parameters from ./output_based_on_private_model/bbc/model.ckpt-556


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Done running local_init_op.


In [57]:
predictions

[('hobbit picture  four years away  lord of the rings director peter jackson has said that it will be up to four years before he starts work on a film version of the hobbit.  the oscar winner said on a visit to sydney there was a  desire  to make it  but not before lengthy negotiations.  i think it s gonna be a lot of lawyers sitting in a room trying to thrash out a deal before it will ever happen   said the new zealander. the rights to jrr tolkien s book are split between two major film studios. jackson  who is currently filming a remake of hollywood classic king kong  said he thought that the sale of mgm studios to the sony corporation would cast further uncertainty on the project. the 43-year-old was in the australian city to visit a lord of the rings exhibition  which has attracted 140 000 visitors since it opened in december.  the film-maker recently sued film company new line cinema for undisclosed damages over alleged withheld profits and lost revenue from the first part of the 

In [65]:
def list_from_csv(file_path):
    sentences_list = []
    label_list = []
    with tf.gfile.Open(file_path, "r") as f:
        reader = csv.reader(f, delimiter=",", quotechar=None)
        for (i, line) in enumerate(reader):
            if i == 0:
                continue
            label_list.append(line[0])
            sentences_list.append(line[1])
    return label_list, sentences_list

In [66]:
file_path = "./data/bbc_test.csv"

In [67]:
label_list, sentences_list = list_from_csv(file_path)

In [68]:
len(sentences_list)

445

In [69]:
len(label_list)

445

In [70]:
label_list

['entertainment',
 'tech',
 'politics',
 'sport',
 'sport',
 'tech',
 'business',
 'tech',
 'entertainment',
 'entertainment',
 'politics',
 'politics',
 'business',
 'entertainment',
 'sport',
 'entertainment',
 'entertainment',
 'business',
 'sport',
 'politics',
 'tech',
 'business',
 'sport',
 'entertainment',
 'tech',
 'politics',
 'politics',
 'sport',
 'sport',
 'business',
 'business',
 'business',
 'business',
 'business',
 'entertainment',
 'business',
 'politics',
 'politics',
 'tech',
 'tech',
 'entertainment',
 'politics',
 'entertainment',
 'business',
 'politics',
 'sport',
 'sport',
 'business',
 'tech',
 'business',
 'tech',
 'sport',
 'business',
 'business',
 'politics',
 'sport',
 'sport',
 'politics',
 'politics',
 'entertainment',
 'entertainment',
 'politics',
 'business',
 'politics',
 'politics',
 'business',
 'tech',
 'politics',
 'politics',
 'politics',
 'politics',
 'politics',
 'entertainment',
 'entertainment',
 'tech',
 'politics',
 'sport',
 'politics',

In [71]:
pred_list = getPrediction(sentences_list)

INFO:tensorflow:Writing example 0 of 445


INFO:tensorflow:Writing example 0 of 445


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: 


INFO:tensorflow:guid: 


INFO:tensorflow:tokens: [CLS] ho ##bb ##it picture four years away lord of the rings director peter jackson has said that it will be up to four years before he starts work on a film version of the ho ##bb ##it . the oscar winner said on a visit to sydney there was a desire to make it but not before lengthy negotiations . i think it s gonna be a lot of lawyers sitting in a room trying to thrash out a deal before it will ever happen said the new zealand ##er . the rights to jr ##r tolkien s book are split between two major film studios . jackson who is currently filming a remake of hollywood classic king kong said he thought that the sale of [SEP]


INFO:tensorflow:tokens: [CLS] ho ##bb ##it picture four years away lord of the rings director peter jackson has said that it will be up to four years before he starts work on a film version of the ho ##bb ##it . the oscar winner said on a visit to sydney there was a desire to make it but not before lengthy negotiations . i think it s gonna be a lot of lawyers sitting in a room trying to thrash out a deal before it will ever happen said the new zealand ##er . the rights to jr ##r tolkien s book are split between two major film studios . jackson who is currently filming a remake of hollywood classic king kong said he thought that the sale of [SEP]


INFO:tensorflow:input_ids: 101 7570 10322 4183 3861 2176 2086 2185 2935 1997 1996 7635 2472 2848 4027 2038 2056 2008 2009 2097 2022 2039 2000 2176 2086 2077 2002 4627 2147 2006 1037 2143 2544 1997 1996 7570 10322 4183 1012 1996 7436 3453 2056 2006 1037 3942 2000 3994 2045 2001 1037 4792 2000 2191 2009 2021 2025 2077 12401 7776 1012 1045 2228 2009 1055 6069 2022 1037 2843 1997 9559 3564 1999 1037 2282 2667 2000 27042 2041 1037 3066 2077 2009 2097 2412 4148 2056 1996 2047 3414 2121 1012 1996 2916 2000 3781 2099 23602 1055 2338 2024 3975 2090 2048 2350 2143 4835 1012 4027 2040 2003 2747 7467 1037 12661 1997 5365 4438 2332 4290 2056 2002 2245 2008 1996 5096 1997 102


INFO:tensorflow:input_ids: 101 7570 10322 4183 3861 2176 2086 2185 2935 1997 1996 7635 2472 2848 4027 2038 2056 2008 2009 2097 2022 2039 2000 2176 2086 2077 2002 4627 2147 2006 1037 2143 2544 1997 1996 7570 10322 4183 1012 1996 7436 3453 2056 2006 1037 3942 2000 3994 2045 2001 1037 4792 2000 2191 2009 2021 2025 2077 12401 7776 1012 1045 2228 2009 1055 6069 2022 1037 2843 1997 9559 3564 1999 1037 2282 2667 2000 27042 2041 1037 3066 2077 2009 2097 2412 4148 2056 1996 2047 3414 2121 1012 1996 2916 2000 3781 2099 23602 1055 2338 2024 3975 2090 2048 2350 2143 4835 1012 4027 2040 2003 2747 7467 1037 12661 1997 5365 4438 2332 4290 2056 2002 2245 2008 1996 5096 1997 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: tech (id = 0)


INFO:tensorflow:label: tech (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: 


INFO:tensorflow:guid: 


INFO:tensorflow:tokens: [CLS] game firm holds cast auditions video game firm bio ##ware is to hold open auditions for people to become cast members for future games . the company which makes role playing games such as knights of the old republic and never ##win ##ter nights is seeking people aged 18 to 99 . the canada - based company says it was looking for a wide variety of people to use as face models for characters . everyone chosen to appear in a video game will receive a performer s fee for the use of their image . the company is inviting people to come along to a shopping mall in west edmonton alberta on friday and saturday bringing along a piece of photo identification . there are [SEP]


INFO:tensorflow:tokens: [CLS] game firm holds cast auditions video game firm bio ##ware is to hold open auditions for people to become cast members for future games . the company which makes role playing games such as knights of the old republic and never ##win ##ter nights is seeking people aged 18 to 99 . the canada - based company says it was looking for a wide variety of people to use as face models for characters . everyone chosen to appear in a video game will receive a performer s fee for the use of their image . the company is inviting people to come along to a shopping mall in west edmonton alberta on friday and saturday bringing along a piece of photo identification . there are [SEP]


INFO:tensorflow:input_ids: 101 2208 3813 4324 3459 21732 2678 2208 3813 16012 8059 2003 2000 2907 2330 21732 2005 2111 2000 2468 3459 2372 2005 2925 2399 1012 1996 2194 2029 3084 2535 2652 2399 2107 2004 7307 1997 1996 2214 3072 1998 2196 10105 3334 6385 2003 6224 2111 4793 2324 2000 5585 1012 1996 2710 1011 2241 2194 2758 2009 2001 2559 2005 1037 2898 3528 1997 2111 2000 2224 2004 2227 4275 2005 3494 1012 3071 4217 2000 3711 1999 1037 2678 2208 2097 4374 1037 9256 1055 7408 2005 1996 2224 1997 2037 3746 1012 1996 2194 2003 15085 2111 2000 2272 2247 2000 1037 6023 6670 1999 2225 10522 7649 2006 5958 1998 5095 5026 2247 1037 3538 1997 6302 8720 1012 2045 2024 102


INFO:tensorflow:input_ids: 101 2208 3813 4324 3459 21732 2678 2208 3813 16012 8059 2003 2000 2907 2330 21732 2005 2111 2000 2468 3459 2372 2005 2925 2399 1012 1996 2194 2029 3084 2535 2652 2399 2107 2004 7307 1997 1996 2214 3072 1998 2196 10105 3334 6385 2003 6224 2111 4793 2324 2000 5585 1012 1996 2710 1011 2241 2194 2758 2009 2001 2559 2005 1037 2898 3528 1997 2111 2000 2224 2004 2227 4275 2005 3494 1012 3071 4217 2000 3711 1999 1037 2678 2208 2097 4374 1037 9256 1055 7408 2005 1996 2224 1997 2037 3746 1012 1996 2194 2003 15085 2111 2000 2272 2247 2000 1037 6023 6670 1999 2225 10522 7649 2006 5958 1998 5095 5026 2247 1037 3538 1997 6302 8720 1012 2045 2024 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: tech (id = 0)


INFO:tensorflow:label: tech (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: 


INFO:tensorflow:guid: 


INFO:tensorflow:tokens: [CLS] clarke plans migrant point scheme anyone planning to move to the uk will have to pass a test to prove they can contribute to the country home secretary charles clarke has said . he is proposing a points system similar to australia s but would avoid the quota system planned by the tori ##es . mr clarke who will un ##ve ##il his plans on monday said economic migration helped the uk but needed proper policing . the li ##b dem ##s say they will look at his plans but tory liam fox said his party offered a clear choice on the issue . the conservative party co - chairman said the british electorate had a choice between a labour government that had done nothing for [SEP]


INFO:tensorflow:tokens: [CLS] clarke plans migrant point scheme anyone planning to move to the uk will have to pass a test to prove they can contribute to the country home secretary charles clarke has said . he is proposing a points system similar to australia s but would avoid the quota system planned by the tori ##es . mr clarke who will un ##ve ##il his plans on monday said economic migration helped the uk but needed proper policing . the li ##b dem ##s say they will look at his plans but tory liam fox said his party offered a clear choice on the issue . the conservative party co - chairman said the british electorate had a choice between a labour government that had done nothing for [SEP]


INFO:tensorflow:input_ids: 101 8359 3488 20731 2391 5679 3087 4041 2000 2693 2000 1996 2866 2097 2031 2000 3413 1037 3231 2000 6011 2027 2064 9002 2000 1996 2406 2188 3187 2798 8359 2038 2056 1012 2002 2003 21991 1037 2685 2291 2714 2000 2660 1055 2021 2052 4468 1996 20563 2291 3740 2011 1996 23413 2229 1012 2720 8359 2040 2097 4895 3726 4014 2010 3488 2006 6928 2056 3171 9230 3271 1996 2866 2021 2734 5372 21107 1012 1996 5622 2497 17183 2015 2360 2027 2097 2298 2012 2010 3488 2021 17117 8230 4419 2056 2010 2283 3253 1037 3154 3601 2006 1996 3277 1012 1996 4603 2283 2522 1011 3472 2056 1996 2329 13694 2018 1037 3601 2090 1037 4428 2231 2008 2018 2589 2498 2005 102


INFO:tensorflow:input_ids: 101 8359 3488 20731 2391 5679 3087 4041 2000 2693 2000 1996 2866 2097 2031 2000 3413 1037 3231 2000 6011 2027 2064 9002 2000 1996 2406 2188 3187 2798 8359 2038 2056 1012 2002 2003 21991 1037 2685 2291 2714 2000 2660 1055 2021 2052 4468 1996 20563 2291 3740 2011 1996 23413 2229 1012 2720 8359 2040 2097 4895 3726 4014 2010 3488 2006 6928 2056 3171 9230 3271 1996 2866 2021 2734 5372 21107 1012 1996 5622 2497 17183 2015 2360 2027 2097 2298 2012 2010 3488 2021 17117 8230 4419 2056 2010 2283 3253 1037 3154 3601 2006 1996 3277 1012 1996 4603 2283 2522 1011 3472 2056 1996 2329 13694 2018 1037 3601 2090 1037 4428 2231 2008 2018 2589 2498 2005 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: tech (id = 0)


INFO:tensorflow:label: tech (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: 


INFO:tensorflow:guid: 


INFO:tensorflow:tokens: [CLS] radcliffe will compete in london paula radcliffe will compete in the flora london marathon this year after deciding her schedule for 2005 . the 31 - year - old won the race in 2002 on her marathon debut defended her title 12 months later and will now seek a third title in the 17 april race . it doesn t get any better than this for the 25th anniversary said race director david bedford . after announcing the greatest men s field ever we now have the greatest women s distance runner ever . three years ago radcliffe smashed the women s world record in two hours 18 minutes 15 seconds . the bedford star returned to london 12 months later lowering her mixed - race [SEP]


INFO:tensorflow:tokens: [CLS] radcliffe will compete in london paula radcliffe will compete in the flora london marathon this year after deciding her schedule for 2005 . the 31 - year - old won the race in 2002 on her marathon debut defended her title 12 months later and will now seek a third title in the 17 april race . it doesn t get any better than this for the 25th anniversary said race director david bedford . after announcing the greatest men s field ever we now have the greatest women s distance runner ever . three years ago radcliffe smashed the women s world record in two hours 18 minutes 15 seconds . the bedford star returned to london 12 months later lowering her mixed - race [SEP]


INFO:tensorflow:input_ids: 101 22603 2097 5566 1999 2414 13723 22603 2097 5566 1999 1996 10088 2414 8589 2023 2095 2044 10561 2014 6134 2005 2384 1012 1996 2861 1011 2095 1011 2214 2180 1996 2679 1999 2526 2006 2014 8589 2834 8047 2014 2516 2260 2706 2101 1998 2097 2085 6148 1037 2353 2516 1999 1996 2459 2258 2679 1012 2009 2987 1056 2131 2151 2488 2084 2023 2005 1996 10965 5315 2056 2679 2472 2585 12003 1012 2044 13856 1996 4602 2273 1055 2492 2412 2057 2085 2031 1996 4602 2308 1055 3292 5479 2412 1012 2093 2086 3283 22603 14368 1996 2308 1055 2088 2501 1999 2048 2847 2324 2781 2321 3823 1012 1996 12003 2732 2513 2000 2414 2260 2706 2101 13845 2014 3816 1011 2679 102


INFO:tensorflow:input_ids: 101 22603 2097 5566 1999 2414 13723 22603 2097 5566 1999 1996 10088 2414 8589 2023 2095 2044 10561 2014 6134 2005 2384 1012 1996 2861 1011 2095 1011 2214 2180 1996 2679 1999 2526 2006 2014 8589 2834 8047 2014 2516 2260 2706 2101 1998 2097 2085 6148 1037 2353 2516 1999 1996 2459 2258 2679 1012 2009 2987 1056 2131 2151 2488 2084 2023 2005 1996 10965 5315 2056 2679 2472 2585 12003 1012 2044 13856 1996 4602 2273 1055 2492 2412 2057 2085 2031 1996 4602 2308 1055 3292 5479 2412 1012 2093 2086 3283 22603 14368 1996 2308 1055 2088 2501 1999 2048 2847 2324 2781 2321 3823 1012 1996 12003 2732 2513 2000 2414 2260 2706 2101 13845 2014 3816 1011 2679 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: tech (id = 0)


INFO:tensorflow:label: tech (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: 


INFO:tensorflow:guid: 


INFO:tensorflow:tokens: [CLS] serena becomes world number two serena williams has moved up five places to second in the world rankings after her australian open win . williams won her first grand slam title since 2003 with victory over lindsay davenport the world number one . men s champion mara ##t sa ##fin remains fourth in the atp rankings while beaten finalist ll ##ey ##ton hewitt replaces andy rod ##dick as world number two . roger federer retains top spot but sa ##fin has over ##taken hewitt to become the new leader of the champions race . alicia mo ##lik who lost a three - set thriller against davenport in the quarter - finals is in the women s top 10 for the first time in her career . [SEP]


INFO:tensorflow:tokens: [CLS] serena becomes world number two serena williams has moved up five places to second in the world rankings after her australian open win . williams won her first grand slam title since 2003 with victory over lindsay davenport the world number one . men s champion mara ##t sa ##fin remains fourth in the atp rankings while beaten finalist ll ##ey ##ton hewitt replaces andy rod ##dick as world number two . roger federer retains top spot but sa ##fin has over ##taken hewitt to become the new leader of the champions race . alicia mo ##lik who lost a three - set thriller against davenport in the quarter - finals is in the women s top 10 for the first time in her career . [SEP]


INFO:tensorflow:input_ids: 101 14419 4150 2088 2193 2048 14419 3766 2038 2333 2039 2274 3182 2000 2117 1999 1996 2088 10385 2044 2014 2827 2330 2663 1012 3766 2180 2014 2034 2882 9555 2516 2144 2494 2007 3377 2058 12110 16273 1996 2088 2193 2028 1012 2273 1055 3410 13955 2102 7842 16294 3464 2959 1999 1996 12649 10385 2096 7854 9914 2222 3240 2669 19482 20736 5557 8473 24066 2004 2088 2193 2048 1012 5074 28294 14567 2327 3962 2021 7842 16294 2038 2058 25310 19482 2000 2468 1996 2047 3003 1997 1996 3966 2679 1012 15935 9587 18393 2040 2439 1037 2093 1011 2275 10874 2114 16273 1999 1996 4284 1011 4399 2003 1999 1996 2308 1055 2327 2184 2005 1996 2034 2051 1999 2014 2476 1012 102


INFO:tensorflow:input_ids: 101 14419 4150 2088 2193 2048 14419 3766 2038 2333 2039 2274 3182 2000 2117 1999 1996 2088 10385 2044 2014 2827 2330 2663 1012 3766 2180 2014 2034 2882 9555 2516 2144 2494 2007 3377 2058 12110 16273 1996 2088 2193 2028 1012 2273 1055 3410 13955 2102 7842 16294 3464 2959 1999 1996 12649 10385 2096 7854 9914 2222 3240 2669 19482 20736 5557 8473 24066 2004 2088 2193 2048 1012 5074 28294 14567 2327 3962 2021 7842 16294 2038 2058 25310 19482 2000 2468 1996 2047 3003 1997 1996 3966 2679 1012 15935 9587 18393 2040 2439 1037 2093 1011 2275 10874 2114 16273 1999 1996 4284 1011 4399 2003 1999 1996 2308 1055 2327 2184 2005 1996 2034 2051 1999 2014 2476 1012 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: tech (id = 0)


INFO:tensorflow:label: tech (id = 0)


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:****** Features ******


INFO:tensorflow:****** Features ******


INFO:tensorflow:name = input_ids, shape = (?, 128)


INFO:tensorflow:name = input_ids, shape = (?, 128)


INFO:tensorflow:name = input_mask, shape = (?, 128)


INFO:tensorflow:name = input_mask, shape = (?, 128)


INFO:tensorflow:name = label_ids, shape = (?,)


INFO:tensorflow:name = label_ids, shape = (?,)


INFO:tensorflow:name = segment_ids, shape = (?, 128)


INFO:tensorflow:name = segment_ids, shape = (?, 128)
Exception ignored in: <generator object Estimator.predict at 0x7f53706aa1a8>
Traceback (most recent call last):
  File "/home/I342202/virtualenv_py3/lib/python3.5/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 647, in predict
    for key, value in six.iteritems(preds_evaluated)
  File "//usr/lib/python3.5/contextlib.py", line 77, in __exit__
    self.gen.throw(type, value, traceback)
  File "/home/I342202/virtualenv_py3/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 5652, in get_controller
    yield g
  File "//usr/lib/python3.5/contextlib.py", line 77, in __exit__
    self.gen.throw(type, value, traceback)
  File "/home/I342202/virtualenv_py3/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 5467, in get_controller
    type(default))
AssertionError: Nesting violated for default stack of <class 'tensorflow.python.framework.ops.Graph'> objects


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Restoring parameters from ./output_based_on_private_model/bbc/model.ckpt-556


INFO:tensorflow:Restoring parameters from ./output_based_on_private_model/bbc/model.ckpt-556


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Done running local_init_op.


In [72]:
len(pred_list)

445

In [73]:
pred_list

[('hobbit picture  four years away  lord of the rings director peter jackson has said that it will be up to four years before he starts work on a film version of the hobbit.  the oscar winner said on a visit to sydney there was a  desire  to make it  but not before lengthy negotiations.  i think it s gonna be a lot of lawyers sitting in a room trying to thrash out a deal before it will ever happen   said the new zealander. the rights to jrr tolkien s book are split between two major film studios. jackson  who is currently filming a remake of hollywood classic king kong  said he thought that the sale of mgm studios to the sony corporation would cast further uncertainty on the project. the 43-year-old was in the australian city to visit a lord of the rings exhibition  which has attracted 140 000 visitors since it opened in december.  the film-maker recently sued film company new line cinema for undisclosed damages over alleged withheld profits and lost revenue from the first part of the 

In [84]:
count = 0
wrong_index_list = []

In [85]:
for i in range(len(label_list)):
    if pred_list[i][3] == label_list[i]:
        count += 1
    else:
        wrong_index_list.append(i)

In [86]:
count

437

In [87]:
wrong_index_list

[29, 153, 278, 290, 292, 327, 403, 421]

In [89]:
for i in wrong_index_list:
    print(pred_list[i][1],pred_list[i][3])
    print(label_list[i])

[2.8299203e-04 7.4471376e-05 2.6795189e-04 ... 2.5635704e-06 1.6707902e-06
 1.5673857e-06] politics
business
[9.9222106e-01 2.1787777e-03 2.4587207e-04 ... 3.1907182e-06 1.9897223e-06
 1.4694775e-06] tech
entertainment
[9.9074239e-01 3.5093157e-03 2.4908755e-04 ... 3.4512168e-06 2.1912283e-06
 1.6739731e-06] tech
entertainment
[9.9209714e-01 2.0372153e-03 2.4601843e-04 ... 3.0164433e-06 1.9516806e-06
 1.4926341e-06] tech
business
[3.4128563e-04 9.9238074e-05 2.6661201e-04 ... 1.6293405e-06 1.4105582e-06
 2.0499401e-06] business
politics
[9.9129522e-01 2.3980618e-03 2.5787676e-04 ... 3.2859991e-06 2.0351442e-06
 1.5659091e-06] tech
politics
[3.4595803e-05 1.1230022e-04 9.9694616e-01 ... 9.0983224e-07 1.1827166e-06
 1.3515552e-06] sport
tech
[2.1589080e-04 5.9627640e-05 2.0736073e-04 ... 2.4009939e-06 1.5489435e-06
 1.4986083e-06] politics
business
