In [652]:
# Copyright 2019 Google Inc.

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

#     http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#Predicting Movie Review Sentiment with BERT on TF Hub

If you’ve been following Natural Language Processing over the past year, you’ve probably heard of BERT: Bidirectional Encoder Representations from Transformers. It’s a neural network architecture designed by Google researchers that’s totally transformed what’s state-of-the-art for NLP tasks, like text classification, translation, summarization, and question answering.

Now that BERT's been added to [TF Hub](https://www.tensorflow.org/hub) as a loadable module, it's easy(ish) to add into existing Tensorflow text pipelines. In an existing pipeline, BERT can replace text embedding layers like ELMO and GloVE. Alternatively, [finetuning](http://wiki.fast.ai/index.php/Fine_tuning) BERT can provide both an accuracy boost and faster training time in many cases.

Here, we'll train a model to predict whether an IMDB movie review is positive or negative using BERT in Tensorflow with tf hub. Some code was adapted from [this colab notebook](https://colab.sandbox.google.com/github/tensorflow/tpu/blob/master/tools/colab/bert_finetuning_with_cloud_tpus.ipynb). Let's get started!

In [653]:
from sklearn.model_selection import train_test_split
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
from datetime import datetime

In addition to the standard libraries we imported above, we'll need to install BERT's python package.

In [654]:
!pip install bert-tensorflow

[33mYou are using pip version 18.1, however version 19.1.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [655]:
import bert
from bert import run_classifier
from bert import optimization
from bert import tokenization

Below, we'll set an output directory location to store our model output and checkpoints. This can be a local directory, in which case you'd set OUTPUT_DIR to the name of the directory you'd like to create. If you're running this code in Google's hosted Colab, the directory won't persist after the Colab session ends.

Alternatively, if you're a GCP user, you can store output in a GCP bucket. To do that, set a directory name in OUTPUT_DIR and the name of the GCP bucket in the BUCKET field.

Set DO_DELETE to rewrite the OUTPUT_DIR if it exists. Otherwise, Tensorflow will load existing model checkpoints from that directory (if they exist).

In [656]:
# Set the output directory for saving model file
# Optionally, set a GCP bucket location

OUTPUT_DIR = 'OUTPUT_DIR_NAME'#@param {type:"string"}
#@markdown Whether or not to clear/delete the directory and create a new one
DO_DELETE = True#@param {type:"boolean"}
#@markdown Set USE_BUCKET and BUCKET if you want to (optionally) store model output on GCP bucket.
USE_BUCKET = False #@param {type:"boolean"}
BUCKET = 'BUCKET_NAME' #@param {type:"string"}

if USE_BUCKET:
  OUTPUT_DIR = 'gs://{}/{}'.format(BUCKET, OUTPUT_DIR)
  from google.colab import auth
  auth.authenticate_user()

if DO_DELETE:
  try:
    tf.gfile.DeleteRecursively(OUTPUT_DIR)
  except:
    # Doesn't matter if the directory didn't exist
    pass
tf.gfile.MakeDirs(OUTPUT_DIR)
print('***** Model output directory: {} *****'.format(OUTPUT_DIR))


***** Model output directory: OUTPUT_DIR_NAME *****


#Data

First, let's download the dataset, hosted by Stanford. The code below, which downloads, extracts, and imports the IMDB Large Movie Review Dataset, is borrowed from [this Tensorflow tutorial](https://www.tensorflow.org/hub/tutorials/text_classification_with_tf_hub).

In [657]:
from tensorflow import keras
import os
import re

# Load all files from a directory in a DataFrame.
def load_directory_data(directory):
  data = {}
  data["sentence"] = []
  data["sentiment"] = []
  for file_path in os.listdir(directory):
    with tf.gfile.GFile(os.path.join(directory, file_path), "r") as f:
      data["sentence"].append(f.read())
      data["sentiment"].append(re.match("\d+_(\d+)\.txt", file_path).group(1))
  return pd.DataFrame.from_dict(data)

# Merge positive and negative examples, add a polarity column and shuffle.
def load_dataset(directory):
  pos_df = load_directory_data(os.path.join(directory, "pos"))
  neg_df = load_directory_data(os.path.join(directory, "neg"))
  pos_df["polarity"] = 1
  neg_df["polarity"] = 0
  return pd.concat([pos_df, neg_df]).sample(frac=1).reset_index(drop=True)

# Download and process the dataset files.
def download_and_load_datasets(force_download=False):
  dataset = tf.keras.utils.get_file(
      fname="aclImdb.tar.gz", 
      origin="http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz", 
      extract=True)
  
  train_df = load_dataset(os.path.join(os.path.dirname(dataset), 
                                       "aclImdb", "train"))
  test_df = load_dataset(os.path.join(os.path.dirname(dataset), 
                                      "aclImdb", "test"))
  
  return train_df, test_df


In [658]:
dataset = 'agreement'
use_org = False
use_resp = True
convert_dict = {"agreement": 0, "disagreement": 1, "unrelated": 2}
#convert_dict = {"attack": 1, "support": 0, "unrelated": 2}
#convert_dict = {"attack": 0, "support": 1, "unrelated": 2}
#convert_dict = {"attack": 0, "support": 0, "unrelated": 1}

In [659]:
#train, test = download_and_load_datasets()
def load_local_data(filename, data='node'):
    df = pd.read_csv(filename, sep='\t')
    print(df.groupby('org_dataset').org.apply(lambda x: x.str.split().str.len().mean()))
    print(df.groupby('org_dataset').response.apply(lambda x: x.str.split().str.len().mean()))
    # Split in Training and Validation data
    if data == 'node':
        # Training data: NoDe debatepedia all versions without neutral label
        # Validation data: NoDe procon
        dataset = df.loc[~df['org_dataset'].isin(['political', 'comargGM', 'comargUGIP', 'agreement'])]
        dataset = df.loc[df['org_dataset'].isin(['debate_test', 'debate_train', 'procon'])] # Use orignal data
        # dataset = dataset[dataset['label'] != 'unrelated'] # Filter only support/attack
        dataset = dataset.sample(frac=1)
        #data_train = dataset.iloc[:-100]
        #data_val = dataset #.iloc[-100:]
        data_train = dataset.loc[~dataset['org_dataset'].isin(['debate_test'])]
        data_val = dataset.loc[dataset['org_dataset'].isin(['debate_test'])]
    elif data == 'political':
        dataset = df.loc[df['org_dataset'].isin(['political'])]
        dataset = dataset[dataset['label'] != 'unrelated'] # Filter only support/attack
        dataset = dataset.sample(frac=1)
        data_train = dataset.iloc[:-200]
        data_val = dataset.iloc[-200:]
    elif data == 'agreement':
        dataset = df.loc[df['org_dataset'].isin(['agreement'])]
        dataset = dataset.sample(frac=1).dropna()
        data_train = dataset.iloc[:-2000]
        data_val = dataset.iloc[-2000:]
    else:
        print('Invalid dataset')
        sys.exit(-1)
    return data_train, data_val

# Load local data
train_df, test_df = load_local_data('../complete_data.tsv', dataset)
print(train_df.head())

# Create datasets (Only take up to max_seq_length words for memory)

train_df = train_df.replace({'label': convert_dict})
test_df = test_df.replace({'label': convert_dict})
print(train_df.groupby('label').describe())
test_df.groupby('label').describe()

org_dataset
agreement              74.945753
comargGM               13.558755
comargUGIP              9.486594
debate_ext_attacks     36.043478
debate_ext_media       53.533333
debate_ext_second      11.043478
debate_ext_supp        18.885714
debate_extended        35.772358
debate_test            13.860000
debate_train           17.280000
political             102.759589
procon                 13.950000
Name: org, dtype: float64
org_dataset
agreement              69.926330
comargGM              115.659922
comargUGIP             83.567031
debate_ext_attacks     32.782609
debate_ext_media       53.168254
debate_ext_second      60.717391
debate_ext_supp        45.428571
debate_extended        59.934959
debate_test            51.240000
debate_train           48.080000
political             102.171233
procon                 30.366667
Name: response, dtype: float64
      org_dataset     id                                                org  \
12468   agreement   9492  The choice to use mari

Unnamed: 0_level_0,org_dataset,org_dataset,org_dataset,org_dataset,id,id,id,id,org,org,...,response,response,response_stance,response_stance,response_stance,response_stance,topic,topic,topic,topic
Unnamed: 0_level_1,count,unique,top,freq,count,unique,top,freq,count,unique,...,top,freq,count,unique,top,freq,count,unique,top,freq
label,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
0,954,1,agreement,954,954,954,23076,1,954,857,...,Regional powers would provide limited support ...,4,954,1,unknown,954,954,134,animal,55
1,1046,1,agreement,1046,1046,1046,8406,1,1046,889,...,Legalized polygamy opens a slippery slope to l...,4,1046,1,unknown,1046,1046,140,energy,53


To keep training fast, we'll take a sample of 5000 train and test examples, respectively.

In [660]:
train = train_df.sample(frac=1)
test = test_df.sample(frac=1)

In [682]:
test.groupby('topic').describe()

Unnamed: 0_level_0,org_dataset,org_dataset,org_dataset,org_dataset,id,id,id,id,org,org,...,response,response,response_stance,response_stance,response_stance,response_stance,label,label,label,label
Unnamed: 0_level_1,count,unique,top,freq,count,unique,top,freq,count,unique,...,top,freq,count,unique,top,freq,count,unique,top,freq
topic,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
abortion,40,1,agreement,40,40,40,11595,1,40,30,...,Alcoholism and drug-use are common after abort...,3,40,1,unknown,40,40,2,1,20
advertising,1,1,agreement,1,1,1,2177,1,1,1,...,Advertising boosts the economy. The economy is...,1,1,1,unknown,1,1,1,1,1
alga,3,1,agreement,3,3,3,16075,1,3,3,...,Industrial algae biofuel requires too many nut...,1,3,1,unknown,3,3,2,0,2
algae,15,1,agreement,15,15,15,800,1,15,12,...,Industrial algae biofuel requires too many nut...,3,15,1,unknown,15,15,2,1,10
animal,95,1,agreement,95,95,95,969,1,95,71,...,Humans have a choice and thus responsibility t...,4,95,1,unknown,95,95,2,0,55
bailout,18,1,agreement,18,18,18,22972,1,18,17,...,Most economists support the $700b US economic ...,2,18,1,unknown,18,18,2,0,10
ban,37,1,agreement,37,37,37,17675,1,37,29,...,The DC handgun ban is made ineffectual by lega...,3,37,1,unknown,37,37,2,1,19
biofuel,5,1,agreement,5,5,5,2571,1,5,5,...,The main proponents of biofuel are farmers and...,2,5,1,unknown,5,5,1,1,5
biofuels,1,1,agreement,1,1,1,2487,1,1,1,...,Developing new land for biofuels can release g...,1,1,1,unknown,1,1,1,1,1
blockade,11,1,agreement,11,11,11,19957,1,11,9,...,General statements against Israeli blockade of...,2,11,1,unknown,11,11,2,0,6


In [661]:
train.columns

Index(['org_dataset', 'id', 'org', 'org_stance', 'response', 'response_stance',
       'label', 'topic'],
      dtype='object')

For us, our input data is the 'sentence' column and our label is the 'polarity' column (0, 1 for negative and positive, respecitvely)

In [662]:
ORG_COLUMN = 'org'
RESP_COLUMN = 'response'
LABEL_COLUMN = 'label'
# label_list is the list of labels, i.e. True, False or 0, 1 or 'dog', 'cat'
label_list = [0, 1, 2]

#Data Preprocessing
We'll need to transform our data into a format BERT understands. This involves two steps. First, we create  `InputExample`'s using the constructor provided in the BERT library.

- `text_a` is the text we want to classify, which in this case, is the `Request` field in our Dataframe. 
- `text_b` is used if we're training a model to understand the relationship between sentences (i.e. is `text_b` a translation of `text_a`? Is `text_b` an answer to the question asked by `text_a`?). This doesn't apply to our task, so we can leave `text_b` blank.
- `label` is the label for our example, i.e. True, False

In [663]:
# Use org + response
if use_org and use_resp:
    # Use the InputExample class from BERT's run_classifier code to create examples from the data
    train_InputExamples = train.apply(lambda x: bert.run_classifier.InputExample(guid=None, # Globally unique ID for bookkeeping, unused in this example
                                                                       text_a = x[ORG_COLUMN], 
                                                                       text_b = x[RESP_COLUMN], 
                                                                       label = x[LABEL_COLUMN]), axis = 1)

    test_InputExamples = test.apply(lambda x: bert.run_classifier.InputExample(guid=None, 
                                                                       text_a = x[ORG_COLUMN], 
                                                                       text_b = x[RESP_COLUMN], 
                                                                       label = x[LABEL_COLUMN]), axis = 1)
# Use only org
elif use_org:
    # Use the InputExample class from BERT's run_classifier code to create examples from the data
    train_InputExamples = train.apply(lambda x: bert.run_classifier.InputExample(guid=None, # Globally unique ID for bookkeeping, unused in this example
                                                                       text_a = x[ORG_COLUMN], 
                                                                       text_b = None, 
                                                                       label = x[LABEL_COLUMN]), axis = 1)

    test_InputExamples = test.apply(lambda x: bert.run_classifier.InputExample(guid=None, 
                                                                       text_a = x[ORG_COLUMN], 
                                                                       text_b = None, 
                                                                       label = x[LABEL_COLUMN]), axis = 1)
# Use only resp
else:
    # Use the InputExample class from BERT's run_classifier code to create examples from the data
    train_InputExamples = train.apply(lambda x: bert.run_classifier.InputExample(guid=None, # Globally unique ID for bookkeeping, unused in this example
                                                                       text_a = x[RESP_COLUMN], 
                                                                       text_b = None, 
                                                                       label = x[LABEL_COLUMN]), axis = 1)

    test_InputExamples = test.apply(lambda x: bert.run_classifier.InputExample(guid=None, 
                                                                       text_a = x[RESP_COLUMN], 
                                                                       text_b = None, 
                                                                       label = x[LABEL_COLUMN]), axis = 1)

Next, we need to preprocess our data so that it matches the data BERT was trained on. For this, we'll need to do a couple of things (but don't worry--this is also included in the Python library):


1. Lowercase our text (if we're using a BERT lowercase model)
2. Tokenize it (i.e. "sally says hi" -> ["sally", "says", "hi"])
3. Break words into WordPieces (i.e. "calling" -> ["call", "##ing"])
4. Map our words to indexes using a vocab file that BERT provides
5. Add special "CLS" and "SEP" tokens (see the [readme](https://github.com/google-research/bert))
6. Append "index" and "segment" tokens to each input (see the [BERT paper](https://arxiv.org/pdf/1810.04805.pdf))

Happily, we don't have to worry about most of these details.




To start, we'll need to load a vocabulary file and lowercasing information directly from the BERT tf hub module:

In [664]:
# This is a path to an uncased (all lowercase) version of BERT
BERT_MODEL_HUB = "https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1"

def create_tokenizer_from_hub_module():
  """Get the vocab file and casing info from the Hub module."""
  with tf.Graph().as_default():
    bert_module = hub.Module(BERT_MODEL_HUB)
    tokenization_info = bert_module(signature="tokenization_info", as_dict=True)
    with tf.Session() as sess:
      vocab_file, do_lower_case = sess.run([tokenization_info["vocab_file"],
                                            tokenization_info["do_lower_case"]])
      
  return bert.tokenization.FullTokenizer(
      vocab_file=vocab_file, do_lower_case=do_lower_case)

tokenizer = create_tokenizer_from_hub_module()

INFO:tensorflow:Saver not created because there are no variables in the graph to restore


I0606 19:16:47.848185 139790959666944 tf_logging.py:115] Saver not created because there are no variables in the graph to restore


Great--we just learned that the BERT model we're using expects lowercase data (that's what stored in tokenization_info["do_lower_case"]) and we also loaded BERT's vocab file. We also created a tokenizer, which breaks words into word pieces:

In [665]:
tokenizer.tokenize("This here's an example of using the BERT tokenizer")

['this',
 'here',
 "'",
 's',
 'an',
 'example',
 'of',
 'using',
 'the',
 'bert',
 'token',
 '##izer']

Using our tokenizer, we'll call `run_classifier.convert_examples_to_features` on our InputExamples to convert them into features BERT understands.

In [666]:
# We'll set sequences to be at most 128 tokens long.
MAX_SEQ_LENGTH = 128
# Convert our train and test features to InputFeatures that BERT understands.
train_features = bert.run_classifier.convert_examples_to_features(train_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)
test_features = bert.run_classifier.convert_examples_to_features(test_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)

INFO:tensorflow:Writing example 0 of 22831


I0606 19:16:48.218993 139790959666944 tf_logging.py:115] Writing example 0 of 22831


INFO:tensorflow:*** Example ***


I0606 19:16:48.220726 139790959666944 tf_logging.py:115] *** Example ***


INFO:tensorflow:guid: None


I0606 19:16:48.221617 139790959666944 tf_logging.py:115] guid: None


INFO:tensorflow:tokens: [CLS] geneva conventions regulates use of land ##mine ##s ; ban is excessive . the appropriate use of land ##mine ##s is governed by the geneva convention . this ensures that the use of land ##mine ##s in specific instances is consistent with international humanitarian law and norms . the use of land ##mine ##s in the d ##ms of korea , for instance , can be justified under the geneva convention , because they pose no real threat to civilians . [SEP]


I0606 19:16:48.222265 139790959666944 tf_logging.py:115] tokens: [CLS] geneva conventions regulates use of land ##mine ##s ; ban is excessive . the appropriate use of land ##mine ##s is governed by the geneva convention . this ensures that the use of land ##mine ##s in specific instances is consistent with international humanitarian law and norms . the use of land ##mine ##s in the d ##ms of korea , for instance , can be justified under the geneva convention , because they pose no real threat to civilians . [SEP]


INFO:tensorflow:input_ids: 101 9810 12472 26773 2224 1997 2455 11233 2015 1025 7221 2003 11664 1012 1996 6413 2224 1997 2455 11233 2015 2003 9950 2011 1996 9810 4680 1012 2023 21312 2008 1996 2224 1997 2455 11233 2015 1999 3563 12107 2003 8335 2007 2248 11470 2375 1998 17606 1012 1996 2224 1997 2455 11233 2015 1999 1996 1040 5244 1997 4420 1010 2005 6013 1010 2064 2022 15123 2104 1996 9810 4680 1010 2138 2027 13382 2053 2613 5081 2000 9272 1012 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0606 19:16:48.222849 139790959666944 tf_logging.py:115] input_ids: 101 9810 12472 26773 2224 1997 2455 11233 2015 1025 7221 2003 11664 1012 1996 6413 2224 1997 2455 11233 2015 2003 9950 2011 1996 9810 4680 1012 2023 21312 2008 1996 2224 1997 2455 11233 2015 1999 3563 12107 2003 8335 2007 2248 11470 2375 1998 17606 1012 1996 2224 1997 2455 11233 2015 1999 1996 1040 5244 1997 4420 1010 2005 6013 1010 2064 2022 15123 2104 1996 9810 4680 1010 2138 2027 13382 2053 2613 5081 2000 9272 1012 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0606 19:16:48.223438 139790959666944 tf_logging.py:115] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0606 19:16:48.223998 139790959666944 tf_logging.py:115] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


I0606 19:16:48.224477 139790959666944 tf_logging.py:115] label: 1 (id = 1)


INFO:tensorflow:*** Example ***


I0606 19:16:48.225808 139790959666944 tf_logging.py:115] *** Example ***


INFO:tensorflow:guid: None


I0606 19:16:48.226426 139790959666944 tf_logging.py:115] guid: None


INFO:tensorflow:tokens: [CLS] [ [ argument : fin . reform will st ##if ##le small business , community banks . " mcconnell : go ##p has votes to stop bill " . ms ##nbc . april 2010 : " establishes new and unlimited regulatory powers that will st ##if ##le small businesses and community banks . " [SEP]


I0606 19:16:48.238958 139790959666944 tf_logging.py:115] tokens: [CLS] [ [ argument : fin . reform will st ##if ##le small business , community banks . " mcconnell : go ##p has votes to stop bill " . ms ##nbc . april 2010 : " establishes new and unlimited regulatory powers that will st ##if ##le small businesses and community banks . " [SEP]


INFO:tensorflow:input_ids: 101 1031 1031 6685 1024 10346 1012 5290 2097 2358 10128 2571 2235 2449 1010 2451 5085 1012 1000 28514 1024 2175 2361 2038 4494 2000 2644 3021 1000 1012 5796 28957 1012 2258 2230 1024 1000 21009 2047 1998 14668 10738 4204 2008 2097 2358 10128 2571 2235 5661 1998 2451 5085 1012 1000 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0606 19:16:48.239709 139790959666944 tf_logging.py:115] input_ids: 101 1031 1031 6685 1024 10346 1012 5290 2097 2358 10128 2571 2235 2449 1010 2451 5085 1012 1000 28514 1024 2175 2361 2038 4494 2000 2644 3021 1000 1012 5796 28957 1012 2258 2230 1024 1000 21009 2047 1998 14668 10738 4204 2008 2097 2358 10128 2571 2235 5661 1998 2451 5085 1012 1000 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0606 19:16:48.240348 139790959666944 tf_logging.py:115] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0606 19:16:48.240860 139790959666944 tf_logging.py:115] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


I0606 19:16:48.241331 139790959666944 tf_logging.py:115] label: 1 (id = 1)


INFO:tensorflow:*** Example ***


I0606 19:16:48.242580 139790959666944 tf_logging.py:115] *** Example ***


INFO:tensorflow:guid: None


I0606 19:16:48.243205 139790959666944 tf_logging.py:115] guid: None


INFO:tensorflow:tokens: [CLS] aging countries cannot afford universal health care . when a country ' s population ages , the strain of a universal health care program grows , with a larger percentage of the population reaching an age in which they require health care . therefore , in countries where the population is aging , it may be important to avoid a universal health care program . [SEP]


I0606 19:16:48.243679 139790959666944 tf_logging.py:115] tokens: [CLS] aging countries cannot afford universal health care . when a country ' s population ages , the strain of a universal health care program grows , with a larger percentage of the population reaching an age in which they require health care . therefore , in countries where the population is aging , it may be important to avoid a universal health care program . [SEP]


INFO:tensorflow:input_ids: 101 12520 3032 3685 8984 5415 2740 2729 1012 2043 1037 2406 1005 1055 2313 5535 1010 1996 10178 1997 1037 5415 2740 2729 2565 7502 1010 2007 1037 3469 7017 1997 1996 2313 4285 2019 2287 1999 2029 2027 5478 2740 2729 1012 3568 1010 1999 3032 2073 1996 2313 2003 12520 1010 2009 2089 2022 2590 2000 4468 1037 5415 2740 2729 2565 1012 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0606 19:16:48.244292 139790959666944 tf_logging.py:115] input_ids: 101 12520 3032 3685 8984 5415 2740 2729 1012 2043 1037 2406 1005 1055 2313 5535 1010 1996 10178 1997 1037 5415 2740 2729 2565 7502 1010 2007 1037 3469 7017 1997 1996 2313 4285 2019 2287 1999 2029 2027 5478 2740 2729 1012 3568 1010 1999 3032 2073 1996 2313 2003 12520 1010 2009 2089 2022 2590 2000 4468 1037 5415 2740 2729 2565 1012 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0606 19:16:48.244801 139790959666944 tf_logging.py:115] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0606 19:16:48.245287 139790959666944 tf_logging.py:115] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


I0606 19:16:48.245781 139790959666944 tf_logging.py:115] label: 1 (id = 1)


INFO:tensorflow:*** Example ***


I0606 19:16:48.247061 139790959666944 tf_logging.py:115] *** Example ***


INFO:tensorflow:guid: None


I0606 19:16:48.247674 139790959666944 tf_logging.py:115] guid: None


INFO:tensorflow:tokens: [CLS] unnecessary to train whole nation to prepare for threats . su ##hai ##l al - en ##iz ##i , aged 28 , argued in 2010 that military service in kuwait should not be mandatory : " i am certain that we have enough soldiers in the army . we don ' t need to train the entire nation in order to be ready for threats ; we are not in a police state . this is a democracy . " [ 11 ] [SEP]


I0606 19:16:48.248636 139790959666944 tf_logging.py:115] tokens: [CLS] unnecessary to train whole nation to prepare for threats . su ##hai ##l al - en ##iz ##i , aged 28 , argued in 2010 that military service in kuwait should not be mandatory : " i am certain that we have enough soldiers in the army . we don ' t need to train the entire nation in order to be ready for threats ; we are not in a police state . this is a democracy . " [ 11 ] [SEP]


INFO:tensorflow:input_ids: 101 14203 2000 3345 2878 3842 2000 7374 2005 8767 1012 10514 10932 2140 2632 1011 4372 10993 2072 1010 4793 2654 1010 5275 1999 2230 2008 2510 2326 1999 13085 2323 2025 2022 10915 1024 1000 1045 2572 3056 2008 2057 2031 2438 3548 1999 1996 2390 1012 2057 2123 1005 1056 2342 2000 3345 1996 2972 3842 1999 2344 2000 2022 3201 2005 8767 1025 2057 2024 2025 1999 1037 2610 2110 1012 2023 2003 1037 7072 1012 1000 1031 2340 1033 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0606 19:16:48.249137 139790959666944 tf_logging.py:115] input_ids: 101 14203 2000 3345 2878 3842 2000 7374 2005 8767 1012 10514 10932 2140 2632 1011 4372 10993 2072 1010 4793 2654 1010 5275 1999 2230 2008 2510 2326 1999 13085 2323 2025 2022 10915 1024 1000 1045 2572 3056 2008 2057 2031 2438 3548 1999 1996 2390 1012 2057 2123 1005 1056 2342 2000 3345 1996 2972 3842 1999 2344 2000 2022 3201 2005 8767 1025 2057 2024 2025 1999 1037 2610 2110 1012 2023 2003 1037 7072 1012 1000 1031 2340 1033 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0606 19:16:48.258059 139790959666944 tf_logging.py:115] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0606 19:16:48.258785 139790959666944 tf_logging.py:115] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


I0606 19:16:48.259364 139790959666944 tf_logging.py:115] label: 0 (id = 0)


INFO:tensorflow:*** Example ***


I0606 19:16:48.261951 139790959666944 tf_logging.py:115] *** Example ***


INFO:tensorflow:guid: None


I0606 19:16:48.262957 139790959666944 tf_logging.py:115] guid: None


INFO:tensorflow:tokens: [CLS] progressive taxes are unfair to the short - lived high earn ##er basketball players , for example , make substantial incomes for around eight years . they are tax ##ed , during this period , as if this was their constant , long - term income , which means they are often left having to find other jobs later in life . [SEP]


I0606 19:16:48.263855 139790959666944 tf_logging.py:115] tokens: [CLS] progressive taxes are unfair to the short - lived high earn ##er basketball players , for example , make substantial incomes for around eight years . they are tax ##ed , during this period , as if this was their constant , long - term income , which means they are often left having to find other jobs later in life . [SEP]


INFO:tensorflow:input_ids: 101 6555 7773 2024 15571 2000 1996 2460 1011 2973 2152 7796 2121 3455 2867 1010 2005 2742 1010 2191 6937 29373 2005 2105 2809 2086 1012 2027 2024 4171 2098 1010 2076 2023 2558 1010 2004 2065 2023 2001 2037 5377 1010 2146 1011 2744 3318 1010 2029 2965 2027 2024 2411 2187 2383 2000 2424 2060 5841 2101 1999 2166 1012 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0606 19:16:48.264471 139790959666944 tf_logging.py:115] input_ids: 101 6555 7773 2024 15571 2000 1996 2460 1011 2973 2152 7796 2121 3455 2867 1010 2005 2742 1010 2191 6937 29373 2005 2105 2809 2086 1012 2027 2024 4171 2098 1010 2076 2023 2558 1010 2004 2065 2023 2001 2037 5377 1010 2146 1011 2744 3318 1010 2029 2965 2027 2024 2411 2187 2383 2000 2424 2060 5841 2101 1999 2166 1012 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0606 19:16:48.264998 139790959666944 tf_logging.py:115] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0606 19:16:48.265587 139790959666944 tf_logging.py:115] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


I0606 19:16:48.266171 139790959666944 tf_logging.py:115] label: 1 (id = 1)


INFO:tensorflow:Writing example 10000 of 22831


I0606 19:16:57.241299 139790959666944 tf_logging.py:115] Writing example 10000 of 22831


INFO:tensorflow:Writing example 20000 of 22831


I0606 19:17:06.009556 139790959666944 tf_logging.py:115] Writing example 20000 of 22831


INFO:tensorflow:Writing example 0 of 2000


I0606 19:17:08.523621 139790959666944 tf_logging.py:115] Writing example 0 of 2000


INFO:tensorflow:*** Example ***


I0606 19:17:08.524566 139790959666944 tf_logging.py:115] *** Example ***


INFO:tensorflow:guid: None


I0606 19:17:08.525911 139790959666944 tf_logging.py:115] guid: None


INFO:tensorflow:tokens: [CLS] a laser ban en ##tails a large bureaucracy for enforcement . [SEP]


I0606 19:17:08.526722 139790959666944 tf_logging.py:115] tokens: [CLS] a laser ban en ##tails a large bureaucracy for enforcement . [SEP]


INFO:tensorflow:input_ids: 101 1037 9138 7221 4372 22081 1037 2312 25934 2005 7285 1012 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0606 19:17:08.527907 139790959666944 tf_logging.py:115] input_ids: 101 1037 9138 7221 4372 22081 1037 2312 25934 2005 7285 1012 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0606 19:17:08.528822 139790959666944 tf_logging.py:115] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0606 19:17:08.529399 139790959666944 tf_logging.py:115] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


I0606 19:17:08.530455 139790959666944 tf_logging.py:115] label: 0 (id = 0)


INFO:tensorflow:*** Example ***


I0606 19:17:08.532761 139790959666944 tf_logging.py:115] *** Example ***


INFO:tensorflow:guid: None


I0606 19:17:08.534234 139790959666944 tf_logging.py:115] guid: None


INFO:tensorflow:tokens: [CLS] mandatory health insurance cannot be effectively enforced " mandatory health insurance ? " a dragon in sheep ' s clothing . july 3rd , 2009 : " consider that those who refuse to get insurance may be fined . how will the rebels be found out – when they show up at the hospital ? will you have police stationed at the er now , to fine the un ##ins ##ured ? or will there be a door - to - door search ? " [SEP]


I0606 19:17:08.534812 139790959666944 tf_logging.py:115] tokens: [CLS] mandatory health insurance cannot be effectively enforced " mandatory health insurance ? " a dragon in sheep ' s clothing . july 3rd , 2009 : " consider that those who refuse to get insurance may be fined . how will the rebels be found out – when they show up at the hospital ? will you have police stationed at the er now , to fine the un ##ins ##ured ? or will there be a door - to - door search ? " [SEP]


INFO:tensorflow:input_ids: 101 10915 2740 5427 3685 2022 6464 16348 1000 10915 2740 5427 1029 1000 1037 5202 1999 8351 1005 1055 5929 1012 2251 3822 1010 2268 1024 1000 5136 2008 2216 2040 10214 2000 2131 5427 2089 2022 16981 1012 2129 2097 1996 8431 2022 2179 2041 1516 2043 2027 2265 2039 2012 1996 2902 1029 2097 2017 2031 2610 8895 2012 1996 9413 2085 1010 2000 2986 1996 4895 7076 12165 1029 2030 2097 2045 2022 1037 2341 1011 2000 1011 2341 3945 1029 1000 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0606 19:17:08.535437 139790959666944 tf_logging.py:115] input_ids: 101 10915 2740 5427 3685 2022 6464 16348 1000 10915 2740 5427 1029 1000 1037 5202 1999 8351 1005 1055 5929 1012 2251 3822 1010 2268 1024 1000 5136 2008 2216 2040 10214 2000 2131 5427 2089 2022 16981 1012 2129 2097 1996 8431 2022 2179 2041 1516 2043 2027 2265 2039 2012 1996 2902 1029 2097 2017 2031 2610 8895 2012 1996 9413 2085 1010 2000 2986 1996 4895 7076 12165 1029 2030 2097 2045 2022 1037 2341 1011 2000 1011 2341 3945 1029 1000 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0606 19:17:08.536010 139790959666944 tf_logging.py:115] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0606 19:17:08.536513 139790959666944 tf_logging.py:115] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


I0606 19:17:08.536991 139790959666944 tf_logging.py:115] label: 1 (id = 1)


INFO:tensorflow:*** Example ***


I0606 19:17:08.538540 139790959666944 tf_logging.py:115] *** Example ***


INFO:tensorflow:guid: None


I0606 19:17:08.539510 139790959666944 tf_logging.py:115] guid: None


INFO:tensorflow:tokens: [CLS] one can support eu and central ##ization , while object ##ing to lisbon " an alternative guide to the lisbon treaty " . sinn fein , liberal irish political party . - " you can support the eu and be against the lisbon treaty . you can support the eu and still want to see democracy and accountability . you can support the eu and still believe that our government should use their position positively and not go along with what suits the larger countries . " [SEP]


I0606 19:17:08.540464 139790959666944 tf_logging.py:115] tokens: [CLS] one can support eu and central ##ization , while object ##ing to lisbon " an alternative guide to the lisbon treaty " . sinn fein , liberal irish political party . - " you can support the eu and be against the lisbon treaty . you can support the eu and still want to see democracy and accountability . you can support the eu and still believe that our government should use their position positively and not go along with what suits the larger countries . " [SEP]


INFO:tensorflow:input_ids: 101 2028 2064 2490 7327 1998 2430 3989 1010 2096 4874 2075 2000 11929 1000 2019 4522 5009 2000 1996 11929 5036 1000 1012 26403 27132 1010 4314 3493 2576 2283 1012 1011 1000 2017 2064 2490 1996 7327 1998 2022 2114 1996 11929 5036 1012 2017 2064 2490 1996 7327 1998 2145 2215 2000 2156 7072 1998 17842 1012 2017 2064 2490 1996 7327 1998 2145 2903 2008 2256 2231 2323 2224 2037 2597 13567 1998 2025 2175 2247 2007 2054 11072 1996 3469 3032 1012 1000 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0606 19:17:08.540987 139790959666944 tf_logging.py:115] input_ids: 101 2028 2064 2490 7327 1998 2430 3989 1010 2096 4874 2075 2000 11929 1000 2019 4522 5009 2000 1996 11929 5036 1000 1012 26403 27132 1010 4314 3493 2576 2283 1012 1011 1000 2017 2064 2490 1996 7327 1998 2022 2114 1996 11929 5036 1012 2017 2064 2490 1996 7327 1998 2145 2215 2000 2156 7072 1998 17842 1012 2017 2064 2490 1996 7327 1998 2145 2903 2008 2256 2231 2323 2224 2037 2597 13567 1998 2025 2175 2247 2007 2054 11072 1996 3469 3032 1012 1000 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0606 19:17:08.541543 139790959666944 tf_logging.py:115] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0606 19:17:08.542087 139790959666944 tf_logging.py:115] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


I0606 19:17:08.542947 139790959666944 tf_logging.py:115] label: 1 (id = 1)


INFO:tensorflow:*** Example ***


I0606 19:17:08.544280 139790959666944 tf_logging.py:115] *** Example ***


INFO:tensorflow:guid: None


I0606 19:17:08.544875 139790959666944 tf_logging.py:115] guid: None


INFO:tensorflow:tokens: [CLS] super ##del ##ega ##tes were not elected or appointed to elect presidential nominee super ##del ##ega ##tes are a group of representatives , senators , governors , party members and ex - officials , these folks represent 20 % of all the delegates needed to be nominated but are not bound to vote according to any constituency . [SEP]


I0606 19:17:08.545448 139790959666944 tf_logging.py:115] tokens: [CLS] super ##del ##ega ##tes were not elected or appointed to elect presidential nominee super ##del ##ega ##tes are a group of representatives , senators , governors , party members and ex - officials , these folks represent 20 % of all the delegates needed to be nominated but are not bound to vote according to any constituency . [SEP]


INFO:tensorflow:input_ids: 101 3565 9247 29107 4570 2020 2025 2700 2030 2805 2000 11322 4883 9773 3565 9247 29107 4570 2024 1037 2177 1997 4505 1010 10153 1010 11141 1010 2283 2372 1998 4654 1011 4584 1010 2122 12455 5050 2322 1003 1997 2035 1996 10284 2734 2000 2022 4222 2021 2024 2025 5391 2000 3789 2429 2000 2151 5540 1012 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0606 19:17:08.546111 139790959666944 tf_logging.py:115] input_ids: 101 3565 9247 29107 4570 2020 2025 2700 2030 2805 2000 11322 4883 9773 3565 9247 29107 4570 2024 1037 2177 1997 4505 1010 10153 1010 11141 1010 2283 2372 1998 4654 1011 4584 1010 2122 12455 5050 2322 1003 1997 2035 1996 10284 2734 2000 2022 4222 2021 2024 2025 5391 2000 3789 2429 2000 2151 5540 1012 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0606 19:17:08.546631 139790959666944 tf_logging.py:115] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0606 19:17:08.547135 139790959666944 tf_logging.py:115] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


I0606 19:17:08.547618 139790959666944 tf_logging.py:115] label: 1 (id = 1)


INFO:tensorflow:*** Example ***


I0606 19:17:08.549563 139790959666944 tf_logging.py:115] *** Example ***


INFO:tensorflow:guid: None


I0606 19:17:08.550687 139790959666944 tf_logging.py:115] guid: None


INFO:tensorflow:tokens: [CLS] no universal health care causes " job lock " " the case for universal health care " . american medical student association ( am ##sa ) . 2005 - 2006 - " “ job lock ” : job lock refers to the idea that people stay with their jobs when they would rather work elsewhere because their current job offers health insurance . for example , many individuals opt to stay with their job instead of starting their own business because they are unsure of whether they can get health insurance on the individual market , which has higher premium ##s and often denies people with pre - existing conditions . " [SEP]


I0606 19:17:08.551378 139790959666944 tf_logging.py:115] tokens: [CLS] no universal health care causes " job lock " " the case for universal health care " . american medical student association ( am ##sa ) . 2005 - 2006 - " “ job lock ” : job lock refers to the idea that people stay with their jobs when they would rather work elsewhere because their current job offers health insurance . for example , many individuals opt to stay with their job instead of starting their own business because they are unsure of whether they can get health insurance on the individual market , which has higher premium ##s and often denies people with pre - existing conditions . " [SEP]


INFO:tensorflow:input_ids: 101 2053 5415 2740 2729 5320 1000 3105 5843 1000 1000 1996 2553 2005 5415 2740 2729 1000 1012 2137 2966 3076 2523 1006 2572 3736 1007 1012 2384 1011 2294 1011 1000 1523 3105 5843 1524 1024 3105 5843 5218 2000 1996 2801 2008 2111 2994 2007 2037 5841 2043 2027 2052 2738 2147 6974 2138 2037 2783 3105 4107 2740 5427 1012 2005 2742 1010 2116 3633 23569 2000 2994 2007 2037 3105 2612 1997 3225 2037 2219 2449 2138 2027 2024 12422 1997 3251 2027 2064 2131 2740 5427 2006 1996 3265 3006 1010 2029 2038 3020 12882 2015 1998 2411 23439 2111 2007 3653 1011 4493 3785 1012 1000 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0606 19:17:08.551977 139790959666944 tf_logging.py:115] input_ids: 101 2053 5415 2740 2729 5320 1000 3105 5843 1000 1000 1996 2553 2005 5415 2740 2729 1000 1012 2137 2966 3076 2523 1006 2572 3736 1007 1012 2384 1011 2294 1011 1000 1523 3105 5843 1524 1024 3105 5843 5218 2000 1996 2801 2008 2111 2994 2007 2037 5841 2043 2027 2052 2738 2147 6974 2138 2037 2783 3105 4107 2740 5427 1012 2005 2742 1010 2116 3633 23569 2000 2994 2007 2037 3105 2612 1997 3225 2037 2219 2449 2138 2027 2024 12422 1997 3251 2027 2064 2131 2740 5427 2006 1996 3265 3006 1010 2029 2038 3020 12882 2015 1998 2411 23439 2111 2007 3653 1011 4493 3785 1012 1000 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0606 19:17:08.552541 139790959666944 tf_logging.py:115] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0606 19:17:08.557087 139790959666944 tf_logging.py:115] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


I0606 19:17:08.557775 139790959666944 tf_logging.py:115] label: 0 (id = 0)


#Creating a model

Now that we've prepared our data, let's focus on building a model. `create_model` does just this below. First, it loads the BERT tf hub module again (this time to extract the computation graph). Next, it creates a single new layer that will be trained to adapt BERT to our sentiment task (i.e. classifying whether a movie review is positive or negative). This strategy of using a mostly trained model is called [fine-tuning](http://wiki.fast.ai/index.php/Fine_tuning).

In [667]:
def create_model(is_predicting, input_ids, input_mask, segment_ids, labels,
                 num_labels):
  """Creates a classification model."""

  bert_module = hub.Module(
      BERT_MODEL_HUB,
      trainable=True)
  bert_inputs = dict(
      input_ids=input_ids,
      input_mask=input_mask,
      segment_ids=segment_ids)
  bert_outputs = bert_module(
      inputs=bert_inputs,
      signature="tokens",
      as_dict=True)

  # Use "pooled_output" for classification tasks on an entire sentence.
  # Use "sequence_outputs" for token-level output.
  output_layer = bert_outputs["pooled_output"]

  hidden_size = output_layer.shape[-1].value

  # Create our own layer to tune for politeness data.
  output_weights = tf.get_variable(
      "output_weights", [num_labels, hidden_size],
      initializer=tf.truncated_normal_initializer(stddev=0.02))

  output_bias = tf.get_variable(
      "output_bias", [num_labels], initializer=tf.zeros_initializer())

  with tf.variable_scope("loss"):

    # Dropout helps prevent overfitting
    output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

    logits = tf.matmul(output_layer, output_weights, transpose_b=True)
    logits = tf.nn.bias_add(logits, output_bias)
    log_probs = tf.nn.log_softmax(logits, axis=-1)

    # Convert labels into one-hot encoding
    one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)

    predicted_labels = tf.squeeze(tf.argmax(log_probs, axis=-1, output_type=tf.int32))
    # If we're predicting, we want predicted labels and the probabiltiies.
    if is_predicting:
      return (predicted_labels, log_probs)

    # If we're train/eval, compute loss between predicted and actual label
    per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
    loss = tf.reduce_mean(per_example_loss)
    return (loss, predicted_labels, log_probs)


Next we'll wrap our model function in a `model_fn_builder` function that adapts our model to work for training, evaluation, and prediction.

In [668]:
# model_fn_builder actually creates our model function
# using the passed parameters for num_labels, learning_rate, etc.
def model_fn_builder(num_labels, learning_rate, num_train_steps,
                     num_warmup_steps):
  """Returns `model_fn` closure for TPUEstimator."""
  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    """The `model_fn` for TPUEstimator."""

    input_ids = features["input_ids"]
    input_mask = features["input_mask"]
    segment_ids = features["segment_ids"]
    label_ids = features["label_ids"]

    is_predicting = (mode == tf.estimator.ModeKeys.PREDICT)
    
    # TRAIN and EVAL
    if not is_predicting:

      (loss, predicted_labels, log_probs) = create_model(
        is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)

      train_op = bert.optimization.create_optimizer(
          loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu=False)

      # Calculate evaluation metrics. 
      def metric_fn(label_ids, predicted_labels):
        accuracy = tf.metrics.accuracy(label_ids, predicted_labels)
        f1_score = tf.contrib.metrics.f1_score(
            label_ids,
            predicted_labels)
        auc = tf.metrics.auc(
            label_ids,
            predicted_labels)
        recall = tf.metrics.recall(
            label_ids,
            predicted_labels)
        precision = tf.metrics.precision(
            label_ids,
            predicted_labels) 
        true_pos = tf.metrics.true_positives(
            label_ids,
            predicted_labels)
        true_neg = tf.metrics.true_negatives(
            label_ids,
            predicted_labels)   
        false_pos = tf.metrics.false_positives(
            label_ids,
            predicted_labels)  
        false_neg = tf.metrics.false_negatives(
            label_ids,
            predicted_labels)
        return {
            "eval_accuracy": accuracy,
            "f1_score": f1_score,
            "auc": auc,
            "precision": precision,
            "recall": recall,
            "true_positives": true_pos,
            "true_negatives": true_neg,
            "false_positives": false_pos,
            "false_negatives": false_neg
        }

      eval_metrics = metric_fn(label_ids, predicted_labels)

      if mode == tf.estimator.ModeKeys.TRAIN:
        return tf.estimator.EstimatorSpec(mode=mode,
          loss=loss,
          train_op=train_op)
      else:
          return tf.estimator.EstimatorSpec(mode=mode,
            loss=loss,
            eval_metric_ops=eval_metrics)
    else:
      (predicted_labels, log_probs) = create_model(
        is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)

      predictions = {
          'probabilities': log_probs,
          'labels': predicted_labels
      }
      return tf.estimator.EstimatorSpec(mode, predictions=predictions)

  # Return the actual model function in the closure
  return model_fn


In [669]:
# Compute train and warmup steps from batch size
# These hyperparameters are copied from this colab notebook (https://colab.sandbox.google.com/github/tensorflow/tpu/blob/master/tools/colab/bert_finetuning_with_cloud_tpus.ipynb)
BATCH_SIZE = 8
LEARNING_RATE = 2e-5
NUM_TRAIN_EPOCHS = 3.0
# Warmup is a period of time where hte learning rate 
# is small and gradually increases--usually helps training.
WARMUP_PROPORTION = 0.1
# Model configs
SAVE_CHECKPOINTS_STEPS = 5000
SAVE_SUMMARY_STEPS = 100

In [670]:
# Compute # train and warmup steps from batch size
num_train_steps = int(len(train_features) / BATCH_SIZE * NUM_TRAIN_EPOCHS)
num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)

In [671]:
# Specify outpit directory and number of checkpoint steps to save
run_config = tf.estimator.RunConfig(
    model_dir=OUTPUT_DIR,
    save_summary_steps=SAVE_SUMMARY_STEPS,
    save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS)

In [672]:
model_fn = model_fn_builder(
  num_labels=len(label_list),
  learning_rate=LEARNING_RATE,
  num_train_steps=num_train_steps,
  num_warmup_steps=num_warmup_steps)

estimator = tf.estimator.Estimator(
  model_fn=model_fn,
  config=run_config,
  params={"batch_size": BATCH_SIZE})


INFO:tensorflow:Using config: {'_model_dir': 'OUTPUT_DIR_NAME', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 5000, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f20d6083438>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


I0606 19:17:10.424511 139790959666944 tf_logging.py:115] Using config: {'_model_dir': 'OUTPUT_DIR_NAME', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 5000, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f20d6083438>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


Next we create an input builder function that takes our training feature set (`train_features`) and produces a generator. This is a pretty standard design pattern for working with Tensorflow [Estimators](https://www.tensorflow.org/guide/estimators).

In [673]:
# Create an input function for training. drop_remainder = True for using TPUs.
train_input_fn = bert.run_classifier.input_fn_builder(
    features=train_features,
    seq_length=MAX_SEQ_LENGTH,
    is_training=True,
    drop_remainder=False)

Now we train our model! For me, using a Colab notebook running on Google's GPUs, my training time was about 14 minutes.

In [674]:
print(f'Beginning Training!')
current_time = datetime.now()
estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
print("Training took time ", datetime.now() - current_time)

Beginning Training!
INFO:tensorflow:Calling model_fn.


I0606 19:17:17.575931 139790959666944 tf_logging.py:115] Calling model_fn.


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


I0606 19:17:20.011276 139790959666944 tf_logging.py:115] Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Done calling model_fn.


I0606 19:17:26.322711 139790959666944 tf_logging.py:115] Done calling model_fn.


INFO:tensorflow:Create CheckpointSaverHook.


I0606 19:17:26.325135 139790959666944 tf_logging.py:115] Create CheckpointSaverHook.


INFO:tensorflow:Graph was finalized.


I0606 19:17:27.281296 139790959666944 tf_logging.py:115] Graph was finalized.


INFO:tensorflow:Running local_init_op.


I0606 19:17:31.835702 139790959666944 tf_logging.py:115] Running local_init_op.


INFO:tensorflow:Done running local_init_op.


I0606 19:17:31.951875 139790959666944 tf_logging.py:115] Done running local_init_op.


INFO:tensorflow:Saving checkpoints for 0 into OUTPUT_DIR_NAME/model.ckpt.


I0606 19:17:42.602069 139790959666944 tf_logging.py:115] Saving checkpoints for 0 into OUTPUT_DIR_NAME/model.ckpt.


INFO:tensorflow:loss = 1.2688377, step = 0


I0606 19:17:54.611202 139790959666944 tf_logging.py:115] loss = 1.2688377, step = 0


INFO:tensorflow:global_step/sec: 2.37292


I0606 19:18:36.753058 139790959666944 tf_logging.py:115] global_step/sec: 2.37292


INFO:tensorflow:loss = 0.7121042, step = 100 (42.143 sec)


I0606 19:18:36.754344 139790959666944 tf_logging.py:115] loss = 0.7121042, step = 100 (42.143 sec)


INFO:tensorflow:global_step/sec: 3.27138


I0606 19:19:07.321218 139790959666944 tf_logging.py:115] global_step/sec: 3.27138


INFO:tensorflow:loss = 0.77386045, step = 200 (30.568 sec)


I0606 19:19:07.322456 139790959666944 tf_logging.py:115] loss = 0.77386045, step = 200 (30.568 sec)


INFO:tensorflow:global_step/sec: 3.26319


I0606 19:19:37.966033 139790959666944 tf_logging.py:115] global_step/sec: 3.26319


INFO:tensorflow:loss = 0.80902123, step = 300 (30.647 sec)


I0606 19:19:37.969411 139790959666944 tf_logging.py:115] loss = 0.80902123, step = 300 (30.647 sec)


INFO:tensorflow:global_step/sec: 3.23205


I0606 19:20:08.906174 139790959666944 tf_logging.py:115] global_step/sec: 3.23205


INFO:tensorflow:loss = 0.72712386, step = 400 (30.939 sec)


I0606 19:20:08.908167 139790959666944 tf_logging.py:115] loss = 0.72712386, step = 400 (30.939 sec)


INFO:tensorflow:global_step/sec: 3.26747


I0606 19:20:39.510900 139790959666944 tf_logging.py:115] global_step/sec: 3.26747


INFO:tensorflow:loss = 0.55919373, step = 500 (30.604 sec)


I0606 19:20:39.512381 139790959666944 tf_logging.py:115] loss = 0.55919373, step = 500 (30.604 sec)


INFO:tensorflow:global_step/sec: 3.23083


I0606 19:21:10.462672 139790959666944 tf_logging.py:115] global_step/sec: 3.23083


INFO:tensorflow:loss = 0.8352276, step = 600 (30.951 sec)


I0606 19:21:10.463714 139790959666944 tf_logging.py:115] loss = 0.8352276, step = 600 (30.951 sec)


INFO:tensorflow:global_step/sec: 3.24081


I0606 19:21:41.319161 139790959666944 tf_logging.py:115] global_step/sec: 3.24081


INFO:tensorflow:loss = 0.3622378, step = 700 (30.857 sec)


I0606 19:21:41.320871 139790959666944 tf_logging.py:115] loss = 0.3622378, step = 700 (30.857 sec)


INFO:tensorflow:global_step/sec: 3.263


I0606 19:22:11.965764 139790959666944 tf_logging.py:115] global_step/sec: 3.263


INFO:tensorflow:loss = 0.53239185, step = 800 (30.646 sec)


I0606 19:22:11.967152 139790959666944 tf_logging.py:115] loss = 0.53239185, step = 800 (30.646 sec)


INFO:tensorflow:global_step/sec: 3.24575


I0606 19:22:42.775262 139790959666944 tf_logging.py:115] global_step/sec: 3.24575


INFO:tensorflow:loss = 0.6653476, step = 900 (30.810 sec)


I0606 19:22:42.776712 139790959666944 tf_logging.py:115] loss = 0.6653476, step = 900 (30.810 sec)


INFO:tensorflow:global_step/sec: 3.23784


I0606 19:23:13.660032 139790959666944 tf_logging.py:115] global_step/sec: 3.23784


INFO:tensorflow:loss = 0.62683326, step = 1000 (30.884 sec)


I0606 19:23:13.661059 139790959666944 tf_logging.py:115] loss = 0.62683326, step = 1000 (30.884 sec)


INFO:tensorflow:global_step/sec: 3.16866


I0606 19:23:45.219089 139790959666944 tf_logging.py:115] global_step/sec: 3.16866


INFO:tensorflow:loss = 0.8627588, step = 1100 (31.559 sec)


I0606 19:23:45.220185 139790959666944 tf_logging.py:115] loss = 0.8627588, step = 1100 (31.559 sec)


INFO:tensorflow:global_step/sec: 3.2454


I0606 19:24:16.031988 139790959666944 tf_logging.py:115] global_step/sec: 3.2454


INFO:tensorflow:loss = 0.47119826, step = 1200 (30.813 sec)


I0606 19:24:16.033132 139790959666944 tf_logging.py:115] loss = 0.47119826, step = 1200 (30.813 sec)


INFO:tensorflow:global_step/sec: 3.24028


I0606 19:24:46.893545 139790959666944 tf_logging.py:115] global_step/sec: 3.24028


INFO:tensorflow:loss = 0.58708817, step = 1300 (30.862 sec)


I0606 19:24:46.894840 139790959666944 tf_logging.py:115] loss = 0.58708817, step = 1300 (30.862 sec)


INFO:tensorflow:global_step/sec: 3.21859


I0606 19:25:17.963047 139790959666944 tf_logging.py:115] global_step/sec: 3.21859


INFO:tensorflow:loss = 0.5983794, step = 1400 (31.071 sec)


I0606 19:25:17.965491 139790959666944 tf_logging.py:115] loss = 0.5983794, step = 1400 (31.071 sec)


INFO:tensorflow:global_step/sec: 3.22972


I0606 19:25:48.925487 139790959666944 tf_logging.py:115] global_step/sec: 3.22972


INFO:tensorflow:loss = 0.4219638, step = 1500 (30.962 sec)


I0606 19:25:48.927065 139790959666944 tf_logging.py:115] loss = 0.4219638, step = 1500 (30.962 sec)


INFO:tensorflow:global_step/sec: 3.24757


I0606 19:26:19.717712 139790959666944 tf_logging.py:115] global_step/sec: 3.24757


INFO:tensorflow:loss = 0.8383572, step = 1600 (30.792 sec)


I0606 19:26:19.719053 139790959666944 tf_logging.py:115] loss = 0.8383572, step = 1600 (30.792 sec)


INFO:tensorflow:global_step/sec: 3.25836


I0606 19:26:50.407984 139790959666944 tf_logging.py:115] global_step/sec: 3.25836


INFO:tensorflow:loss = 0.65894175, step = 1700 (30.690 sec)


I0606 19:26:50.409220 139790959666944 tf_logging.py:115] loss = 0.65894175, step = 1700 (30.690 sec)


INFO:tensorflow:global_step/sec: 3.25802


I0606 19:27:21.101520 139790959666944 tf_logging.py:115] global_step/sec: 3.25802


INFO:tensorflow:loss = 0.76303315, step = 1800 (30.694 sec)


I0606 19:27:21.103050 139790959666944 tf_logging.py:115] loss = 0.76303315, step = 1800 (30.694 sec)


INFO:tensorflow:global_step/sec: 3.25952


I0606 19:27:51.780865 139790959666944 tf_logging.py:115] global_step/sec: 3.25952


INFO:tensorflow:loss = 0.5768133, step = 1900 (30.679 sec)


I0606 19:27:51.782194 139790959666944 tf_logging.py:115] loss = 0.5768133, step = 1900 (30.679 sec)


INFO:tensorflow:global_step/sec: 3.25415


I0606 19:28:22.510860 139790959666944 tf_logging.py:115] global_step/sec: 3.25415


INFO:tensorflow:loss = 0.8510957, step = 2000 (30.730 sec)


I0606 19:28:22.512392 139790959666944 tf_logging.py:115] loss = 0.8510957, step = 2000 (30.730 sec)


INFO:tensorflow:global_step/sec: 3.22584


I0606 19:28:53.510543 139790959666944 tf_logging.py:115] global_step/sec: 3.22584


INFO:tensorflow:loss = 0.48835894, step = 2100 (31.000 sec)


I0606 19:28:53.512191 139790959666944 tf_logging.py:115] loss = 0.48835894, step = 2100 (31.000 sec)


INFO:tensorflow:global_step/sec: 3.25608


I0606 19:29:24.222332 139790959666944 tf_logging.py:115] global_step/sec: 3.25608


INFO:tensorflow:loss = 0.6187762, step = 2200 (30.711 sec)


I0606 19:29:24.223670 139790959666944 tf_logging.py:115] loss = 0.6187762, step = 2200 (30.711 sec)


INFO:tensorflow:global_step/sec: 3.25717


I0606 19:29:54.923795 139790959666944 tf_logging.py:115] global_step/sec: 3.25717


INFO:tensorflow:loss = 0.57382274, step = 2300 (30.702 sec)


I0606 19:29:54.925291 139790959666944 tf_logging.py:115] loss = 0.57382274, step = 2300 (30.702 sec)


INFO:tensorflow:global_step/sec: 3.25488


I0606 19:30:25.646851 139790959666944 tf_logging.py:115] global_step/sec: 3.25488


INFO:tensorflow:loss = 0.58680516, step = 2400 (30.723 sec)


I0606 19:30:25.648043 139790959666944 tf_logging.py:115] loss = 0.58680516, step = 2400 (30.723 sec)


INFO:tensorflow:global_step/sec: 3.2359


I0606 19:30:56.550177 139790959666944 tf_logging.py:115] global_step/sec: 3.2359


INFO:tensorflow:loss = 0.48665968, step = 2500 (30.903 sec)


I0606 19:30:56.551200 139790959666944 tf_logging.py:115] loss = 0.48665968, step = 2500 (30.903 sec)


INFO:tensorflow:global_step/sec: 3.25981


I0606 19:31:27.226839 139790959666944 tf_logging.py:115] global_step/sec: 3.25981


INFO:tensorflow:loss = 0.65281653, step = 2600 (30.677 sec)


I0606 19:31:27.228682 139790959666944 tf_logging.py:115] loss = 0.65281653, step = 2600 (30.677 sec)


INFO:tensorflow:global_step/sec: 3.25836


I0606 19:31:57.917140 139790959666944 tf_logging.py:115] global_step/sec: 3.25836


INFO:tensorflow:loss = 0.5284121, step = 2700 (30.690 sec)


I0606 19:31:57.918830 139790959666944 tf_logging.py:115] loss = 0.5284121, step = 2700 (30.690 sec)


INFO:tensorflow:global_step/sec: 3.25872


I0606 19:32:28.604005 139790959666944 tf_logging.py:115] global_step/sec: 3.25872


INFO:tensorflow:loss = 0.47115988, step = 2800 (30.686 sec)


I0606 19:32:28.604966 139790959666944 tf_logging.py:115] loss = 0.47115988, step = 2800 (30.686 sec)


INFO:tensorflow:global_step/sec: 3.26169


I0606 19:32:59.262963 139790959666944 tf_logging.py:115] global_step/sec: 3.26169


INFO:tensorflow:loss = 0.5618776, step = 2900 (30.660 sec)


I0606 19:32:59.264508 139790959666944 tf_logging.py:115] loss = 0.5618776, step = 2900 (30.660 sec)


INFO:tensorflow:global_step/sec: 3.26096


I0606 19:33:29.928814 139790959666944 tf_logging.py:115] global_step/sec: 3.26096


INFO:tensorflow:loss = 0.92122704, step = 3000 (30.666 sec)


I0606 19:33:29.930287 139790959666944 tf_logging.py:115] loss = 0.92122704, step = 3000 (30.666 sec)


INFO:tensorflow:global_step/sec: 3.25602


I0606 19:34:00.641137 139790959666944 tf_logging.py:115] global_step/sec: 3.25602


INFO:tensorflow:loss = 0.5226813, step = 3100 (30.712 sec)


I0606 19:34:00.642756 139790959666944 tf_logging.py:115] loss = 0.5226813, step = 3100 (30.712 sec)


INFO:tensorflow:global_step/sec: 3.25423


I0606 19:34:31.370406 139790959666944 tf_logging.py:115] global_step/sec: 3.25423


INFO:tensorflow:loss = 0.56229705, step = 3200 (30.729 sec)


I0606 19:34:31.372247 139790959666944 tf_logging.py:115] loss = 0.56229705, step = 3200 (30.729 sec)


INFO:tensorflow:global_step/sec: 3.25811


I0606 19:35:02.063123 139790959666944 tf_logging.py:115] global_step/sec: 3.25811


INFO:tensorflow:loss = 0.4886484, step = 3300 (30.694 sec)


I0606 19:35:02.065773 139790959666944 tf_logging.py:115] loss = 0.4886484, step = 3300 (30.694 sec)


INFO:tensorflow:global_step/sec: 3.25918


I0606 19:35:32.745585 139790959666944 tf_logging.py:115] global_step/sec: 3.25918


INFO:tensorflow:loss = 0.34607124, step = 3400 (30.681 sec)


I0606 19:35:32.747192 139790959666944 tf_logging.py:115] loss = 0.34607124, step = 3400 (30.681 sec)


INFO:tensorflow:global_step/sec: 3.26058


I0606 19:36:03.415023 139790959666944 tf_logging.py:115] global_step/sec: 3.26058


INFO:tensorflow:loss = 0.851954, step = 3500 (30.669 sec)


I0606 19:36:03.416584 139790959666944 tf_logging.py:115] loss = 0.851954, step = 3500 (30.669 sec)


INFO:tensorflow:global_step/sec: 3.25905


I0606 19:36:34.098850 139790959666944 tf_logging.py:115] global_step/sec: 3.25905


INFO:tensorflow:loss = 0.63055134, step = 3600 (30.688 sec)


I0606 19:36:34.104630 139790959666944 tf_logging.py:115] loss = 0.63055134, step = 3600 (30.688 sec)


INFO:tensorflow:global_step/sec: 3.25772


I0606 19:37:04.795112 139790959666944 tf_logging.py:115] global_step/sec: 3.25772


INFO:tensorflow:loss = 0.44793886, step = 3700 (30.692 sec)


I0606 19:37:04.796740 139790959666944 tf_logging.py:115] loss = 0.44793886, step = 3700 (30.692 sec)


INFO:tensorflow:global_step/sec: 3.26014


I0606 19:37:35.468583 139790959666944 tf_logging.py:115] global_step/sec: 3.26014


INFO:tensorflow:loss = 0.66300833, step = 3800 (30.674 sec)


I0606 19:37:35.470562 139790959666944 tf_logging.py:115] loss = 0.66300833, step = 3800 (30.674 sec)


INFO:tensorflow:global_step/sec: 3.2563


I0606 19:38:06.178291 139790959666944 tf_logging.py:115] global_step/sec: 3.2563


INFO:tensorflow:loss = 0.6197716, step = 3900 (30.710 sec)


I0606 19:38:06.180510 139790959666944 tf_logging.py:115] loss = 0.6197716, step = 3900 (30.710 sec)


INFO:tensorflow:global_step/sec: 3.25673


I0606 19:38:36.883914 139790959666944 tf_logging.py:115] global_step/sec: 3.25673


INFO:tensorflow:loss = 0.89031696, step = 4000 (30.705 sec)


I0606 19:38:36.885548 139790959666944 tf_logging.py:115] loss = 0.89031696, step = 4000 (30.705 sec)


INFO:tensorflow:global_step/sec: 3.25801


I0606 19:39:07.577450 139790959666944 tf_logging.py:115] global_step/sec: 3.25801


INFO:tensorflow:loss = 0.117079064, step = 4100 (30.693 sec)


I0606 19:39:07.578900 139790959666944 tf_logging.py:115] loss = 0.117079064, step = 4100 (30.693 sec)


INFO:tensorflow:global_step/sec: 3.26098


I0606 19:39:38.243072 139790959666944 tf_logging.py:115] global_step/sec: 3.26098


INFO:tensorflow:loss = 0.8968463, step = 4200 (30.666 sec)


I0606 19:39:38.244714 139790959666944 tf_logging.py:115] loss = 0.8968463, step = 4200 (30.666 sec)


INFO:tensorflow:global_step/sec: 3.25664


I0606 19:40:08.949623 139790959666944 tf_logging.py:115] global_step/sec: 3.25664


INFO:tensorflow:loss = 0.31351885, step = 4300 (30.707 sec)


I0606 19:40:08.951425 139790959666944 tf_logging.py:115] loss = 0.31351885, step = 4300 (30.707 sec)


INFO:tensorflow:global_step/sec: 3.25972


I0606 19:40:39.627091 139790959666944 tf_logging.py:115] global_step/sec: 3.25972


INFO:tensorflow:loss = 0.44416296, step = 4400 (30.677 sec)


I0606 19:40:39.628505 139790959666944 tf_logging.py:115] loss = 0.44416296, step = 4400 (30.677 sec)


INFO:tensorflow:global_step/sec: 3.25818


I0606 19:41:10.319080 139790959666944 tf_logging.py:115] global_step/sec: 3.25818


INFO:tensorflow:loss = 0.73311955, step = 4500 (30.692 sec)


I0606 19:41:10.320529 139790959666944 tf_logging.py:115] loss = 0.73311955, step = 4500 (30.692 sec)


INFO:tensorflow:global_step/sec: 3.25983


I0606 19:41:40.995474 139790959666944 tf_logging.py:115] global_step/sec: 3.25983


INFO:tensorflow:loss = 0.7188789, step = 4600 (30.676 sec)


I0606 19:41:40.996707 139790959666944 tf_logging.py:115] loss = 0.7188789, step = 4600 (30.676 sec)


INFO:tensorflow:global_step/sec: 3.26111


I0606 19:42:11.659885 139790959666944 tf_logging.py:115] global_step/sec: 3.26111


INFO:tensorflow:loss = 0.28467745, step = 4700 (30.664 sec)


I0606 19:42:11.661191 139790959666944 tf_logging.py:115] loss = 0.28467745, step = 4700 (30.664 sec)


INFO:tensorflow:global_step/sec: 3.25612


I0606 19:42:42.371258 139790959666944 tf_logging.py:115] global_step/sec: 3.25612


INFO:tensorflow:loss = 0.6989491, step = 4800 (30.711 sec)


I0606 19:42:42.372667 139790959666944 tf_logging.py:115] loss = 0.6989491, step = 4800 (30.711 sec)


INFO:tensorflow:global_step/sec: 3.25799


I0606 19:43:13.065075 139790959666944 tf_logging.py:115] global_step/sec: 3.25799


INFO:tensorflow:loss = 0.87658286, step = 4900 (30.694 sec)


I0606 19:43:13.066706 139790959666944 tf_logging.py:115] loss = 0.87658286, step = 4900 (30.694 sec)


INFO:tensorflow:Saving checkpoints for 5000 into OUTPUT_DIR_NAME/model.ckpt.


I0606 19:43:43.457406 139790959666944 tf_logging.py:115] Saving checkpoints for 5000 into OUTPUT_DIR_NAME/model.ckpt.


INFO:tensorflow:global_step/sec: 2.68942


I0606 19:43:50.247756 139790959666944 tf_logging.py:115] global_step/sec: 2.68942


INFO:tensorflow:loss = 0.7001285, step = 5000 (37.182 sec)


I0606 19:43:50.248768 139790959666944 tf_logging.py:115] loss = 0.7001285, step = 5000 (37.182 sec)


INFO:tensorflow:global_step/sec: 3.26158


I0606 19:44:20.907778 139790959666944 tf_logging.py:115] global_step/sec: 3.26158


INFO:tensorflow:loss = 0.38835973, step = 5100 (30.660 sec)


I0606 19:44:20.908811 139790959666944 tf_logging.py:115] loss = 0.38835973, step = 5100 (30.660 sec)


INFO:tensorflow:global_step/sec: 3.25966


I0606 19:44:51.585803 139790959666944 tf_logging.py:115] global_step/sec: 3.25966


INFO:tensorflow:loss = 0.7712732, step = 5200 (30.678 sec)


I0606 19:44:51.586952 139790959666944 tf_logging.py:115] loss = 0.7712732, step = 5200 (30.678 sec)


INFO:tensorflow:global_step/sec: 3.26153


I0606 19:45:22.246292 139790959666944 tf_logging.py:115] global_step/sec: 3.26153


INFO:tensorflow:loss = 0.61818105, step = 5300 (30.661 sec)


I0606 19:45:22.248081 139790959666944 tf_logging.py:115] loss = 0.61818105, step = 5300 (30.661 sec)


INFO:tensorflow:global_step/sec: 3.25743


I0606 19:45:52.945326 139790959666944 tf_logging.py:115] global_step/sec: 3.25743


INFO:tensorflow:loss = 0.4594947, step = 5400 (30.700 sec)


I0606 19:45:52.948086 139790959666944 tf_logging.py:115] loss = 0.4594947, step = 5400 (30.700 sec)


INFO:tensorflow:global_step/sec: 3.25773


I0606 19:46:23.641526 139790959666944 tf_logging.py:115] global_step/sec: 3.25773


INFO:tensorflow:loss = 1.0588839, step = 5500 (30.695 sec)


I0606 19:46:23.643194 139790959666944 tf_logging.py:115] loss = 1.0588839, step = 5500 (30.695 sec)


INFO:tensorflow:global_step/sec: 3.25674


I0606 19:46:54.347144 139790959666944 tf_logging.py:115] global_step/sec: 3.25674


INFO:tensorflow:loss = 0.6173221, step = 5600 (30.706 sec)


I0606 19:46:54.349143 139790959666944 tf_logging.py:115] loss = 0.6173221, step = 5600 (30.706 sec)


INFO:tensorflow:global_step/sec: 3.25979


I0606 19:47:25.023882 139790959666944 tf_logging.py:115] global_step/sec: 3.25979


INFO:tensorflow:loss = 0.41648608, step = 5700 (30.676 sec)


I0606 19:47:25.025189 139790959666944 tf_logging.py:115] loss = 0.41648608, step = 5700 (30.676 sec)


INFO:tensorflow:global_step/sec: 3.25591


I0606 19:47:55.737291 139790959666944 tf_logging.py:115] global_step/sec: 3.25591


INFO:tensorflow:loss = 0.3028623, step = 5800 (30.713 sec)


I0606 19:47:55.738678 139790959666944 tf_logging.py:115] loss = 0.3028623, step = 5800 (30.713 sec)


INFO:tensorflow:global_step/sec: 3.25692


I0606 19:48:26.441183 139790959666944 tf_logging.py:115] global_step/sec: 3.25692


INFO:tensorflow:loss = 0.109999925, step = 5900 (30.704 sec)


I0606 19:48:26.442729 139790959666944 tf_logging.py:115] loss = 0.109999925, step = 5900 (30.704 sec)


INFO:tensorflow:global_step/sec: 3.25938


I0606 19:48:57.121876 139790959666944 tf_logging.py:115] global_step/sec: 3.25938


INFO:tensorflow:loss = 0.8223293, step = 6000 (30.681 sec)


I0606 19:48:57.123894 139790959666944 tf_logging.py:115] loss = 0.8223293, step = 6000 (30.681 sec)


INFO:tensorflow:global_step/sec: 3.25966


I0606 19:49:27.799830 139790959666944 tf_logging.py:115] global_step/sec: 3.25966


INFO:tensorflow:loss = 0.3489812, step = 6100 (30.678 sec)


I0606 19:49:27.801450 139790959666944 tf_logging.py:115] loss = 0.3489812, step = 6100 (30.678 sec)


INFO:tensorflow:global_step/sec: 3.25402


I0606 19:49:58.531022 139790959666944 tf_logging.py:115] global_step/sec: 3.25402


INFO:tensorflow:loss = 0.701541, step = 6200 (30.731 sec)


I0606 19:49:58.532561 139790959666944 tf_logging.py:115] loss = 0.701541, step = 6200 (30.731 sec)


INFO:tensorflow:global_step/sec: 3.25946


I0606 19:50:29.210989 139790959666944 tf_logging.py:115] global_step/sec: 3.25946


INFO:tensorflow:loss = 0.21504828, step = 6300 (30.680 sec)


I0606 19:50:29.212419 139790959666944 tf_logging.py:115] loss = 0.21504828, step = 6300 (30.680 sec)


INFO:tensorflow:global_step/sec: 3.25709


I0606 19:50:59.913300 139790959666944 tf_logging.py:115] global_step/sec: 3.25709


INFO:tensorflow:loss = 0.2866449, step = 6400 (30.703 sec)


I0606 19:50:59.915577 139790959666944 tf_logging.py:115] loss = 0.2866449, step = 6400 (30.703 sec)


INFO:tensorflow:global_step/sec: 3.25899


I0606 19:51:30.597628 139790959666944 tf_logging.py:115] global_step/sec: 3.25899


INFO:tensorflow:loss = 0.4032441, step = 6500 (30.684 sec)


I0606 19:51:30.599113 139790959666944 tf_logging.py:115] loss = 0.4032441, step = 6500 (30.684 sec)


INFO:tensorflow:global_step/sec: 3.25659


I0606 19:52:01.304623 139790959666944 tf_logging.py:115] global_step/sec: 3.25659


INFO:tensorflow:loss = 0.7989726, step = 6600 (30.707 sec)


I0606 19:52:01.306168 139790959666944 tf_logging.py:115] loss = 0.7989726, step = 6600 (30.707 sec)


INFO:tensorflow:global_step/sec: 3.25835


I0606 19:52:31.995005 139790959666944 tf_logging.py:115] global_step/sec: 3.25835


INFO:tensorflow:loss = 0.7396882, step = 6700 (30.690 sec)


I0606 19:52:31.996605 139790959666944 tf_logging.py:115] loss = 0.7396882, step = 6700 (30.690 sec)


INFO:tensorflow:global_step/sec: 3.25632


I0606 19:53:02.704450 139790959666944 tf_logging.py:115] global_step/sec: 3.25632


INFO:tensorflow:loss = 0.23813413, step = 6800 (30.710 sec)


I0606 19:53:02.706187 139790959666944 tf_logging.py:115] loss = 0.23813413, step = 6800 (30.710 sec)


INFO:tensorflow:global_step/sec: 3.25897


I0606 19:53:33.389063 139790959666944 tf_logging.py:115] global_step/sec: 3.25897


INFO:tensorflow:loss = 0.25518, step = 6900 (30.685 sec)


I0606 19:53:33.390973 139790959666944 tf_logging.py:115] loss = 0.25518, step = 6900 (30.685 sec)


INFO:tensorflow:global_step/sec: 3.25999


I0606 19:54:04.064037 139790959666944 tf_logging.py:115] global_step/sec: 3.25999


INFO:tensorflow:loss = 0.8384721, step = 7000 (30.675 sec)


I0606 19:54:04.065570 139790959666944 tf_logging.py:115] loss = 0.8384721, step = 7000 (30.675 sec)


INFO:tensorflow:global_step/sec: 3.25561


I0606 19:54:34.780211 139790959666944 tf_logging.py:115] global_step/sec: 3.25561


INFO:tensorflow:loss = 0.46630514, step = 7100 (30.716 sec)


I0606 19:54:34.781507 139790959666944 tf_logging.py:115] loss = 0.46630514, step = 7100 (30.716 sec)


INFO:tensorflow:global_step/sec: 3.25799


I0606 19:55:05.473932 139790959666944 tf_logging.py:115] global_step/sec: 3.25799


INFO:tensorflow:loss = 0.5383043, step = 7200 (30.694 sec)


I0606 19:55:05.475897 139790959666944 tf_logging.py:115] loss = 0.5383043, step = 7200 (30.694 sec)


INFO:tensorflow:global_step/sec: 3.25537


I0606 19:55:36.192429 139790959666944 tf_logging.py:115] global_step/sec: 3.25537


INFO:tensorflow:loss = 0.68786657, step = 7300 (30.718 sec)


I0606 19:55:36.193884 139790959666944 tf_logging.py:115] loss = 0.68786657, step = 7300 (30.718 sec)


INFO:tensorflow:global_step/sec: 3.26053


I0606 19:56:06.862329 139790959666944 tf_logging.py:115] global_step/sec: 3.26053


INFO:tensorflow:loss = 0.5444917, step = 7400 (30.670 sec)


I0606 19:56:06.863940 139790959666944 tf_logging.py:115] loss = 0.5444917, step = 7400 (30.670 sec)


INFO:tensorflow:global_step/sec: 3.25699


I0606 19:56:37.565564 139790959666944 tf_logging.py:115] global_step/sec: 3.25699


INFO:tensorflow:loss = 0.48841628, step = 7500 (30.704 sec)


I0606 19:56:37.568394 139790959666944 tf_logging.py:115] loss = 0.48841628, step = 7500 (30.704 sec)


INFO:tensorflow:global_step/sec: 3.25914


I0606 19:57:08.248368 139790959666944 tf_logging.py:115] global_step/sec: 3.25914


INFO:tensorflow:loss = 0.6988239, step = 7600 (30.681 sec)


I0606 19:57:08.249600 139790959666944 tf_logging.py:115] loss = 0.6988239, step = 7600 (30.681 sec)


INFO:tensorflow:global_step/sec: 3.25775


I0606 19:57:38.944371 139790959666944 tf_logging.py:115] global_step/sec: 3.25775


INFO:tensorflow:loss = 0.2820016, step = 7700 (30.697 sec)


I0606 19:57:38.946243 139790959666944 tf_logging.py:115] loss = 0.2820016, step = 7700 (30.697 sec)


INFO:tensorflow:global_step/sec: 3.25521


I0606 19:58:09.664358 139790959666944 tf_logging.py:115] global_step/sec: 3.25521


INFO:tensorflow:loss = 0.48170096, step = 7800 (30.720 sec)


I0606 19:58:09.665848 139790959666944 tf_logging.py:115] loss = 0.48170096, step = 7800 (30.720 sec)


INFO:tensorflow:global_step/sec: 3.25841


I0606 19:58:40.354130 139790959666944 tf_logging.py:115] global_step/sec: 3.25841


INFO:tensorflow:loss = 0.74757844, step = 7900 (30.689 sec)


I0606 19:58:40.355264 139790959666944 tf_logging.py:115] loss = 0.74757844, step = 7900 (30.689 sec)


INFO:tensorflow:global_step/sec: 3.2596


I0606 19:59:11.032719 139790959666944 tf_logging.py:115] global_step/sec: 3.2596


INFO:tensorflow:loss = 0.1750385, step = 8000 (30.679 sec)


I0606 19:59:11.034521 139790959666944 tf_logging.py:115] loss = 0.1750385, step = 8000 (30.679 sec)


INFO:tensorflow:global_step/sec: 3.2594


I0606 19:59:41.713183 139790959666944 tf_logging.py:115] global_step/sec: 3.2594


INFO:tensorflow:loss = 0.39228967, step = 8100 (30.681 sec)


I0606 19:59:41.716018 139790959666944 tf_logging.py:115] loss = 0.39228967, step = 8100 (30.681 sec)


INFO:tensorflow:global_step/sec: 3.25946


I0606 20:00:12.393155 139790959666944 tf_logging.py:115] global_step/sec: 3.25946


INFO:tensorflow:loss = 0.25617385, step = 8200 (30.679 sec)


I0606 20:00:12.395416 139790959666944 tf_logging.py:115] loss = 0.25617385, step = 8200 (30.679 sec)


INFO:tensorflow:global_step/sec: 3.25184


I0606 20:00:43.144924 139790959666944 tf_logging.py:115] global_step/sec: 3.25184


INFO:tensorflow:loss = 0.5711311, step = 8300 (30.751 sec)


I0606 20:00:43.146203 139790959666944 tf_logging.py:115] loss = 0.5711311, step = 8300 (30.751 sec)


INFO:tensorflow:global_step/sec: 3.21624


I0606 20:01:14.237192 139790959666944 tf_logging.py:115] global_step/sec: 3.21624


INFO:tensorflow:loss = 0.48967844, step = 8400 (31.093 sec)


I0606 20:01:14.238969 139790959666944 tf_logging.py:115] loss = 0.48967844, step = 8400 (31.093 sec)


INFO:tensorflow:global_step/sec: 3.25416


I0606 20:01:44.967077 139790959666944 tf_logging.py:115] global_step/sec: 3.25416


INFO:tensorflow:loss = 0.5545676, step = 8500 (30.730 sec)


I0606 20:01:44.968462 139790959666944 tf_logging.py:115] loss = 0.5545676, step = 8500 (30.730 sec)


INFO:tensorflow:Saving checkpoints for 8561 into OUTPUT_DIR_NAME/model.ckpt.


I0606 20:02:03.415354 139790959666944 tf_logging.py:115] Saving checkpoints for 8561 into OUTPUT_DIR_NAME/model.ckpt.


INFO:tensorflow:Loss for final step: 0.63267195.


I0606 20:02:10.321399 139790959666944 tf_logging.py:115] Loss for final step: 0.63267195.


Training took time  0:44:59.877039


Now let's use our test data to see how well our model did:

In [675]:
test_input_fn = run_classifier.input_fn_builder(
    features=test_features,
    seq_length=MAX_SEQ_LENGTH,
    is_training=False,
    drop_remainder=False)

In [676]:
predictions = estimator.predict(input_fn=test_input_fn)

In [677]:
pred_label = [prediction['labels'] for prediction in predictions]

INFO:tensorflow:Calling model_fn.


I0606 20:02:10.947963 139790959666944 tf_logging.py:115] Calling model_fn.


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


I0606 20:02:13.297445 139790959666944 tf_logging.py:115] Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Done calling model_fn.


I0606 20:02:13.419023 139790959666944 tf_logging.py:115] Done calling model_fn.


INFO:tensorflow:Graph was finalized.


I0606 20:02:13.753352 139790959666944 tf_logging.py:115] Graph was finalized.


INFO:tensorflow:Restoring parameters from OUTPUT_DIR_NAME/model.ckpt-8561


I0606 20:02:13.758899 139790959666944 tf_logging.py:115] Restoring parameters from OUTPUT_DIR_NAME/model.ckpt-8561


INFO:tensorflow:Running local_init_op.


I0606 20:02:14.234652 139790959666944 tf_logging.py:115] Running local_init_op.


INFO:tensorflow:Done running local_init_op.


I0606 20:02:14.280484 139790959666944 tf_logging.py:115] Done running local_init_op.


In [678]:
import numpy as np

In [679]:
print(test['label'].values.astype(int))
print(pred_label)

[0 1 1 ... 0 1 1]
[0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 

In [680]:
from sklearn.metrics import classification_report, confusion_matrix

print("Confusion Matrix:")
print(confusion_matrix(test['label'].values.astype(int), np.array(pred_label)))
print("Classification Report:")
if len(set(test['label'])) == 3:
    print(classification_report(test['label'].values.astype(int), pred_label, target_names=["attack", "support", "unrelated"]))
else: 
    #print(classification_report(test['label'].values.astype(int), pred_label, target_names=["agreement", "disagreement"]))
    print(classification_report(test['label'].values.astype(int), pred_label, target_names=["attack", "support"]))
    #print(classification_report(test['label'].values.astype(int), pred_label, target_names=["relation", "unrelated"]))


Confusion Matrix:
[[556 398]
 [212 834]]
Classification Report:
              precision    recall  f1-score   support

      attack       0.72      0.58      0.65       954
     support       0.68      0.80      0.73      1046

    accuracy                           0.69      2000
   macro avg       0.70      0.69      0.69      2000
weighted avg       0.70      0.69      0.69      2000

