# Relation Prediction in Argument Mining with Pre-trained Deep Bidirectional Transformers

Code for BA thesis.

In [74]:
# All imports
# import os 
# import re

import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub

# from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.model_selection import KFold


from bert import run_classifier
from bert import optimization
from bert import tokenization
from bert import modeling
from datetime import datetime

In [75]:
def load_local_data(filename, data):
    df = pd.read_csv(filename, sep='\t')
    print(df.groupby('org_dataset').org.apply(lambda x: x.str.split().str.len().mean()))
    print(df.groupby('org_dataset').response.apply(lambda x: x.str.split().str.len().mean()))
    # Split in Training and Validation data
    if data == 'node':
        # Training data: NoDe debatepedia all versions without neutral label
        # Validation data: NoDe procon
        dataset = df.loc[~df['org_dataset'].isin(['political', 'comargGM', 'comargUGIP', 'agreement'])]
        dataset = df.loc[df['org_dataset'].isin(['debate_test', 'debate_train', 'procon'])] # Use orignal data
        # dataset = dataset[dataset['label'] != 'unrelated'] # Filter only support/attack
        dataset = dataset.sample(frac=1)
        #data_train = dataset.iloc[:-100]
        #data_val = dataset #.iloc[-100:]
        data_train = dataset.loc[~dataset['org_dataset'].isin(['debate_test'])]
        data_val = dataset.loc[dataset['org_dataset'].isin(['debate_test'])]
    elif data == 'political':
        dataset = df.loc[df['org_dataset'].isin(['political'])]
        #dataset = dataset[dataset['label'] != 'unrelated'] # Filter only support/attack
        dataset = dataset.sample(frac=1)
        data_train = dataset.iloc[:-200]
        data_val = dataset.iloc[-200:]
    elif data == 'agreement':
        dataset = df.loc[df['org_dataset'].isin(['agreement'])]
        dataset = dataset.sample(frac=1).dropna()
        data_train = dataset.iloc[:-2000]
        data_val = dataset.iloc[-2000:]
    else:
        print('Invalid dataset')
        sys.exit(-1)
    return data_train, data_val

def create_model(is_predicting, input_ids, input_mask, segment_ids, labels,
                 num_labels):
    """Creates a classification model."""
    
    is_training = not is_predicting
    
    """
    model = modeling.BertModel(
        config=BERT_CONFIG,
        is_training=is_training,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=segment_ids,
        use_one_hot_embeddings=False)

    # Use "pooled_output" for classification tasks on an entire sentence.
    # Use "sequence_outputs" for token-level output.
    output_layer = model.get_pooled_output()
    
    """
    tags = set()
    if is_training:
        tags.add("train")
    bert_module = hub.Module(
        BERT_MODEL_HUB,
        tags=tags,
        trainable=True)
    bert_inputs = dict(
        input_ids=input_ids,
        input_mask=input_mask,
        segment_ids=segment_ids)
    bert_outputs = bert_module(
        inputs=bert_inputs,
        signature="tokens",
        as_dict=True)
    

    # Use "pooled_output" for classification tasks on an entire sentence.
    # Use "sequence_outputs" for token-level output.
    output_layer = bert_outputs["pooled_output"]
    
    #######
    

    hidden_size = output_layer.shape[-1].value

    # Create our own layer to tune for politeness data.
    output_weights = tf.get_variable(
      "output_weights", [num_labels, hidden_size],
      initializer=tf.truncated_normal_initializer(stddev=0.02))

    output_bias = tf.get_variable(
      "output_bias", [num_labels], initializer=tf.zeros_initializer())

    with tf.variable_scope("loss"):
        if is_training:
            # Dropout helps prevent overfitting
            output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

        logits = tf.matmul(output_layer, output_weights, transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)
        log_probs = tf.nn.log_softmax(logits, axis=-1)

        # Convert labels into one-hot encoding
        one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)

        predicted_labels = tf.squeeze(tf.argmax(log_probs, axis=-1, output_type=tf.int32))
        # If we're predicting, we want predicted labels and the probabiltiies.
        if is_predicting:
            return (predicted_labels, log_probs)

        # If we're train/eval, compute loss between predicted and actual label
        per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
        loss = tf.reduce_mean(per_example_loss)
        return (loss, predicted_labels, log_probs)

# model_fn_builder actually creates our model function
# using the passed parameters for num_labels, learning_rate, etc.
def model_fn_builder(num_labels, learning_rate, num_train_steps, num_warmup_steps):
    """Returns `model_fn` closure for TPUEstimator."""
    
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]

        is_predicting = (mode == tf.estimator.ModeKeys.PREDICT)

        # TRAIN 
        if not is_predicting:

            (loss, predicted_labels, log_probs) = create_model(
                is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)

            train_op = optimization.create_optimizer(
              loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu=False)


            if mode == tf.estimator.ModeKeys.TRAIN:
                return tf.estimator.EstimatorSpec(mode=mode,
                  loss=loss,
                  train_op=train_op)

        # TEST
        else:
            (predicted_labels, log_probs) = create_model(
                is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)

            predictions = {
                'probabilities': log_probs,
                'labels': predicted_labels
            }
            return tf.estimator.EstimatorSpec(mode, predictions=predictions)

    # Return the actual model function in the closure
    return model_fn   

In [76]:
# All parameters
cross_val = False
data_path = '../data/complete_data.tsv'
dataset = 'political'  # One of 'agreement', 'node' and 'political'
use_org = True
use_resp = True
convert_dicts = {'agreement': {"agreement": 0, "disagreement": 1, "unrelated": 2},
                'node': {"attack": 0, "support": 1, "unrelated": 2},
                'political': {"attack": 0, "support": 0, "unrelated": 1}}
                #'political': {"attack": 0, "support": 1, "unrelated": 1}}
convert_dict = convert_dicts[dataset]

ORG_COLUMN = 'org'
RESP_COLUMN = 'response'
LABEL_COLUMN = 'label'
# label_list is the list of labels, i.e. True, False or 0, 1 or 'dog', 'cat'
label_list = [0, 1] # [0, 1, 2]
BERT_VOCAB= './uncased_L-12_H-768_A-12/vocab.txt'
BERT_INIT_CHKPNT = './uncased_L-12_H-768_A-12/bert_model.ckpt'
BERT_CONFIG_PATH = './uncased_L-12_H-768_A-12/bert_config.json'
BERT_CONFIG = modeling.BertConfig.from_json_file(BERT_CONFIG_PATH)
BERT_MODEL_HUB = "https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1"



MAX_SEQ_LENGTH = 128

# Compute train and warmup steps from batch size
# These hyperparameters are copied from this colab notebook (https://colab.sandbox.google.com/github/tensorflow/tpu/blob/master/tools/colab/bert_finetuning_with_cloud_tpus.ipynb)
BATCH_SIZE = 8
LEARNING_RATE = 2e-5
NUM_TRAIN_EPOCHS = 3.0
# Warmup is a period of time where hte learning rate 
# is small and gradually increases--usually helps training.
WARMUP_PROPORTION = 0.1
# Model configs
SAVE_CHECKPOINTS_STEPS = 5000
SAVE_SUMMARY_STEPS = 100
OUTPUT_DIR = 'BERT_RUN' + str(datetime.now()) + dataset

# Specify outpit directory and number of checkpoint steps to save
run_config = tf.estimator.RunConfig(
#run_config = tf.contrib.tpu.RunConfig(
    model_dir=OUTPUT_DIR,
    save_summary_steps=SAVE_SUMMARY_STEPS,
    save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS)


In [77]:
# Create tokenizer
tokenization.validate_case_matches_checkpoint(True, BERT_INIT_CHKPNT)
tokenizer = tokenization.FullTokenizer(
      vocab_file=BERT_VOCAB, do_lower_case=True)

In [78]:
# Load local data
train_df, test_df = load_local_data(data_path, dataset)
print(train_df.head())

# Create datasets (Only take up to max_seq_length words for memory)

train_df = train_df.replace({'label': convert_dict})
test_df = test_df.replace({'label': convert_dict})
print(train_df.groupby('label').describe())
print(test_df.groupby('label').describe())
train = train_df.sample(frac=1)
test = test_df.sample(frac=1)

# Use org + response
if use_org and use_resp:
    # Use the InputExample class from BERT's run_classifier code to create examples from the data
    train_InputExamples = train.apply(lambda x: run_classifier.InputExample(guid=None, # Globally unique ID for bookkeeping, unused in this example
                                                                       text_a = x[ORG_COLUMN], 
                                                                       text_b = x[RESP_COLUMN], 
                                                                       label = x[LABEL_COLUMN]), axis = 1)

    test_InputExamples = test.apply(lambda x: run_classifier.InputExample(guid=None, 
                                                                       text_a = x[ORG_COLUMN], 
                                                                       text_b = x[RESP_COLUMN], 
                                                                       label = x[LABEL_COLUMN]), axis = 1)
# Use only org
elif use_org:
    # Use the InputExample class from BERT's run_classifier code to create examples from the data
    train_InputExamples = train.apply(lambda x: run_classifier.InputExample(guid=None, # Globally unique ID for bookkeeping, unused in this example
                                                                       text_a = x[ORG_COLUMN], 
                                                                       text_b = None, 
                                                                       label = x[LABEL_COLUMN]), axis = 1)

    test_InputExamples = test.apply(lambda x: run_classifier.InputExample(guid=None, 
                                                                       text_a = x[ORG_COLUMN], 
                                                                       text_b = None, 
                                                                       label = x[LABEL_COLUMN]), axis = 1)
# Use only resp
else:
    # Use the InputExample class from BERT's run_classifier code to create examples from the data
    train_InputExamples = train.apply(lambda x: run_classifier.InputExample(guid=None, # Globally unique ID for bookkeeping, unused in this example
                                                                       text_a = x[RESP_COLUMN], 
                                                                       text_b = None, 
                                                                       label = x[LABEL_COLUMN]), axis = 1)

    test_InputExamples = test.apply(lambda x: run_classifier.InputExample(guid=None, 
                                                                       text_a = x[RESP_COLUMN], 
                                                                       text_b = None, 
                                                                       label = x[LABEL_COLUMN]), axis = 1)
    
# Convert our train and test features to InputFeatures that BERT understands.
train_features = run_classifier.convert_examples_to_features(train_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)
test_features = run_classifier.convert_examples_to_features(test_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)

org_dataset
agreement              74.945753
comargGM               13.558755
comargUGIP              9.486594
debate_ext_attacks     36.043478
debate_ext_media       53.533333
debate_ext_second      11.043478
debate_ext_supp        18.885714
debate_extended        35.772358
debate_test            13.860000
debate_train           17.280000
political             102.759589
procon                 13.950000
Name: org, dtype: float64
org_dataset
agreement              69.926330
comargGM              115.659922
comargUGIP             83.567031
debate_ext_attacks     32.782609
debate_ext_media       53.168254
debate_ext_second      60.717391
debate_ext_supp        45.428571
debate_extended        59.934959
debate_test            51.240000
debate_train           48.080000
political             102.171233
procon                 30.366667
Name: response, dtype: float64
     org_dataset    id                                                org org_stance                                           

I0619 23:12:50.805358 139826748040960 tf_logging.py:115] Writing example 0 of 531


INFO:tensorflow:*** Example ***


I0619 23:12:50.808983 139826748040960 tf_logging.py:115] *** Example ***


INFO:tensorflow:guid: None


I0619 23:12:50.811480 139826748040960 tf_logging.py:115] guid: None


INFO:tensorflow:tokens: [CLS] when i said that i had no idea that you would make even a greater blu ##nder about cuba . consequently , i will talk about not only que ##mo ##y and mats ##u but also about cuba every day during the next 2 weeks - i will do this because i am deeply concerned about the policies you advocate in those two [SEP] take my opponent , for example . he says on the cuban situation that he did not intend to suggest that the american government should intervene in the internal affairs of cuba . and yet the words he used , the u . s . government should support the anti - castro forces in cuba have been interpreted by his friends among [SEP]


I0619 23:12:50.812273 139826748040960 tf_logging.py:115] tokens: [CLS] when i said that i had no idea that you would make even a greater blu ##nder about cuba . consequently , i will talk about not only que ##mo ##y and mats ##u but also about cuba every day during the next 2 weeks - i will do this because i am deeply concerned about the policies you advocate in those two [SEP] take my opponent , for example . he says on the cuban situation that he did not intend to suggest that the american government should intervene in the internal affairs of cuba . and yet the words he used , the u . s . government should support the anti - castro forces in cuba have been interpreted by his friends among [SEP]


INFO:tensorflow:input_ids: 101 2043 1045 2056 2008 1045 2018 2053 2801 2008 2017 2052 2191 2130 1037 3618 14154 11563 2055 7394 1012 8821 1010 1045 2097 2831 2055 2025 2069 10861 5302 2100 1998 22281 2226 2021 2036 2055 7394 2296 2154 2076 1996 2279 1016 3134 1011 1045 2097 2079 2023 2138 1045 2572 6171 4986 2055 1996 6043 2017 8175 1999 2216 2048 102 2202 2026 7116 1010 2005 2742 1012 2002 2758 2006 1996 9642 3663 2008 2002 2106 2025 13566 2000 6592 2008 1996 2137 2231 2323 18793 1999 1996 4722 3821 1997 7394 1012 1998 2664 1996 2616 2002 2109 1010 1996 1057 1012 1055 1012 2231 2323 2490 1996 3424 1011 11794 2749 1999 7394 2031 2042 10009 2011 2010 2814 2426 102


I0619 23:12:50.813020 139826748040960 tf_logging.py:115] input_ids: 101 2043 1045 2056 2008 1045 2018 2053 2801 2008 2017 2052 2191 2130 1037 3618 14154 11563 2055 7394 1012 8821 1010 1045 2097 2831 2055 2025 2069 10861 5302 2100 1998 22281 2226 2021 2036 2055 7394 2296 2154 2076 1996 2279 1016 3134 1011 1045 2097 2079 2023 2138 1045 2572 6171 4986 2055 1996 6043 2017 8175 1999 2216 2048 102 2202 2026 7116 1010 2005 2742 1012 2002 2758 2006 1996 9642 3663 2008 2002 2106 2025 13566 2000 6592 2008 1996 2137 2231 2323 18793 1999 1996 4722 3821 1997 7394 1012 1998 2664 1996 2616 2002 2109 1010 1996 1057 1012 1055 1012 2231 2323 2490 1996 3424 1011 11794 2749 1999 7394 2031 2042 10009 2011 2010 2814 2426 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0619 23:12:50.813732 139826748040960 tf_logging.py:115] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0619 23:12:50.814327 139826748040960 tf_logging.py:115] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:label: 1 (id = 1)


I0619 23:12:50.816131 139826748040960 tf_logging.py:115] label: 1 (id = 1)


INFO:tensorflow:*** Example ***


I0619 23:12:50.819411 139826748040960 tf_logging.py:115] *** Example ***


INFO:tensorflow:guid: None


I0619 23:12:50.820380 139826748040960 tf_logging.py:115] guid: None


INFO:tensorflow:tokens: [CLS] i believe that the most effective method is through a program which will provide a choice whereby people who want to have the protection for catastrophic illnesses or , for that matter , for non ##cat ##ast ##rop ##hic illnesses , if they so desire , can have it , but where they are not forced to have it if they do not desire , and where they can choose getting that protection through government or through private plans , which they might [SEP] we only need a few more . a shift of only four votes would have given us a decent bill to provide medical care for the aged under the social security system . and next january we will have those votes . [SEP]


I0619 23:12:50.821168 139826748040960 tf_logging.py:115] tokens: [CLS] i believe that the most effective method is through a program which will provide a choice whereby people who want to have the protection for catastrophic illnesses or , for that matter , for non ##cat ##ast ##rop ##hic illnesses , if they so desire , can have it , but where they are not forced to have it if they do not desire , and where they can choose getting that protection through government or through private plans , which they might [SEP] we only need a few more . a shift of only four votes would have given us a decent bill to provide medical care for the aged under the social security system . and next january we will have those votes . [SEP]


INFO:tensorflow:input_ids: 101 1045 2903 2008 1996 2087 4621 4118 2003 2083 1037 2565 2029 2097 3073 1037 3601 13557 2111 2040 2215 2000 2031 1996 3860 2005 23546 24757 2030 1010 2005 2008 3043 1010 2005 2512 11266 14083 18981 16066 24757 1010 2065 2027 2061 4792 1010 2064 2031 2009 1010 2021 2073 2027 2024 2025 3140 2000 2031 2009 2065 2027 2079 2025 4792 1010 1998 2073 2027 2064 5454 2893 2008 3860 2083 2231 2030 2083 2797 3488 1010 2029 2027 2453 102 2057 2069 2342 1037 2261 2062 1012 1037 5670 1997 2069 2176 4494 2052 2031 2445 2149 1037 11519 3021 2000 3073 2966 2729 2005 1996 4793 2104 1996 2591 3036 2291 1012 1998 2279 2254 2057 2097 2031 2216 4494 1012 102


I0619 23:12:50.821885 139826748040960 tf_logging.py:115] input_ids: 101 1045 2903 2008 1996 2087 4621 4118 2003 2083 1037 2565 2029 2097 3073 1037 3601 13557 2111 2040 2215 2000 2031 1996 3860 2005 23546 24757 2030 1010 2005 2008 3043 1010 2005 2512 11266 14083 18981 16066 24757 1010 2065 2027 2061 4792 1010 2064 2031 2009 1010 2021 2073 2027 2024 2025 3140 2000 2031 2009 2065 2027 2079 2025 4792 1010 1998 2073 2027 2064 5454 2893 2008 3860 2083 2231 2030 2083 2797 3488 1010 2029 2027 2453 102 2057 2069 2342 1037 2261 2062 1012 1037 5670 1997 2069 2176 4494 2052 2031 2445 2149 1037 11519 3021 2000 3073 2966 2729 2005 1996 4793 2104 1996 2591 3036 2291 1012 1998 2279 2254 2057 2097 2031 2216 4494 1012 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0619 23:12:50.824829 139826748040960 tf_logging.py:115] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0619 23:12:50.825601 139826748040960 tf_logging.py:115] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:label: 0 (id = 0)


I0619 23:12:50.826261 139826748040960 tf_logging.py:115] label: 0 (id = 0)


INFO:tensorflow:*** Example ***


I0619 23:12:50.829724 139826748040960 tf_logging.py:115] *** Example ***


INFO:tensorflow:guid: None


I0619 23:12:50.836994 139826748040960 tf_logging.py:115] guid: None


INFO:tensorflow:tokens: [CLS] i refer , of course , to the social security act of 1935 . it is particularly appropriate that we mark that anniversary by the extension of the system to include medical care insurance for our older citizens . a quarter of a century ago , the people of the united states turned their back on the do ##le and the means test [SEP] i believe that the most effective method is through a program which will provide a choice whereby people who want to have the protection for catastrophic illnesses or , for that matter , for non ##cat ##ast ##rop ##hic illnesses , if they so desire , can have it , but where they are not forced to have it if they do [SEP]


I0619 23:12:50.837808 139826748040960 tf_logging.py:115] tokens: [CLS] i refer , of course , to the social security act of 1935 . it is particularly appropriate that we mark that anniversary by the extension of the system to include medical care insurance for our older citizens . a quarter of a century ago , the people of the united states turned their back on the do ##le and the means test [SEP] i believe that the most effective method is through a program which will provide a choice whereby people who want to have the protection for catastrophic illnesses or , for that matter , for non ##cat ##ast ##rop ##hic illnesses , if they so desire , can have it , but where they are not forced to have it if they do [SEP]


INFO:tensorflow:input_ids: 101 1045 6523 1010 1997 2607 1010 2000 1996 2591 3036 2552 1997 4437 1012 2009 2003 3391 6413 2008 2057 2928 2008 5315 2011 1996 5331 1997 1996 2291 2000 2421 2966 2729 5427 2005 2256 3080 4480 1012 1037 4284 1997 1037 2301 3283 1010 1996 2111 1997 1996 2142 2163 2357 2037 2067 2006 1996 2079 2571 1998 1996 2965 3231 102 1045 2903 2008 1996 2087 4621 4118 2003 2083 1037 2565 2029 2097 3073 1037 3601 13557 2111 2040 2215 2000 2031 1996 3860 2005 23546 24757 2030 1010 2005 2008 3043 1010 2005 2512 11266 14083 18981 16066 24757 1010 2065 2027 2061 4792 1010 2064 2031 2009 1010 2021 2073 2027 2024 2025 3140 2000 2031 2009 2065 2027 2079 102


I0619 23:12:50.838769 139826748040960 tf_logging.py:115] input_ids: 101 1045 6523 1010 1997 2607 1010 2000 1996 2591 3036 2552 1997 4437 1012 2009 2003 3391 6413 2008 2057 2928 2008 5315 2011 1996 5331 1997 1996 2291 2000 2421 2966 2729 5427 2005 2256 3080 4480 1012 1037 4284 1997 1037 2301 3283 1010 1996 2111 1997 1996 2142 2163 2357 2037 2067 2006 1996 2079 2571 1998 1996 2965 3231 102 1045 2903 2008 1996 2087 4621 4118 2003 2083 1037 2565 2029 2097 3073 1037 3601 13557 2111 2040 2215 2000 2031 1996 3860 2005 23546 24757 2030 1010 2005 2008 3043 1010 2005 2512 11266 14083 18981 16066 24757 1010 2065 2027 2061 4792 1010 2064 2031 2009 1010 2021 2073 2027 2024 2025 3140 2000 2031 2009 2065 2027 2079 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0619 23:12:50.839557 139826748040960 tf_logging.py:115] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0619 23:12:50.841785 139826748040960 tf_logging.py:115] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:label: 1 (id = 1)


I0619 23:12:50.842372 139826748040960 tf_logging.py:115] label: 1 (id = 1)


INFO:tensorflow:*** Example ***


I0619 23:12:50.845664 139826748040960 tf_logging.py:115] *** Example ***


INFO:tensorflow:guid: None


I0619 23:12:50.846358 139826748040960 tf_logging.py:115] guid: None


INFO:tensorflow:tokens: [CLS] i just do n ' t believe that the 1960 ' s is a time to stand still . every piece of important legislation of benefit to our people was opposed by the republicans when they first came into being , away back to the federal reserve board , all the reforms of the roosevelt administration , social security , all the others [SEP] i would answer the question , hoping that i am hitting the point , and if it does n ' t , if he will call back in , we will give this question priority . i have answered the question by saying that i favor the raising of the minimum wage to $ 1 . 15 . i also favor extending [SEP]


I0619 23:12:50.846953 139826748040960 tf_logging.py:115] tokens: [CLS] i just do n ' t believe that the 1960 ' s is a time to stand still . every piece of important legislation of benefit to our people was opposed by the republicans when they first came into being , away back to the federal reserve board , all the reforms of the roosevelt administration , social security , all the others [SEP] i would answer the question , hoping that i am hitting the point , and if it does n ' t , if he will call back in , we will give this question priority . i have answered the question by saying that i favor the raising of the minimum wage to $ 1 . 15 . i also favor extending [SEP]


INFO:tensorflow:input_ids: 101 1045 2074 2079 1050 1005 1056 2903 2008 1996 3624 1005 1055 2003 1037 2051 2000 3233 2145 1012 2296 3538 1997 2590 6094 1997 5770 2000 2256 2111 2001 4941 2011 1996 10643 2043 2027 2034 2234 2046 2108 1010 2185 2067 2000 1996 2976 3914 2604 1010 2035 1996 8818 1997 1996 8573 3447 1010 2591 3036 1010 2035 1996 2500 102 1045 2052 3437 1996 3160 1010 5327 2008 1045 2572 7294 1996 2391 1010 1998 2065 2009 2515 1050 1005 1056 1010 2065 2002 2097 2655 2067 1999 1010 2057 2097 2507 2023 3160 9470 1012 1045 2031 4660 1996 3160 2011 3038 2008 1045 5684 1996 6274 1997 1996 6263 11897 2000 1002 1015 1012 2321 1012 1045 2036 5684 8402 102


I0619 23:12:50.847512 139826748040960 tf_logging.py:115] input_ids: 101 1045 2074 2079 1050 1005 1056 2903 2008 1996 3624 1005 1055 2003 1037 2051 2000 3233 2145 1012 2296 3538 1997 2590 6094 1997 5770 2000 2256 2111 2001 4941 2011 1996 10643 2043 2027 2034 2234 2046 2108 1010 2185 2067 2000 1996 2976 3914 2604 1010 2035 1996 8818 1997 1996 8573 3447 1010 2591 3036 1010 2035 1996 2500 102 1045 2052 3437 1996 3160 1010 5327 2008 1045 2572 7294 1996 2391 1010 1998 2065 2009 2515 1050 1005 1056 1010 2065 2002 2097 2655 2067 1999 1010 2057 2097 2507 2023 3160 9470 1012 1045 2031 4660 1996 3160 2011 3038 2008 1045 5684 1996 6274 1997 1996 6263 11897 2000 1002 1015 1012 2321 1012 1045 2036 5684 8402 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0619 23:12:50.848073 139826748040960 tf_logging.py:115] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0619 23:12:50.848575 139826748040960 tf_logging.py:115] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:label: 0 (id = 0)


I0619 23:12:50.849175 139826748040960 tf_logging.py:115] label: 0 (id = 0)


INFO:tensorflow:*** Example ***


I0619 23:12:50.853162 139826748040960 tf_logging.py:115] *** Example ***


INFO:tensorflow:guid: None


I0619 23:12:50.862666 139826748040960 tf_logging.py:115] guid: None


INFO:tensorflow:tokens: [CLS] and what about that future ? we want a future of progress , but above all , the people of this country are concerned that our children may grow up in a world in which we can be free and we can live at peace - and that is the most important thing that americans must think about at this time . now [SEP] what is more important than jobs and progress and these things that i have been mentioning ? well , of course , you know what it is . it ' s just being around to enjoy it , because we can have the best jobs and medical care and housing and all that sort of thing that we can imagine , and [SEP]


I0619 23:12:50.863913 139826748040960 tf_logging.py:115] tokens: [CLS] and what about that future ? we want a future of progress , but above all , the people of this country are concerned that our children may grow up in a world in which we can be free and we can live at peace - and that is the most important thing that americans must think about at this time . now [SEP] what is more important than jobs and progress and these things that i have been mentioning ? well , of course , you know what it is . it ' s just being around to enjoy it , because we can have the best jobs and medical care and housing and all that sort of thing that we can imagine , and [SEP]


INFO:tensorflow:input_ids: 101 1998 2054 2055 2008 2925 1029 2057 2215 1037 2925 1997 5082 1010 2021 2682 2035 1010 1996 2111 1997 2023 2406 2024 4986 2008 2256 2336 2089 4982 2039 1999 1037 2088 1999 2029 2057 2064 2022 2489 1998 2057 2064 2444 2012 3521 1011 1998 2008 2003 1996 2087 2590 2518 2008 4841 2442 2228 2055 2012 2023 2051 1012 2085 102 2054 2003 2062 2590 2084 5841 1998 5082 1998 2122 2477 2008 1045 2031 2042 18625 1029 2092 1010 1997 2607 1010 2017 2113 2054 2009 2003 1012 2009 1005 1055 2074 2108 2105 2000 5959 2009 1010 2138 2057 2064 2031 1996 2190 5841 1998 2966 2729 1998 3847 1998 2035 2008 4066 1997 2518 2008 2057 2064 5674 1010 1998 102


I0619 23:12:50.864795 139826748040960 tf_logging.py:115] input_ids: 101 1998 2054 2055 2008 2925 1029 2057 2215 1037 2925 1997 5082 1010 2021 2682 2035 1010 1996 2111 1997 2023 2406 2024 4986 2008 2256 2336 2089 4982 2039 1999 1037 2088 1999 2029 2057 2064 2022 2489 1998 2057 2064 2444 2012 3521 1011 1998 2008 2003 1996 2087 2590 2518 2008 4841 2442 2228 2055 2012 2023 2051 1012 2085 102 2054 2003 2062 2590 2084 5841 1998 5082 1998 2122 2477 2008 1045 2031 2042 18625 1029 2092 1010 1997 2607 1010 2017 2113 2054 2009 2003 1012 2009 1005 1055 2074 2108 2105 2000 5959 2009 1010 2138 2057 2064 2031 1996 2190 5841 1998 2966 2729 1998 3847 1998 2035 2008 4066 1997 2518 2008 2057 2064 5674 1010 1998 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0619 23:12:50.865611 139826748040960 tf_logging.py:115] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0619 23:12:50.866474 139826748040960 tf_logging.py:115] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:label: 1 (id = 1)


I0619 23:12:50.867441 139826748040960 tf_logging.py:115] label: 1 (id = 1)


INFO:tensorflow:Writing example 0 of 200


I0619 23:12:52.008374 139826748040960 tf_logging.py:115] Writing example 0 of 200


INFO:tensorflow:*** Example ***


I0619 23:12:52.011733 139826748040960 tf_logging.py:115] *** Example ***


INFO:tensorflow:guid: None


I0619 23:12:52.012801 139826748040960 tf_logging.py:115] guid: None


INFO:tensorflow:tokens: [CLS] our chief ne ##go ##tia ##tor . this administration is liable on the whole series of grounds , latin america , africa , asia , outer space , and here in the field of di ##sar ##mament , which involves the security and peace of every person in and out of the state of wisconsin , the head of our mission should say [SEP] they want to go eisenhower ' s way - and that ' s our way , and that ' s the way we ' re going . now , the last point that i make is one that i make particularly because i know of the great concern the people in these university communities have for the cause of di ##sar ##mament [SEP]


I0619 23:12:52.013611 139826748040960 tf_logging.py:115] tokens: [CLS] our chief ne ##go ##tia ##tor . this administration is liable on the whole series of grounds , latin america , africa , asia , outer space , and here in the field of di ##sar ##mament , which involves the security and peace of every person in and out of the state of wisconsin , the head of our mission should say [SEP] they want to go eisenhower ' s way - and that ' s our way , and that ' s the way we ' re going . now , the last point that i make is one that i make particularly because i know of the great concern the people in these university communities have for the cause of di ##sar ##mament [SEP]


INFO:tensorflow:input_ids: 101 2256 2708 11265 3995 10711 4263 1012 2023 3447 2003 20090 2006 1996 2878 2186 1997 5286 1010 3763 2637 1010 3088 1010 4021 1010 6058 2686 1010 1998 2182 1999 1996 2492 1997 4487 10286 28119 1010 2029 7336 1996 3036 1998 3521 1997 2296 2711 1999 1998 2041 1997 1996 2110 1997 5273 1010 1996 2132 1997 2256 3260 2323 2360 102 2027 2215 2000 2175 16551 1005 1055 2126 1011 1998 2008 1005 1055 2256 2126 1010 1998 2008 1005 1055 1996 2126 2057 1005 2128 2183 1012 2085 1010 1996 2197 2391 2008 1045 2191 2003 2028 2008 1045 2191 3391 2138 1045 2113 1997 1996 2307 5142 1996 2111 1999 2122 2118 4279 2031 2005 1996 3426 1997 4487 10286 28119 102


I0619 23:12:52.014381 139826748040960 tf_logging.py:115] input_ids: 101 2256 2708 11265 3995 10711 4263 1012 2023 3447 2003 20090 2006 1996 2878 2186 1997 5286 1010 3763 2637 1010 3088 1010 4021 1010 6058 2686 1010 1998 2182 1999 1996 2492 1997 4487 10286 28119 1010 2029 7336 1996 3036 1998 3521 1997 2296 2711 1999 1998 2041 1997 1996 2110 1997 5273 1010 1996 2132 1997 2256 3260 2323 2360 102 2027 2215 2000 2175 16551 1005 1055 2126 1011 1998 2008 1005 1055 2256 2126 1010 1998 2008 1005 1055 1996 2126 2057 1005 2128 2183 1012 2085 1010 1996 2197 2391 2008 1045 2191 2003 2028 2008 1045 2191 3391 2138 1045 2113 1997 1996 2307 5142 1996 2111 1999 2122 2118 4279 2031 2005 1996 3426 1997 4487 10286 28119 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0619 23:12:52.015011 139826748040960 tf_logging.py:115] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0619 23:12:52.015558 139826748040960 tf_logging.py:115] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:label: 1 (id = 1)


I0619 23:12:52.016077 139826748040960 tf_logging.py:115] label: 1 (id = 1)


INFO:tensorflow:*** Example ***


I0619 23:12:52.021634 139826748040960 tf_logging.py:115] *** Example ***


INFO:tensorflow:guid: None


I0619 23:12:52.022451 139826748040960 tf_logging.py:115] guid: None


INFO:tensorflow:tokens: [CLS] the principles for which dwight eisenhower and i have fought and will continue to fight are the ideals not only of republicans but of democrats as well . all through the campaign my opponent has been trumpet ##ing the false ##hood that i , in effect , feast on human misery - that i oppose minimum wage , help to distressed areas , [SEP] secondly , i have tried in this campaign to set before the american people their unfinished agenda - the tasks which franklin roosevelt did not fore ##see in 1933 - the tasks which harry truman could not complete by 1953 - and the tasks on which in 8 republican years we never even started . we have a minimum wage , but [SEP]


I0619 23:12:52.023235 139826748040960 tf_logging.py:115] tokens: [CLS] the principles for which dwight eisenhower and i have fought and will continue to fight are the ideals not only of republicans but of democrats as well . all through the campaign my opponent has been trumpet ##ing the false ##hood that i , in effect , feast on human misery - that i oppose minimum wage , help to distressed areas , [SEP] secondly , i have tried in this campaign to set before the american people their unfinished agenda - the tasks which franklin roosevelt did not fore ##see in 1933 - the tasks which harry truman could not complete by 1953 - and the tasks on which in 8 republican years we never even started . we have a minimum wage , but [SEP]


INFO:tensorflow:input_ids: 101 1996 6481 2005 2029 14304 16551 1998 1045 2031 4061 1998 2097 3613 2000 2954 2024 1996 15084 2025 2069 1997 10643 2021 1997 8037 2004 2092 1012 2035 2083 1996 3049 2026 7116 2038 2042 9368 2075 1996 6270 9021 2008 1045 1010 1999 3466 1010 9831 2006 2529 14624 1011 2008 1045 15391 6263 11897 1010 2393 2000 24305 2752 1010 102 16378 1010 1045 2031 2699 1999 2023 3049 2000 2275 2077 1996 2137 2111 2037 14342 11376 1011 1996 8518 2029 5951 8573 2106 2025 18921 19763 1999 4537 1011 1996 8518 2029 4302 15237 2071 2025 3143 2011 4052 1011 1998 1996 8518 2006 2029 1999 1022 3951 2086 2057 2196 2130 2318 1012 2057 2031 1037 6263 11897 1010 2021 102


I0619 23:12:52.024124 139826748040960 tf_logging.py:115] input_ids: 101 1996 6481 2005 2029 14304 16551 1998 1045 2031 4061 1998 2097 3613 2000 2954 2024 1996 15084 2025 2069 1997 10643 2021 1997 8037 2004 2092 1012 2035 2083 1996 3049 2026 7116 2038 2042 9368 2075 1996 6270 9021 2008 1045 1010 1999 3466 1010 9831 2006 2529 14624 1011 2008 1045 15391 6263 11897 1010 2393 2000 24305 2752 1010 102 16378 1010 1045 2031 2699 1999 2023 3049 2000 2275 2077 1996 2137 2111 2037 14342 11376 1011 1996 8518 2029 5951 8573 2106 2025 18921 19763 1999 4537 1011 1996 8518 2029 4302 15237 2071 2025 3143 2011 4052 1011 1998 1996 8518 2006 2029 1999 1022 3951 2086 2057 2196 2130 2318 1012 2057 2031 1037 6263 11897 1010 2021 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0619 23:12:52.024788 139826748040960 tf_logging.py:115] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0619 23:12:52.025614 139826748040960 tf_logging.py:115] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:label: 0 (id = 0)


I0619 23:12:52.026269 139826748040960 tf_logging.py:115] label: 0 (id = 0)


INFO:tensorflow:*** Example ***


I0619 23:12:52.028774 139826748040960 tf_logging.py:115] *** Example ***


INFO:tensorflow:guid: None


I0619 23:12:52.029614 139826748040960 tf_logging.py:115] guid: None


INFO:tensorflow:tokens: [CLS] they say the way to progress in every instance is to turn the problem over to the federal government . if you ' ve got a problem , whether it ' s schools or housing or medical care , they say set up a huge government program ; spend a great deal of money , billions of dollars ; weaken the states ' [SEP] and to meet the pressing problem confronting men past working age , and their families , we must put through an effective program of medical care for the aged under the social security system . the present medical care program will not send one penny to needy persons without further action by the congress and the state legislatures . it impose ##s [SEP]


I0619 23:12:52.030333 139826748040960 tf_logging.py:115] tokens: [CLS] they say the way to progress in every instance is to turn the problem over to the federal government . if you ' ve got a problem , whether it ' s schools or housing or medical care , they say set up a huge government program ; spend a great deal of money , billions of dollars ; weaken the states ' [SEP] and to meet the pressing problem confronting men past working age , and their families , we must put through an effective program of medical care for the aged under the social security system . the present medical care program will not send one penny to needy persons without further action by the congress and the state legislatures . it impose ##s [SEP]


INFO:tensorflow:input_ids: 101 2027 2360 1996 2126 2000 5082 1999 2296 6013 2003 2000 2735 1996 3291 2058 2000 1996 2976 2231 1012 2065 2017 1005 2310 2288 1037 3291 1010 3251 2009 1005 1055 2816 2030 3847 2030 2966 2729 1010 2027 2360 2275 2039 1037 4121 2231 2565 1025 5247 1037 2307 3066 1997 2769 1010 25501 1997 6363 1025 23021 1996 2163 1005 102 1998 2000 3113 1996 7827 3291 26964 2273 2627 2551 2287 1010 1998 2037 2945 1010 2057 2442 2404 2083 2019 4621 2565 1997 2966 2729 2005 1996 4793 2104 1996 2591 3036 2291 1012 1996 2556 2966 2729 2565 2097 2025 4604 2028 10647 2000 23927 5381 2302 2582 2895 2011 1996 3519 1998 1996 2110 27977 1012 2009 17607 2015 102


I0619 23:12:52.031031 139826748040960 tf_logging.py:115] input_ids: 101 2027 2360 1996 2126 2000 5082 1999 2296 6013 2003 2000 2735 1996 3291 2058 2000 1996 2976 2231 1012 2065 2017 1005 2310 2288 1037 3291 1010 3251 2009 1005 1055 2816 2030 3847 2030 2966 2729 1010 2027 2360 2275 2039 1037 4121 2231 2565 1025 5247 1037 2307 3066 1997 2769 1010 25501 1997 6363 1025 23021 1996 2163 1005 102 1998 2000 3113 1996 7827 3291 26964 2273 2627 2551 2287 1010 1998 2037 2945 1010 2057 2442 2404 2083 2019 4621 2565 1997 2966 2729 2005 1996 4793 2104 1996 2591 3036 2291 1012 1996 2556 2966 2729 2565 2097 2025 4604 2028 10647 2000 23927 5381 2302 2582 2895 2011 1996 3519 1998 1996 2110 27977 1012 2009 17607 2015 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0619 23:12:52.031572 139826748040960 tf_logging.py:115] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0619 23:12:52.032072 139826748040960 tf_logging.py:115] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:label: 0 (id = 0)


I0619 23:12:52.032557 139826748040960 tf_logging.py:115] label: 0 (id = 0)


INFO:tensorflow:*** Example ***


I0619 23:12:52.037397 139826748040960 tf_logging.py:115] *** Example ***


INFO:tensorflow:guid: None


I0619 23:12:52.038543 139826748040960 tf_logging.py:115] guid: None


INFO:tensorflow:tokens: [CLS] the administration ' s position is in between . we would extend coverage to approximately 3 million not presently covered , and the coverage which we would extend is to those particular businesses which operate interstate and which have larger gross incomes and , therefore , businesses that we think could , therefore , pay the minimum wage , not resulting in unemployment [SEP] this letter from this 12 - year - old boy sums up the concern that i found all over the united states as we have traveled this country north , east , west , and south . this concern and the idea that this is the major of this campaign is held not only by republicans but by democrats and independents , [SEP]


I0619 23:12:52.039534 139826748040960 tf_logging.py:115] tokens: [CLS] the administration ' s position is in between . we would extend coverage to approximately 3 million not presently covered , and the coverage which we would extend is to those particular businesses which operate interstate and which have larger gross incomes and , therefore , businesses that we think could , therefore , pay the minimum wage , not resulting in unemployment [SEP] this letter from this 12 - year - old boy sums up the concern that i found all over the united states as we have traveled this country north , east , west , and south . this concern and the idea that this is the major of this campaign is held not only by republicans but by democrats and independents , [SEP]


INFO:tensorflow:input_ids: 101 1996 3447 1005 1055 2597 2003 1999 2090 1012 2057 2052 7949 6325 2000 3155 1017 2454 2025 12825 3139 1010 1998 1996 6325 2029 2057 2052 7949 2003 2000 2216 3327 5661 2029 5452 7553 1998 2029 2031 3469 7977 29373 1998 1010 3568 1010 5661 2008 2057 2228 2071 1010 3568 1010 3477 1996 6263 11897 1010 2025 4525 1999 12163 102 2023 3661 2013 2023 2260 1011 2095 1011 2214 2879 20571 2039 1996 5142 2008 1045 2179 2035 2058 1996 2142 2163 2004 2057 2031 6158 2023 2406 2167 1010 2264 1010 2225 1010 1998 2148 1012 2023 5142 1998 1996 2801 2008 2023 2003 1996 2350 1997 2023 3049 2003 2218 2025 2069 2011 10643 2021 2011 8037 1998 23756 1010 102


I0619 23:12:52.040353 139826748040960 tf_logging.py:115] input_ids: 101 1996 3447 1005 1055 2597 2003 1999 2090 1012 2057 2052 7949 6325 2000 3155 1017 2454 2025 12825 3139 1010 1998 1996 6325 2029 2057 2052 7949 2003 2000 2216 3327 5661 2029 5452 7553 1998 2029 2031 3469 7977 29373 1998 1010 3568 1010 5661 2008 2057 2228 2071 1010 3568 1010 3477 1996 6263 11897 1010 2025 4525 1999 12163 102 2023 3661 2013 2023 2260 1011 2095 1011 2214 2879 20571 2039 1996 5142 2008 1045 2179 2035 2058 1996 2142 2163 2004 2057 2031 6158 2023 2406 2167 1010 2264 1010 2225 1010 1998 2148 1012 2023 5142 1998 1996 2801 2008 2023 2003 1996 2350 1997 2023 3049 2003 2218 2025 2069 2011 10643 2021 2011 8037 1998 23756 1010 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0619 23:12:52.041008 139826748040960 tf_logging.py:115] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0619 23:12:52.041843 139826748040960 tf_logging.py:115] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:label: 1 (id = 1)


I0619 23:12:52.042496 139826748040960 tf_logging.py:115] label: 1 (id = 1)


INFO:tensorflow:*** Example ***


I0619 23:12:52.048568 139826748040960 tf_logging.py:115] *** Example ***


INFO:tensorflow:guid: None


I0619 23:12:52.050580 139826748040960 tf_logging.py:115] guid: None


INFO:tensorflow:tokens: [CLS] meanwhile , the contradictory statements of senator kennedy still leave him opposed to this position . the miss ##tate ##ment : in the second debate senator kennedy said : the republicans in recent years , not only in the last 25 years , but in the last 8 years , have opposed * * * minimum wage * * the facts : senator [SEP] we are going to have , by the middle of november , 1 million un ##sol ##d cars , the highest inventory of un ##sol ##d cars in the history of the united states . now , anyone who believes that under those conditions we should continue that kind of leadership , that supports leadership that twice , not once , but [SEP]


I0619 23:12:52.051560 139826748040960 tf_logging.py:115] tokens: [CLS] meanwhile , the contradictory statements of senator kennedy still leave him opposed to this position . the miss ##tate ##ment : in the second debate senator kennedy said : the republicans in recent years , not only in the last 25 years , but in the last 8 years , have opposed * * * minimum wage * * the facts : senator [SEP] we are going to have , by the middle of november , 1 million un ##sol ##d cars , the highest inventory of un ##sol ##d cars in the history of the united states . now , anyone who believes that under those conditions we should continue that kind of leadership , that supports leadership that twice , not once , but [SEP]


INFO:tensorflow:input_ids: 101 5564 1010 1996 27894 8635 1997 5205 5817 2145 2681 2032 4941 2000 2023 2597 1012 1996 3335 12259 3672 1024 1999 1996 2117 5981 5205 5817 2056 1024 1996 10643 1999 3522 2086 1010 2025 2069 1999 1996 2197 2423 2086 1010 2021 1999 1996 2197 1022 2086 1010 2031 4941 1008 1008 1008 6263 11897 1008 1008 1996 8866 1024 5205 102 2057 2024 2183 2000 2031 1010 2011 1996 2690 1997 2281 1010 1015 2454 4895 19454 2094 3765 1010 1996 3284 12612 1997 4895 19454 2094 3765 1999 1996 2381 1997 1996 2142 2163 1012 2085 1010 3087 2040 7164 2008 2104 2216 3785 2057 2323 3613 2008 2785 1997 4105 1010 2008 6753 4105 2008 3807 1010 2025 2320 1010 2021 102


I0619 23:12:52.052438 139826748040960 tf_logging.py:115] input_ids: 101 5564 1010 1996 27894 8635 1997 5205 5817 2145 2681 2032 4941 2000 2023 2597 1012 1996 3335 12259 3672 1024 1999 1996 2117 5981 5205 5817 2056 1024 1996 10643 1999 3522 2086 1010 2025 2069 1999 1996 2197 2423 2086 1010 2021 1999 1996 2197 1022 2086 1010 2031 4941 1008 1008 1008 6263 11897 1008 1008 1996 8866 1024 5205 102 2057 2024 2183 2000 2031 1010 2011 1996 2690 1997 2281 1010 1015 2454 4895 19454 2094 3765 1010 1996 3284 12612 1997 4895 19454 2094 3765 1999 1996 2381 1997 1996 2142 2163 1012 2085 1010 3087 2040 7164 2008 2104 2216 3785 2057 2323 3613 2008 2785 1997 4105 1010 2008 6753 4105 2008 3807 1010 2025 2320 1010 2021 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0619 23:12:52.053323 139826748040960 tf_logging.py:115] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0619 23:12:52.053898 139826748040960 tf_logging.py:115] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:label: 0 (id = 0)


I0619 23:12:52.054493 139826748040960 tf_logging.py:115] label: 0 (id = 0)


In [79]:
if not cross_val:
    # Compute # train and warmup steps from batch size
    num_train_steps = int(len(train_features) / BATCH_SIZE * NUM_TRAIN_EPOCHS)
    num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)

    model_fn = model_fn_builder(
      num_labels=len(label_list),
      learning_rate=LEARNING_RATE,
      num_train_steps=num_train_steps,
      num_warmup_steps=num_warmup_steps)

    """model_fn = run_classifier.model_fn_builder(
        bert_config=BERT_CONFIG,
        num_labels=len(label_list),
        init_checkpoint=False,
        learning_rate=LEARNING_RATE,
        num_train_steps=num_train_steps,
        num_warmup_steps=num_warmup_steps,
        use_tpu=False,
        use_one_hot_embeddings=False)"""

    estimator = tf.estimator.Estimator(
    #estimator = tf.contrib.tpu.TPUEstimator(
      model_fn=model_fn,
      config=run_config,
      params={"batch_size": BATCH_SIZE})
      #train_batch_size=BATCH_SIZE)

    # Create an input function for training. drop_remainder = True for using TPUs.
    train_input_fn = run_classifier.input_fn_builder(
        features=train_features,
        seq_length=MAX_SEQ_LENGTH,
        is_training=True,
        drop_remainder=False)

    print(f'Beginning Training!')
    current_time = datetime.now()
    estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
    print("Training took time ", datetime.now() - current_time)
    
    test_input_fn = run_classifier.input_fn_builder(
    features=test_features,
    seq_length=MAX_SEQ_LENGTH,
    is_training=False,
    drop_remainder=False)
    
    predictions = estimator.predict(input_fn=test_input_fn)
    pred_label = [prediction['labels'] for prediction in predictions]
    print("Confusion Matrix:")
    print(confusion_matrix(test['label'].values.astype(int), np.array(pred_label)))
    print("Classification Report:")
    if len(set(test['label'])) == 3:
        print(classification_report(test['label'].values.astype(int), pred_label, target_names=["attack", "support", "unrelated"]))
    else: 
        #print(classification_report(test['label'].values.astype(int), pred_label, target_names=["agreement", "disagreement"]))
        print(classification_report(test['label'].values.astype(int), pred_label, target_names=["attack", "support"]))
        #print(classification_report(test['label'].values.astype(int), pred_label, target_names=["relation", "unrelated"]))

INFO:tensorflow:Using config: {'_model_dir': 'BERT_RUN2019-06-19 23:12:50.090822political', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 5000, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f2b4f156fd0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


I0619 23:12:52.479407 139826748040960 tf_logging.py:115] Using config: {'_model_dir': 'BERT_RUN2019-06-19 23:12:50.090822political', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 5000, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f2b4f156fd0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


Beginning Training!
INFO:tensorflow:Calling model_fn.


I0619 23:12:52.674186 139826748040960 tf_logging.py:115] Calling model_fn.


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


I0619 23:12:55.042183 139826748040960 tf_logging.py:115] Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Done calling model_fn.


I0619 23:13:00.904656 139826748040960 tf_logging.py:115] Done calling model_fn.


INFO:tensorflow:Create CheckpointSaverHook.


I0619 23:13:00.907528 139826748040960 tf_logging.py:115] Create CheckpointSaverHook.


INFO:tensorflow:Graph was finalized.


I0619 23:13:03.556878 139826748040960 tf_logging.py:115] Graph was finalized.


INFO:tensorflow:Running local_init_op.


I0619 23:13:08.030962 139826748040960 tf_logging.py:115] Running local_init_op.


INFO:tensorflow:Done running local_init_op.


I0619 23:13:08.149006 139826748040960 tf_logging.py:115] Done running local_init_op.


INFO:tensorflow:Saving checkpoints for 0 into BERT_RUN2019-06-19 23:12:50.090822political/model.ckpt.


I0619 23:13:14.308981 139826748040960 tf_logging.py:115] Saving checkpoints for 0 into BERT_RUN2019-06-19 23:12:50.090822political/model.ckpt.


INFO:tensorflow:loss = 0.70325327, step = 0


I0619 23:13:26.314081 139826748040960 tf_logging.py:115] loss = 0.70325327, step = 0


INFO:tensorflow:global_step/sec: 2.28288


I0619 23:14:10.117917 139826748040960 tf_logging.py:115] global_step/sec: 2.28288


INFO:tensorflow:loss = 0.8432512, step = 100 (43.806 sec)


I0619 23:14:10.119580 139826748040960 tf_logging.py:115] loss = 0.8432512, step = 100 (43.806 sec)


INFO:tensorflow:Saving checkpoints for 199 into BERT_RUN2019-06-19 23:12:50.090822political/model.ckpt.


I0619 23:14:40.950563 139826748040960 tf_logging.py:115] Saving checkpoints for 199 into BERT_RUN2019-06-19 23:12:50.090822political/model.ckpt.


INFO:tensorflow:Loss for final step: 0.07430437.


I0619 23:14:47.424087 139826748040960 tf_logging.py:115] Loss for final step: 0.07430437.


Training took time  0:01:54.939749
INFO:tensorflow:Calling model_fn.


I0619 23:14:47.502920 139826748040960 tf_logging.py:115] Calling model_fn.


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


I0619 23:14:49.753039 139826748040960 tf_logging.py:115] Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Done calling model_fn.


I0619 23:14:49.857202 139826748040960 tf_logging.py:115] Done calling model_fn.


INFO:tensorflow:Graph was finalized.


I0619 23:14:50.199453 139826748040960 tf_logging.py:115] Graph was finalized.


INFO:tensorflow:Restoring parameters from BERT_RUN2019-06-19 23:12:50.090822political/model.ckpt-199


I0619 23:14:50.203129 139826748040960 tf_logging.py:115] Restoring parameters from BERT_RUN2019-06-19 23:12:50.090822political/model.ckpt-199


INFO:tensorflow:Running local_init_op.


I0619 23:14:50.704718 139826748040960 tf_logging.py:115] Running local_init_op.


INFO:tensorflow:Done running local_init_op.


I0619 23:14:50.760561 139826748040960 tf_logging.py:115] Done running local_init_op.


Confusion Matrix:
[[86 24]
 [28 62]]
Classification Report:
              precision    recall  f1-score   support

      attack       0.75      0.78      0.77       110
     support       0.72      0.69      0.70        90

    accuracy                           0.74       200
   macro avg       0.74      0.74      0.74       200
weighted avg       0.74      0.74      0.74       200



In [80]:
def run_training(train_features, test_features, test, train, target_names):
    num_train_steps = int(len(train_features) / BATCH_SIZE * NUM_TRAIN_EPOCHS)
    num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)
    model_fn = model_fn_builder(
        num_labels=len(label_list),
        learning_rate=LEARNING_RATE,
        num_train_steps=num_train_steps,
        num_warmup_steps=num_warmup_steps)
    train_input_fn = run_classifier.input_fn_builder(
        features=train_features,
        seq_length=MAX_SEQ_LENGTH,
        is_training=True,
        drop_remainder=False)
    test_input_fn = run_classifier.input_fn_builder(
        features=test_features,
        seq_length=MAX_SEQ_LENGTH,
        is_training=False,
        drop_remainder=False)
    print(f'Beginning Training!')
    print('test labels', test['label'].value_counts())
    print('train labels', train['label'].value_counts())
    current_time = datetime.now()
    estimator = tf.estimator.Estimator(
              model_fn=model_fn,
              config=run_config,
              params={"batch_size": BATCH_SIZE})
    estimator = estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
    predictions = estimator.predict(input_fn=test_input_fn, yield_single_examples=False)
    print(predictions)
    pred_label = np.array([])
    pred_label = [np.append(pred_label, prediction['labels']) for prediction in predictions]
    pred_label = np.concatenate(pred_label).ravel()
    print(pred_label)
    class_rep = classification_report(test['label'].values.astype(int), pred_label, labels= [0,1], target_names=target_names, output_dict=True)
    acc = accuracy_score(test['label'].values.astype(int), pred_label)
    print(classification_report(test['label'].values.astype(int), pred_label, target_names=target_names))
    print("Accuracy:",  acc)
    print("Training took time ", datetime.now() - current_time)
    return (class_rep, acc)

def cross_validate(data):
    results = []
    kf = KFold(n_splits=10)
    for train_idx, val_idx in kf.split(data):
        try:
            tf.gfile.DeleteRecursively(OUTPUT_DIR)
        except:
            # Doesn't matter if the directory didn't exist
            pass
        tf.gfile.MakeDirs(OUTPUT_DIR)
        train = data.iloc[train_idx]
        test = data.iloc[val_idx]
        print(train.shape, test.shape)
        # Use the InputExample class from BERT's run_classifier code to create examples from the data
        train_InputExamples = train.apply(lambda x: run_classifier.InputExample(guid=None, # Globally unique ID for bookkeeping, unused in this example
                                                                           text_a = x[ORG_COLUMN], 
                                                                           text_b = x[RESP_COLUMN], 
                                                                           label = x[LABEL_COLUMN]), axis = 1)

        test_InputExamples = test.apply(lambda x: run_classifier.InputExample(guid=None, 
                                                                           text_a = x[ORG_COLUMN], 
                                                                           text_b = x[RESP_COLUMN], 
                                                                           label = x[LABEL_COLUMN]), axis = 1)

        # Convert our train and test features to InputFeatures that BERT understands.
        train_features = run_classifier.convert_examples_to_features(train_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)
        test_features = run_classifier.convert_examples_to_features(test_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)
        results.append(run_training(train_features, test_features, test, train, ["relation", "unrelated"]))
    return results

In [81]:
if cross_val:

    result = cross_validate(train_df.append(test_df))

In [82]:
if cross_val:
    #print(result)
    res = np.array(result)[:,0]
    result_df = pd.DataFrame(res[0])
    for data in res[1:]:
        new_data = pd.DataFrame(data)
        result_df = result_df.append(new_data)
    pd.set_option('display.max_rows', 500)
    pd.set_option('display.max_columns', 500)
    pd.set_option('display.width', 1000)
    print(result_df.groupby(level=0).agg([np.mean, np.max, np.min]).transpose())

In [83]:
#print(estimator.get_variable_names())