In [1]:
import numpy as np
import keras
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
from sklearn.model_selection import train_test_split
print(keras.__version__ ,'\t' , tf.__version__, "\t", np.__version__)

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


2.3.1 	 1.14.0 	 1.19.0


In [2]:
import os
import re

# Load all files from a directory in a DataFrame.
def load_directory_data(directory):
  data = {}
  data["sentence"] = []
  data["sentiment"] = []
  for file_path in os.listdir(directory):
    with tf.gfile.GFile(os.path.join(directory, file_path), "r") as f:
      data["sentence"].append(f.read())
      data["sentiment"].append(re.match("\d+_(\d+)\.txt", file_path).group(1))
  return pd.DataFrame.from_dict(data)

# Merge positive and negative examples, add a polarity column and shuffle.
def load_dataset(directory):
  pos_df = load_directory_data(os.path.join(directory, "pos"))
  neg_df = load_directory_data(os.path.join(directory, "neg"))
  pos_df["polarity"] = 1
  neg_df["polarity"] = 0
  return pd.concat([pos_df, neg_df]).sample(frac=1).reset_index(drop=True)

# Download and process the dataset files.
def download_and_load_datasets(force_download=False):
  dataset = tf.keras.utils.get_file(
      fname="aclImdb.tar.gz", 
      origin="http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz", 
      extract=True)
  
  train_df = load_dataset(os.path.join(os.path.dirname(dataset), 
                                       "aclImdb", "train"))
  test_df = load_dataset(os.path.join(os.path.dirname(dataset), 
                                      "aclImdb", "test"))
  
  return train_df, test_df


In [3]:
train, test = download_and_load_datasets()

In [4]:
train = train.sample(5000)
test = test.sample(5000)
print(train.columns)
print(train.head())

Index(['sentence', 'sentiment', 'polarity'], dtype='object')
                                                sentence sentiment  polarity
10847  ===minor spoilers=== <br /><br />I am, like ma...         1         0
14614  I am a relative latecomer to the transcendent ...         4         0
19255  There are some comments about this film that s...        10         1
5212   Yeah, I remember this one! Many years since I ...         7         1
10844  I watched mask in the 80's and it's currently ...         2         0


In [5]:
DATA_COLUMN = 'sentence'
LABEL_COLUMN = 'polarity'
# label_list is the list of labels, i.e. True, False or 0, 1 or 'dog', 'cat'
label_list = [0, 1]

## Data processing

We'll need to transform our data into a format BERT understands. This involves two steps. First, we create InputExample's using the constructor provided in the BERT library.
1. text_a is the text we want to classify,
2. text_b is used if we're training a model to understand the relationship between sentences
3. label is the label for our example, i.e. True, False

In [6]:
import bert
from bert import run_classifier
from bert import optimization
from bert import tokenization




In [7]:
# Use the InputExample class from BERT's run_classifier code to create examples from the data
train_InputExamples = train.apply(lambda x: bert.run_classifier.InputExample(guid=None, # Globally unique ID for bookkeeping, unused in this example
                                                                   text_a = x[DATA_COLUMN], 
                                                                   text_b = None, 
                                                                   label = x[LABEL_COLUMN]), axis = 1)

test_InputExamples = test.apply(lambda x: bert.run_classifier.InputExample(guid=None, 
                                                                   text_a = x[DATA_COLUMN], 
                                                                   text_b = None, 
                                                                   label = x[LABEL_COLUMN]), axis = 1)

Next, we need to preprocess our data so that it matches the data BERT was trained on.
1. Lowercase our text 
2. Tokenize it 
3. Break words into WordPieces
4. Map our words to indexes using a vocab file that BERT provides
5. Add special "CLS" and "SEP" tokens 
6. Append "index" and "segment" tokens to each input

In [8]:
# This is a path to an uncased (all lowercase) version of BERT
BERT_MODEL_HUB = "https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1"

def create_tokenizer_from_hub_module():
  """Get the vocab file and casing info from the Hub module."""
  with tf.Graph().as_default():
    bert_module = hub.Module(BERT_MODEL_HUB)
    tokenization_info = bert_module(signature="tokenization_info", as_dict=True)
    with tf.Session() as sess:
      vocab_file, do_lower_case = sess.run([tokenization_info["vocab_file"],
                                            tokenization_info["do_lower_case"]])
      
  return bert.tokenization.FullTokenizer(
      vocab_file=vocab_file, do_lower_case=do_lower_case)

tokenizer = create_tokenizer_from_hub_module()

INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore








In [9]:
tokenizer.tokenize("This here's an example of using the BERT tokenizer")

['this',
 'here',
 "'",
 's',
 'an',
 'example',
 'of',
 'using',
 'the',
 'bert',
 'token',
 '##izer']

Using our tokenizer, we'll call run_classifier.convert_examples_to_features on our InputExamples to convert them into features BERT understands.

In [10]:
# We'll set sequences to be at most 128 tokens long.
MAX_SEQ_LENGTH = 128
# Convert our train and test features to InputFeatures that BERT understands.
train_features = bert.run_classifier.convert_examples_to_features(train_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)
test_features = bert.run_classifier.convert_examples_to_features(test_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)







INFO:tensorflow:Writing example 0 of 5000


INFO:tensorflow:Writing example 0 of 5000


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] = = = minor spoil ##ers = = = < br / > < br / > i am , like many others , a huge jerry br ##uck ##heimer fan . so when i saw all the beautiful posters hanging out front , and the trailer coming by before mi : 2 , expectations were rising . a jerry br ##uck ##heimer production . big cars . nic cage and fresh from an oscar - angelina jo ##lie . what can possibly go wrong ? a lot . < br / > < br / > the script is neither funny ( which it tries really hard to be ) nor exciting . you put in a black person who is constantly making racist jokes [SEP]


INFO:tensorflow:tokens: [CLS] = = = minor spoil ##ers = = = < br / > < br / > i am , like many others , a huge jerry br ##uck ##heimer fan . so when i saw all the beautiful posters hanging out front , and the trailer coming by before mi : 2 , expectations were rising . a jerry br ##uck ##heimer production . big cars . nic cage and fresh from an oscar - angelina jo ##lie . what can possibly go wrong ? a lot . < br / > < br / > the script is neither funny ( which it tries really hard to be ) nor exciting . you put in a black person who is constantly making racist jokes [SEP]


INFO:tensorflow:input_ids: 101 1027 1027 1027 3576 27594 2545 1027 1027 1027 1026 7987 1013 1028 1026 7987 1013 1028 1045 2572 1010 2066 2116 2500 1010 1037 4121 6128 7987 12722 18826 5470 1012 2061 2043 1045 2387 2035 1996 3376 14921 5689 2041 2392 1010 1998 1996 9117 2746 2011 2077 2771 1024 1016 1010 10908 2020 4803 1012 1037 6128 7987 12722 18826 2537 1012 2502 3765 1012 27969 7980 1998 4840 2013 2019 7436 1011 23847 8183 8751 1012 2054 2064 4298 2175 3308 1029 1037 2843 1012 1026 7987 1013 1028 1026 7987 1013 1028 1996 5896 2003 4445 6057 1006 2029 2009 5363 2428 2524 2000 2022 1007 4496 10990 1012 2017 2404 1999 1037 2304 2711 2040 2003 7887 2437 16939 13198 102


INFO:tensorflow:input_ids: 101 1027 1027 1027 3576 27594 2545 1027 1027 1027 1026 7987 1013 1028 1026 7987 1013 1028 1045 2572 1010 2066 2116 2500 1010 1037 4121 6128 7987 12722 18826 5470 1012 2061 2043 1045 2387 2035 1996 3376 14921 5689 2041 2392 1010 1998 1996 9117 2746 2011 2077 2771 1024 1016 1010 10908 2020 4803 1012 1037 6128 7987 12722 18826 2537 1012 2502 3765 1012 27969 7980 1998 4840 2013 2019 7436 1011 23847 8183 8751 1012 2054 2064 4298 2175 3308 1029 1037 2843 1012 1026 7987 1013 1028 1026 7987 1013 1028 1996 5896 2003 4445 6057 1006 2029 2009 5363 2428 2524 2000 2022 1007 4496 10990 1012 2017 2404 1999 1037 2304 2711 2040 2003 7887 2437 16939 13198 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] i am a relative late ##com ##er to the trans ##cend ##ent work of film au ##te ##ur ya ##su ##ji ##ro oz ##u , whose master ##fully under ##sta ##ted views of japanese life , especially in the post - wwii era , ill ##umi ##nate universal truths . having now seen several of his landmark films such as 1949 ' s " late spring " and 1953 ' s " tokyo story " , i am convinced that oz ##u had a particularly id ##ios ##yn ##cratic gift of convey ##ing the range of feelings arising from inter ##gen ##eration ##al conflict through elliptical narratives and subtle imagery . it is taiwanese director ho ##u hs ##ia ##o - hs ##ien ' s keen [SEP]


INFO:tensorflow:tokens: [CLS] i am a relative late ##com ##er to the trans ##cend ##ent work of film au ##te ##ur ya ##su ##ji ##ro oz ##u , whose master ##fully under ##sta ##ted views of japanese life , especially in the post - wwii era , ill ##umi ##nate universal truths . having now seen several of his landmark films such as 1949 ' s " late spring " and 1953 ' s " tokyo story " , i am convinced that oz ##u had a particularly id ##ios ##yn ##cratic gift of convey ##ing the range of feelings arising from inter ##gen ##eration ##al conflict through elliptical narratives and subtle imagery . it is taiwanese director ho ##u hs ##ia ##o - hs ##ien ' s keen [SEP]


INFO:tensorflow:input_ids: 101 1045 2572 1037 5816 2397 9006 2121 2000 1996 9099 23865 4765 2147 1997 2143 8740 2618 3126 8038 6342 4478 3217 11472 2226 1010 3005 3040 7699 2104 9153 3064 5328 1997 2887 2166 1010 2926 1999 1996 2695 1011 25755 3690 1010 5665 12717 12556 5415 23019 1012 2383 2085 2464 2195 1997 2010 8637 3152 2107 2004 4085 1005 1055 1000 2397 3500 1000 1998 4052 1005 1055 1000 5522 2466 1000 1010 1045 2572 6427 2008 11472 2226 2018 1037 3391 8909 10735 6038 17510 5592 1997 16636 2075 1996 2846 1997 5346 17707 2013 6970 6914 16754 2389 4736 2083 27213 22143 1998 11259 13425 1012 2009 2003 16539 2472 7570 2226 26236 2401 2080 1011 26236 9013 1005 1055 10326 102


INFO:tensorflow:input_ids: 101 1045 2572 1037 5816 2397 9006 2121 2000 1996 9099 23865 4765 2147 1997 2143 8740 2618 3126 8038 6342 4478 3217 11472 2226 1010 3005 3040 7699 2104 9153 3064 5328 1997 2887 2166 1010 2926 1999 1996 2695 1011 25755 3690 1010 5665 12717 12556 5415 23019 1012 2383 2085 2464 2195 1997 2010 8637 3152 2107 2004 4085 1005 1055 1000 2397 3500 1000 1998 4052 1005 1055 1000 5522 2466 1000 1010 1045 2572 6427 2008 11472 2226 2018 1037 3391 8909 10735 6038 17510 5592 1997 16636 2075 1996 2846 1997 5346 17707 2013 6970 6914 16754 2389 4736 2083 27213 22143 1998 11259 13425 1012 2009 2003 16539 2472 7570 2226 26236 2401 2080 1011 26236 9013 1005 1055 10326 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] there are some comments about this film that say that it is a bad and silly one and such an excellent actor as pierre fr ##es ##nay should not have accepted to act in it . < br / > < br / > i think , just the opposite , that , even when the film is strange and has some weaknesses , the performance of pierre fr ##es ##nay is so formidable that it converts the film in something excellent . < br / > < br / > his performance is probably the best in history . < br / > < br / > the film itself has a very pole ##mic scene about the consecration of wine in the cabaret . [SEP]


INFO:tensorflow:tokens: [CLS] there are some comments about this film that say that it is a bad and silly one and such an excellent actor as pierre fr ##es ##nay should not have accepted to act in it . < br / > < br / > i think , just the opposite , that , even when the film is strange and has some weaknesses , the performance of pierre fr ##es ##nay is so formidable that it converts the film in something excellent . < br / > < br / > his performance is probably the best in history . < br / > < br / > the film itself has a very pole ##mic scene about the consecration of wine in the cabaret . [SEP]


INFO:tensorflow:input_ids: 101 2045 2024 2070 7928 2055 2023 2143 2008 2360 2008 2009 2003 1037 2919 1998 10021 2028 1998 2107 2019 6581 3364 2004 5578 10424 2229 16741 2323 2025 2031 3970 2000 2552 1999 2009 1012 1026 7987 1013 1028 1026 7987 1013 1028 1045 2228 1010 2074 1996 4500 1010 2008 1010 2130 2043 1996 2143 2003 4326 1998 2038 2070 21775 1010 1996 2836 1997 5578 10424 2229 16741 2003 2061 18085 2008 2009 19884 1996 2143 1999 2242 6581 1012 1026 7987 1013 1028 1026 7987 1013 1028 2010 2836 2003 2763 1996 2190 1999 2381 1012 1026 7987 1013 1028 1026 7987 1013 1028 1996 2143 2993 2038 1037 2200 6536 7712 3496 2055 1996 24730 1997 4511 1999 1996 19685 1012 102


INFO:tensorflow:input_ids: 101 2045 2024 2070 7928 2055 2023 2143 2008 2360 2008 2009 2003 1037 2919 1998 10021 2028 1998 2107 2019 6581 3364 2004 5578 10424 2229 16741 2323 2025 2031 3970 2000 2552 1999 2009 1012 1026 7987 1013 1028 1026 7987 1013 1028 1045 2228 1010 2074 1996 4500 1010 2008 1010 2130 2043 1996 2143 2003 4326 1998 2038 2070 21775 1010 1996 2836 1997 5578 10424 2229 16741 2003 2061 18085 2008 2009 19884 1996 2143 1999 2242 6581 1012 1026 7987 1013 1028 1026 7987 1013 1028 2010 2836 2003 2763 1996 2190 1999 2381 1012 1026 7987 1013 1028 1026 7987 1013 1028 1996 2143 2993 2038 1037 2200 6536 7712 3496 2055 1996 24730 1997 4511 1999 1996 19685 1012 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] yeah , i remember this one ! many years since i actually watched it . the story was entirely surreal , but nonetheless great ! what anyone who rates and reviews movies ought to bear in mind is what the respective movie aims at . it ' s the same with " first kid " , which follows a similar pattern . certain movies - like this one here - just aim at plain and comical nonsense . such movies can ' t be rated from the point of view of a hyper ##cr ##itical reviewer . of course these movies lack quality , lack a sophisticated storyline , very often lack first - class acting , but if they do fu ##lf ##il their primary [SEP]


INFO:tensorflow:tokens: [CLS] yeah , i remember this one ! many years since i actually watched it . the story was entirely surreal , but nonetheless great ! what anyone who rates and reviews movies ought to bear in mind is what the respective movie aims at . it ' s the same with " first kid " , which follows a similar pattern . certain movies - like this one here - just aim at plain and comical nonsense . such movies can ' t be rated from the point of view of a hyper ##cr ##itical reviewer . of course these movies lack quality , lack a sophisticated storyline , very often lack first - class acting , but if they do fu ##lf ##il their primary [SEP]


INFO:tensorflow:input_ids: 101 3398 1010 1045 3342 2023 2028 999 2116 2086 2144 1045 2941 3427 2009 1012 1996 2466 2001 4498 16524 1010 2021 9690 2307 999 2054 3087 2040 6165 1998 4391 5691 11276 2000 4562 1999 2568 2003 2054 1996 7972 3185 8704 2012 1012 2009 1005 1055 1996 2168 2007 1000 2034 4845 1000 1010 2029 4076 1037 2714 5418 1012 3056 5691 1011 2066 2023 2028 2182 1011 2074 6614 2012 5810 1998 29257 14652 1012 2107 5691 2064 1005 1056 2022 6758 2013 1996 2391 1997 3193 1997 1037 23760 26775 26116 12027 1012 1997 2607 2122 5691 3768 3737 1010 3768 1037 12138 9994 1010 2200 2411 3768 2034 1011 2465 3772 1010 2021 2065 2027 2079 11865 10270 4014 2037 3078 102


INFO:tensorflow:input_ids: 101 3398 1010 1045 3342 2023 2028 999 2116 2086 2144 1045 2941 3427 2009 1012 1996 2466 2001 4498 16524 1010 2021 9690 2307 999 2054 3087 2040 6165 1998 4391 5691 11276 2000 4562 1999 2568 2003 2054 1996 7972 3185 8704 2012 1012 2009 1005 1055 1996 2168 2007 1000 2034 4845 1000 1010 2029 4076 1037 2714 5418 1012 3056 5691 1011 2066 2023 2028 2182 1011 2074 6614 2012 5810 1998 29257 14652 1012 2107 5691 2064 1005 1056 2022 6758 2013 1996 2391 1997 3193 1997 1037 23760 26775 26116 12027 1012 1997 2607 2122 5691 3768 3737 1010 3768 1037 12138 9994 1010 2200 2411 3768 2034 1011 2465 3772 1010 2021 2065 2027 2079 11865 10270 4014 2037 3078 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] i watched mask in the 80 ' s and it ' s currently showing on fox kids in the uk ( very late at night ) . i remember thinking that it was kinda cool back in the day and had a couple of the toys too but watching it now bore ##s me to tears . i never realised before of how ted ##ious and bland this cartoon show really was . it ' s just plain awful ! it is no where near in the same league as the transformers , he - man or thunder ##cats and was very quickly forgot by nearly everyone once it stopped being made . i only watch it on fox kids because ulysses 31 comes on straight [SEP]


INFO:tensorflow:tokens: [CLS] i watched mask in the 80 ' s and it ' s currently showing on fox kids in the uk ( very late at night ) . i remember thinking that it was kinda cool back in the day and had a couple of the toys too but watching it now bore ##s me to tears . i never realised before of how ted ##ious and bland this cartoon show really was . it ' s just plain awful ! it is no where near in the same league as the transformers , he - man or thunder ##cats and was very quickly forgot by nearly everyone once it stopped being made . i only watch it on fox kids because ulysses 31 comes on straight [SEP]


INFO:tensorflow:input_ids: 101 1045 3427 7308 1999 1996 3770 1005 1055 1998 2009 1005 1055 2747 4760 2006 4419 4268 1999 1996 2866 1006 2200 2397 2012 2305 1007 1012 1045 3342 3241 2008 2009 2001 17704 4658 2067 1999 1996 2154 1998 2018 1037 3232 1997 1996 10899 2205 2021 3666 2009 2085 8501 2015 2033 2000 4000 1012 1045 2196 11323 2077 1997 2129 6945 6313 1998 20857 2023 9476 2265 2428 2001 1012 2009 1005 1055 2074 5810 9643 999 2009 2003 2053 2073 2379 1999 1996 2168 2223 2004 1996 19081 1010 2002 1011 2158 2030 8505 19588 1998 2001 2200 2855 9471 2011 3053 3071 2320 2009 3030 2108 2081 1012 1045 2069 3422 2009 2006 4419 4268 2138 22784 2861 3310 2006 3442 102


INFO:tensorflow:input_ids: 101 1045 3427 7308 1999 1996 3770 1005 1055 1998 2009 1005 1055 2747 4760 2006 4419 4268 1999 1996 2866 1006 2200 2397 2012 2305 1007 1012 1045 3342 3241 2008 2009 2001 17704 4658 2067 1999 1996 2154 1998 2018 1037 3232 1997 1996 10899 2205 2021 3666 2009 2085 8501 2015 2033 2000 4000 1012 1045 2196 11323 2077 1997 2129 6945 6313 1998 20857 2023 9476 2265 2428 2001 1012 2009 1005 1055 2074 5810 9643 999 2009 2003 2053 2073 2379 1999 1996 2168 2223 2004 1996 19081 1010 2002 1011 2158 2030 8505 19588 1998 2001 2200 2855 9471 2011 3053 3071 2320 2009 3030 2108 2081 1012 1045 2069 3422 2009 2006 4419 4268 2138 22784 2861 3310 2006 3442 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:Writing example 0 of 5000


INFO:tensorflow:Writing example 0 of 5000


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] blind date ( columbia pictures , 1934 ) , was a decent film , but i have a few issues with this film . first of all , i don ' t fault the actors in this film at all , but more or less , i have a problem with the script . also , i understand that this film was made in the 1930 ' s and people were looking to escape reality , but the script made ann so ##ther ##n ' s character look weak . she kept going back and forth between suit ##ors and i felt as though she should have stayed with paul kelly ' s character in the end . he truly did care about her and her [SEP]


INFO:tensorflow:tokens: [CLS] blind date ( columbia pictures , 1934 ) , was a decent film , but i have a few issues with this film . first of all , i don ' t fault the actors in this film at all , but more or less , i have a problem with the script . also , i understand that this film was made in the 1930 ' s and people were looking to escape reality , but the script made ann so ##ther ##n ' s character look weak . she kept going back and forth between suit ##ors and i felt as though she should have stayed with paul kelly ' s character in the end . he truly did care about her and her [SEP]


INFO:tensorflow:input_ids: 101 6397 3058 1006 3996 4620 1010 4579 1007 1010 2001 1037 11519 2143 1010 2021 1045 2031 1037 2261 3314 2007 2023 2143 1012 2034 1997 2035 1010 1045 2123 1005 1056 6346 1996 5889 1999 2023 2143 2012 2035 1010 2021 2062 2030 2625 1010 1045 2031 1037 3291 2007 1996 5896 1012 2036 1010 1045 3305 2008 2023 2143 2001 2081 1999 1996 4479 1005 1055 1998 2111 2020 2559 2000 4019 4507 1010 2021 1996 5896 2081 5754 2061 12399 2078 1005 1055 2839 2298 5410 1012 2016 2921 2183 2067 1998 5743 2090 4848 5668 1998 1045 2371 2004 2295 2016 2323 2031 4370 2007 2703 5163 1005 1055 2839 1999 1996 2203 1012 2002 5621 2106 2729 2055 2014 1998 2014 102


INFO:tensorflow:input_ids: 101 6397 3058 1006 3996 4620 1010 4579 1007 1010 2001 1037 11519 2143 1010 2021 1045 2031 1037 2261 3314 2007 2023 2143 1012 2034 1997 2035 1010 1045 2123 1005 1056 6346 1996 5889 1999 2023 2143 2012 2035 1010 2021 2062 2030 2625 1010 1045 2031 1037 3291 2007 1996 5896 1012 2036 1010 1045 3305 2008 2023 2143 2001 2081 1999 1996 4479 1005 1055 1998 2111 2020 2559 2000 4019 4507 1010 2021 1996 5896 2081 5754 2061 12399 2078 1005 1055 2839 2298 5410 1012 2016 2921 2183 2067 1998 5743 2090 4848 5668 1998 1045 2371 2004 2295 2016 2323 2031 4370 2007 2703 5163 1005 1055 2839 1999 1996 2203 1012 2002 5621 2106 2729 2055 2014 1998 2014 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] i really enjoyed this movie . i have a real sense of justice and ' an eye for an eye ' , and this movie delivers that in spade ##s . glenn ford is offered a very low price for his ranch by the big ranch ##er in the valley ; then one of his ranch hands is beaten and shot ' to help him make up his mind about selling ' . when the ranch hand dies , and the sheriff refuses to do anything , ford seems at first reluctant to do anything , caution ##ing his men to not take things into their own hands . but , that ' s just what he is about to do . i knew this movie [SEP]


INFO:tensorflow:tokens: [CLS] i really enjoyed this movie . i have a real sense of justice and ' an eye for an eye ' , and this movie delivers that in spade ##s . glenn ford is offered a very low price for his ranch by the big ranch ##er in the valley ; then one of his ranch hands is beaten and shot ' to help him make up his mind about selling ' . when the ranch hand dies , and the sheriff refuses to do anything , ford seems at first reluctant to do anything , caution ##ing his men to not take things into their own hands . but , that ' s just what he is about to do . i knew this movie [SEP]


INFO:tensorflow:input_ids: 101 1045 2428 5632 2023 3185 1012 1045 2031 1037 2613 3168 1997 3425 1998 1005 2019 3239 2005 2019 3239 1005 1010 1998 2023 3185 18058 2008 1999 23288 2015 1012 9465 4811 2003 3253 1037 2200 2659 3976 2005 2010 8086 2011 1996 2502 8086 2121 1999 1996 3028 1025 2059 2028 1997 2010 8086 2398 2003 7854 1998 2915 1005 2000 2393 2032 2191 2039 2010 2568 2055 4855 1005 1012 2043 1996 8086 2192 8289 1010 1998 1996 6458 10220 2000 2079 2505 1010 4811 3849 2012 2034 11542 2000 2079 2505 1010 14046 2075 2010 2273 2000 2025 2202 2477 2046 2037 2219 2398 1012 2021 1010 2008 1005 1055 2074 2054 2002 2003 2055 2000 2079 1012 1045 2354 2023 3185 102


INFO:tensorflow:input_ids: 101 1045 2428 5632 2023 3185 1012 1045 2031 1037 2613 3168 1997 3425 1998 1005 2019 3239 2005 2019 3239 1005 1010 1998 2023 3185 18058 2008 1999 23288 2015 1012 9465 4811 2003 3253 1037 2200 2659 3976 2005 2010 8086 2011 1996 2502 8086 2121 1999 1996 3028 1025 2059 2028 1997 2010 8086 2398 2003 7854 1998 2915 1005 2000 2393 2032 2191 2039 2010 2568 2055 4855 1005 1012 2043 1996 8086 2192 8289 1010 1998 1996 6458 10220 2000 2079 2505 1010 4811 3849 2012 2034 11542 2000 2079 2505 1010 14046 2075 2010 2273 2000 2025 2202 2477 2046 2037 2219 2398 1012 2021 1010 2008 1005 1055 2074 2054 2002 2003 2055 2000 2079 1012 1045 2354 2023 3185 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] disappointing he ##ist movie indeed , i was actually expecting a pretty cool cat and mouse stuff going on through out the movie and it does have few of those cat and mouse stuff going on , but it was just pretty stupid . and it basically showed all the good scenes in the trailer , as a matter of fact if you seen the trailer to this , you basically seen the whole film cause it is just that predictable . so basically the plot is about a few armored truck company workers that try to steal the 42 million dollars they are suppose to transport , until one of the members grows a conscience . i thought the movie would be better with it [SEP]


INFO:tensorflow:tokens: [CLS] disappointing he ##ist movie indeed , i was actually expecting a pretty cool cat and mouse stuff going on through out the movie and it does have few of those cat and mouse stuff going on , but it was just pretty stupid . and it basically showed all the good scenes in the trailer , as a matter of fact if you seen the trailer to this , you basically seen the whole film cause it is just that predictable . so basically the plot is about a few armored truck company workers that try to steal the 42 million dollars they are suppose to transport , until one of the members grows a conscience . i thought the movie would be better with it [SEP]


INFO:tensorflow:input_ids: 101 15640 2002 2923 3185 5262 1010 1045 2001 2941 8074 1037 3492 4658 4937 1998 8000 4933 2183 2006 2083 2041 1996 3185 1998 2009 2515 2031 2261 1997 2216 4937 1998 8000 4933 2183 2006 1010 2021 2009 2001 2074 3492 5236 1012 1998 2009 10468 3662 2035 1996 2204 5019 1999 1996 9117 1010 2004 1037 3043 1997 2755 2065 2017 2464 1996 9117 2000 2023 1010 2017 10468 2464 1996 2878 2143 3426 2009 2003 2074 2008 21425 1012 2061 10468 1996 5436 2003 2055 1037 2261 10612 4744 2194 3667 2008 3046 2000 8954 1996 4413 2454 6363 2027 2024 6814 2000 3665 1010 2127 2028 1997 1996 2372 7502 1037 13454 1012 1045 2245 1996 3185 2052 2022 2488 2007 2009 102


INFO:tensorflow:input_ids: 101 15640 2002 2923 3185 5262 1010 1045 2001 2941 8074 1037 3492 4658 4937 1998 8000 4933 2183 2006 2083 2041 1996 3185 1998 2009 2515 2031 2261 1997 2216 4937 1998 8000 4933 2183 2006 1010 2021 2009 2001 2074 3492 5236 1012 1998 2009 10468 3662 2035 1996 2204 5019 1999 1996 9117 1010 2004 1037 3043 1997 2755 2065 2017 2464 1996 9117 2000 2023 1010 2017 10468 2464 1996 2878 2143 3426 2009 2003 2074 2008 21425 1012 2061 10468 1996 5436 2003 2055 1037 2261 10612 4744 2194 3667 2008 3046 2000 8954 1996 4413 2454 6363 2027 2024 6814 2000 3665 1010 2127 2028 1997 1996 2372 7502 1037 13454 1012 1045 2245 1996 3185 2052 2022 2488 2007 2009 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] this movie is way too long . i lost interest about one hour into the story . saratoga trunk tells the story of ingrid bergman , who is an the child of a prominent new orleans man and his mistress . after her mother dies in paris , bergman comes back to new orleans to scandal ##ize her father ' s " legitimate " family and to blackmail them . she meets gary cooper , who is likewise seeking revenge against the railroad ty ##co ##ons who cheated his father out of his land in texas . she draws him into her schemes , and the movie climax ##es at a saratoga resort . long and boring , but worth watching if you are a bergman [SEP]


INFO:tensorflow:tokens: [CLS] this movie is way too long . i lost interest about one hour into the story . saratoga trunk tells the story of ingrid bergman , who is an the child of a prominent new orleans man and his mistress . after her mother dies in paris , bergman comes back to new orleans to scandal ##ize her father ' s " legitimate " family and to blackmail them . she meets gary cooper , who is likewise seeking revenge against the railroad ty ##co ##ons who cheated his father out of his land in texas . she draws him into her schemes , and the movie climax ##es at a saratoga resort . long and boring , but worth watching if you are a bergman [SEP]


INFO:tensorflow:input_ids: 101 2023 3185 2003 2126 2205 2146 1012 1045 2439 3037 2055 2028 3178 2046 1996 2466 1012 23136 8260 4136 1996 2466 1997 22093 24544 1010 2040 2003 2019 1996 2775 1997 1037 4069 2047 5979 2158 1998 2010 10414 1012 2044 2014 2388 8289 1999 3000 1010 24544 3310 2067 2000 2047 5979 2000 9446 4697 2014 2269 1005 1055 1000 11476 1000 2155 1998 2000 25044 2068 1012 2016 6010 5639 6201 1010 2040 2003 10655 6224 7195 2114 1996 4296 5939 3597 5644 2040 22673 2010 2269 2041 1997 2010 2455 1999 3146 1012 2016 9891 2032 2046 2014 11683 1010 1998 1996 3185 14463 2229 2012 1037 23136 7001 1012 2146 1998 11771 1010 2021 4276 3666 2065 2017 2024 1037 24544 102


INFO:tensorflow:input_ids: 101 2023 3185 2003 2126 2205 2146 1012 1045 2439 3037 2055 2028 3178 2046 1996 2466 1012 23136 8260 4136 1996 2466 1997 22093 24544 1010 2040 2003 2019 1996 2775 1997 1037 4069 2047 5979 2158 1998 2010 10414 1012 2044 2014 2388 8289 1999 3000 1010 24544 3310 2067 2000 2047 5979 2000 9446 4697 2014 2269 1005 1055 1000 11476 1000 2155 1998 2000 25044 2068 1012 2016 6010 5639 6201 1010 2040 2003 10655 6224 7195 2114 1996 4296 5939 3597 5644 2040 22673 2010 2269 2041 1997 2010 2455 1999 3146 1012 2016 9891 2032 2046 2014 11683 1010 1998 1996 3185 14463 2229 2012 1037 23136 7001 1012 2146 1998 11771 1010 2021 4276 3666 2065 2017 2024 1037 24544 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] this is a big disappointment . the main problem is the acting . sy ##lves ##tre le to ##uze ##l is pretty poor as fanny , and the rest are not much better , everybody is very stil ##ted and unnatural . also the camera ##work is very 1980 ' s ie cramped and jump ##y , compared with the likes of 1995 ' s p & p , for example . < br / > < br / > the script is , if anything too faithful to the book , and there are some cr ##inge worthy expressions that should have been cut . < br / > < br / > in every way this is far inferior to the recent film version [SEP]


INFO:tensorflow:tokens: [CLS] this is a big disappointment . the main problem is the acting . sy ##lves ##tre le to ##uze ##l is pretty poor as fanny , and the rest are not much better , everybody is very stil ##ted and unnatural . also the camera ##work is very 1980 ' s ie cramped and jump ##y , compared with the likes of 1995 ' s p & p , for example . < br / > < br / > the script is , if anything too faithful to the book , and there are some cr ##inge worthy expressions that should have been cut . < br / > < br / > in every way this is far inferior to the recent film version [SEP]


INFO:tensorflow:input_ids: 101 2023 2003 1037 2502 10520 1012 1996 2364 3291 2003 1996 3772 1012 25353 20899 7913 3393 2000 20395 2140 2003 3492 3532 2004 17813 1010 1998 1996 2717 2024 2025 2172 2488 1010 7955 2003 2200 25931 3064 1998 21242 1012 2036 1996 4950 6198 2003 2200 3150 1005 1055 29464 22766 1998 5376 2100 1010 4102 2007 1996 7777 1997 2786 1005 1055 1052 1004 1052 1010 2005 2742 1012 1026 7987 1013 1028 1026 7987 1013 1028 1996 5896 2003 1010 2065 2505 2205 11633 2000 1996 2338 1010 1998 2045 2024 2070 13675 23496 11007 11423 2008 2323 2031 2042 3013 1012 1026 7987 1013 1028 1026 7987 1013 1028 1999 2296 2126 2023 2003 2521 14092 2000 1996 3522 2143 2544 102


INFO:tensorflow:input_ids: 101 2023 2003 1037 2502 10520 1012 1996 2364 3291 2003 1996 3772 1012 25353 20899 7913 3393 2000 20395 2140 2003 3492 3532 2004 17813 1010 1998 1996 2717 2024 2025 2172 2488 1010 7955 2003 2200 25931 3064 1998 21242 1012 2036 1996 4950 6198 2003 2200 3150 1005 1055 29464 22766 1998 5376 2100 1010 4102 2007 1996 7777 1997 2786 1005 1055 1052 1004 1052 1010 2005 2742 1012 1026 7987 1013 1028 1026 7987 1013 1028 1996 5896 2003 1010 2065 2505 2205 11633 2000 1996 2338 1010 1998 2045 2024 2070 13675 23496 11007 11423 2008 2323 2031 2042 3013 1012 1026 7987 1013 1028 1026 7987 1013 1028 1999 2296 2126 2023 2003 2521 14092 2000 1996 3522 2143 2544 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


In [11]:
### how to use segment id effectively?

In [12]:
## input_mask : masking token
## segment_id : differenciate two sentence
print(train_features[0].input_ids, '\n', train_features[0].input_mask, '\n', train_features[0].segment_ids, '\n', train_features[0].label_id, '\n', train_features[0].is_real_example)

[101, 1027, 1027, 1027, 3576, 27594, 2545, 1027, 1027, 1027, 1026, 7987, 1013, 1028, 1026, 7987, 1013, 1028, 1045, 2572, 1010, 2066, 2116, 2500, 1010, 1037, 4121, 6128, 7987, 12722, 18826, 5470, 1012, 2061, 2043, 1045, 2387, 2035, 1996, 3376, 14921, 5689, 2041, 2392, 1010, 1998, 1996, 9117, 2746, 2011, 2077, 2771, 1024, 1016, 1010, 10908, 2020, 4803, 1012, 1037, 6128, 7987, 12722, 18826, 2537, 1012, 2502, 3765, 1012, 27969, 7980, 1998, 4840, 2013, 2019, 7436, 1011, 23847, 8183, 8751, 1012, 2054, 2064, 4298, 2175, 3308, 1029, 1037, 2843, 1012, 1026, 7987, 1013, 1028, 1026, 7987, 1013, 1028, 1996, 5896, 2003, 4445, 6057, 1006, 2029, 2009, 5363, 2428, 2524, 2000, 2022, 1007, 4496, 10990, 1012, 2017, 2404, 1999, 1037, 2304, 2711, 2040, 2003, 7887, 2437, 16939, 13198, 102] 
 [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

## Creating Model

In [13]:
def create_model(is_predicting, input_ids, input_mask, segment_ids, labels,
                 num_labels):
  """Creates a classification model."""

  bert_module = hub.Module(
      BERT_MODEL_HUB,
      trainable=True)
  bert_inputs = dict(
      input_ids=input_ids,
      input_mask=input_mask,
      segment_ids=segment_ids)
  bert_outputs = bert_module(
      inputs=bert_inputs,
      signature="tokens",
      as_dict=True)

  # Use "pooled_output" for classification tasks on an entire sentence.
  # Use "sequence_outputs" for token-level output.
  output_layer = bert_outputs["pooled_output"]

  hidden_size = output_layer.shape[-1].value

  # Create our own layer to tune for politeness data.
  output_weights = tf.get_variable(
      "output_weights", [num_labels, hidden_size],
      initializer=tf.truncated_normal_initializer(stddev=0.02))

  output_bias = tf.get_variable(
      "output_bias", [num_labels], initializer=tf.zeros_initializer())

  with tf.variable_scope("loss"):

    # Dropout helps prevent overfitting
    output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

    logits = tf.matmul(output_layer, output_weights, transpose_b=True)
    logits = tf.nn.bias_add(logits, output_bias)
    log_probs = tf.nn.log_softmax(logits, axis=-1)

    # Convert labels into one-hot encoding
    one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)

    predicted_labels = tf.squeeze(tf.argmax(log_probs, axis=-1, output_type=tf.int32))
    # If we're predicting, we want predicted labels and the probabiltiies.
    if is_predicting:
      return (predicted_labels, log_probs)

    # If we're train/eval, compute loss between predicted and actual label
    per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
    loss = tf.reduce_mean(per_example_loss)
    return (loss, predicted_labels, log_probs)

In [14]:
# model_fn_builder actually creates our model function
# using the passed parameters for num_labels, learning_rate, etc.
def model_fn_builder(num_labels, learning_rate, num_train_steps,
                     num_warmup_steps):
  """Returns `model_fn` closure for TPUEstimator."""
  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    """The `model_fn` for TPUEstimator."""

    input_ids = features["input_ids"]
    input_mask = features["input_mask"]
    segment_ids = features["segment_ids"]
    label_ids = features["label_ids"]

    is_predicting = (mode == tf.estimator.ModeKeys.PREDICT)
    
    # TRAIN and EVAL
    if not is_predicting:

      (loss, predicted_labels, log_probs) = create_model(
        is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)

      train_op = bert.optimization.create_optimizer(
          loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu=False)

      # Calculate evaluation metrics. 
      def metric_fn(label_ids, predicted_labels):
        accuracy = tf.metrics.accuracy(label_ids, predicted_labels)
        f1_score = tf.contrib.metrics.f1_score(
            label_ids,
            predicted_labels)
        auc = tf.metrics.auc(
            label_ids,
            predicted_labels)
        recall = tf.metrics.recall(
            label_ids,
            predicted_labels)
        precision = tf.metrics.precision(
            label_ids,
            predicted_labels) 
        true_pos = tf.metrics.true_positives(
            label_ids,
            predicted_labels)
        true_neg = tf.metrics.true_negatives(
            label_ids,
            predicted_labels)   
        false_pos = tf.metrics.false_positives(
            label_ids,
            predicted_labels)  
        false_neg = tf.metrics.false_negatives(
            label_ids,
            predicted_labels)
        return {
            "eval_accuracy": accuracy,
            "f1_score": f1_score,
            "auc": auc,
            "precision": precision,
            "recall": recall,
            "true_positives": true_pos,
            "true_negatives": true_neg,
            "false_positives": false_pos,
            "false_negatives": false_neg
        }

      eval_metrics = metric_fn(label_ids, predicted_labels)

      if mode == tf.estimator.ModeKeys.TRAIN:
        return tf.estimator.EstimatorSpec(mode=mode,
          loss=loss,
          train_op=train_op)
      else:
          return tf.estimator.EstimatorSpec(mode=mode,
            loss=loss,
            eval_metric_ops=eval_metrics)
    else:
      (predicted_labels, log_probs) = create_model(
        is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)

      predictions = {
          'probabilities': log_probs,
          'labels': predicted_labels
      }
      return tf.estimator.EstimatorSpec(mode, predictions=predictions)

  # Return the actual model function in the closure
  return model_fn


In [15]:
BATCH_SIZE = 32
LEARNING_RATE = 2e-5
NUM_TRAIN_EPOCHS = 3.0
# Warmup is a period of time where hte learning rate 
# is small and gradually increases--usually helps training.
WARMUP_PROPORTION = 0.1
# Model configs
SAVE_CHECKPOINTS_STEPS = 500
SAVE_SUMMARY_STEPS = 100
# Compute # train and warmup steps from batch size
num_train_steps = int(len(train_features) / BATCH_SIZE * NUM_TRAIN_EPOCHS)
num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)
# # Specify outpit directory and number of checkpoint steps to save
# run_config = tf.estimator.RunConfig(
#     model_dir=OUTPUT_DIR,
#     save_summary_steps=SAVE_SUMMARY_STEPS,
#     save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS)

In [16]:
model_fn = model_fn_builder(
  num_labels=len(label_list),
  learning_rate=LEARNING_RATE,
  num_train_steps=num_train_steps,
  num_warmup_steps=num_warmup_steps)

estimator = tf.estimator.Estimator(
  model_fn=model_fn,
  params={"batch_size": BATCH_SIZE})

INFO:tensorflow:Using default config.


INFO:tensorflow:Using default config.






INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmpjvz5lcct', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fb0882abd90>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmpjvz5lcct', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fb0882abd90>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [17]:
# Create an input function for training. drop_remainder = True for using TPUs.
train_input_fn = bert.run_classifier.input_fn_builder(
    features=train_features,
    seq_length=MAX_SEQ_LENGTH,
    is_training=True,
    drop_remainder=False)

In [None]:
from datetime import datetime
print(f'Beginning Training!')
current_time = datetime.now()
estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
print("Training took time ", datetime.now() - current_time)

Beginning Training!
Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.


Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.














Instructions for updating:
Deprecated in favor of operator or tf.math.divide.


Instructions for updating:
Deprecated in favor of operator or tf.math.divide.








Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where






  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Create CheckpointSaverHook.


INFO:tensorflow:Create CheckpointSaverHook.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Saving checkpoints for 0 into /tmp/tmpjvz5lcct/model.ckpt.


INFO:tensorflow:Saving checkpoints for 0 into /tmp/tmpjvz5lcct/model.ckpt.


INFO:tensorflow:loss = 0.6932694, step = 1


INFO:tensorflow:loss = 0.6932694, step = 1


INFO:tensorflow:Saving checkpoints for 22 into /tmp/tmpjvz5lcct/model.ckpt.


INFO:tensorflow:Saving checkpoints for 22 into /tmp/tmpjvz5lcct/model.ckpt.


INFO:tensorflow:Saving checkpoints for 44 into /tmp/tmpjvz5lcct/model.ckpt.


INFO:tensorflow:Saving checkpoints for 44 into /tmp/tmpjvz5lcct/model.ckpt.


INFO:tensorflow:Saving checkpoints for 66 into /tmp/tmpjvz5lcct/model.ckpt.


INFO:tensorflow:Saving checkpoints for 66 into /tmp/tmpjvz5lcct/model.ckpt.


INFO:tensorflow:Saving checkpoints for 88 into /tmp/tmpjvz5lcct/model.ckpt.


INFO:tensorflow:Saving checkpoints for 88 into /tmp/tmpjvz5lcct/model.ckpt.


INFO:tensorflow:global_step/sec: 0.0357439


INFO:tensorflow:global_step/sec: 0.0357439


INFO:tensorflow:loss = 0.2895655, step = 101 (2797.684 sec)


INFO:tensorflow:loss = 0.2895655, step = 101 (2797.684 sec)


INFO:tensorflow:Saving checkpoints for 109 into /tmp/tmpjvz5lcct/model.ckpt.


INFO:tensorflow:Saving checkpoints for 109 into /tmp/tmpjvz5lcct/model.ckpt.


Instructions for updating:
Use standard file APIs to delete files with this prefix.


Instructions for updating:
Use standard file APIs to delete files with this prefix.


In [None]:
test_input_fn = run_classifier.input_fn_builder(
    features=test_features,
    seq_length=MAX_SEQ_LENGTH,
    is_training=False,
    drop_remainder=False)

In [None]:
estimator.evaluate(input_fn=test_input_fn, steps=None)