In [1]:
#!pip install bert-tensorflow  #Python package for Google's BERT implementation.
#!pip uninstall tensorflow
!pip install tensorflow==2.0.0
!pip install tensorflow_hub



In [2]:
import tensorflow as tf
import tensorflow_hub as hub
print("TF version: ", tf.__version__)
print("Hub version: ", hub.__version__)

TF version:  2.0.0
Hub version:  0.7.0


In [0]:
from sklearn.model_selection import train_test_split
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
from datetime import datetime

https://github.com/tensorflow/models/tree/master/official/nlp/bert (tensorflow 2 implementations for bert)

In [4]:
!pip install bert-for-tf2
!pip install sentencepiece



In [0]:
import bert
#from bert import run_classifier
from bert import tokenization
#from bert import optimization

In [6]:
# Set the output directory for saving model file
# Optionally, set a GCP bucket location

OUTPUT_DIR = 'BERT_OUTPUT'#@param {type:"string"}
#@markdown Whether or not to clear/delete the directory and create a new one
DO_DELETE = False #@param {type:"boolean"}
#@markdown Set USE_BUCKET and BUCKET if you want to (optionally) store model output on GCP bucket.
USE_BUCKET = False #@param {type:"boolean"}
BUCKET = 'BUCKET_NAME' #@param {type:"string"}

if DO_DELETE:
  try:
    tf.io.gfile.DeleteRecursively(OUTPUT_DIR)
  except:
    # Doesn't matter if the directory didn't exist
    pass


tf.io.gfile.makedirs(OUTPUT_DIR)
print('***** Model output directory: {} *****'.format(OUTPUT_DIR))


***** Model output directory: BERT_OUTPUT *****


#### Data

First, let's download the dataset, hosted by Stanford.
The code below, which downloads, extracts, and imports the IMDB Large Movie Review Dataset, is borrowed from this Tensorflow tutorial.
https://www.tensorflow.org/hub/tutorials/text_classification_with_tf_hub

In [0]:
from tensorflow import keras
import os
import re

def load_directory_data(directory):
  data = {}
  data['sentence'] = []
  data['sentiment'] = []
  for file_name in os.listdir(directory):
    with tf.io.gfile.GFile(os.path.join(directory, file_name), "r") as f:
      data['sentence'].append(f.read())
      data['sentiment'].append(re.match(r"\d+_(\d+).txt", file_name).group(1))
  return pd.DataFrame.from_dict(data)

# Merge positive and negative examples, add a polarity column and shuffle.
def load_dataset(directory):
  pos_df = load_directory_data(os.path.join(directory, "pos"))
  neg_df = load_directory_data(os.path.join(directory, "neg"))
  pos_df['polarity'] = 1
  neg_df['polarity'] = 0
  return pd.concat([pos_df, neg_df]).sample(frac=1).reset_index(drop=True)

# Download and process the dataset files.
def download_and_load_datasets(force_download=False):
  dataset = tf.keras.utils.get_file(
      fname="aclImdb.tar.gz", 
      origin="http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz", 
      extract=True)
  
  print(os.path.dirname(dataset))

  train_df = load_dataset(os.path.join(os.path.dirname(dataset), 
                                       "aclImdb", "train"))
  test_df = load_dataset(os.path.join(os.path.dirname(dataset), 
                                      "aclImdb", "test"))
  return train_df, test_df

In [41]:
train, test = download_and_load_datasets()

/root/.keras/datasets


#### To keep training fast, we'll take a sample of 5000 train and test examples, respectively.

In [0]:
train = train.sample(5000)
test = test.sample(5000)

In [43]:
train.head()

Unnamed: 0,sentence,sentiment,polarity
5308,"Gritty, dusty western from director Richard Br...",4,0
4270,I have to vote this 10 out of 10 in the rare c...,10,1
8468,*I mark where there are spoilers! Overall comm...,10,1
10187,Family problems abound in real life and that i...,10,1
10901,"Sitting, Typing Nothing is the latest ""what i...",7,1


In [11]:
train.columns

Index(['sentence', 'sentiment', 'polarity'], dtype='object')

#### For us, our input data is the 'sentence' column and our label is the 'polarity' column (0, 1 for negative and positive, respecitvely)

In [0]:
DATA_COLUMN = 'sentence'
LABEL_COLUMN = 'polarity'
# label_list is the list of labels, i.e. True, False or 0, 1 or 'dog', 'cat'
label_list = [0, 1]
num_labels = len(label_list)

#### Data Preprocessing
We'll need to transform our data into a format BERT understands. This involves two steps. First, we create InputExample's using the constructor provided in the BERT library.

text_a is the text we want to classify, which in this case, is the Request field in our Dataframe.
text_b is used if we're training a model to understand the relationship between sentences (i.e. is text_b a translation of text_a? Is text_b an answer to the question asked by text_a?). This doesn't apply to our task, so we can leave text_b blank.
label is the label for our example, i.e. True, False

In [0]:
#copied from bert runclassifier.py
class InputExample(object):
  """A single training/test example for simple sequence classification."""
  def __init__(self, guid, text_a, text_b=None, label=None):
    self.guid = guid
    self.text_a = text_a
    self.text_b = text_b
    self.label = label

In [0]:
train_input_examples = train.apply(lambda x: InputExample(guid=None, #Globally unique ID for bookkeeping, unused in this example
                                 text_a = x[DATA_COLUMN],
                                 text_b = None,
                                 label= x[LABEL_COLUMN]), axis = 1)
test_input_examples = test.apply(lambda x: InputExample(guid=None, #Globally unique ID for bookkeeping, unused in this example
                                 text_a = x[DATA_COLUMN],
                                 text_b = None,
                                 label= x[LABEL_COLUMN]), axis = 1)

Next, we need to preprocess our data so that it matches the data BERT was trained on. For this, we'll need to do a couple of things (but don't worry--this is also included in the Python library):

1. Lowercase our text (if we're using a BERT lowercase model)
2. Tokenize it (i.e. "sally says hi" -> ["sally", "says", "hi"])
3. Break words into WordPieces (i.e. "calling" -> ["call", "##ing"])
4. Map our words to indexes using a vocab file that BERT provides
5. Add special "CLS" and "SEP" tokens (see the readme)
6. Append "index" and "segment" tokens to each input (see the BERT paper)

Happily, we don't have to worry about most of these details.

To start, we'll need to load a vocabulary file and lowercasing information directly from the BERT tf hub module:

In [45]:
# This is a path to an uncased (all lowercase) version of BERT
#BERT_MODEL_HUB = "https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1"  #this one supports only tf1 . it was not saved using saved model format

BERT_MODEL_HUB = "https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/1"  #use this for tf2.

#refer https://tfhub.dev/tensorflow/bert_en_cased_L-12_H-768_A-12/1 for example usage
#tokens are stored as signatures
# signatures can be accessed using loaded_module.signatures[' '](*model_inputs)

#Its vocab_file is stored as tf.saved_model.Asset and 
#the do_lower_case flag is stored as a tf.Variable object on the SavedModel.
#They can be retrieved (using TensorFlow Hub 0.7.0 or newer) as follows:
#if using keras layer
#vocab_file = bert_layer.resolved_object.vocab_file.asset_path.numpy()
#do_lower_case = bert_layer.resolved_object.do_lower_case.numpy()

# if using hub.load(model_url)
#  bert_module = hub.load(BERT_MODEL_HUB)
#  vocab_file = bert_module.vocab_file.asset_path.numpy()
#  do_lower_case = bert_module.do_lower_case.numpy()


def create_tokenizer_from_hub_module():
  """Get the vocab file and casing info from the Hub module."""
  #with tf.Graph().as_default():
  bert_module = hub.load(BERT_MODEL_HUB)
  print(bert_module.vocab_file.asset_path.numpy())
  print(bert_module.do_lower_case.numpy())
  vocab_file = bert_module.vocab_file.asset_path.numpy()
  do_lower_case = bert_module.do_lower_case.numpy()
  return bert.bert_tokenization.FullTokenizer(vocab_file, do_lower_case)

tokenizer = create_tokenizer_from_hub_module()

b'/tmp/tfhub_modules/03d6fb3ce1605ad9e5e9ed5346b2fb9623ef4d3d/assets/vocab.txt'
True


Great--we just learned that the BERT model we're using expects lowercase data (that's what stored in tokenization_info["do_lower_case"]) and we also loaded BERT's vocab file. We also created a tokenizer, which breaks words into word pieces:

In [16]:
tokenizer.tokenize("This here's an example of using the BERT tokenizer")

['this',
 'here',
 "'",
 's',
 'an',
 'example',
 'of',
 'using',
 'the',
 'bert',
 'token',
 '##izer']

Using our tokenizer, we'll call run_classifier.convert_examples_to_features on our InputExamples to convert them into features BERT understands.

In [0]:
def convert_examples_to_features(examples, label_list, max_seq_length,
                                 tokenizer):
  """Convert a set of `InputExample`s to a list of `InputFeatures`."""

  features = []
  for (ex_index, example) in enumerate(examples):
    if ex_index % 10000 == 0:
      tf.compat.v1.logging.info("Writing example %d of %d" % (ex_index, len(examples)))

    feature = convert_single_example(ex_index, example, label_list,
                                     max_seq_length, tokenizer)

    features.append(feature)
  return features

def convert_single_example(ex_index, example, label_list, max_seq_length,
                           tokenizer):
  """Converts a single `InputExample` into a single `InputFeatures`."""

  if isinstance(example, PaddingInputExample):
    return InputFeatures(
        input_ids=[0] * max_seq_length,
        input_mask=[0] * max_seq_length,
        segment_ids=[0] * max_seq_length,
        label_id=0,
        is_real_example=False)

  label_map = {}
  for (i, label) in enumerate(label_list):
    label_map[label] = i

  tokens_a = tokenizer.tokenize(example.text_a)
  tokens_b = None
  if example.text_b:
    tokens_b = tokenizer.tokenize(example.text_b)

  if tokens_b:
    # Modifies `tokens_a` and `tokens_b` in place so that the total
    # length is less than the specified length.
    # Account for [CLS], [SEP], [SEP] with "- 3"
    _truncate_seq_pair(tokens_a, tokens_b, max_seq_length - 3)
  else:
    # Account for [CLS] and [SEP] with "- 2"
    if len(tokens_a) > max_seq_length - 2:
      tokens_a = tokens_a[0:(max_seq_length - 2)]

  # The convention in BERT is:
  # (a) For sequence pairs:
  #  tokens:   [CLS] is this jack ##son ##ville ? [SEP] no it is not . [SEP]
  #  type_ids: 0     0  0    0    0     0       0 0     1  1  1  1   1 1
  # (b) For single sequences:
  #  tokens:   [CLS] the dog is hairy . [SEP]
  #  type_ids: 0     0   0   0  0     0 0
  #
  # Where "type_ids" are used to indicate whether this is the first
  # sequence or the second sequence. The embedding vectors for `type=0` and
  # `type=1` were learned during pre-training and are added to the wordpiece
  # embedding vector (and position vector). This is not *strictly* necessary
  # since the [SEP] token unambiguously separates the sequences, but it makes
  # it easier for the model to learn the concept of sequences.
  #
  # For classification tasks, the first vector (corresponding to [CLS]) is
  # used as the "sentence vector". Note that this only makes sense because
  # the entire model is fine-tuned.
  tokens = []
  segment_ids = []
  tokens.append("[CLS]")
  segment_ids.append(0)
  for token in tokens_a:
    tokens.append(token)
    segment_ids.append(0)
  tokens.append("[SEP]")
  segment_ids.append(0)

  if tokens_b:
    for token in tokens_b:
      tokens.append(token)
      segment_ids.append(1)
    tokens.append("[SEP]")
    segment_ids.append(1)

  input_ids = tokenizer.convert_tokens_to_ids(tokens)

  # The mask has 1 for real tokens and 0 for padding tokens. Only real
  # tokens are attended to.
  input_mask = [1] * len(input_ids)

  # Zero-pad up to the sequence length.
  while len(input_ids) < max_seq_length:
    input_ids.append(0)
    input_mask.append(0)
    segment_ids.append(0)

  assert len(input_ids) == max_seq_length
  assert len(input_mask) == max_seq_length
  assert len(segment_ids) == max_seq_length

  label_id = label_map[example.label]
  if ex_index < 5:
    tf.compat.v1.logging.info("*** Example ***")
    tf.compat.v1.logging.info("guid: %s" % (example.guid))
    tf.compat.v1.logging.info("tokens: %s" % " ".join(
        [bert.bert_tokenization.printable_text(x) for x in tokens]))
    tf.compat.v1.logging.info("input_ids: %s" % " ".join([str(x) for x in input_ids]))
    tf.compat.v1.logging.info("input_mask: %s" % " ".join([str(x) for x in input_mask]))
    tf.compat.v1.logging.info("segment_ids: %s" % " ".join([str(x) for x in segment_ids]))
    tf.compat.v1.logging.info("label: %s (id = %d)" % (example.label, label_id))

  feature = InputFeatures(
      input_ids=input_ids,
      input_mask=input_mask,
      segment_ids=segment_ids,
      label_id=label_id,
      is_real_example=True)
  return feature

class InputFeatures(object):
  """A single set of features of data."""

  def __init__(self,
                input_ids,
                input_mask,
                segment_ids,
                label_id,
                is_real_example=True):
    self.input_ids = input_ids
    self.input_mask = input_mask
    self.segment_ids = segment_ids
    self.label_id = label_id
    self.is_real_example = is_real_example

class PaddingInputExample(object):
  """Fake example so the num input examples is a multiple of the batch size.
  When running eval/predict on the TPU, we need to pad the number of examples
  to be a multiple of the batch size, because the TPU requires a fixed batch
  size. The alternative is to drop the last batch, which is bad because it means
  the entire output data won't be generated.
  We use this class instead of `None` because treating `None` as padding
  battches could cause silent errors.
  """

In [47]:
# We'll set sequences to be at most 128 tokens long.
MAX_SEQ_LENGTH = 128

# Convert our train and test features to InputFeatures that BERT understands.
train_features = convert_examples_to_features(train_input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
test_features = convert_examples_to_features(test_input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)

INFO:tensorflow:Writing example 0 of 5000


INFO:tensorflow:Writing example 0 of 5000


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] grit ##ty , dusty western from director richard brooks , who seems thoroughly eng ##ross ##ed in the genre while keeping all the usual cl ##iche ##s intact . early 1900s horse race attracts a low - key ##ed cowboy ( gene hack ##man ) , a su ##ave gamble ##r ( james co ##burn ) , a cocky kid ( jan michael vincent ) , and even a female ( a surprisingly game candi ##ce bergen ) . once the pre ##lim ##ina ##ries are out of the way ( with the predictable arguments over whether or not a woman should take part ) , this becomes a fairly eng ##ross ##ing entry , though one which breaks no new ground ( it instead resembles [SEP]


INFO:tensorflow:tokens: [CLS] grit ##ty , dusty western from director richard brooks , who seems thoroughly eng ##ross ##ed in the genre while keeping all the usual cl ##iche ##s intact . early 1900s horse race attracts a low - key ##ed cowboy ( gene hack ##man ) , a su ##ave gamble ##r ( james co ##burn ) , a cocky kid ( jan michael vincent ) , and even a female ( a surprisingly game candi ##ce bergen ) . once the pre ##lim ##ina ##ries are out of the way ( with the predictable arguments over whether or not a woman should take part ) , this becomes a fairly eng ##ross ##ing entry , though one which breaks no new ground ( it instead resembles [SEP]


INFO:tensorflow:input_ids: 101 24842 3723 1010 12727 2530 2013 2472 2957 8379 1010 2040 3849 12246 25540 25725 2098 1999 1996 6907 2096 4363 2035 1996 5156 18856 17322 2015 10109 1012 2220 16430 3586 2679 17771 1037 2659 1011 3145 2098 11762 1006 4962 20578 2386 1007 1010 1037 10514 10696 18503 2099 1006 2508 2522 8022 1007 1010 1037 24995 4845 1006 5553 2745 6320 1007 1010 1998 2130 1037 2931 1006 1037 10889 2208 27467 3401 12674 1007 1012 2320 1996 3653 17960 3981 5134 2024 2041 1997 1996 2126 1006 2007 1996 21425 9918 2058 3251 2030 2025 1037 2450 2323 2202 2112 1007 1010 2023 4150 1037 7199 25540 25725 2075 4443 1010 2295 2028 2029 7807 2053 2047 2598 1006 2009 2612 12950 102


INFO:tensorflow:input_ids: 101 24842 3723 1010 12727 2530 2013 2472 2957 8379 1010 2040 3849 12246 25540 25725 2098 1999 1996 6907 2096 4363 2035 1996 5156 18856 17322 2015 10109 1012 2220 16430 3586 2679 17771 1037 2659 1011 3145 2098 11762 1006 4962 20578 2386 1007 1010 1037 10514 10696 18503 2099 1006 2508 2522 8022 1007 1010 1037 24995 4845 1006 5553 2745 6320 1007 1010 1998 2130 1037 2931 1006 1037 10889 2208 27467 3401 12674 1007 1012 2320 1996 3653 17960 3981 5134 2024 2041 1997 1996 2126 1006 2007 1996 21425 9918 2058 3251 2030 2025 1037 2450 2323 2202 2112 1007 1010 2023 4150 1037 7199 25540 25725 2075 4443 1010 2295 2028 2029 7807 2053 2047 2598 1006 2009 2612 12950 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] i have to vote this 10 out of 10 in the rare chance that she happens to see this review , takes pity on me , w ##his ##ks me to hollywood and involves me in her freak ##y / funny world . but in all seriousness , it was good . first episode is obviously finding it ' s feet , but it ' s got that silver ##man weird ##ness running all the way through it . it ' s not a laugh out loud sort of comedy , but that ' s good thing , too much has a laughter - track to it , and this wouldn ' t be right with cues when to laugh , it ' s to the [SEP]


INFO:tensorflow:tokens: [CLS] i have to vote this 10 out of 10 in the rare chance that she happens to see this review , takes pity on me , w ##his ##ks me to hollywood and involves me in her freak ##y / funny world . but in all seriousness , it was good . first episode is obviously finding it ' s feet , but it ' s got that silver ##man weird ##ness running all the way through it . it ' s not a laugh out loud sort of comedy , but that ' s good thing , too much has a laughter - track to it , and this wouldn ' t be right with cues when to laugh , it ' s to the [SEP]


INFO:tensorflow:input_ids: 101 1045 2031 2000 3789 2023 2184 2041 1997 2184 1999 1996 4678 3382 2008 2016 6433 2000 2156 2023 3319 1010 3138 12063 2006 2033 1010 1059 24158 5705 2033 2000 5365 1998 7336 2033 1999 2014 11576 2100 1013 6057 2088 1012 2021 1999 2035 27994 1010 2009 2001 2204 1012 2034 2792 2003 5525 4531 2009 1005 1055 2519 1010 2021 2009 1005 1055 2288 2008 3165 2386 6881 2791 2770 2035 1996 2126 2083 2009 1012 2009 1005 1055 2025 1037 4756 2041 5189 4066 1997 4038 1010 2021 2008 1005 1055 2204 2518 1010 2205 2172 2038 1037 7239 1011 2650 2000 2009 1010 1998 2023 2876 1005 1056 2022 2157 2007 23391 2043 2000 4756 1010 2009 1005 1055 2000 1996 102


INFO:tensorflow:input_ids: 101 1045 2031 2000 3789 2023 2184 2041 1997 2184 1999 1996 4678 3382 2008 2016 6433 2000 2156 2023 3319 1010 3138 12063 2006 2033 1010 1059 24158 5705 2033 2000 5365 1998 7336 2033 1999 2014 11576 2100 1013 6057 2088 1012 2021 1999 2035 27994 1010 2009 2001 2204 1012 2034 2792 2003 5525 4531 2009 1005 1055 2519 1010 2021 2009 1005 1055 2288 2008 3165 2386 6881 2791 2770 2035 1996 2126 2083 2009 1012 2009 1005 1055 2025 1037 4756 2041 5189 4066 1997 4038 1010 2021 2008 1005 1055 2204 2518 1010 2205 2172 2038 1037 7239 1011 2650 2000 2009 1010 1998 2023 2876 1005 1056 2022 2157 2007 23391 2043 2000 4756 1010 2009 1005 1055 2000 1996 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] * i mark where there are spoil ##ers ! overall comments : if you can take a serious movie , go see this . have an open mind and you will enjoy it . don ' t leave the theater because you get confused as to what is going on ! the movie fits together nicely in the second half . i will be taking my mom to see it again when the movie officially opens . < br / > < br / > i was lucky to see this at a screening a couple of weeks ago , when will was going around promoting the movie . he was great - - spent a lot of time with the fans . thank you for [SEP]


INFO:tensorflow:tokens: [CLS] * i mark where there are spoil ##ers ! overall comments : if you can take a serious movie , go see this . have an open mind and you will enjoy it . don ' t leave the theater because you get confused as to what is going on ! the movie fits together nicely in the second half . i will be taking my mom to see it again when the movie officially opens . < br / > < br / > i was lucky to see this at a screening a couple of weeks ago , when will was going around promoting the movie . he was great - - spent a lot of time with the fans . thank you for [SEP]


INFO:tensorflow:input_ids: 101 1008 1045 2928 2073 2045 2024 27594 2545 999 3452 7928 1024 2065 2017 2064 2202 1037 3809 3185 1010 2175 2156 2023 1012 2031 2019 2330 2568 1998 2017 2097 5959 2009 1012 2123 1005 1056 2681 1996 4258 2138 2017 2131 5457 2004 2000 2054 2003 2183 2006 999 1996 3185 16142 2362 19957 1999 1996 2117 2431 1012 1045 2097 2022 2635 2026 3566 2000 2156 2009 2153 2043 1996 3185 3985 7480 1012 1026 7987 1013 1028 1026 7987 1013 1028 1045 2001 5341 2000 2156 2023 2012 1037 11326 1037 3232 1997 3134 3283 1010 2043 2097 2001 2183 2105 7694 1996 3185 1012 2002 2001 2307 1011 1011 2985 1037 2843 1997 2051 2007 1996 4599 1012 4067 2017 2005 102


INFO:tensorflow:input_ids: 101 1008 1045 2928 2073 2045 2024 27594 2545 999 3452 7928 1024 2065 2017 2064 2202 1037 3809 3185 1010 2175 2156 2023 1012 2031 2019 2330 2568 1998 2017 2097 5959 2009 1012 2123 1005 1056 2681 1996 4258 2138 2017 2131 5457 2004 2000 2054 2003 2183 2006 999 1996 3185 16142 2362 19957 1999 1996 2117 2431 1012 1045 2097 2022 2635 2026 3566 2000 2156 2009 2153 2043 1996 3185 3985 7480 1012 1026 7987 1013 1028 1026 7987 1013 1028 1045 2001 5341 2000 2156 2023 2012 1037 11326 1037 3232 1997 3134 3283 1010 2043 2097 2001 2183 2105 7694 1996 3185 1012 2002 2001 2307 1011 1011 2985 1037 2843 1997 2051 2007 1996 4599 1012 4067 2017 2005 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] family problems ab ##ound in real life and that is what this movie is about . love can hold the members together through out the ordeal ##s and trials and that is what this movie is about . one man , daddy , has the maturity and fort ##itude to sustain the family in the face of ad ##vers ##ity . the kids grow up , one all be it , in the hard way , to realize that no matter how old they or a parent is , the parent still loves their children and are willing to provide them a cushion when they fall . all the actors portraying their characters did outstanding performances . yes , i shed a tear along the way [SEP]


INFO:tensorflow:tokens: [CLS] family problems ab ##ound in real life and that is what this movie is about . love can hold the members together through out the ordeal ##s and trials and that is what this movie is about . one man , daddy , has the maturity and fort ##itude to sustain the family in the face of ad ##vers ##ity . the kids grow up , one all be it , in the hard way , to realize that no matter how old they or a parent is , the parent still loves their children and are willing to provide them a cushion when they fall . all the actors portraying their characters did outstanding performances . yes , i shed a tear along the way [SEP]


INFO:tensorflow:input_ids: 101 2155 3471 11113 28819 1999 2613 2166 1998 2008 2003 2054 2023 3185 2003 2055 1012 2293 2064 2907 1996 2372 2362 2083 2041 1996 23304 2015 1998 7012 1998 2008 2003 2054 2023 3185 2003 2055 1012 2028 2158 1010 8600 1010 2038 1996 16736 1998 3481 18679 2000 15770 1996 2155 1999 1996 2227 1997 4748 14028 3012 1012 1996 4268 4982 2039 1010 2028 2035 2022 2009 1010 1999 1996 2524 2126 1010 2000 5382 2008 2053 3043 2129 2214 2027 2030 1037 6687 2003 1010 1996 6687 2145 7459 2037 2336 1998 2024 5627 2000 3073 2068 1037 22936 2043 2027 2991 1012 2035 1996 5889 17274 2037 3494 2106 5151 4616 1012 2748 1010 1045 8328 1037 7697 2247 1996 2126 102


INFO:tensorflow:input_ids: 101 2155 3471 11113 28819 1999 2613 2166 1998 2008 2003 2054 2023 3185 2003 2055 1012 2293 2064 2907 1996 2372 2362 2083 2041 1996 23304 2015 1998 7012 1998 2008 2003 2054 2023 3185 2003 2055 1012 2028 2158 1010 8600 1010 2038 1996 16736 1998 3481 18679 2000 15770 1996 2155 1999 1996 2227 1997 4748 14028 3012 1012 1996 4268 4982 2039 1010 2028 2035 2022 2009 1010 1999 1996 2524 2126 1010 2000 5382 2008 2053 3043 2129 2214 2027 2030 1037 6687 2003 1010 1996 6687 2145 7459 2037 2336 1998 2024 5627 2000 3073 2068 1037 22936 2043 2027 2991 1012 2035 1996 5889 17274 2037 3494 2106 5151 4616 1012 2748 1010 1045 8328 1037 7697 2247 1996 2126 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] sitting , typing nothing is the latest " what if ? " fest offered by vince ##nz ##io natal ##i , and starring david he ##wl ##itt and andrew miller as two losers . one is having relationship problems , got canned from his job ( because of relationship problems ) and the police are out to get him ( because of his job and his relationship problems ) . the other guy is a ago ##raph ##ob ##ic who refuses to go outside his home , is met by a bother ##some girl guide who calls on her mom to claim she was mole ##sted when he doesn ' t buy cookies from him . oh yeah , the police are after him too , [SEP]


INFO:tensorflow:tokens: [CLS] sitting , typing nothing is the latest " what if ? " fest offered by vince ##nz ##io natal ##i , and starring david he ##wl ##itt and andrew miller as two losers . one is having relationship problems , got canned from his job ( because of relationship problems ) and the police are out to get him ( because of his job and his relationship problems ) . the other guy is a ago ##raph ##ob ##ic who refuses to go outside his home , is met by a bother ##some girl guide who calls on her mom to claim she was mole ##sted when he doesn ' t buy cookies from him . oh yeah , the police are after him too , [SEP]


INFO:tensorflow:input_ids: 101 3564 1010 22868 2498 2003 1996 6745 1000 2054 2065 1029 1000 17037 3253 2011 12159 14191 3695 17489 2072 1010 1998 4626 2585 2002 13668 12474 1998 4080 4679 2004 2048 23160 1012 2028 2003 2383 3276 3471 1010 2288 27141 2013 2010 3105 1006 2138 1997 3276 3471 1007 1998 1996 2610 2024 2041 2000 2131 2032 1006 2138 1997 2010 3105 1998 2010 3276 3471 1007 1012 1996 2060 3124 2003 1037 3283 24342 16429 2594 2040 10220 2000 2175 2648 2010 2188 1010 2003 2777 2011 1037 8572 14045 2611 5009 2040 4455 2006 2014 3566 2000 4366 2016 2001 16709 14701 2043 2002 2987 1005 1056 4965 16324 2013 2032 1012 2821 3398 1010 1996 2610 2024 2044 2032 2205 1010 102


INFO:tensorflow:input_ids: 101 3564 1010 22868 2498 2003 1996 6745 1000 2054 2065 1029 1000 17037 3253 2011 12159 14191 3695 17489 2072 1010 1998 4626 2585 2002 13668 12474 1998 4080 4679 2004 2048 23160 1012 2028 2003 2383 3276 3471 1010 2288 27141 2013 2010 3105 1006 2138 1997 3276 3471 1007 1998 1996 2610 2024 2041 2000 2131 2032 1006 2138 1997 2010 3105 1998 2010 3276 3471 1007 1012 1996 2060 3124 2003 1037 3283 24342 16429 2594 2040 10220 2000 2175 2648 2010 2188 1010 2003 2777 2011 1037 8572 14045 2611 5009 2040 4455 2006 2014 3566 2000 4366 2016 2001 16709 14701 2043 2002 2987 1005 1056 4965 16324 2013 2032 1012 2821 3398 1010 1996 2610 2024 2044 2032 2205 1010 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:Writing example 0 of 5000


INFO:tensorflow:Writing example 0 of 5000


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] christophe lambert once said he was still making movies only to make good and easy money . when i see his latest releases , i can believe that . < br / > < br / > be ##ow ##ulf is , all in all , in the " good " part of the crap movies : there are some good thrill scenes , indeed . the actors themselves aren ' t too bad . but the plot is silly , the " mortal ko ##mba ##t " - like music has nothing to do here , the ending is really s * * * * y . . . < br / > < br / > really , the only good thing about it [SEP]


INFO:tensorflow:tokens: [CLS] christophe lambert once said he was still making movies only to make good and easy money . when i see his latest releases , i can believe that . < br / > < br / > be ##ow ##ulf is , all in all , in the " good " part of the crap movies : there are some good thrill scenes , indeed . the actors themselves aren ' t too bad . but the plot is silly , the " mortal ko ##mba ##t " - like music has nothing to do here , the ending is really s * * * * y . . . < br / > < br / > really , the only good thing about it [SEP]


INFO:tensorflow:input_ids: 101 23978 12838 2320 2056 2002 2001 2145 2437 5691 2069 2000 2191 2204 1998 3733 2769 1012 2043 1045 2156 2010 6745 7085 1010 1045 2064 2903 2008 1012 1026 7987 1013 1028 1026 7987 1013 1028 2022 5004 21007 2003 1010 2035 1999 2035 1010 1999 1996 1000 2204 1000 2112 1997 1996 10231 5691 1024 2045 2024 2070 2204 16959 5019 1010 5262 1012 1996 5889 3209 4995 1005 1056 2205 2919 1012 2021 1996 5436 2003 10021 1010 1996 1000 9801 12849 11201 2102 1000 1011 2066 2189 2038 2498 2000 2079 2182 1010 1996 4566 2003 2428 1055 1008 1008 1008 1008 1061 1012 1012 1012 1026 7987 1013 1028 1026 7987 1013 1028 2428 1010 1996 2069 2204 2518 2055 2009 102


INFO:tensorflow:input_ids: 101 23978 12838 2320 2056 2002 2001 2145 2437 5691 2069 2000 2191 2204 1998 3733 2769 1012 2043 1045 2156 2010 6745 7085 1010 1045 2064 2903 2008 1012 1026 7987 1013 1028 1026 7987 1013 1028 2022 5004 21007 2003 1010 2035 1999 2035 1010 1999 1996 1000 2204 1000 2112 1997 1996 10231 5691 1024 2045 2024 2070 2204 16959 5019 1010 5262 1012 1996 5889 3209 4995 1005 1056 2205 2919 1012 2021 1996 5436 2003 10021 1010 1996 1000 9801 12849 11201 2102 1000 1011 2066 2189 2038 2498 2000 2079 2182 1010 1996 4566 2003 2428 1055 1008 1008 1008 1008 1061 1012 1012 1012 1026 7987 1013 1028 1026 7987 1013 1028 2428 1010 1996 2069 2204 2518 2055 2009 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] this was a television show that i watched during the 1960s as a child and was capt ##ivated by it . in the many years that have passed , i have often thought about this show and how good it would be to watch it again , but being mind ##ful that things of the past are not always as good as you remember them . this was a great show in the 1960s and has lost nothing , even now in 2007 . sure there are a few odd production mistakes that you see when watching old shows , and it takes a couple of episodes to get used to the dubbed voices , but having done that , i was as capt ##ivated now [SEP]


INFO:tensorflow:tokens: [CLS] this was a television show that i watched during the 1960s as a child and was capt ##ivated by it . in the many years that have passed , i have often thought about this show and how good it would be to watch it again , but being mind ##ful that things of the past are not always as good as you remember them . this was a great show in the 1960s and has lost nothing , even now in 2007 . sure there are a few odd production mistakes that you see when watching old shows , and it takes a couple of episodes to get used to the dubbed voices , but having done that , i was as capt ##ivated now [SEP]


INFO:tensorflow:input_ids: 101 2023 2001 1037 2547 2265 2008 1045 3427 2076 1996 4120 2004 1037 2775 1998 2001 14408 21967 2011 2009 1012 1999 1996 2116 2086 2008 2031 2979 1010 1045 2031 2411 2245 2055 2023 2265 1998 2129 2204 2009 2052 2022 2000 3422 2009 2153 1010 2021 2108 2568 3993 2008 2477 1997 1996 2627 2024 2025 2467 2004 2204 2004 2017 3342 2068 1012 2023 2001 1037 2307 2265 1999 1996 4120 1998 2038 2439 2498 1010 2130 2085 1999 2289 1012 2469 2045 2024 1037 2261 5976 2537 12051 2008 2017 2156 2043 3666 2214 3065 1010 1998 2009 3138 1037 3232 1997 4178 2000 2131 2109 2000 1996 9188 5755 1010 2021 2383 2589 2008 1010 1045 2001 2004 14408 21967 2085 102


INFO:tensorflow:input_ids: 101 2023 2001 1037 2547 2265 2008 1045 3427 2076 1996 4120 2004 1037 2775 1998 2001 14408 21967 2011 2009 1012 1999 1996 2116 2086 2008 2031 2979 1010 1045 2031 2411 2245 2055 2023 2265 1998 2129 2204 2009 2052 2022 2000 3422 2009 2153 1010 2021 2108 2568 3993 2008 2477 1997 1996 2627 2024 2025 2467 2004 2204 2004 2017 3342 2068 1012 2023 2001 1037 2307 2265 1999 1996 4120 1998 2038 2439 2498 1010 2130 2085 1999 2289 1012 2469 2045 2024 1037 2261 5976 2537 12051 2008 2017 2156 2043 3666 2214 3065 1010 1998 2009 3138 1037 3232 1997 4178 2000 2131 2109 2000 1996 9188 5755 1010 2021 2383 2589 2008 1010 1045 2001 2004 14408 21967 2085 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] in my opinion of this movie the entire video portion of this movie was absolute trash ! ! ! ! however the soundtrack that was used contained the music of a great heavy metal rock band , i recognized the music as being a band called first ##ry ##ke and the album was " just a nightmare " and it was very well written ! ! and i am curious to see what the rest of you movie buff ##s out there think of it , if can remember back that far i would appreciate the feed back , i collect old movie , and obscure movie sound tracks . it is a very time consuming hobby but is very reward ##ing . i have seen [SEP]


INFO:tensorflow:tokens: [CLS] in my opinion of this movie the entire video portion of this movie was absolute trash ! ! ! ! however the soundtrack that was used contained the music of a great heavy metal rock band , i recognized the music as being a band called first ##ry ##ke and the album was " just a nightmare " and it was very well written ! ! and i am curious to see what the rest of you movie buff ##s out there think of it , if can remember back that far i would appreciate the feed back , i collect old movie , and obscure movie sound tracks . it is a very time consuming hobby but is very reward ##ing . i have seen [SEP]


INFO:tensorflow:input_ids: 101 1999 2026 5448 1997 2023 3185 1996 2972 2678 4664 1997 2023 3185 2001 7619 11669 999 999 999 999 2174 1996 6050 2008 2001 2109 4838 1996 2189 1997 1037 2307 3082 3384 2600 2316 1010 1045 3858 1996 2189 2004 2108 1037 2316 2170 2034 2854 3489 1998 1996 2201 2001 1000 2074 1037 10103 1000 1998 2009 2001 2200 2092 2517 999 999 1998 1045 2572 8025 2000 2156 2054 1996 2717 1997 2017 3185 23176 2015 2041 2045 2228 1997 2009 1010 2065 2064 3342 2067 2008 2521 1045 2052 9120 1996 5438 2067 1010 1045 8145 2214 3185 1010 1998 14485 3185 2614 3162 1012 2009 2003 1037 2200 2051 15077 17792 2021 2003 2200 10377 2075 1012 1045 2031 2464 102


INFO:tensorflow:input_ids: 101 1999 2026 5448 1997 2023 3185 1996 2972 2678 4664 1997 2023 3185 2001 7619 11669 999 999 999 999 2174 1996 6050 2008 2001 2109 4838 1996 2189 1997 1037 2307 3082 3384 2600 2316 1010 1045 3858 1996 2189 2004 2108 1037 2316 2170 2034 2854 3489 1998 1996 2201 2001 1000 2074 1037 10103 1000 1998 2009 2001 2200 2092 2517 999 999 1998 1045 2572 8025 2000 2156 2054 1996 2717 1997 2017 3185 23176 2015 2041 2045 2228 1997 2009 1010 2065 2064 3342 2067 2008 2521 1045 2052 9120 1996 5438 2067 1010 1045 8145 2214 3185 1010 1998 14485 3185 2614 3162 1012 2009 2003 1037 2200 2051 15077 17792 2021 2003 2200 10377 2075 1012 1045 2031 2464 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] david cr ##one ##nberg , much like colleague david lynch , is an acquired taste . a director who plays with themes like reality , per ##version , sex , insanity and death , is bound to get the most extreme re ##ations from audiences . he proved this with films as the fly , naked lunch , crash and exit ##en ##z ( capital x , capital z ) and more recently , spider . it ' s best to see exist ##en ##z with a clear mind . try not to read too much about the plot , or it ' ll be ruined for you . what i can tell you is that cr ##one ##nberg takes you on a trip down into [SEP]


INFO:tensorflow:tokens: [CLS] david cr ##one ##nberg , much like colleague david lynch , is an acquired taste . a director who plays with themes like reality , per ##version , sex , insanity and death , is bound to get the most extreme re ##ations from audiences . he proved this with films as the fly , naked lunch , crash and exit ##en ##z ( capital x , capital z ) and more recently , spider . it ' s best to see exist ##en ##z with a clear mind . try not to read too much about the plot , or it ' ll be ruined for you . what i can tell you is that cr ##one ##nberg takes you on a trip down into [SEP]


INFO:tensorflow:input_ids: 101 2585 13675 5643 11144 1010 2172 2066 11729 2585 11404 1010 2003 2019 3734 5510 1012 1037 2472 2040 3248 2007 6991 2066 4507 1010 2566 27774 1010 3348 1010 19272 1998 2331 1010 2003 5391 2000 2131 1996 2087 6034 2128 10708 2013 9501 1012 2002 4928 2023 2007 3152 2004 1996 4875 1010 6248 6265 1010 5823 1998 6164 2368 2480 1006 3007 1060 1010 3007 1062 1007 1998 2062 3728 1010 6804 1012 2009 1005 1055 2190 2000 2156 4839 2368 2480 2007 1037 3154 2568 1012 3046 2025 2000 3191 2205 2172 2055 1996 5436 1010 2030 2009 1005 2222 2022 9868 2005 2017 1012 2054 1045 2064 2425 2017 2003 2008 13675 5643 11144 3138 2017 2006 1037 4440 2091 2046 102


INFO:tensorflow:input_ids: 101 2585 13675 5643 11144 1010 2172 2066 11729 2585 11404 1010 2003 2019 3734 5510 1012 1037 2472 2040 3248 2007 6991 2066 4507 1010 2566 27774 1010 3348 1010 19272 1998 2331 1010 2003 5391 2000 2131 1996 2087 6034 2128 10708 2013 9501 1012 2002 4928 2023 2007 3152 2004 1996 4875 1010 6248 6265 1010 5823 1998 6164 2368 2480 1006 3007 1060 1010 3007 1062 1007 1998 2062 3728 1010 6804 1012 2009 1005 1055 2190 2000 2156 4839 2368 2480 2007 1037 3154 2568 1012 3046 2025 2000 3191 2205 2172 2055 1996 5436 1010 2030 2009 1005 2222 2022 9868 2005 2017 1012 2054 1045 2064 2425 2017 2003 2008 13675 5643 11144 3138 2017 2006 1037 4440 2091 2046 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] arthur hu ##nni ##cut ##t plays a very stereo ##typical role as a mountain man ( probably the oz ##ark ##s ) who goes hunting with his favorite co ##on dog . however , the dog appears to be drowning when hu ##nni ##cut ##t jumps in after him . it becomes obvious pretty soon that despite hu ##nni ##cut ##t and his dog roaming about after leaving the water that they both died in the water - - as no one responds when he talks to them and sees and hears people talking about his and the dog ' s death . yet , oddly , hu ##nni ##cut ##t is really slow on the up ##take and it takes him a while to understand [SEP]


INFO:tensorflow:tokens: [CLS] arthur hu ##nni ##cut ##t plays a very stereo ##typical role as a mountain man ( probably the oz ##ark ##s ) who goes hunting with his favorite co ##on dog . however , the dog appears to be drowning when hu ##nni ##cut ##t jumps in after him . it becomes obvious pretty soon that despite hu ##nni ##cut ##t and his dog roaming about after leaving the water that they both died in the water - - as no one responds when he talks to them and sees and hears people talking about his and the dog ' s death . yet , oddly , hu ##nni ##cut ##t is really slow on the up ##take and it takes him a while to understand [SEP]


INFO:tensorflow:input_ids: 101 4300 15876 23500 12690 2102 3248 1037 2200 12991 27086 2535 2004 1037 3137 2158 1006 2763 1996 11472 17007 2015 1007 2040 3632 5933 2007 2010 5440 2522 2239 3899 1012 2174 1010 1996 3899 3544 2000 2022 14759 2043 15876 23500 12690 2102 14523 1999 2044 2032 1012 2009 4150 5793 3492 2574 2008 2750 15876 23500 12690 2102 1998 2010 3899 24430 2055 2044 2975 1996 2300 2008 2027 2119 2351 1999 1996 2300 1011 1011 2004 2053 2028 16412 2043 2002 7566 2000 2068 1998 5927 1998 14994 2111 3331 2055 2010 1998 1996 3899 1005 1055 2331 1012 2664 1010 15056 1010 15876 23500 12690 2102 2003 2428 4030 2006 1996 2039 15166 1998 2009 3138 2032 1037 2096 2000 3305 102


INFO:tensorflow:input_ids: 101 4300 15876 23500 12690 2102 3248 1037 2200 12991 27086 2535 2004 1037 3137 2158 1006 2763 1996 11472 17007 2015 1007 2040 3632 5933 2007 2010 5440 2522 2239 3899 1012 2174 1010 1996 3899 3544 2000 2022 14759 2043 15876 23500 12690 2102 14523 1999 2044 2032 1012 2009 4150 5793 3492 2574 2008 2750 15876 23500 12690 2102 1998 2010 3899 24430 2055 2044 2975 1996 2300 2008 2027 2119 2351 1999 1996 2300 1011 1011 2004 2053 2028 16412 2043 2002 7566 2000 2068 1998 5927 1998 14994 2111 3331 2055 2010 1998 1996 3899 1005 1055 2331 1012 2664 1010 15056 1010 15876 23500 12690 2102 2003 2428 4030 2006 1996 2039 15166 1998 2009 3138 2032 1037 2096 2000 3305 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


#### Creating a model

Now that we've prepared our data, let's focus on building a model. create_model does just this below. First, it loads the BERT tf hub module again (this time to extract the computation graph). Next, it creates a single new layer that will be trained to adapt BERT to our sentiment task (i.e. classifying whether a movie review is positive or negative). This strategy of using a mostly trained model is called fine-tuning.

In [48]:
import numpy as np
train_input_ids = []
train_input_mask = []
train_input_type_ids = []
train_labels_list = []

for feature in train_features:  
  train_input_ids.append(feature.input_ids)
  train_input_mask.append(feature.input_mask)
  train_input_type_ids.append(feature.segment_ids)
  train_labels_list.append(feature.label_id)

print(len(train_input_ids))

train_input_ids = np.array(train_input_ids, dtype='int32')
train_input_mask = np.array(train_input_mask, dtype='int32')
train_input_type_ids = np.array(train_input_type_ids, dtype='int32')
train_labels_list = np.array(train_labels_list)

5000


In [0]:
#train_input_ids[0:2]

In [0]:
test_input_ids = []
test_input_mask = []
test_input_type_ids = []
test_labels_list = []

for feature in train_features:  
  test_input_ids.append(feature.input_ids)
  test_input_mask.append(feature.input_mask)
  test_input_type_ids.append(feature.segment_ids)
  test_labels_list.append(feature.label_id)

test_input_ids = np.array(test_input_ids, dtype='int32')
test_input_mask = np.array(test_input_mask, dtype='int32')
test_input_type_ids = np.array(test_input_type_ids, dtype='int32')
test_labels_list = np.array(test_labels_list)


In [0]:
input_ids = tf.keras.layers.Input(shape=(MAX_SEQ_LENGTH, ), dtype=tf.int32, name='input_ids')
input_masks = tf.keras.layers.Input(shape=(MAX_SEQ_LENGTH, ), dtype=tf.int32, name='input_masks')
segment_ids = tf.keras.layers.Input(shape=(MAX_SEQ_LENGTH, ), dtype=tf.int32, name='segment_ids')

bert_layer = hub.KerasLayer(BERT_MODEL_HUB, trainable=True)
pooled_output, sequence_output = bert_layer([input_ids, input_masks, segment_ids])
drop_output = tf.keras.layers.Dropout(rate=0.1)(pooled_output)
dense_output = tf.keras.layers.Dense(2, name='dense_output', activation='softmax')(drop_output)

In [70]:
LEARNING_RATE = 2e-5
model = tf.keras.Model(inputs=[input_ids, input_masks, segment_ids], outputs=[dense_output])
print(model.summary())
#model.compile(optimizer = 'adam', loss='binary_crossentropy', metrics=['accuracy'])
model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE), loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])

Model: "model_3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_ids (InputLayer)          [(None, 128)]        0                                            
__________________________________________________________________________________________________
input_masks (InputLayer)        [(None, 128)]        0                                            
__________________________________________________________________________________________________
segment_ids (InputLayer)        [(None, 128)]        0                                            
__________________________________________________________________________________________________
keras_layer_3 (KerasLayer)      [(None, 768), (None, 109482241   input_ids[0][0]                  
                                                                 input_masks[0][0]          

In [71]:
import numpy as np
len(train_input_ids)
print(np.array(train_input_ids).shape)
print(np.array(train_input_ids).dtype)


(5000, 128)
int32


In [72]:
train_labels_list.shape

(5000,)

In [73]:
#fit
BATCH_SIZE = 50
EPOCHS = 3
history = model.fit([train_input_ids, train_input_mask, train_input_type_ids], train_labels_list, epochs=EPOCHS, batch_size=BATCH_SIZE)  
#model.fit()

#model.fit({'input_ids': train_input_ids, 'input_masks': train_input_mask, 'segment_ids': train_input_type_ids},
#          {'dense_output': train_labels_list},
#          epochs=EPOCHS, batch_size=BATCH_SIZE)

Train on 5000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


In [0]:
model.save('BertModel.h5')

In [58]:
test_input_type_ids.shape

(5000, 128)

In [75]:
results = model.evaluate([test_input_ids, test_input_mask, test_input_type_ids],test_labels_list, batch_size=BATCH_SIZE)



In [76]:
for name, value in zip(model.metrics_names, results):
  print("%s: %.3f" % (name, value))

loss: 0.037
sparse_categorical_accuracy: 0.991


In [61]:
test_input_mask.shape

(5000, 128)

In [0]:
#pre_save_preds = model.predict([test_input_ids, test_input_mask, test_input_type_ids])  

#model.predict({'input_ids': test_input_ids, 'input_masks': test_input_mask, 'segment_ids': test_input_type_ids})

In [36]:
len(pre_save_preds)

1000

In [0]:
def getPrediction(in_sentences):
  labels = ["Negative", "Positive"]
  input_examples = [InputExample(guid="", text_a = x, text_b = None, label=0) for x in in_sentences]
  print(label_list)
  input_features = convert_examples_to_features(input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)

  pred_input_ids = []
  pred_input_mask = []
  pred_input_type_ids = []
  
  for feature in input_features:  
    pred_input_ids.append(feature.input_ids)
    pred_input_mask.append(feature.input_mask)
    pred_input_type_ids.append(feature.segment_ids)
    
  pred_input_ids = np.array(pred_input_ids, dtype='int32')
  pred_input_mask = np.array(pred_input_mask, dtype='int32')
  pred_input_type_ids = np.array(pred_input_type_ids, dtype='int32')

  print(pred_input_ids.shape)
  #print(model.summary())
  
  predictions = model.predict([pred_input_ids, pred_input_mask, pred_input_type_ids])
  return predictions

In [0]:
pred_sentences = [
  "That movie was absolutely awful",
  "The acting was a bit lacking",
  "The film was creative and surprising",
  "Absolutely fantastic!"
]

In [79]:
predictions = getPrediction(pred_sentences)

[0, 1]
INFO:tensorflow:Writing example 0 of 4


INFO:tensorflow:Writing example 0 of 4


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: 


INFO:tensorflow:guid: 


INFO:tensorflow:tokens: [CLS] that movie was absolutely awful [SEP]


INFO:tensorflow:tokens: [CLS] that movie was absolutely awful [SEP]


INFO:tensorflow:input_ids: 101 2008 3185 2001 7078 9643 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_ids: 101 2008 3185 2001 7078 9643 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: 


INFO:tensorflow:guid: 


INFO:tensorflow:tokens: [CLS] the acting was a bit lacking [SEP]


INFO:tensorflow:tokens: [CLS] the acting was a bit lacking [SEP]


INFO:tensorflow:input_ids: 101 1996 3772 2001 1037 2978 11158 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_ids: 101 1996 3772 2001 1037 2978 11158 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: 


INFO:tensorflow:guid: 


INFO:tensorflow:tokens: [CLS] the film was creative and surprising [SEP]


INFO:tensorflow:tokens: [CLS] the film was creative and surprising [SEP]


INFO:tensorflow:input_ids: 101 1996 2143 2001 5541 1998 11341 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_ids: 101 1996 2143 2001 5541 1998 11341 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: 


INFO:tensorflow:guid: 


INFO:tensorflow:tokens: [CLS] absolutely fantastic ! [SEP]


INFO:tensorflow:tokens: [CLS] absolutely fantastic ! [SEP]


INFO:tensorflow:input_ids: 101 7078 10392 999 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_ids: 101 7078 10392 999 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


(4, 128)


In [80]:
predictions

array([[0.9959984 , 0.00400165],
       [0.8497251 , 0.15027489],
       [0.04698265, 0.9530173 ],
       [0.0581346 , 0.9418654 ]], dtype=float32)