In [1]:
# Copyright 2019 Google Inc.

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

#     http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#Predicting Movie Review Sentiment with BERT on TF Hub

If you’ve been following Natural Language Processing over the past year, you’ve probably heard of BERT: Bidirectional Encoder Representations from Transformers. It’s a neural network architecture designed by Google researchers that’s totally transformed what’s state-of-the-art for NLP tasks, like text classification, translation, summarization, and question answering.

Now that BERT's been added to [TF Hub](https://www.tensorflow.org/hub) as a loadable module, it's easy(ish) to add into existing Tensorflow text pipelines. In an existing pipeline, BERT can replace text embedding layers like ELMO and GloVE. Alternatively, [finetuning](http://wiki.fast.ai/index.php/Fine_tuning) BERT can provide both an accuracy boost and faster training time in many cases.

Here, we'll train a model to predict whether an IMDB movie review is positive or negative using BERT in Tensorflow with tf hub. Some code was adapted from [this colab notebook](https://colab.sandbox.google.com/github/tensorflow/tpu/blob/master/tools/colab/bert_finetuning_with_cloud_tpus.ipynb). Let's get started!

In [3]:
from sklearn.model_selection import train_test_split
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
from datetime import datetime

In addition to the standard libraries we imported above, we'll need to install BERT's python package.

In [7]:
!pip3 install bert-tensorflow

Collecting bert-tensorflow
  Using cached https://files.pythonhosted.org/packages/a6/66/7eb4e8b6ea35b7cc54c322c816f976167a43019750279a8473d355800a93/bert_tensorflow-1.0.1-py2.py3-none-any.whl
Collecting six (from bert-tensorflow)
  Using cached https://files.pythonhosted.org/packages/65/eb/1f97cb97bfc2390a276969c6fae16075da282f5058082d4cb10c6c5c1dba/six-1.14.0-py2.py3-none-any.whl
Installing collected packages: six, bert-tensorflow
Successfully installed bert-tensorflow-1.0.1 six-1.14.0


In [8]:
import bert
from bert import run_classifier
from bert import optimization
from bert import tokenization




Below, we'll set an output directory location to store our model output and checkpoints. This can be a local directory, in which case you'd set OUTPUT_DIR to the name of the directory you'd like to create. If you're running this code in Google's hosted Colab, the directory won't persist after the Colab session ends.

Alternatively, if you're a GCP user, you can store output in a GCP bucket. To do that, set a directory name in OUTPUT_DIR and the name of the GCP bucket in the BUCKET field.

Set DO_DELETE to rewrite the OUTPUT_DIR if it exists. Otherwise, Tensorflow will load existing model checkpoints from that directory (if they exist).

In [13]:
# Set the output directory for saving model file
# Optionally, set a GCP bucket location

OUTPUT_DIR = 'model'#@param {type:"string"}
#@markdown Whether or not to clear/delete the directory and create a new one
DO_DELETE = False #@param {type:"boolean"}
#@markdown Set USE_BUCKET and BUCKET if you want to (optionally) store model output on GCP bucket.

# USE_BUCKET = True #@param {type:"boolean"}
# BUCKET = 'BUCKET_NAME' #@param {type:"string"}

# if USE_BUCKET:
#   OUTPUT_DIR = 'gs://{}/{}'.format(BUCKET, OUTPUT_DIR)
#    from google.colab import auth
#    auth.authenticate_user()

if DO_DELETE:
  try:
    tf.gfile.DeleteRecursively(OUTPUT_DIR)
  except:
    # Doesn't matter if the directory didn't exist
    pass
tf.gfile.MakeDirs(OUTPUT_DIR)
print('***** Model output directory: {} *****'.format(OUTPUT_DIR))


***** Model output directory: model *****


#Data

First, let's download the dataset, hosted by Stanford. The code below, which downloads, extracts, and imports the IMDB Large Movie Review Dataset, is borrowed from [this Tensorflow tutorial](https://www.tensorflow.org/hub/tutorials/text_classification_with_tf_hub).

In [18]:
from tensorflow import keras
import os
import re

# Load all files from a directory in a DataFrame.
def load_directory_data(directory):
  data = {}
  data["sentence"] = []
  data["sentiment"] = []
  for file_path in os.listdir(directory):
    with tf.gfile.GFile(os.path.join(directory, file_path), "r") as f:
      data["sentence"].append(f.read())
      data["sentiment"].append(re.match("\d+_(\d+)\.txt", file_path).group(1))
  return pd.DataFrame.from_dict(data)

# Merge positive and negative examples, add a polarity column and shuffle.
def load_dataset(directory):
  pos_df = load_directory_data(os.path.join(directory, "pos"))
  neg_df = load_directory_data(os.path.join(directory, "neg"))
  pos_df["polarity"] = 1
  neg_df["polarity"] = 0
  return pd.concat([pos_df, neg_df]).sample(frac=1).reset_index(drop=True)

# Download and process the dataset files.
def download_and_load_datasets(force_download=False):
  dataset = tf.keras.utils.get_file(
      fname="aclImdb.tar.gz", 
      origin="http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz", 
      extract=True)
  
  train_df = load_dataset(os.path.join(os.path.dirname(dataset), 
                                       "aclImdb", "train"))
  test_df = load_dataset(os.path.join(os.path.dirname(dataset), 
                                      "aclImdb", "test"))
  
  return train_df, test_df


In [19]:
train, test = download_and_load_datasets()

To keep training fast, we'll take a sample of 5000 train and test examples, respectively.

In [20]:
train = train.sample(5000)
test = test.sample(5000)

In [21]:
train.columns

Index(['sentence', 'sentiment', 'polarity'], dtype='object')

For us, our input data is the 'sentence' column and our label is the 'polarity' column (0, 1 for negative and positive, respecitvely)

In [22]:
DATA_COLUMN = 'sentence'
LABEL_COLUMN = 'polarity'
# label_list is the list of labels, i.e. True, False or 0, 1 or 'dog', 'cat'
label_list = [0, 1]

#Data Preprocessing
We'll need to transform our data into a format BERT understands. This involves two steps. First, we create  `InputExample`'s using the constructor provided in the BERT library.

- `text_a` is the text we want to classify, which in this case, is the `Request` field in our Dataframe. 
- `text_b` is used if we're training a model to understand the relationship between sentences (i.e. is `text_b` a translation of `text_a`? Is `text_b` an answer to the question asked by `text_a`?). This doesn't apply to our task, so we can leave `text_b` blank.
- `label` is the label for our example, i.e. True, False

In [23]:
# Use the InputExample class from BERT's run_classifier code to create examples from the data
train_InputExamples = train.apply(lambda x: bert.run_classifier.InputExample(guid=None, # Globally unique ID for bookkeeping, unused in this example
                                                                   text_a = x[DATA_COLUMN], 
                                                                   text_b = None, 
                                                                   label = x[LABEL_COLUMN]), axis = 1)

test_InputExamples = test.apply(lambda x: bert.run_classifier.InputExample(guid=None, 
                                                                   text_a = x[DATA_COLUMN], 
                                                                   text_b = None, 
                                                                   label = x[LABEL_COLUMN]), axis = 1)

Next, we need to preprocess our data so that it matches the data BERT was trained on. For this, we'll need to do a couple of things (but don't worry--this is also included in the Python library):


1. Lowercase our text (if we're using a BERT lowercase model)
2. Tokenize it (i.e. "sally says hi" -> ["sally", "says", "hi"])
3. Break words into WordPieces (i.e. "calling" -> ["call", "##ing"])
4. Map our words to indexes using a vocab file that BERT provides
5. Add special "CLS" and "SEP" tokens (see the [readme](https://github.com/google-research/bert))
6. Append "index" and "segment" tokens to each input (see the [BERT paper](https://arxiv.org/pdf/1810.04805.pdf))

Happily, we don't have to worry about most of these details.




To start, we'll need to load a vocabulary file and lowercasing information directly from the BERT tf hub module:

In [24]:
# This is a path to an uncased (all lowercase) version of BERT
BERT_MODEL_HUB = "https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1"

def create_tokenizer_from_hub_module():
  """Get the vocab file and casing info from the Hub module."""
  with tf.Graph().as_default():
    bert_module = hub.Module(BERT_MODEL_HUB)
    tokenization_info = bert_module(signature="tokenization_info", as_dict=True)
    with tf.Session() as sess:
      vocab_file, do_lower_case = sess.run([tokenization_info["vocab_file"],
                                            tokenization_info["do_lower_case"]])
      
  return bert.tokenization.FullTokenizer(
      vocab_file=vocab_file, do_lower_case=do_lower_case)

tokenizer = create_tokenizer_from_hub_module()

INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore








Great--we just learned that the BERT model we're using expects lowercase data (that's what stored in tokenization_info["do_lower_case"]) and we also loaded BERT's vocab file. We also created a tokenizer, which breaks words into word pieces:

In [25]:
tokenizer.tokenize("This here's an example of using the BERT tokenizer")

['this',
 'here',
 "'",
 's',
 'an',
 'example',
 'of',
 'using',
 'the',
 'bert',
 'token',
 '##izer']

Using our tokenizer, we'll call `run_classifier.convert_examples_to_features` on our InputExamples to convert them into features BERT understands.

In [26]:
# We'll set sequences to be at most 128 tokens long.
MAX_SEQ_LENGTH = 128
# Convert our train and test features to InputFeatures that BERT understands.
train_features = bert.run_classifier.convert_examples_to_features(train_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)
test_features = bert.run_classifier.convert_examples_to_features(test_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)







INFO:tensorflow:Writing example 0 of 5000


INFO:tensorflow:Writing example 0 of 5000


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] this is the best film version of dick ##en ' s classic tale . i ' ve seen it over and over on vhs , and recently acquired the dvd version , which is format ##ted for tv ( not wide - screen ) . what i find interesting about this tel ##ep ##lay is the cast of english actors who are now recognizable since many have appeared in other films / shows in north america since 1984 . my biggest surprise is edward woodward , " the equal ##izer " , as the ghost of christmas present . [SEP]


INFO:tensorflow:tokens: [CLS] this is the best film version of dick ##en ' s classic tale . i ' ve seen it over and over on vhs , and recently acquired the dvd version , which is format ##ted for tv ( not wide - screen ) . what i find interesting about this tel ##ep ##lay is the cast of english actors who are now recognizable since many have appeared in other films / shows in north america since 1984 . my biggest surprise is edward woodward , " the equal ##izer " , as the ghost of christmas present . [SEP]


INFO:tensorflow:input_ids: 101 2023 2003 1996 2190 2143 2544 1997 5980 2368 1005 1055 4438 6925 1012 1045 1005 2310 2464 2009 2058 1998 2058 2006 17550 1010 1998 3728 3734 1996 4966 2544 1010 2029 2003 4289 3064 2005 2694 1006 2025 2898 1011 3898 1007 1012 2054 1045 2424 5875 2055 2023 10093 13699 8485 2003 1996 3459 1997 2394 5889 2040 2024 2085 20123 2144 2116 2031 2596 1999 2060 3152 1013 3065 1999 2167 2637 2144 3118 1012 2026 5221 4474 2003 3487 19133 1010 1000 1996 5020 17629 1000 1010 2004 1996 5745 1997 4234 2556 1012 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_ids: 101 2023 2003 1996 2190 2143 2544 1997 5980 2368 1005 1055 4438 6925 1012 1045 1005 2310 2464 2009 2058 1998 2058 2006 17550 1010 1998 3728 3734 1996 4966 2544 1010 2029 2003 4289 3064 2005 2694 1006 2025 2898 1011 3898 1007 1012 2054 1045 2424 5875 2055 2023 10093 13699 8485 2003 1996 3459 1997 2394 5889 2040 2024 2085 20123 2144 2116 2031 2596 1999 2060 3152 1013 3065 1999 2167 2637 2144 3118 1012 2026 5221 4474 2003 3487 19133 1010 1000 1996 5020 17629 1000 1010 2004 1996 5745 1997 4234 2556 1012 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] the shame of it ! there i was , comfortable in the arm chair with a beer and a bag of popcorn , bo ##uy ##ant in the hope of another splendid mu ##ppet night of entertainment . what did i get ? disappointed ! how can the mu ##ppet ##s go from the sublime christmas carol to this ? the humour was dreadful , the songs were worse than country and western ( and that ' s saying something ) and the plot was as confused and poor as blind pew . i think the main problem was that they bit off too much in attempting treasure island . a short tale , such as christmas carol , is perfect because you can weave the [SEP]


INFO:tensorflow:tokens: [CLS] the shame of it ! there i was , comfortable in the arm chair with a beer and a bag of popcorn , bo ##uy ##ant in the hope of another splendid mu ##ppet night of entertainment . what did i get ? disappointed ! how can the mu ##ppet ##s go from the sublime christmas carol to this ? the humour was dreadful , the songs were worse than country and western ( and that ' s saying something ) and the plot was as confused and poor as blind pew . i think the main problem was that they bit off too much in attempting treasure island . a short tale , such as christmas carol , is perfect because you can weave the [SEP]


INFO:tensorflow:input_ids: 101 1996 9467 1997 2009 999 2045 1045 2001 1010 6625 1999 1996 2849 3242 2007 1037 5404 1998 1037 4524 1997 24593 1010 8945 26230 4630 1999 1996 3246 1997 2178 21459 14163 29519 2305 1997 4024 1012 2054 2106 1045 2131 1029 9364 999 2129 2064 1996 14163 29519 2015 2175 2013 1996 28341 4234 8594 2000 2023 1029 1996 17211 2001 21794 1010 1996 2774 2020 4788 2084 2406 1998 2530 1006 1998 2008 1005 1055 3038 2242 1007 1998 1996 5436 2001 2004 5457 1998 3532 2004 6397 29071 1012 1045 2228 1996 2364 3291 2001 2008 2027 2978 2125 2205 2172 1999 7161 8813 2479 1012 1037 2460 6925 1010 2107 2004 4234 8594 1010 2003 3819 2138 2017 2064 25308 1996 102


INFO:tensorflow:input_ids: 101 1996 9467 1997 2009 999 2045 1045 2001 1010 6625 1999 1996 2849 3242 2007 1037 5404 1998 1037 4524 1997 24593 1010 8945 26230 4630 1999 1996 3246 1997 2178 21459 14163 29519 2305 1997 4024 1012 2054 2106 1045 2131 1029 9364 999 2129 2064 1996 14163 29519 2015 2175 2013 1996 28341 4234 8594 2000 2023 1029 1996 17211 2001 21794 1010 1996 2774 2020 4788 2084 2406 1998 2530 1006 1998 2008 1005 1055 3038 2242 1007 1998 1996 5436 2001 2004 5457 1998 3532 2004 6397 29071 1012 1045 2228 1996 2364 3291 2001 2008 2027 2978 2125 2205 2172 1999 7161 8813 2479 1012 1037 2460 6925 1010 2107 2004 4234 8594 1010 2003 3819 2138 2017 2064 25308 1996 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] this was playing at our theater in amsterdam and the film we wanted to see was sold - out so we went to this , not knowing anything about it other than it was a documentary about the planet . we were very happy at our mis ##fort ##une as this was a very powerful film about life and the delicate balance we all share with the rest of the inhabitants of earth . this film has some of the most breath ##taking photography i have ever seen in a film and took me places from deserts to oceans to rain forests and displayed things i have never seen in a film , tv or book ! " earth " is a film that every student [SEP]


INFO:tensorflow:tokens: [CLS] this was playing at our theater in amsterdam and the film we wanted to see was sold - out so we went to this , not knowing anything about it other than it was a documentary about the planet . we were very happy at our mis ##fort ##une as this was a very powerful film about life and the delicate balance we all share with the rest of the inhabitants of earth . this film has some of the most breath ##taking photography i have ever seen in a film and took me places from deserts to oceans to rain forests and displayed things i have never seen in a film , tv or book ! " earth " is a film that every student [SEP]


INFO:tensorflow:input_ids: 101 2023 2001 2652 2012 2256 4258 1999 7598 1998 1996 2143 2057 2359 2000 2156 2001 2853 1011 2041 2061 2057 2253 2000 2023 1010 2025 4209 2505 2055 2009 2060 2084 2009 2001 1037 4516 2055 1996 4774 1012 2057 2020 2200 3407 2012 2256 28616 13028 9816 2004 2023 2001 1037 2200 3928 2143 2055 2166 1998 1996 10059 5703 2057 2035 3745 2007 1996 2717 1997 1996 4864 1997 3011 1012 2023 2143 2038 2070 1997 1996 2087 3052 17904 5855 1045 2031 2412 2464 1999 1037 2143 1998 2165 2033 3182 2013 28858 2000 17401 2000 4542 6138 1998 6913 2477 1045 2031 2196 2464 1999 1037 2143 1010 2694 2030 2338 999 1000 3011 1000 2003 1037 2143 2008 2296 3076 102


INFO:tensorflow:input_ids: 101 2023 2001 2652 2012 2256 4258 1999 7598 1998 1996 2143 2057 2359 2000 2156 2001 2853 1011 2041 2061 2057 2253 2000 2023 1010 2025 4209 2505 2055 2009 2060 2084 2009 2001 1037 4516 2055 1996 4774 1012 2057 2020 2200 3407 2012 2256 28616 13028 9816 2004 2023 2001 1037 2200 3928 2143 2055 2166 1998 1996 10059 5703 2057 2035 3745 2007 1996 2717 1997 1996 4864 1997 3011 1012 2023 2143 2038 2070 1997 1996 2087 3052 17904 5855 1045 2031 2412 2464 1999 1037 2143 1998 2165 2033 3182 2013 28858 2000 17401 2000 4542 6138 1998 6913 2477 1045 2031 2196 2464 1999 1037 2143 1010 2694 2030 2338 999 1000 3011 1000 2003 1037 2143 2008 2296 3076 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] five deadly venom ##s is not as bloody and violent as story of ricky or super ninja ##s , but it features some of the best hand - to - hand fight sequences in hong kong film history . director chang che ##h creates what is considered by many to be his masterpiece . this movie launched the careers of the five men who play the venom ##s . meng lo plays yet another bad ass . he would go on to be in super ninja ##s . ku ##o chu ##i who is philip kw ##ok would go on to story of ricky and hard boiled . any chop sock ##y fan can app ##er ##cia ##te this . but i still think it ain [SEP]


INFO:tensorflow:tokens: [CLS] five deadly venom ##s is not as bloody and violent as story of ricky or super ninja ##s , but it features some of the best hand - to - hand fight sequences in hong kong film history . director chang che ##h creates what is considered by many to be his masterpiece . this movie launched the careers of the five men who play the venom ##s . meng lo plays yet another bad ass . he would go on to be in super ninja ##s . ku ##o chu ##i who is philip kw ##ok would go on to story of ricky and hard boiled . any chop sock ##y fan can app ##er ##cia ##te this . but i still think it ain [SEP]


INFO:tensorflow:input_ids: 101 2274 9252 15779 2015 2003 2025 2004 6703 1998 6355 2004 2466 1997 11184 2030 3565 14104 2015 1010 2021 2009 2838 2070 1997 1996 2190 2192 1011 2000 1011 2192 2954 10071 1999 4291 4290 2143 2381 1012 2472 11132 18178 2232 9005 2054 2003 2641 2011 2116 2000 2022 2010 17743 1012 2023 3185 3390 1996 10922 1997 1996 2274 2273 2040 2377 1996 15779 2015 1012 27955 8840 3248 2664 2178 2919 4632 1012 2002 2052 2175 2006 2000 2022 1999 3565 14104 2015 1012 13970 2080 14684 2072 2040 2003 5170 6448 6559 2052 2175 2006 2000 2466 1997 11184 1998 2524 17020 1012 2151 24494 28407 2100 5470 2064 10439 2121 7405 2618 2023 1012 2021 1045 2145 2228 2009 7110 102


INFO:tensorflow:input_ids: 101 2274 9252 15779 2015 2003 2025 2004 6703 1998 6355 2004 2466 1997 11184 2030 3565 14104 2015 1010 2021 2009 2838 2070 1997 1996 2190 2192 1011 2000 1011 2192 2954 10071 1999 4291 4290 2143 2381 1012 2472 11132 18178 2232 9005 2054 2003 2641 2011 2116 2000 2022 2010 17743 1012 2023 3185 3390 1996 10922 1997 1996 2274 2273 2040 2377 1996 15779 2015 1012 27955 8840 3248 2664 2178 2919 4632 1012 2002 2052 2175 2006 2000 2022 1999 3565 14104 2015 1012 13970 2080 14684 2072 2040 2003 5170 6448 6559 2052 2175 2006 2000 2466 1997 11184 1998 2524 17020 1012 2151 24494 28407 2100 5470 2064 10439 2121 7405 2618 2023 1012 2021 1045 2145 2228 2009 7110 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] one the whole , this movie isn ' t perfect . it doesn ' t ' hang well ' together as the story line is basically a bunch of hooks to hang jokes . < br / > < br / > some of these jokes are a little ' too 80s ' and tend to date the picture . < br / > < br / > but some of these jokes are classic . < br / > < br / > you know a movie has something special when you and your friends still reference silly quotes from it over 2 decades later . < br / > < br / > plus , there are a bunch of familiar faces ; michael [SEP]


INFO:tensorflow:tokens: [CLS] one the whole , this movie isn ' t perfect . it doesn ' t ' hang well ' together as the story line is basically a bunch of hooks to hang jokes . < br / > < br / > some of these jokes are a little ' too 80s ' and tend to date the picture . < br / > < br / > but some of these jokes are classic . < br / > < br / > you know a movie has something special when you and your friends still reference silly quotes from it over 2 decades later . < br / > < br / > plus , there are a bunch of familiar faces ; michael [SEP]


INFO:tensorflow:input_ids: 101 2028 1996 2878 1010 2023 3185 3475 1005 1056 3819 1012 2009 2987 1005 1056 1005 6865 2092 1005 2362 2004 1996 2466 2240 2003 10468 1037 9129 1997 18008 2000 6865 13198 1012 1026 7987 1013 1028 1026 7987 1013 1028 2070 1997 2122 13198 2024 1037 2210 1005 2205 16002 1005 1998 7166 2000 3058 1996 3861 1012 1026 7987 1013 1028 1026 7987 1013 1028 2021 2070 1997 2122 13198 2024 4438 1012 1026 7987 1013 1028 1026 7987 1013 1028 2017 2113 1037 3185 2038 2242 2569 2043 2017 1998 2115 2814 2145 4431 10021 16614 2013 2009 2058 1016 5109 2101 1012 1026 7987 1013 1028 1026 7987 1013 1028 4606 1010 2045 2024 1037 9129 1997 5220 5344 1025 2745 102


INFO:tensorflow:input_ids: 101 2028 1996 2878 1010 2023 3185 3475 1005 1056 3819 1012 2009 2987 1005 1056 1005 6865 2092 1005 2362 2004 1996 2466 2240 2003 10468 1037 9129 1997 18008 2000 6865 13198 1012 1026 7987 1013 1028 1026 7987 1013 1028 2070 1997 2122 13198 2024 1037 2210 1005 2205 16002 1005 1998 7166 2000 3058 1996 3861 1012 1026 7987 1013 1028 1026 7987 1013 1028 2021 2070 1997 2122 13198 2024 4438 1012 1026 7987 1013 1028 1026 7987 1013 1028 2017 2113 1037 3185 2038 2242 2569 2043 2017 1998 2115 2814 2145 4431 10021 16614 2013 2009 2058 1016 5109 2101 1012 1026 7987 1013 1028 1026 7987 1013 1028 4606 1010 2045 2024 1037 9129 1997 5220 5344 1025 2745 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:Writing example 0 of 5000


INFO:tensorflow:Writing example 0 of 5000


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] drug runner archie moses introduces his friend rock ke ##ats to his boss , drug king ##pin frank colton . unknown to moses , ke ##ats is actually an undercover police officer . during the bust on colton ' s factory , moses accidentally shoots ke ##ats in the head . he survives the wound & later arrests moses . dod ##ging colton ' s hired assassins , the duo must overcome their mutual hatred to survive . < br / > < br / > adam sand ##ler ' s films are usually a hit - & - miss affair , with his comedies either loved by his fans or hated by everyone . this one is not as stupid as his other films , [SEP]


INFO:tensorflow:tokens: [CLS] drug runner archie moses introduces his friend rock ke ##ats to his boss , drug king ##pin frank colton . unknown to moses , ke ##ats is actually an undercover police officer . during the bust on colton ' s factory , moses accidentally shoots ke ##ats in the head . he survives the wound & later arrests moses . dod ##ging colton ' s hired assassins , the duo must overcome their mutual hatred to survive . < br / > < br / > adam sand ##ler ' s films are usually a hit - & - miss affair , with his comedies either loved by his fans or hated by everyone . this one is not as stupid as his other films , [SEP]


INFO:tensorflow:input_ids: 101 4319 5479 13255 9952 13999 2010 2767 2600 17710 11149 2000 2010 5795 1010 4319 2332 8091 3581 21000 1012 4242 2000 9952 1010 17710 11149 2003 2941 2019 16382 2610 2961 1012 2076 1996 13950 2006 21000 1005 1055 4713 1010 9952 9554 11758 17710 11149 1999 1996 2132 1012 2002 13655 1996 6357 1004 2101 17615 9952 1012 26489 4726 21000 1005 1055 5086 18364 1010 1996 6829 2442 9462 2037 8203 11150 2000 5788 1012 1026 7987 1013 1028 1026 7987 1013 1028 4205 5472 3917 1005 1055 3152 2024 2788 1037 2718 1011 1004 1011 3335 6771 1010 2007 2010 22092 2593 3866 2011 2010 4599 2030 6283 2011 3071 1012 2023 2028 2003 2025 2004 5236 2004 2010 2060 3152 1010 102


INFO:tensorflow:input_ids: 101 4319 5479 13255 9952 13999 2010 2767 2600 17710 11149 2000 2010 5795 1010 4319 2332 8091 3581 21000 1012 4242 2000 9952 1010 17710 11149 2003 2941 2019 16382 2610 2961 1012 2076 1996 13950 2006 21000 1005 1055 4713 1010 9952 9554 11758 17710 11149 1999 1996 2132 1012 2002 13655 1996 6357 1004 2101 17615 9952 1012 26489 4726 21000 1005 1055 5086 18364 1010 1996 6829 2442 9462 2037 8203 11150 2000 5788 1012 1026 7987 1013 1028 1026 7987 1013 1028 4205 5472 3917 1005 1055 3152 2024 2788 1037 2718 1011 1004 1011 3335 6771 1010 2007 2010 22092 2593 3866 2011 2010 4599 2030 6283 2011 3071 1012 2023 2028 2003 2025 2004 5236 2004 2010 2060 3152 1010 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] this is the best show ever no matter what you say ! i have been watching this show since cycle 1 . this show is never boring its wonderful how you see peoples dream come true of being a model . ty ##ra is trying her best to help young women not be ashamed of their bodies and make them believe that they are beautiful in their own way and that you don ' t have to feel beautiful by being an ##ore ##xi ##c . and just as ty ##ra says on the ty ##ra show so what if your cu ##r ##vy so what if you have a big round boot ##y so what if you have a big nose so what if your [SEP]


INFO:tensorflow:tokens: [CLS] this is the best show ever no matter what you say ! i have been watching this show since cycle 1 . this show is never boring its wonderful how you see peoples dream come true of being a model . ty ##ra is trying her best to help young women not be ashamed of their bodies and make them believe that they are beautiful in their own way and that you don ' t have to feel beautiful by being an ##ore ##xi ##c . and just as ty ##ra says on the ty ##ra show so what if your cu ##r ##vy so what if you have a big round boot ##y so what if you have a big nose so what if your [SEP]


INFO:tensorflow:input_ids: 101 2023 2003 1996 2190 2265 2412 2053 3043 2054 2017 2360 999 1045 2031 2042 3666 2023 2265 2144 5402 1015 1012 2023 2265 2003 2196 11771 2049 6919 2129 2017 2156 7243 3959 2272 2995 1997 2108 1037 2944 1012 5939 2527 2003 2667 2014 2190 2000 2393 2402 2308 2025 2022 14984 1997 2037 4230 1998 2191 2068 2903 2008 2027 2024 3376 1999 2037 2219 2126 1998 2008 2017 2123 1005 1056 2031 2000 2514 3376 2011 2108 2019 5686 9048 2278 1012 1998 2074 2004 5939 2527 2758 2006 1996 5939 2527 2265 2061 2054 2065 2115 12731 2099 10736 2061 2054 2065 2017 2031 1037 2502 2461 9573 2100 2061 2054 2065 2017 2031 1037 2502 4451 2061 2054 2065 2115 102


INFO:tensorflow:input_ids: 101 2023 2003 1996 2190 2265 2412 2053 3043 2054 2017 2360 999 1045 2031 2042 3666 2023 2265 2144 5402 1015 1012 2023 2265 2003 2196 11771 2049 6919 2129 2017 2156 7243 3959 2272 2995 1997 2108 1037 2944 1012 5939 2527 2003 2667 2014 2190 2000 2393 2402 2308 2025 2022 14984 1997 2037 4230 1998 2191 2068 2903 2008 2027 2024 3376 1999 2037 2219 2126 1998 2008 2017 2123 1005 1056 2031 2000 2514 3376 2011 2108 2019 5686 9048 2278 1012 1998 2074 2004 5939 2527 2758 2006 1996 5939 2527 2265 2061 2054 2065 2115 12731 2099 10736 2061 2054 2065 2017 2031 1037 2502 2461 9573 2100 2061 2054 2065 2017 2031 1037 2502 4451 2061 2054 2065 2115 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] this movie is silly and very short of being a funny movie . unhappy ' eastern ##ers ' are not pleased with being out west ; so they hire a drunk wagon master ( john candy ) to lead them back east . sight gag ##s were just not funny enough to carry this one . and richard lewis gets on your nerves very quickly ; but then i honestly don ' t like him at anything he does . ed lau ##ter is hilarious as the bum ##bling villain . < br / > < br / > the movie was dedicated to candy . he died from a massive heart attack ten days before the movie was completed . a stand in and digital [SEP]


INFO:tensorflow:tokens: [CLS] this movie is silly and very short of being a funny movie . unhappy ' eastern ##ers ' are not pleased with being out west ; so they hire a drunk wagon master ( john candy ) to lead them back east . sight gag ##s were just not funny enough to carry this one . and richard lewis gets on your nerves very quickly ; but then i honestly don ' t like him at anything he does . ed lau ##ter is hilarious as the bum ##bling villain . < br / > < br / > the movie was dedicated to candy . he died from a massive heart attack ten days before the movie was completed . a stand in and digital [SEP]


INFO:tensorflow:input_ids: 101 2023 3185 2003 10021 1998 2200 2460 1997 2108 1037 6057 3185 1012 12511 1005 2789 2545 1005 2024 2025 7537 2007 2108 2041 2225 1025 2061 2027 10887 1037 7144 9540 3040 1006 2198 9485 1007 2000 2599 2068 2067 2264 1012 4356 18201 2015 2020 2074 2025 6057 2438 2000 4287 2023 2028 1012 1998 2957 4572 4152 2006 2115 10627 2200 2855 1025 2021 2059 1045 9826 2123 1005 1056 2066 2032 2012 2505 2002 2515 1012 3968 21360 3334 2003 26316 2004 1996 26352 9709 12700 1012 1026 7987 1013 1028 1026 7987 1013 1028 1996 3185 2001 4056 2000 9485 1012 2002 2351 2013 1037 5294 2540 2886 2702 2420 2077 1996 3185 2001 2949 1012 1037 3233 1999 1998 3617 102


INFO:tensorflow:input_ids: 101 2023 3185 2003 10021 1998 2200 2460 1997 2108 1037 6057 3185 1012 12511 1005 2789 2545 1005 2024 2025 7537 2007 2108 2041 2225 1025 2061 2027 10887 1037 7144 9540 3040 1006 2198 9485 1007 2000 2599 2068 2067 2264 1012 4356 18201 2015 2020 2074 2025 6057 2438 2000 4287 2023 2028 1012 1998 2957 4572 4152 2006 2115 10627 2200 2855 1025 2021 2059 1045 9826 2123 1005 1056 2066 2032 2012 2505 2002 2515 1012 3968 21360 3334 2003 26316 2004 1996 26352 9709 12700 1012 1026 7987 1013 1028 1026 7987 1013 1028 1996 3185 2001 4056 2000 9485 1012 2002 2351 2013 1037 5294 2540 2886 2702 2420 2077 1996 3185 2001 2949 1012 1037 3233 1999 1998 3617 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] to be fair , it has been several years since i watched the bile committed to cell ##ulo ##id known as " here on earth , " so forgive me if my memory of the film is a little sketch ##y . i ' ll stick with the main points which plague the soul of the unfortunate viewer . < br / > < br / > scene one : chris klein , after having been thrown out of prep school ( because he looks like a seventeen year old - - yes , very bel ##ie ##vable ) , gives what i assume is his vale ##dict ##oria ##n speech . . . to a field . let me repeat that for you - - [SEP]


INFO:tensorflow:tokens: [CLS] to be fair , it has been several years since i watched the bile committed to cell ##ulo ##id known as " here on earth , " so forgive me if my memory of the film is a little sketch ##y . i ' ll stick with the main points which plague the soul of the unfortunate viewer . < br / > < br / > scene one : chris klein , after having been thrown out of prep school ( because he looks like a seventeen year old - - yes , very bel ##ie ##vable ) , gives what i assume is his vale ##dict ##oria ##n speech . . . to a field . let me repeat that for you - - [SEP]


INFO:tensorflow:input_ids: 101 2000 2022 4189 1010 2009 2038 2042 2195 2086 2144 1045 3427 1996 23974 5462 2000 3526 18845 3593 2124 2004 1000 2182 2006 3011 1010 1000 2061 9641 2033 2065 2026 3638 1997 1996 2143 2003 1037 2210 11080 2100 1012 1045 1005 2222 6293 2007 1996 2364 2685 2029 11629 1996 3969 1997 1996 15140 13972 1012 1026 7987 1013 1028 1026 7987 1013 1028 3496 2028 1024 3782 12555 1010 2044 2383 2042 6908 2041 1997 17463 2082 1006 2138 2002 3504 2066 1037 9171 2095 2214 1011 1011 2748 1010 2200 19337 2666 12423 1007 1010 3957 2054 1045 7868 2003 2010 10380 29201 11069 2078 4613 1012 1012 1012 2000 1037 2492 1012 2292 2033 9377 2008 2005 2017 1011 1011 102


INFO:tensorflow:input_ids: 101 2000 2022 4189 1010 2009 2038 2042 2195 2086 2144 1045 3427 1996 23974 5462 2000 3526 18845 3593 2124 2004 1000 2182 2006 3011 1010 1000 2061 9641 2033 2065 2026 3638 1997 1996 2143 2003 1037 2210 11080 2100 1012 1045 1005 2222 6293 2007 1996 2364 2685 2029 11629 1996 3969 1997 1996 15140 13972 1012 1026 7987 1013 1028 1026 7987 1013 1028 3496 2028 1024 3782 12555 1010 2044 2383 2042 6908 2041 1997 17463 2082 1006 2138 2002 3504 2066 1037 9171 2095 2214 1011 1011 2748 1010 2200 19337 2666 12423 1007 1010 3957 2054 1045 7868 2003 2010 10380 29201 11069 2078 4613 1012 1012 1012 2000 1037 2492 1012 2292 2033 9377 2008 2005 2017 1011 1011 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] i swear , i had never seen such a bad movie as half caste is . not only because it just makes no sense , is a huge piece of ego ##lat ##ry and self - confidence that makes me pu ##ke . < br / > < br / > sebastian ap ##oca ##da ( in spanish ap ##oca ##da has a similar pronunciation to " ap ##oca ##do " which means " out of life and happiness " ) makes here a one man army movie thinking he is sam rai ##mi or the boys who directed the blair witch project . this is the blair kit ##tie project , with an expensive low budget . < br / > < br / > [SEP]


INFO:tensorflow:tokens: [CLS] i swear , i had never seen such a bad movie as half caste is . not only because it just makes no sense , is a huge piece of ego ##lat ##ry and self - confidence that makes me pu ##ke . < br / > < br / > sebastian ap ##oca ##da ( in spanish ap ##oca ##da has a similar pronunciation to " ap ##oca ##do " which means " out of life and happiness " ) makes here a one man army movie thinking he is sam rai ##mi or the boys who directed the blair witch project . this is the blair kit ##tie project , with an expensive low budget . < br / > < br / > [SEP]


INFO:tensorflow:input_ids: 101 1045 8415 1010 1045 2018 2196 2464 2107 1037 2919 3185 2004 2431 14542 2003 1012 2025 2069 2138 2009 2074 3084 2053 3168 1010 2003 1037 4121 3538 1997 13059 20051 2854 1998 2969 1011 7023 2008 3084 2033 16405 3489 1012 1026 7987 1013 1028 1026 7987 1013 1028 6417 9706 24755 2850 1006 1999 3009 9706 24755 2850 2038 1037 2714 15498 2000 1000 9706 24755 3527 1000 2029 2965 1000 2041 1997 2166 1998 8404 1000 1007 3084 2182 1037 2028 2158 2390 3185 3241 2002 2003 3520 15547 4328 2030 1996 3337 2040 2856 1996 10503 6965 2622 1012 2023 2003 1996 10503 8934 9515 2622 1010 2007 2019 6450 2659 5166 1012 1026 7987 1013 1028 1026 7987 1013 1028 102


INFO:tensorflow:input_ids: 101 1045 8415 1010 1045 2018 2196 2464 2107 1037 2919 3185 2004 2431 14542 2003 1012 2025 2069 2138 2009 2074 3084 2053 3168 1010 2003 1037 4121 3538 1997 13059 20051 2854 1998 2969 1011 7023 2008 3084 2033 16405 3489 1012 1026 7987 1013 1028 1026 7987 1013 1028 6417 9706 24755 2850 1006 1999 3009 9706 24755 2850 2038 1037 2714 15498 2000 1000 9706 24755 3527 1000 2029 2965 1000 2041 1997 2166 1998 8404 1000 1007 3084 2182 1037 2028 2158 2390 3185 3241 2002 2003 3520 15547 4328 2030 1996 3337 2040 2856 1996 10503 6965 2622 1012 2023 2003 1996 10503 8934 9515 2622 1010 2007 2019 6450 2659 5166 1012 1026 7987 1013 1028 1026 7987 1013 1028 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


#Creating a model

Now that we've prepared our data, let's focus on building a model. `create_model` does just this below. First, it loads the BERT tf hub module again (this time to extract the computation graph). Next, it creates a single new layer that will be trained to adapt BERT to our sentiment task (i.e. classifying whether a movie review is positive or negative). This strategy of using a mostly trained model is called [fine-tuning](http://wiki.fast.ai/index.php/Fine_tuning).

In [27]:
def create_model(is_predicting, input_ids, input_mask, segment_ids, labels,
                 num_labels):
  """Creates a classification model."""

  bert_module = hub.Module(
      BERT_MODEL_HUB,
      trainable=True)
  bert_inputs = dict(
      input_ids=input_ids,
      input_mask=input_mask,
      segment_ids=segment_ids)
  bert_outputs = bert_module(
      inputs=bert_inputs,
      signature="tokens",
      as_dict=True)

  # Use "pooled_output" for classification tasks on an entire sentence.
  # Use "sequence_outputs" for token-level output.
  output_layer = bert_outputs["pooled_output"]

  hidden_size = output_layer.shape[-1].value

  # Create our own layer to tune for politeness data.
  output_weights = tf.get_variable(
      "output_weights", [num_labels, hidden_size],
      initializer=tf.truncated_normal_initializer(stddev=0.02))

  output_bias = tf.get_variable(
      "output_bias", [num_labels], initializer=tf.zeros_initializer())

  with tf.variable_scope("loss"):

    # Dropout helps prevent overfitting
    output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

    logits = tf.matmul(output_layer, output_weights, transpose_b=True)
    logits = tf.nn.bias_add(logits, output_bias)
    log_probs = tf.nn.log_softmax(logits, axis=-1)

    # Convert labels into one-hot encoding
    one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)

    predicted_labels = tf.squeeze(tf.argmax(log_probs, axis=-1, output_type=tf.int32))
    # If we're predicting, we want predicted labels and the probabiltiies.
    if is_predicting:
      return (predicted_labels, log_probs)

    # If we're train/eval, compute loss between predicted and actual label
    per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
    loss = tf.reduce_mean(per_example_loss)
    return (loss, predicted_labels, log_probs)


Next we'll wrap our model function in a `model_fn_builder` function that adapts our model to work for training, evaluation, and prediction.

In [28]:
# model_fn_builder actually creates our model function
# using the passed parameters for num_labels, learning_rate, etc.
def model_fn_builder(num_labels, learning_rate, num_train_steps,
                     num_warmup_steps):
  """Returns `model_fn` closure for TPUEstimator."""
  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    """The `model_fn` for TPUEstimator."""

    input_ids = features["input_ids"]
    input_mask = features["input_mask"]
    segment_ids = features["segment_ids"]
    label_ids = features["label_ids"]

    is_predicting = (mode == tf.estimator.ModeKeys.PREDICT)
    
    # TRAIN and EVAL
    if not is_predicting:

      (loss, predicted_labels, log_probs) = create_model(
        is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)

      train_op = bert.optimization.create_optimizer(
          loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu=False)

      # Calculate evaluation metrics. 
      def metric_fn(label_ids, predicted_labels):
        accuracy = tf.metrics.accuracy(label_ids, predicted_labels)
        f1_score = tf.contrib.metrics.f1_score(
            label_ids,
            predicted_labels)
        auc = tf.metrics.auc(
            label_ids,
            predicted_labels)
        recall = tf.metrics.recall(
            label_ids,
            predicted_labels)
        precision = tf.metrics.precision(
            label_ids,
            predicted_labels) 
        true_pos = tf.metrics.true_positives(
            label_ids,
            predicted_labels)
        true_neg = tf.metrics.true_negatives(
            label_ids,
            predicted_labels)   
        false_pos = tf.metrics.false_positives(
            label_ids,
            predicted_labels)  
        false_neg = tf.metrics.false_negatives(
            label_ids,
            predicted_labels)
        return {
            "eval_accuracy": accuracy,
            "f1_score": f1_score,
            "auc": auc,
            "precision": precision,
            "recall": recall,
            "true_positives": true_pos,
            "true_negatives": true_neg,
            "false_positives": false_pos,
            "false_negatives": false_neg
        }

      eval_metrics = metric_fn(label_ids, predicted_labels)

      if mode == tf.estimator.ModeKeys.TRAIN:
        return tf.estimator.EstimatorSpec(mode=mode,
          loss=loss,
          train_op=train_op)
      else:
          return tf.estimator.EstimatorSpec(mode=mode,
            loss=loss,
            eval_metric_ops=eval_metrics)
    else:
      (predicted_labels, log_probs) = create_model(
        is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)

      predictions = {
          'probabilities': log_probs,
          'labels': predicted_labels
      }
      return tf.estimator.EstimatorSpec(mode, predictions=predictions)

  # Return the actual model function in the closure
  return model_fn


In [29]:
# Compute train and warmup steps from batch size
# These hyperparameters are copied from this colab notebook (https://colab.sandbox.google.com/github/tensorflow/tpu/blob/master/tools/colab/bert_finetuning_with_cloud_tpus.ipynb)
BATCH_SIZE = 32
LEARNING_RATE = 2e-5
NUM_TRAIN_EPOCHS = 3.0
# Warmup is a period of time where hte learning rate 
# is small and gradually increases--usually helps training.
WARMUP_PROPORTION = 0.1
# Model configs
SAVE_CHECKPOINTS_STEPS = 500
SAVE_SUMMARY_STEPS = 100

In [30]:
# Compute # train and warmup steps from batch size
num_train_steps = int(len(train_features) / BATCH_SIZE * NUM_TRAIN_EPOCHS)
num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)

In [31]:
# Specify outpit directory and number of checkpoint steps to save
run_config = tf.estimator.RunConfig(
    model_dir=OUTPUT_DIR,
    save_summary_steps=SAVE_SUMMARY_STEPS,
    save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS)

In [32]:
model_fn = model_fn_builder(
  num_labels=len(label_list),
  learning_rate=LEARNING_RATE,
  num_train_steps=num_train_steps,
  num_warmup_steps=num_warmup_steps)

estimator = tf.estimator.Estimator(
  model_fn=model_fn,
  config=run_config,
  params={"batch_size": BATCH_SIZE})


INFO:tensorflow:Using config: {'_model_dir': 'model', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 500, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fe1cd48cd30>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


INFO:tensorflow:Using config: {'_model_dir': 'model', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 500, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fe1cd48cd30>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


Next we create an input builder function that takes our training feature set (`train_features`) and produces a generator. This is a pretty standard design pattern for working with Tensorflow [Estimators](https://www.tensorflow.org/guide/estimators).

In [33]:
# Create an input function for training. drop_remainder = True for using TPUs.
train_input_fn = bert.run_classifier.input_fn_builder(
    features=train_features,
    seq_length=MAX_SEQ_LENGTH,
    is_training=True,
    drop_remainder=False)

Now we train our model! For me, using a Colab notebook running on Google's GPUs, my training time was about 14 minutes.

In [None]:
print(f'Beginning Training!')
current_time = datetime.now()
estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
print("Training took time ", datetime.now() - current_time)

Beginning Training!
Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.


Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.














Instructions for updating:
Deprecated in favor of operator or tf.math.divide.


Instructions for updating:
Deprecated in favor of operator or tf.math.divide.








Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where






  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Create CheckpointSaverHook.


INFO:tensorflow:Create CheckpointSaverHook.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Saving checkpoints for 0 into model/model.ckpt.


INFO:tensorflow:Saving checkpoints for 0 into model/model.ckpt.


INFO:tensorflow:loss = 0.7098901, step = 1


INFO:tensorflow:loss = 0.7098901, step = 1


Now let's use our test data to see how well our model did:

In [0]:
test_input_fn = run_classifier.input_fn_builder(
    features=test_features,
    seq_length=MAX_SEQ_LENGTH,
    is_training=False,
    drop_remainder=False)

In [59]:
estimator.evaluate(input_fn=test_input_fn, steps=None)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-02-12T21:04:20Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from gs://bert-tfhub/aclImdb_v1/model.ckpt-468
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2019-02-12-21:06:05
INFO:tensorflow:Saving dict for global step 468: auc = 0.86659324, eval_accuracy = 0.8664, f1_score = 0.8659711, false_negatives = 375.0, false_positives = 293.0, global_step = 468, loss = 0.51870537, precision = 0.880457, recall = 0.8519542, true_negatives = 2174.0, true_positives = 2158.0
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 468: gs://bert-tfhub/aclImdb_v1/model.ckpt-468


{'auc': 0.86659324,
 'eval_accuracy': 0.8664,
 'f1_score': 0.8659711,
 'false_negatives': 375.0,
 'false_positives': 293.0,
 'global_step': 468,
 'loss': 0.51870537,
 'precision': 0.880457,
 'recall': 0.8519542,
 'true_negatives': 2174.0,
 'true_positives': 2158.0}

Now let's write code to make predictions on new sentences:

In [0]:
def getPrediction(in_sentences):
  labels = ["Negative", "Positive"]
  input_examples = [run_classifier.InputExample(guid="", text_a = x, text_b = None, label = 0) for x in in_sentences] # here, "" is just a dummy label
  input_features = run_classifier.convert_examples_to_features(input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
  predict_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=False)
  predictions = estimator.predict(predict_input_fn)
  return [(sentence, prediction['probabilities'], labels[prediction['labels']]) for sentence, prediction in zip(in_sentences, predictions)]

In [0]:
pred_sentences = [
  "That movie was absolutely awful",
  "The acting was a bit lacking",
  "The film was creative and surprising",
  "Absolutely fantastic!"
]

In [72]:
predictions = getPrediction(pred_sentences)

INFO:tensorflow:Writing example 0 of 4
INFO:tensorflow:*** Example ***
INFO:tensorflow:guid: 
INFO:tensorflow:tokens: [CLS] that movie was absolutely awful [SEP]
INFO:tensorflow:input_ids: 101 2008 3185 2001 7078 9643 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

Voila! We have a sentiment classifier!

In [73]:
predictions

[('That movie was absolutely awful',
  array([-4.9142293e-03, -5.3180690e+00], dtype=float32),
  'Negative'),
 ('The acting was a bit lacking',
  array([-0.03325794, -3.4200459 ], dtype=float32),
  'Negative'),
 ('The film was creative and surprising',
  array([-5.3589125e+00, -4.7171740e-03], dtype=float32),
  'Positive'),
 ('Absolutely fantastic!',
  array([-5.0434084 , -0.00647258], dtype=float32),
  'Positive')]