In [None]:
# Copyright 2019 Google Inc.

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

#     http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#Predicting Movie Review Sentiment with BERT on TF Hub

If you’ve been following Natural Language Processing over the past year, you’ve probably heard of BERT: Bidirectional Encoder Representations from Transformers. It’s a neural network architecture designed by Google researchers that’s totally transformed what’s state-of-the-art for NLP tasks, like text classification, translation, summarization, and question answering.

Now that BERT's been added to [TF Hub](https://www.tensorflow.org/hub) as a loadable module, it's easy(ish) to add into existing Tensorflow text pipelines. In an existing pipeline, BERT can replace text embedding layers like ELMO and GloVE. Alternatively, [finetuning](http://wiki.fast.ai/index.php/Fine_tuning) BERT can provide both an accuracy boost and faster training time in many cases.

Here, we'll train a model to predict whether an IMDB movie review is positive or negative using BERT in Tensorflow with tf hub. Some code was adapted from [this colab notebook](https://colab.sandbox.google.com/github/tensorflow/tpu/blob/master/tools/colab/bert_finetuning_with_cloud_tpus.ipynb). Let's get started!

In [1]:
from sklearn.model_selection import train_test_split
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
from datetime import datetime

In addition to the standard libraries we imported above, we'll need to install BERT's python package.

In [2]:
# !pip install bert-tensorflow==1.0.1
# !pip install tensorflow==1.15.0

In [3]:
import bert
from bert import run_classifier
from bert import optimization
from bert import tokenization




Below, we'll set an output directory location to store our model output and checkpoints. This can be a local directory, in which case you'd set OUTPUT_DIR to the name of the directory you'd like to create. If you're running this code in Google's hosted Colab, the directory won't persist after the Colab session ends.

Alternatively, if you're a GCP user, you can store output in a GCP bucket. To do that, set a directory name in OUTPUT_DIR and the name of the GCP bucket in the BUCKET field.

Set DO_DELETE to rewrite the OUTPUT_DIR if it exists. Otherwise, Tensorflow will load existing model checkpoints from that directory (if they exist).

In [4]:
# Set the output directory for saving model file
# Optionally, set a GCP bucket location

OUTPUT_DIR = 'OUTPUT_DIR'#@param {type:"string"}
#@markdown Whether or not to clear/delete the directory and create a new one
# DO_DELETE = True #@param {type:"boolean"}
# #@markdown Set USE_BUCKET and BUCKET if you want to (optionally) store model output on GCP bucket.
# USE_BUCKET = False #@param {type:"boolean"}
# BUCKET = 'BUCKET_NAME' #@param {type:"string"}

# if USE_BUCKET:
#   OUTPUT_DIR = 'gs://{}/{}'.format(BUCKET, OUTPUT_DIR)
#   from google.colab import auth
#   auth.authenticate_user()

# if DO_DELETE:
#   try:
#     tf.gfile.DeleteRecursively(OUTPUT_DIR)
#   except:
#     # Doesn't matter if the directory didn't exist
#     pass
# tf.gfile.MakeDirs(OUTPUT_DIR)
# print('***** Model output directory: {} *****'.format(OUTPUT_DIR))


# Data

First, let's download the dataset, hosted by Stanford. The code below, which downloads, extracts, and imports the IMDB Large Movie Review Dataset, is borrowed from [this Tensorflow tutorial](https://www.tensorflow.org/hub/tutorials/text_classification_with_tf_hub).

In [5]:
from tensorflow import keras
import os
import re

# Load all files from a directory in a DataFrame.
def load_directory_data(directory):
  data = {}
  data["sentence"] = []
  data["sentiment"] = []
  for file_path in os.listdir(directory):
    with tf.gfile.GFile(os.path.join(directory, file_path), "r") as f:
      data["sentence"].append(f.read())
      data["sentiment"].append(re.match("\d+_(\d+)\.txt", file_path).group(1))
  return pd.DataFrame.from_dict(data)

# Merge positive and negative examples, add a polarity column and shuffle.
def load_dataset(directory):
  pos_df = load_directory_data(os.path.join(directory, "pos"))
  neg_df = load_directory_data(os.path.join(directory, "neg"))
  pos_df["polarity"] = 1
  neg_df["polarity"] = 0
  return pd.concat([pos_df, neg_df]).sample(frac=1).reset_index(drop=True)

# Download and process the dataset files.
def download_and_load_datasets(force_download=False):
  dataset = tf.keras.utils.get_file(
      fname="aclImdb.tar.gz", 
      origin="http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz", 
      extract=True)
  print(os.path.join(os.path.dirname(dataset), 
                                       "aclImdb", "train"))
  
  train_df = load_dataset(os.path.join(os.path.dirname(dataset), 
                                       "aclImdb", "train"))
  test_df = load_dataset(os.path.join(os.path.dirname(dataset), 
                                      "aclImdb", "test"))
  
  return train_df, test_df


In [6]:
train, test = download_and_load_datasets()

Downloading data from http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz
C:\Users\10650\.keras\datasets\aclImdb\train


SystemError: <built-in function CreateBufferedInputStream> returned a result with an error set

To keep training fast, we'll take a sample of 5000 train and test examples, respectively.

In [6]:
train = train.sample(5000)
test = test.sample(5000)

In [7]:
train.columns
train.head()

Unnamed: 0,sentence,sentiment,polarity
15223,David Mamet's film debut has been hailed by ma...,4,0
13546,Guy walking around without motive... I will ne...,1,0
1944,I approach films about talking animals with ca...,9,1
4715,"I like Peter Sellers, most of the time. I had ...",7,1
21312,this was a favorite Christmas Special that I w...,9,1


For us, our input data is the 'sentence' column and our label is the 'polarity' column (0, 1 for negative and positive, respecitvely)

In [8]:
DATA_COLUMN = 'sentence'
LABEL_COLUMN = 'polarity'
# label_list is the list of labels, i.e. True, False or 0, 1 or 'dog', 'cat'
label_list = [0, 1]

#Data Preprocessing
We'll need to transform our data into a format BERT understands. This involves two steps. First, we create  `InputExample`'s using the constructor provided in the BERT library.

- `text_a` is the text we want to classify, which in this case, is the `Request` field in our Dataframe. 
- `text_b` is used if we're training a model to understand the relationship between sentences (i.e. is `text_b` a translation of `text_a`? Is `text_b` an answer to the question asked by `text_a`?). This doesn't apply to our task, so we can leave `text_b` blank.
- `label` is the label for our example, i.e. True, False

In [9]:
# Use the InputExample class from BERT's run_classifier code to create examples from the data
train_InputExamples = train.apply(lambda x: bert.run_classifier.InputExample(guid=None, # Globally unique ID for bookkeeping, unused in this example
                                                                   text_a = x[DATA_COLUMN], 
                                                                   text_b = None, 
                                                                   label = x[LABEL_COLUMN]), axis = 1)
print(train_InputExamples)

test_InputExamples = test.apply(lambda x: bert.run_classifier.InputExample(guid=None, 
                                                                   text_a = x[DATA_COLUMN], 
                                                                   text_b = None, 
                                                                   label = x[LABEL_COLUMN]), axis = 1)

15223    <bert.run_classifier.InputExample object at 0x...
13546    <bert.run_classifier.InputExample object at 0x...
1944     <bert.run_classifier.InputExample object at 0x...
4715     <bert.run_classifier.InputExample object at 0x...
21312    <bert.run_classifier.InputExample object at 0x...
                               ...                        
17928    <bert.run_classifier.InputExample object at 0x...
19881    <bert.run_classifier.InputExample object at 0x...
22681    <bert.run_classifier.InputExample object at 0x...
2140     <bert.run_classifier.InputExample object at 0x...
2172     <bert.run_classifier.InputExample object at 0x...
Length: 5000, dtype: object


Next, we need to preprocess our data so that it matches the data BERT was trained on. For this, we'll need to do a couple of things (but don't worry--this is also included in the Python library):


1. Lowercase our text (if we're using a BERT lowercase model)
2. Tokenize it (i.e. "sally says hi" -> ["sally", "says", "hi"])
3. Break words into WordPieces (i.e. "calling" -> ["call", "##ing"])
4. Map our words to indexes using a vocab file that BERT provides
5. Add special "CLS" and "SEP" tokens (see the [readme](https://github.com/google-research/bert))
6. Append "index" and "segment" tokens to each input (see the [BERT paper](https://arxiv.org/pdf/1810.04805.pdf))

Happily, we don't have to worry about most of these details.




To start, we'll need to load a vocabulary file and lowercasing information directly from the BERT tf hub module:

In [10]:
# This is a path to an uncased (all lowercase) version of BERT
BERT_MODEL_HUB = "https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1"

def create_tokenizer_from_hub_module():
  """Get the vocab file and casing info from the Hub module."""
  with tf.Graph().as_default():
    bert_module = hub.Module(BERT_MODEL_HUB)
    tokenization_info = bert_module(signature="tokenization_info", as_dict=True)
    print(tokenization_info)
    with tf.Session() as sess:
      vocab_file, do_lower_case = sess.run([tokenization_info["vocab_file"],
                                            tokenization_info["do_lower_case"]])
      
  return bert.tokenization.FullTokenizer(
      vocab_file=vocab_file, do_lower_case=do_lower_case)

tokenizer = create_tokenizer_from_hub_module()

INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


{'do_lower_case': <tf.Tensor 'module_apply_tokenization_info/Const:0' shape=() dtype=bool>, 'vocab_file': <tf.Tensor 'module_apply_tokenization_info/vocab_file:0' shape=() dtype=string>}


Great--we just learned that the BERT model we're using expects lowercase data (that's what stored in tokenization_info["do_lower_case"]) and we also loaded BERT's vocab file. We also created a tokenizer, which breaks words into word pieces:

In [11]:
tokenizer.tokenize("This here's an example of using the BERT tokenizer")

['this',
 'here',
 "'",
 's',
 'an',
 'example',
 'of',
 'using',
 'the',
 'bert',
 'token',
 '##izer']

Using our tokenizer, we'll call `run_classifier.convert_examples_to_features` on our InputExamples to convert them into features BERT understands.

In [14]:
# We'll set sequences to be at most 128 tokens long.
MAX_SEQ_LENGTH = 128
# Convert our train and test features to InputFeatures that BERT understands.
train_features = bert.run_classifier.convert_examples_to_features(train_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)
print(len(train_features))
test_features = bert.run_classifier.convert_examples_to_features(test_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)

INFO:tensorflow:Writing example 0 of 5000


INFO:tensorflow:Writing example 0 of 5000


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] david ma ##met ' s film debut has been hailed by many as a real thinking - man ' s movie , a movie that makes you question everybody and everything . i saw it for the first time recently and couldn ' t understand what was supposed to be so great about it . < br / > < br / > the movie is about a female psychologist named margaret who is also a best - selling author . margaret has become di ##sil ##lusion ##ed by her profession and her inability to really help anyone . she tries to rec ##tify this by helping settle her patient ' s gambling debt to a shark named mike ( played by joe man ##te ##gna [SEP]


INFO:tensorflow:tokens: [CLS] david ma ##met ' s film debut has been hailed by many as a real thinking - man ' s movie , a movie that makes you question everybody and everything . i saw it for the first time recently and couldn ' t understand what was supposed to be so great about it . < br / > < br / > the movie is about a female psychologist named margaret who is also a best - selling author . margaret has become di ##sil ##lusion ##ed by her profession and her inability to really help anyone . she tries to rec ##tify this by helping settle her patient ' s gambling debt to a shark named mike ( played by joe man ##te ##gna [SEP]


INFO:tensorflow:input_ids: 101 2585 5003 11368 1005 1055 2143 2834 2038 2042 16586 2011 2116 2004 1037 2613 3241 1011 2158 1005 1055 3185 1010 1037 3185 2008 3084 2017 3160 7955 1998 2673 1012 1045 2387 2009 2005 1996 2034 2051 3728 1998 2481 1005 1056 3305 2054 2001 4011 2000 2022 2061 2307 2055 2009 1012 1026 7987 1013 1028 1026 7987 1013 1028 1996 3185 2003 2055 1037 2931 15034 2315 5545 2040 2003 2036 1037 2190 1011 4855 3166 1012 5545 2038 2468 4487 27572 24117 2098 2011 2014 9518 1998 2014 13720 2000 2428 2393 3087 1012 2016 5363 2000 28667 27351 2023 2011 5094 7392 2014 5776 1005 1055 12219 7016 2000 1037 11420 2315 3505 1006 2209 2011 3533 2158 2618 16989 102


INFO:tensorflow:input_ids: 101 2585 5003 11368 1005 1055 2143 2834 2038 2042 16586 2011 2116 2004 1037 2613 3241 1011 2158 1005 1055 3185 1010 1037 3185 2008 3084 2017 3160 7955 1998 2673 1012 1045 2387 2009 2005 1996 2034 2051 3728 1998 2481 1005 1056 3305 2054 2001 4011 2000 2022 2061 2307 2055 2009 1012 1026 7987 1013 1028 1026 7987 1013 1028 1996 3185 2003 2055 1037 2931 15034 2315 5545 2040 2003 2036 1037 2190 1011 4855 3166 1012 5545 2038 2468 4487 27572 24117 2098 2011 2014 9518 1998 2014 13720 2000 2428 2393 3087 1012 2016 5363 2000 28667 27351 2023 2011 5094 7392 2014 5776 1005 1055 12219 7016 2000 1037 11420 2315 3505 1006 2209 2011 3533 2158 2618 16989 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] guy walking around without motive . . . i will never get those two hours of my life back . the guy kept on assuming identities and cheating on his pregnant wife . what was i thinking ? how did this win a price anywhere ? i understood he loved his father but other than that the movie was completely sense ##less to me . what was the purpose of walking so much and going to the funeral of a stranger for no apparent reason . how did this en ##rich his life ? ? ? why did we have to see the dying old lady on her underwear ? ? ? ? ! ! ! why ? ? ? ! ! ! ! < br [SEP]


INFO:tensorflow:tokens: [CLS] guy walking around without motive . . . i will never get those two hours of my life back . the guy kept on assuming identities and cheating on his pregnant wife . what was i thinking ? how did this win a price anywhere ? i understood he loved his father but other than that the movie was completely sense ##less to me . what was the purpose of walking so much and going to the funeral of a stranger for no apparent reason . how did this en ##rich his life ? ? ? why did we have to see the dying old lady on her underwear ? ? ? ? ! ! ! why ? ? ? ! ! ! ! < br [SEP]


INFO:tensorflow:input_ids: 101 3124 3788 2105 2302 15793 1012 1012 1012 1045 2097 2196 2131 2216 2048 2847 1997 2026 2166 2067 1012 1996 3124 2921 2006 10262 15702 1998 16789 2006 2010 6875 2564 1012 2054 2001 1045 3241 1029 2129 2106 2023 2663 1037 3976 5973 1029 1045 5319 2002 3866 2010 2269 2021 2060 2084 2008 1996 3185 2001 3294 3168 3238 2000 2033 1012 2054 2001 1996 3800 1997 3788 2061 2172 1998 2183 2000 1996 6715 1997 1037 7985 2005 2053 6835 3114 1012 2129 2106 2023 4372 13149 2010 2166 1029 1029 1029 2339 2106 2057 2031 2000 2156 1996 5996 2214 3203 2006 2014 14236 1029 1029 1029 1029 999 999 999 2339 1029 1029 1029 999 999 999 999 1026 7987 102


INFO:tensorflow:input_ids: 101 3124 3788 2105 2302 15793 1012 1012 1012 1045 2097 2196 2131 2216 2048 2847 1997 2026 2166 2067 1012 1996 3124 2921 2006 10262 15702 1998 16789 2006 2010 6875 2564 1012 2054 2001 1045 3241 1029 2129 2106 2023 2663 1037 3976 5973 1029 1045 5319 2002 3866 2010 2269 2021 2060 2084 2008 1996 3185 2001 3294 3168 3238 2000 2033 1012 2054 2001 1996 3800 1997 3788 2061 2172 1998 2183 2000 1996 6715 1997 1037 7985 2005 2053 6835 3114 1012 2129 2106 2023 4372 13149 2010 2166 1029 1029 1029 2339 2106 2057 2031 2000 2156 1996 5996 2214 3203 2006 2014 14236 1029 1029 1029 1029 999 999 999 2339 1029 1029 1029 999 999 999 999 1026 7987 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] i approach films about talking animals with care . for every wonderful one like babe , you get an equally poor one like the dreadful remake of home ##ward bound : the incredible journey . or in the case of cats & dogs , you have a great idea for a film not living up to its potential . when i heard about paul ##ie , the premise of a wise ##cr ##ack ##ing parrot didn ' t exactly fill me with confidence . but i found the film a pleasant surprise . and it manages to sneak its way into your heart without you realising . < br / > < br / > a russian jan ##itor , mis ##ha vi ##ly ##enko ##v [SEP]


INFO:tensorflow:tokens: [CLS] i approach films about talking animals with care . for every wonderful one like babe , you get an equally poor one like the dreadful remake of home ##ward bound : the incredible journey . or in the case of cats & dogs , you have a great idea for a film not living up to its potential . when i heard about paul ##ie , the premise of a wise ##cr ##ack ##ing parrot didn ' t exactly fill me with confidence . but i found the film a pleasant surprise . and it manages to sneak its way into your heart without you realising . < br / > < br / > a russian jan ##itor , mis ##ha vi ##ly ##enko ##v [SEP]


INFO:tensorflow:input_ids: 101 1045 3921 3152 2055 3331 4176 2007 2729 1012 2005 2296 6919 2028 2066 11561 1010 2017 2131 2019 8053 3532 2028 2066 1996 21794 12661 1997 2188 7652 5391 1024 1996 9788 4990 1012 2030 1999 1996 2553 1997 8870 1004 6077 1010 2017 2031 1037 2307 2801 2005 1037 2143 2025 2542 2039 2000 2049 4022 1012 2043 1045 2657 2055 2703 2666 1010 1996 18458 1997 1037 7968 26775 8684 2075 22530 2134 1005 1056 3599 6039 2033 2007 7023 1012 2021 1045 2179 1996 2143 1037 8242 4474 1012 1998 2009 9020 2000 13583 2049 2126 2046 2115 2540 2302 2017 27504 1012 1026 7987 1013 1028 1026 7987 1013 1028 1037 2845 5553 15660 1010 28616 3270 6819 2135 17868 2615 102


INFO:tensorflow:input_ids: 101 1045 3921 3152 2055 3331 4176 2007 2729 1012 2005 2296 6919 2028 2066 11561 1010 2017 2131 2019 8053 3532 2028 2066 1996 21794 12661 1997 2188 7652 5391 1024 1996 9788 4990 1012 2030 1999 1996 2553 1997 8870 1004 6077 1010 2017 2031 1037 2307 2801 2005 1037 2143 2025 2542 2039 2000 2049 4022 1012 2043 1045 2657 2055 2703 2666 1010 1996 18458 1997 1037 7968 26775 8684 2075 22530 2134 1005 1056 3599 6039 2033 2007 7023 1012 2021 1045 2179 1996 2143 1037 8242 4474 1012 1998 2009 9020 2000 13583 2049 2126 2046 2115 2540 2302 2017 27504 1012 1026 7987 1013 1028 1026 7987 1013 1028 1037 2845 5553 15660 1010 28616 3270 6819 2135 17868 2615 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] i like peter sellers , most of the time . i had never seen him portray an upper - class brit until this movie . he pulls it off pretty well , although you see bits of inspector cl ##ouse ##au in the mix . it doesn ' t get interesting until gold ##ie ha ##wn arrives . < br / > < br / > i never expected the youthful ha ##wn to deliver such a solid performance . her timing was great and her expressions were price ##less . the way she alternately shoots sellers le ##cher ##ous character down and seduce ##s him is beautiful to watch . verbal spa ##rring like i ' ve seldom seen from a movie of that era [SEP]


INFO:tensorflow:tokens: [CLS] i like peter sellers , most of the time . i had never seen him portray an upper - class brit until this movie . he pulls it off pretty well , although you see bits of inspector cl ##ouse ##au in the mix . it doesn ' t get interesting until gold ##ie ha ##wn arrives . < br / > < br / > i never expected the youthful ha ##wn to deliver such a solid performance . her timing was great and her expressions were price ##less . the way she alternately shoots sellers le ##cher ##ous character down and seduce ##s him is beautiful to watch . verbal spa ##rring like i ' ve seldom seen from a movie of that era [SEP]


INFO:tensorflow:input_ids: 101 1045 2066 2848 19041 1010 2087 1997 1996 2051 1012 1045 2018 2196 2464 2032 17279 2019 3356 1011 2465 28101 2127 2023 3185 1012 2002 8005 2009 2125 3492 2092 1010 2348 2017 2156 9017 1997 7742 18856 15441 4887 1999 1996 4666 1012 2009 2987 1005 1056 2131 5875 2127 2751 2666 5292 7962 8480 1012 1026 7987 1013 1028 1026 7987 1013 1028 1045 2196 3517 1996 22446 5292 7962 2000 8116 2107 1037 5024 2836 1012 2014 10984 2001 2307 1998 2014 11423 2020 3976 3238 1012 1996 2126 2016 23554 11758 19041 3393 7474 3560 2839 2091 1998 23199 2015 2032 2003 3376 2000 3422 1012 12064 12403 18807 2066 1045 1005 2310 15839 2464 2013 1037 3185 1997 2008 3690 102


INFO:tensorflow:input_ids: 101 1045 2066 2848 19041 1010 2087 1997 1996 2051 1012 1045 2018 2196 2464 2032 17279 2019 3356 1011 2465 28101 2127 2023 3185 1012 2002 8005 2009 2125 3492 2092 1010 2348 2017 2156 9017 1997 7742 18856 15441 4887 1999 1996 4666 1012 2009 2987 1005 1056 2131 5875 2127 2751 2666 5292 7962 8480 1012 1026 7987 1013 1028 1026 7987 1013 1028 1045 2196 3517 1996 22446 5292 7962 2000 8116 2107 1037 5024 2836 1012 2014 10984 2001 2307 1998 2014 11423 2020 3976 3238 1012 1996 2126 2016 23554 11758 19041 3393 7474 3560 2839 2091 1998 23199 2015 2032 2003 3376 2000 3422 1012 12064 12403 18807 2066 1045 1005 2310 15839 2464 2013 1037 3185 1997 2008 3690 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] this was a favorite christmas special that i wish that they would release on vhs or dvd , since my 33 rpm got lost , and any cassette ##s i made are also long gone . < br / > < br / > i am not even a big john denver fan but was very impressed with the music , which was mostly traditional favorites with a mu ##ppet spin ( es ##p little st . nick ! ) it also contained a few little known songs ( original ? ) . < br / > < br / > even though it was done at the end of the ' 70 ' s this show had a timeless feel to it . hoping to [SEP]


INFO:tensorflow:tokens: [CLS] this was a favorite christmas special that i wish that they would release on vhs or dvd , since my 33 rpm got lost , and any cassette ##s i made are also long gone . < br / > < br / > i am not even a big john denver fan but was very impressed with the music , which was mostly traditional favorites with a mu ##ppet spin ( es ##p little st . nick ! ) it also contained a few little known songs ( original ? ) . < br / > < br / > even though it was done at the end of the ' 70 ' s this show had a timeless feel to it . hoping to [SEP]


INFO:tensorflow:input_ids: 101 2023 2001 1037 5440 4234 2569 2008 1045 4299 2008 2027 2052 2713 2006 17550 2030 4966 1010 2144 2026 3943 11575 2288 2439 1010 1998 2151 13903 2015 1045 2081 2024 2036 2146 2908 1012 1026 7987 1013 1028 1026 7987 1013 1028 1045 2572 2025 2130 1037 2502 2198 7573 5470 2021 2001 2200 7622 2007 1996 2189 1010 2029 2001 3262 3151 20672 2007 1037 14163 29519 6714 1006 9686 2361 2210 2358 1012 4172 999 1007 2009 2036 4838 1037 2261 2210 2124 2774 1006 2434 1029 1007 1012 1026 7987 1013 1028 1026 7987 1013 1028 2130 2295 2009 2001 2589 2012 1996 2203 1997 1996 1005 3963 1005 1055 2023 2265 2018 1037 27768 2514 2000 2009 1012 5327 2000 102


INFO:tensorflow:input_ids: 101 2023 2001 1037 5440 4234 2569 2008 1045 4299 2008 2027 2052 2713 2006 17550 2030 4966 1010 2144 2026 3943 11575 2288 2439 1010 1998 2151 13903 2015 1045 2081 2024 2036 2146 2908 1012 1026 7987 1013 1028 1026 7987 1013 1028 1045 2572 2025 2130 1037 2502 2198 7573 5470 2021 2001 2200 7622 2007 1996 2189 1010 2029 2001 3262 3151 20672 2007 1037 14163 29519 6714 1006 9686 2361 2210 2358 1012 4172 999 1007 2009 2036 4838 1037 2261 2210 2124 2774 1006 2434 1029 1007 1012 1026 7987 1013 1028 1026 7987 1013 1028 2130 2295 2009 2001 2589 2012 1996 2203 1997 1996 1005 3963 1005 1055 2023 2265 2018 1037 27768 2514 2000 2009 1012 5327 2000 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


5000
INFO:tensorflow:Writing example 0 of 5000


INFO:tensorflow:Writing example 0 of 5000


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] so me and my friend are car ##ous ##ing our local movie rental store and are looking for something to pick up to go along with eternal sunshine of the spot ##less mind , so why not pick up the third installment in the scare ##crow series ! ? ! keep in mind that this is not just scare ##crow three ; this is , scare ##crow : gone wild . now both of us had seen to the first two scare ##crow ##s so we felt ob ##liga ##ted to finish the job . let ' s start with the cover of the dvd first . first we notice a picture of ken shamrock ( " the world ' s most dangerous man " ) [SEP]


INFO:tensorflow:tokens: [CLS] so me and my friend are car ##ous ##ing our local movie rental store and are looking for something to pick up to go along with eternal sunshine of the spot ##less mind , so why not pick up the third installment in the scare ##crow series ! ? ! keep in mind that this is not just scare ##crow three ; this is , scare ##crow : gone wild . now both of us had seen to the first two scare ##crow ##s so we felt ob ##liga ##ted to finish the job . let ' s start with the cover of the dvd first . first we notice a picture of ken shamrock ( " the world ' s most dangerous man " ) [SEP]


INFO:tensorflow:input_ids: 101 2061 2033 1998 2026 2767 2024 2482 3560 2075 2256 2334 3185 12635 3573 1998 2024 2559 2005 2242 2000 4060 2039 2000 2175 2247 2007 10721 9609 1997 1996 3962 3238 2568 1010 2061 2339 2025 4060 2039 1996 2353 18932 1999 1996 12665 24375 2186 999 1029 999 2562 1999 2568 2008 2023 2003 2025 2074 12665 24375 2093 1025 2023 2003 1010 12665 24375 1024 2908 3748 1012 2085 2119 1997 2149 2018 2464 2000 1996 2034 2048 12665 24375 2015 2061 2057 2371 27885 14715 3064 2000 3926 1996 3105 1012 2292 1005 1055 2707 2007 1996 3104 1997 1996 4966 2034 1012 2034 2057 5060 1037 3861 1997 6358 28782 1006 1000 1996 2088 1005 1055 2087 4795 2158 1000 1007 102


INFO:tensorflow:input_ids: 101 2061 2033 1998 2026 2767 2024 2482 3560 2075 2256 2334 3185 12635 3573 1998 2024 2559 2005 2242 2000 4060 2039 2000 2175 2247 2007 10721 9609 1997 1996 3962 3238 2568 1010 2061 2339 2025 4060 2039 1996 2353 18932 1999 1996 12665 24375 2186 999 1029 999 2562 1999 2568 2008 2023 2003 2025 2074 12665 24375 2093 1025 2023 2003 1010 12665 24375 1024 2908 3748 1012 2085 2119 1997 2149 2018 2464 2000 1996 2034 2048 12665 24375 2015 2061 2057 2371 27885 14715 3064 2000 3926 1996 3105 1012 2292 1005 1055 2707 2007 1996 3104 1997 1996 4966 2034 1012 2034 2057 5060 1037 3861 1997 6358 28782 1006 1000 1996 2088 1005 1055 2087 4795 2158 1000 1007 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] brilliant acting , excellent plot , wonderful special effects ! this is what i would say about this movie if i had been watching it with a bag of dia ##rre ##ha on my head for the entire film . instead , i endured a 2 hour crap - o - rama . our " brilliant " story begins with some billion ##are who has nothing better to do than look in volcanoes in a vain attempt to find his lucky charms . instead , he finds a 5 ' 4 " man in a che ##es ##y rubber dinosaur suit and some queer cave - folk . < br / > < br / > in his infinite wisdom , ( along with his infinitely [SEP]


INFO:tensorflow:tokens: [CLS] brilliant acting , excellent plot , wonderful special effects ! this is what i would say about this movie if i had been watching it with a bag of dia ##rre ##ha on my head for the entire film . instead , i endured a 2 hour crap - o - rama . our " brilliant " story begins with some billion ##are who has nothing better to do than look in volcanoes in a vain attempt to find his lucky charms . instead , he finds a 5 ' 4 " man in a che ##es ##y rubber dinosaur suit and some queer cave - folk . < br / > < br / > in his infinite wisdom , ( along with his infinitely [SEP]


INFO:tensorflow:input_ids: 101 8235 3772 1010 6581 5436 1010 6919 2569 3896 999 2023 2003 2054 1045 2052 2360 2055 2023 3185 2065 1045 2018 2042 3666 2009 2007 1037 4524 1997 22939 14343 3270 2006 2026 2132 2005 1996 2972 2143 1012 2612 1010 1045 16753 1037 1016 3178 10231 1011 1051 1011 14115 1012 2256 1000 8235 1000 2466 4269 2007 2070 4551 12069 2040 2038 2498 2488 2000 2079 2084 2298 1999 23694 1999 1037 15784 3535 2000 2424 2010 5341 24044 1012 2612 1010 2002 4858 1037 1019 1005 1018 1000 2158 1999 1037 18178 2229 2100 8903 15799 4848 1998 2070 19483 5430 1011 5154 1012 1026 7987 1013 1028 1026 7987 1013 1028 1999 2010 10709 9866 1010 1006 2247 2007 2010 25773 102


INFO:tensorflow:input_ids: 101 8235 3772 1010 6581 5436 1010 6919 2569 3896 999 2023 2003 2054 1045 2052 2360 2055 2023 3185 2065 1045 2018 2042 3666 2009 2007 1037 4524 1997 22939 14343 3270 2006 2026 2132 2005 1996 2972 2143 1012 2612 1010 1045 16753 1037 1016 3178 10231 1011 1051 1011 14115 1012 2256 1000 8235 1000 2466 4269 2007 2070 4551 12069 2040 2038 2498 2488 2000 2079 2084 2298 1999 23694 1999 1037 15784 3535 2000 2424 2010 5341 24044 1012 2612 1010 2002 4858 1037 1019 1005 1018 1000 2158 1999 1037 18178 2229 2100 8903 15799 4848 1998 2070 19483 5430 1011 5154 1012 1026 7987 1013 1028 1026 7987 1013 1028 1999 2010 10709 9866 1010 1006 2247 2007 2010 25773 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None






INFO:tensorflow:input_ids: 101 4599 1997 27660 5691 2097 7842 14550 2023 2092 1011 2081 2659 1011 5166 10874 2008 2003 7687 1037 12661 1997 1996 4131 2577 14412 4438 1000 2043 8484 8902 24198 1012 1000 1037 15699 2003 3753 2005 1037 2379 1011 12365 2007 3011 1010 1998 2043 2010 3507 6529 27770 2010 16234 1997 12677 1010 18080 7155 2848 10554 1006 6877 20517 1007 29438 1037 2177 1997 2797 9387 2000 10082 9570 2019 5230 8493 1012 1026 7987 1013 1028 1026 7987 1013 1028 1996 2466 4895 10371 2015 2083 1996 2159 1997 6740 1011 5391 6084 1011 2162 8003 5180 14086 1006 2848 21058 8609 2072 1007 2040 21089 9418 10554 1005 1055 5023 8493 1998 2059 7288 2008 10554 2003 3308 2005 4363 102


INFO:tensorflow:input_ids: 101 4599 1997 27660 5691 2097 7842 14550 2023 2092 1011 2081 2659 1011 5166 10874 2008 2003 7687 1037 12661 1997 1996 4131 2577 14412 4438 1000 2043 8484 8902 24198 1012 1000 1037 15699 2003 3753 2005 1037 2379 1011 12365 2007 3011 1010 1998 2043 2010 3507 6529 27770 2010 16234 1997 12677 1010 18080 7155 2848 10554 1006 6877 20517 1007 29438 1037 2177 1997 2797 9387 2000 10082 9570 2019 5230 8493 1012 1026 7987 1013 1028 1026 7987 1013 1028 1996 2466 4895 10371 2015 2083 1996 2159 1997 6740 1011 5391 6084 1011 2162 8003 5180 14086 1006 2848 21058 8609 2072 1007 2040 21089 9418 10554 1005 1055 5023 8493 1998 2059 7288 2008 10554 2003 3308 2005 4363 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] it has started quietly . if your are looking for an action - packed movie this is absolutely not the right choice . all characters are slowly depicted on the scene . stroke after stroke on the scene canvas . none can take away his hands to the priest and so the sisters lifespan devotion can only remain into the village . philipp ##a and martina know their destiny , belong only to the village . so when you understand that , you are on the movie scene , in the village that becomes the whole known world in that time . when , no technology can let you imagine anything else than the campaign , the village , the sea . you feel the rhythm [SEP]


INFO:tensorflow:tokens: [CLS] it has started quietly . if your are looking for an action - packed movie this is absolutely not the right choice . all characters are slowly depicted on the scene . stroke after stroke on the scene canvas . none can take away his hands to the priest and so the sisters lifespan devotion can only remain into the village . philipp ##a and martina know their destiny , belong only to the village . so when you understand that , you are on the movie scene , in the village that becomes the whole known world in that time . when , no technology can let you imagine anything else than the campaign , the village , the sea . you feel the rhythm [SEP]


INFO:tensorflow:input_ids: 101 2009 2038 2318 5168 1012 2065 2115 2024 2559 2005 2019 2895 1011 8966 3185 2023 2003 7078 2025 1996 2157 3601 1012 2035 3494 2024 3254 8212 2006 1996 3496 1012 6909 2044 6909 2006 1996 3496 10683 1012 3904 2064 2202 2185 2010 2398 2000 1996 5011 1998 2061 1996 5208 26462 13347 2064 2069 3961 2046 1996 2352 1012 20765 2050 1998 23508 2113 2037 10461 1010 7141 2069 2000 1996 2352 1012 2061 2043 2017 3305 2008 1010 2017 2024 2006 1996 3185 3496 1010 1999 1996 2352 2008 4150 1996 2878 2124 2088 1999 2008 2051 1012 2043 1010 2053 2974 2064 2292 2017 5674 2505 2842 2084 1996 3049 1010 1996 2352 1010 1996 2712 1012 2017 2514 1996 6348 102


INFO:tensorflow:input_ids: 101 2009 2038 2318 5168 1012 2065 2115 2024 2559 2005 2019 2895 1011 8966 3185 2023 2003 7078 2025 1996 2157 3601 1012 2035 3494 2024 3254 8212 2006 1996 3496 1012 6909 2044 6909 2006 1996 3496 10683 1012 3904 2064 2202 2185 2010 2398 2000 1996 5011 1998 2061 1996 5208 26462 13347 2064 2069 3961 2046 1996 2352 1012 20765 2050 1998 23508 2113 2037 10461 1010 7141 2069 2000 1996 2352 1012 2061 2043 2017 3305 2008 1010 2017 2024 2006 1996 3185 3496 1010 1999 1996 2352 2008 4150 1996 2878 2124 2088 1999 2008 2051 1012 2043 1010 2053 2974 2064 2292 2017 5674 2505 2842 2084 1996 3049 1010 1996 2352 1010 1996 2712 1012 2017 2514 1996 6348 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] u ##gh ##h this movie is awful . the script is stupid and of course chase doesn ' t tell zoe ##y he doesn ' t love her ! ! ! like every episode . . . ill never understand zoe ##y 101 ( the show ) also , why the heck does logan ' s dad act so re ##tar ##ted . and its only about zoe ##y and chase what about the other characters . its always the same in every episode quinn makes and invention something goes terribly wrong with the invention and zoe ##ys brother always gets involved in it . if you haven ' t seen it don ' t waste an hour watching this cu ##z you ' ll be [SEP]


INFO:tensorflow:tokens: [CLS] u ##gh ##h this movie is awful . the script is stupid and of course chase doesn ' t tell zoe ##y he doesn ' t love her ! ! ! like every episode . . . ill never understand zoe ##y 101 ( the show ) also , why the heck does logan ' s dad act so re ##tar ##ted . and its only about zoe ##y and chase what about the other characters . its always the same in every episode quinn makes and invention something goes terribly wrong with the invention and zoe ##ys brother always gets involved in it . if you haven ' t seen it don ' t waste an hour watching this cu ##z you ' ll be [SEP]


INFO:tensorflow:input_ids: 101 1057 5603 2232 2023 3185 2003 9643 1012 1996 5896 2003 5236 1998 1997 2607 5252 2987 1005 1056 2425 11199 2100 2002 2987 1005 1056 2293 2014 999 999 999 2066 2296 2792 1012 1012 1012 5665 2196 3305 11199 2100 7886 1006 1996 2265 1007 2036 1010 2339 1996 17752 2515 6307 1005 1055 3611 2552 2061 2128 7559 3064 1012 1998 2049 2069 2055 11199 2100 1998 5252 2054 2055 1996 2060 3494 1012 2049 2467 1996 2168 1999 2296 2792 8804 3084 1998 11028 2242 3632 16668 3308 2007 1996 11028 1998 11199 7274 2567 2467 4152 2920 1999 2009 1012 2065 2017 4033 1005 1056 2464 2009 2123 1005 1056 5949 2019 3178 3666 2023 12731 2480 2017 1005 2222 2022 102


INFO:tensorflow:input_ids: 101 1057 5603 2232 2023 3185 2003 9643 1012 1996 5896 2003 5236 1998 1997 2607 5252 2987 1005 1056 2425 11199 2100 2002 2987 1005 1056 2293 2014 999 999 999 2066 2296 2792 1012 1012 1012 5665 2196 3305 11199 2100 7886 1006 1996 2265 1007 2036 1010 2339 1996 17752 2515 6307 1005 1055 3611 2552 2061 2128 7559 3064 1012 1998 2049 2069 2055 11199 2100 1998 5252 2054 2055 1996 2060 3494 1012 2049 2467 1996 2168 1999 2296 2792 8804 3084 1998 11028 2242 3632 16668 3308 2007 1996 11028 1998 11199 7274 2567 2467 4152 2920 1999 2009 1012 2065 2017 4033 1005 1056 2464 2009 2123 1005 1056 5949 2019 3178 3666 2023 12731 2480 2017 1005 2222 2022 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


#Creating a model

Now that we've prepared our data, let's focus on building a model. `create_model` does just this below. First, it loads the BERT tf hub module again (this time to extract the computation graph). Next, it creates a single new layer that will be trained to adapt BERT to our sentiment task (i.e. classifying whether a movie review is positive or negative). This strategy of using a mostly trained model is called [fine-tuning](http://wiki.fast.ai/index.php/Fine_tuning).

In [25]:
def create_model(is_predicting, input_ids, input_mask, segment_ids, labels,
                 num_labels):
  """Creates a classification model."""

  bert_module = hub.Module(
      BERT_MODEL_HUB,
      trainable=True)
  bert_inputs = dict(
      input_ids=input_ids,
      input_mask=input_mask,
      segment_ids=segment_ids)
  bert_outputs = bert_module(
      inputs=bert_inputs,
      signature="tokens",
      as_dict=True)
  
  print(bert_outputs.keys())
  # Use "pooled_output" for classification tasks on an entire sentence.
  # Use "sequence_outputs" for token-level output.
  output_layer = bert_outputs["pooled_output"]

  hidden_size = output_layer.shape[-1].value

  # Create our own layer to tune for politeness data.
  output_weights = tf.get_variable(
      "output_weights", [num_labels, hidden_size],
      initializer=tf.truncated_normal_initializer(stddev=0.02))

  output_bias = tf.get_variable(
      "output_bias", [num_labels], initializer=tf.zeros_initializer())

  with tf.variable_scope("loss"):

    # Dropout helps prevent overfitting
    output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

    logits = tf.matmul(output_layer, output_weights, transpose_b=True)
    logits = tf.nn.bias_add(logits, output_bias)
    log_probs = tf.nn.log_softmax(logits, axis=-1)

    # Convert labels into one-hot encoding
    one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)

    predicted_labels = tf.squeeze(tf.argmax(log_probs, axis=-1, output_type=tf.int32))
    # If we're predicting, we want predicted labels and the probabiltiies.
    if is_predicting:
      return (predicted_labels, log_probs)

    # If we're train/eval, compute loss between predicted and actual label
    per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
    loss = tf.reduce_mean(per_example_loss)
    return (loss, predicted_labels, log_probs)


Next we'll wrap our model function in a `model_fn_builder` function that adapts our model to work for training, evaluation, and prediction.

In [17]:
# model_fn_builder actually creates our model function
# using the passed parameters for num_labels, learning_rate, etc.
def model_fn_builder(num_labels, learning_rate, num_train_steps,
                     num_warmup_steps):
  """Returns `model_fn` closure for TPUEstimator."""
  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    """The `model_fn` for TPUEstimator."""

    input_ids = features["input_ids"]
    input_mask = features["input_mask"]
    segment_ids = features["segment_ids"]
    label_ids = features["label_ids"]

    is_predicting = (mode == tf.estimator.ModeKeys.PREDICT)
    
    # TRAIN and EVAL
    if not is_predicting:

      (loss, predicted_labels, log_probs) = create_model(
        is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)

      train_op = bert.optimization.create_optimizer(
          loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu=False)

      # Calculate evaluation metrics. 
      def metric_fn(label_ids, predicted_labels):
        accuracy = tf.metrics.accuracy(label_ids, predicted_labels)
        f1_score = tf.contrib.metrics.f1_score(
            label_ids,
            predicted_labels)
        auc = tf.metrics.auc(
            label_ids,
            predicted_labels)
        recall = tf.metrics.recall(
            label_ids,
            predicted_labels)
        precision = tf.metrics.precision(
            label_ids,
            predicted_labels) 
        true_pos = tf.metrics.true_positives(
            label_ids,
            predicted_labels)
        true_neg = tf.metrics.true_negatives(
            label_ids,
            predicted_labels)   
        false_pos = tf.metrics.false_positives(
            label_ids,
            predicted_labels)  
        false_neg = tf.metrics.false_negatives(
            label_ids,
            predicted_labels)
        return {
            "eval_accuracy": accuracy,
            "f1_score": f1_score,
            "auc": auc,
            "precision": precision,
            "recall": recall,
            "true_positives": true_pos,
            "true_negatives": true_neg,
            "false_positives": false_pos,
            "false_negatives": false_neg
        }

      eval_metrics = metric_fn(label_ids, predicted_labels)

      if mode == tf.estimator.ModeKeys.TRAIN:
        return tf.estimator.EstimatorSpec(mode=mode,
          loss=loss,
          train_op=train_op)
      else:
          return tf.estimator.EstimatorSpec(mode=mode,
            loss=loss,
            eval_metric_ops=eval_metrics)
    else:
      (predicted_labels, log_probs) = create_model(
        is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)

      predictions = {
          'probabilities': log_probs,
          'labels': predicted_labels
      }
      return tf.estimator.EstimatorSpec(mode, predictions=predictions)

  # Return the actual model function in the closure
  return model_fn


In [18]:
# Compute train and warmup steps from batch size
# These hyperparameters are copied from this colab notebook (https://colab.sandbox.google.com/github/tensorflow/tpu/blob/master/tools/colab/bert_finetuning_with_cloud_tpus.ipynb)
BATCH_SIZE = 32
LEARNING_RATE = 2e-5
NUM_TRAIN_EPOCHS = 3.0
# Warmup is a period of time where hte learning rate 
# is small and gradually increases--usually helps training.
WARMUP_PROPORTION = 0.1
# Model configs
SAVE_CHECKPOINTS_STEPS = 500
SAVE_SUMMARY_STEPS = 100

In [19]:
# Compute # train and warmup steps from batch size
num_train_steps = int(len(train_features) / BATCH_SIZE * NUM_TRAIN_EPOCHS)
print(num_train_steps)
num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)

468


In [20]:
# Specify outpit directory and number of checkpoint steps to save
run_config = tf.estimator.RunConfig(
    model_dir=OUTPUT_DIR,
    save_summary_steps=SAVE_SUMMARY_STEPS,
    save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS)

In [22]:
model_fn = model_fn_builder(
  num_labels=len(label_list),
  learning_rate=LEARNING_RATE,
  num_train_steps=num_train_steps,
  num_warmup_steps=num_warmup_steps)

estimator = tf.estimator.Estimator(
  model_fn=model_fn,
  config=run_config,
  params={"batch_size": BATCH_SIZE})


INFO:tensorflow:Using config: {'_model_dir': 'OUTPUT_DIR_NAME', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 500, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f3bad357810>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


INFO:tensorflow:Using config: {'_model_dir': 'OUTPUT_DIR_NAME', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 500, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f3bad357810>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


Next we create an input builder function that takes our training feature set (`train_features`) and produces a generator. This is a pretty standard design pattern for working with Tensorflow [Estimators](https://www.tensorflow.org/guide/estimators).

In [23]:
# Create an input function for training. drop_remainder = True for using TPUs.
train_input_fn = bert.run_classifier.input_fn_builder(
    features=train_features,
    seq_length=MAX_SEQ_LENGTH,
    is_training=True,
    drop_remainder=False)

Now we train our model! For me, using a Colab notebook running on Google's GPUs, my training time was about 14 minutes.

In [None]:
print(f'Beginning Training!')
current_time = datetime.now()
estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
print("Training took time ", datetime.now() - current_time)

Beginning Training!
INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


dict_keys(['pooled_output', 'sequence_output'])
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.




















Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Instructions for updating:
Deprecated in favor of operator or tf.math.divide.


Instructions for updating:
Deprecated in favor of operator or tf.math.divide.


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Create CheckpointSaverHook.


INFO:tensorflow:Create CheckpointSaverHook.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Saving checkpoints for 0 into OUTPUT_DIR_NAME/model.ckpt.


INFO:tensorflow:Saving checkpoints for 0 into OUTPUT_DIR_NAME/model.ckpt.


INFO:tensorflow:loss = 0.6940776, step = 1


INFO:tensorflow:loss = 0.6940776, step = 1


Now let's use our test data to see how well our model did:

In [None]:
test_input_fn = run_classifier.input_fn_builder(
    features=test_features,
    seq_length=MAX_SEQ_LENGTH,
    is_training=False,
    drop_remainder=False)

In [None]:
estimator.evaluate(input_fn=test_input_fn, steps=None)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-02-12T21:04:20Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from gs://bert-tfhub/aclImdb_v1/model.ckpt-468
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2019-02-12-21:06:05
INFO:tensorflow:Saving dict for global step 468: auc = 0.86659324, eval_accuracy = 0.8664, f1_score = 0.8659711, false_negatives = 375.0, false_positives = 293.0, global_step = 468, loss = 0.51870537, precision = 0.880457, recall = 0.8519542, true_negatives = 2174.0, true_positives = 2158.0
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 468: gs://bert-tfhub/aclImdb_v1/model.ckpt-468


{'auc': 0.86659324,
 'eval_accuracy': 0.8664,
 'f1_score': 0.8659711,
 'false_negatives': 375.0,
 'false_positives': 293.0,
 'global_step': 468,
 'loss': 0.51870537,
 'precision': 0.880457,
 'recall': 0.8519542,
 'true_negatives': 2174.0,
 'true_positives': 2158.0}

Now let's write code to make predictions on new sentences:

In [None]:
def getPrediction(in_sentences):
  labels = ["Negative", "Positive"]
  input_examples = [run_classifier.InputExample(guid="", text_a = x, text_b = None, label = 0) for x in in_sentences] # here, "" is just a dummy label
  input_features = run_classifier.convert_examples_to_features(input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
  predict_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=False)
  predictions = estimator.predict(predict_input_fn)
  return [(sentence, prediction['probabilities'], labels[prediction['labels']]) for sentence, prediction in zip(in_sentences, predictions)]

In [None]:
pred_sentences = [
  "That movie was absolutely awful",
  "The acting was a bit lacking",
  "The film was creative and surprising",
  "Absolutely fantastic!"
]

In [None]:
predictions = getPrediction(pred_sentences)

INFO:tensorflow:Writing example 0 of 4
INFO:tensorflow:*** Example ***
INFO:tensorflow:guid: 
INFO:tensorflow:tokens: [CLS] that movie was absolutely awful [SEP]
INFO:tensorflow:input_ids: 101 2008 3185 2001 7078 9643 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

Voila! We have a sentiment classifier!

In [None]:
predictions

[('That movie was absolutely awful',
  array([-4.9142293e-03, -5.3180690e+00], dtype=float32),
  'Negative'),
 ('The acting was a bit lacking',
  array([-0.03325794, -3.4200459 ], dtype=float32),
  'Negative'),
 ('The film was creative and surprising',
  array([-5.3589125e+00, -4.7171740e-03], dtype=float32),
  'Positive'),
 ('Absolutely fantastic!',
  array([-5.0434084 , -0.00647258], dtype=float32),
  'Positive')]