In [None]:
# Copyright 2019 Google Inc.

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

#     http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#Predicting Movie Review Sentiment with BERT on TF Hub

If you’ve been following Natural Language Processing over the past year, you’ve probably heard of BERT: Bidirectional Encoder Representations from Transformers. It’s a neural network architecture designed by Google researchers that’s totally transformed what’s state-of-the-art for NLP tasks, like text classification, translation, summarization, and question answering.

Now that BERT's been added to [TF Hub](https://www.tensorflow.org/hub) as a loadable module, it's easy(ish) to add into existing Tensorflow text pipelines. In an existing pipeline, BERT can replace text embedding layers like ELMO and GloVE. Alternatively, [finetuning](http://wiki.fast.ai/index.php/Fine_tuning) BERT can provide both an accuracy boost and faster training time in many cases.

Here, we'll train a model to predict whether an IMDB movie review is positive or negative using BERT in Tensorflow with tf hub. Some code was adapted from [this colab notebook](https://colab.sandbox.google.com/github/tensorflow/tpu/blob/master/tools/colab/bert_finetuning_with_cloud_tpus.ipynb). Let's get started!

In [1]:
from sklearn.model_selection import train_test_split
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
from datetime import datetime
import numpy as np
import time
from tensorflow import keras
import os
import re
import json

In addition to the standard libraries we imported above, we'll need to install BERT's python package.

In [None]:
#!pip install bert-tensorflow

In [2]:
import bert
from bert import run_classifier
from bert import optimization
from bert import tokenization

Below, we'll set an output directory location to store our model output and checkpoints. This can be a local directory, in which case you'd set OUTPUT_DIR to the name of the directory you'd like to create. If you're running this code in Google's hosted Colab, the directory won't persist after the Colab session ends.

Alternatively, if you're a GCP user, you can store output in a GCP bucket. To do that, set a directory name in OUTPUT_DIR and the name of the GCP bucket in the BUCKET field.

Set DO_DELETE to rewrite the OUTPUT_DIR if it exists. Otherwise, Tensorflow will load existing model checkpoints from that directory (if they exist).

In [3]:
# Set the output directory for saving model file
# Optionally, set a GCP bucket location

OUTPUT_DIR = 'output-amazon-testexport'#@param {type:"string"}
#@markdown Whether or not to clear/delete the directory and create a new one
DO_DELETE = False #@param {type:"boolean"}
#@markdown Set USE_BUCKET and BUCKET if you want to (optionally) store model output on GCP bucket.
USE_BUCKET = False #@param {type:"boolean"}
BUCKET = 'BUCKET_NAME' #@param {type:"string"}

if USE_BUCKET:
  OUTPUT_DIR = 'gs://{}/{}'.format(BUCKET, OUTPUT_DIR)
  from google.colab import auth
  auth.authenticate_user()

if DO_DELETE:
  try:
    tf.gfile.DeleteRecursively(OUTPUT_DIR)
  except:
    # Doesn't matter if the directory didn't exist
    pass
tf.gfile.MakeDirs(OUTPUT_DIR)
print('***** Model output directory: {} *****'.format(OUTPUT_DIR))


***** Model output directory: output-amazon-testexport *****


#Data

First, let's download the dataset, hosted by Stanford. The code below, which downloads, extracts, and imports the IMDB Large Movie Review Dataset, is borrowed from [this Tensorflow tutorial](https://www.tensorflow.org/hub/tutorials/text_classification_with_tf_hub).

In [None]:
def normalize_text(text):
    #s = text.split()
    #s = [t for t in s if (t not in stopwords.words('english')) or (t not in other_stopwords)]
    #s = [t for t in s if t not in other_stopwords]
    #s = " ".join(s)
    no_tabs = text.lower().replace('\t', ' ')
    remove_tag = re.sub(r'@[A-Za-z0-9]+', "", no_tabs)
    remove_url = re.sub(r'https?://[A-Za-z0-9./]+', "", remove_tag)
    alpha_only = re.sub("<br\s*/><br\s*/>", " ", no_tabs)
    #multi_spaces = re.sub("[^a-zA-Z\.\!]", " ", alpha_only) !!!!!! temporary
    multi_spaces = re.sub("[^a-zA-Z\!]", " ", alpha_only)
    text_clean = re.sub(" +", " ", multi_spaces);
    return text_clean

In [None]:
string = "Good morning, how are you?"
print("Good morning" in string)
print(re.sub("Good morning","", string))

In [4]:


with open('Data/amazonelectronics.json') as f:
    data = [json.loads(line) for line in f]
 
# Print loaded json object
#print(data[0:2])
#print(type(data))
from pandas.io.json import json_normalize

df_elec = pd.DataFrame.from_dict(json_normalize(data), orient='columns')
df_elec.drop(['asin', 'helpful', 'reviewTime', 'reviewerID', 'reviewerName', 'summary', 'unixReviewTime'], axis=1, inplace=True)
df_elec.columns = ['Tag', 'Phrase']
print(df_elec.shape)
print(df_elec.head())
print(df_elec.Tag.value_counts())
print(df_elec.head())
df_elec_clean = df_elec.copy()
#df_elec_clean.Phrase = [normalize_text(row) for row in df_elec.Phrase]
#print(df_elec_clean.head())


(1689188, 2)
   Tag                                             Phrase
0  5.0  We got this GPS for my husband who is an (OTR)...
1  1.0  I'm a professional OTR truck driver, and I bou...
2  3.0  Well, what can I say.  I've had this unit in m...
3  2.0  Not going to write a long review, even thought...
4  1.0  I've had mine for a year and here's what we go...
5.0    1009026
4.0     347041
3.0     142257
1.0     108725
2.0      82139
Name: Tag, dtype: int64
   Tag                                             Phrase
0  5.0  We got this GPS for my husband who is an (OTR)...
1  1.0  I'm a professional OTR truck driver, and I bou...
2  3.0  Well, what can I say.  I've had this unit in m...
3  2.0  Not going to write a long review, even thought...
4  1.0  I've had mine for a year and here's what we go...


In [6]:
# négatif : 1, neutre : 2, 3, 4, positif : 5
df_elec_5 = df_elec_clean[df_elec_clean.Tag == 5.0]
df_elec_4 = df_elec_clean[df_elec_clean.Tag == 4.0]
df_elec_3 = df_elec_clean[df_elec_clean.Tag == 3.0]
df_elec_2 = df_elec_clean[df_elec_clean.Tag == 2.0]
df_elec_1 = df_elec_clean[df_elec_clean.Tag == 1.0]
df_elec_pos_sample = df_elec_5.sample(n=500, random_state=42)
df_elec_neg_sample = df_elec_1.sample(n=500, random_state=42)
df_elec_neu_sample = pd.concat([df_elec_2, df_elec_3, df_elec_4]).sample(n=5000, random_state=42)
df_small_elec = pd.concat([df_elec_pos_sample, df_elec_neg_sample, df_elec_neu_sample])
df_small_elec['Tag'].replace(1.0, 0, inplace=True)
df_small_elec['Tag'].replace(2.0, 0.5, inplace=True)
df_small_elec['Tag'].replace(3.0, 0.5, inplace=True)
df_small_elec['Tag'].replace(4.0, 0.5, inplace=True)
df_small_elec['Tag'].replace(5.0, 1, inplace=True)
df_binary_small_elec = pd.concat([df_elec_pos_sample, df_elec_neg_sample])
df_binary_small_elec['Tag'].replace(1.0, 0, inplace=True)
df_binary_small_elec['Tag'].replace(5.0, 1, inplace=True)
df_binary_small_elec.Tag = df_binary_small_elec.Tag.astype(int)
#df_small_elec.Tag = df_small_elec.Tag.astype(int)
print(df_small_elec.Tag.value_counts())
print(df_binary_small_elec.Tag.value_counts())
#df_binary_small_elec.to_csv("dataAmazonElecSmall.csv", sep=";", header=0)

0.5    5000
0.0     500
1.0     500
Name: Tag, dtype: int64
1    500
0    500
Name: Tag, dtype: int64


In [None]:
df_binary_small_elec.to_csv("data_en.csv", sep=";", header=None)

In [8]:
train_init = df_binary_small_elec.sample(n=800, random_state=42)
test_init = df_binary_small_elec.loc[~df_binary_small_elec.index.isin(train_init.index)]
print(train_init.shape)
print(test_init.shape)
print(train_init.head())
print(train_init.Tag.value_counts())

(800, 2)
(200, 2)
         Tag                                             Phrase
756047     0  It was WONDERFUL ...at first. But it was withi...
275863     0  I noticed that a bunch of people recently had ...
1301264    0  It took quite a bit of troubleshooting and tim...
1191914    0  Since 5 month after purchase it started to pro...
1383043    1  Very impressed. This product just works. Had n...
0    401
1    399
Name: Tag, dtype: int64


In [None]:
with open('Data/Pet_Supplies.json') as f:
    data = [json.loads(line) for line in f]
 
# Print loaded json object
#print(data[0:2])
#print(type(data))
from pandas.io.json import json_normalize

df_pets = pd.DataFrame.from_dict(json_normalize(data), orient='columns')
df_pets.drop(['asin', 'helpful', 'reviewTime', 'reviewerID', 'reviewerName', 'summary', 'unixReviewTime'], axis=1, inplace=True)
df_pets.columns = ['Tag', 'Phrase']
print(df_pets.shape)
print(df_pets.head())
print(df_pets.Tag.value_counts())

In [None]:
# négatif : 1, neutre : 2, 3, 4, positif : 5
df_pets_5 = df_pets[df_pets.Tag == 5.0]
df_pets_4 = df_pets[df_pets.Tag == 4.0]
df_pets_3 = df_pets[df_pets.Tag == 3.0]
df_pets_2 = df_pets[df_pets.Tag == 2.0]
df_pets_1 = df_pets[df_pets.Tag == 1.0]
df_pets_pos_sample = df_pets_5.sample(n=8748, random_state=42)
df_pets_neg_sample = df_pets_1.sample(n=8748, random_state=42)
df_pets_neu_sample = pd.concat([df_pets_2, df_pets_3, df_pets_4]).sample(n=5000, random_state=42)
df_small_pets = pd.concat([df_pets_pos_sample, df_pets_neg_sample, df_pets_neu_sample])
df_small_pets['Tag'].replace(1.0, 0, inplace=True)
df_small_pets['Tag'].replace(2.0, 0.5, inplace=True)
df_small_pets['Tag'].replace(3.0, 0.5, inplace=True)
df_small_pets['Tag'].replace(4.0, 0.5, inplace=True)
df_small_pets['Tag'].replace(5.0, 1, inplace=True)
df_binary_small_pets = pd.concat([df_pets_pos_sample,df_pets_neg_sample])
df_binary_small_pets['Tag'].replace(1.0, 0, inplace=True)
df_binary_small_pets['Tag'].replace(5.0, 1, inplace=True)
df_binary_small_pets.Tag = df_binary_small_pets.Tag.astype(int)
#df_small_elec.Tag = df_small_elec.Tag.astype(int)
print(df_small_pets.Tag.value_counts())
print(df_binary_small_pets.Tag.value_counts())

In [None]:
#train_init_pets = df_binary_small_pets.sample(n=15000, random_state=42)
test_init_pets = df_binary_small_pets#.loc[~df_binary_small_pets.index.isin(train_init_pets.index)]

In [None]:
print(test_init_pets.shape)
print(test_init_pets.Tag.value_counts())

To keep training fast, we'll take a sample of 5000 train and test examples, respectively.

In [9]:
train = train_init#.sample(25000)
test = test_init#_pets#.sample(25000)

In [10]:
train.columns

Index(['Tag', 'Phrase'], dtype='object')

For us, our input data is the 'sentence' column and our label is the 'polarity' column (0, 1 for negative and positive, respecitvely)

In [11]:
DATA_COLUMN = 'Phrase'
LABEL_COLUMN = 'Tag'
# label_list is the list of labels, i.e. True, False or 0, 1 or 'dog', 'cat'
label_list = [0, 1]

#Data Preprocessing
We'll need to transform our data into a format BERT understands. This involves two steps. First, we create  `InputExample`'s using the constructor provided in the BERT library.

- `text_a` is the text we want to classify, which in this case, is the `Request` field in our Dataframe. 
- `text_b` is used if we're training a model to understand the relationship between sentences (i.e. is `text_b` a translation of `text_a`? Is `text_b` an answer to the question asked by `text_a`?). This doesn't apply to our task, so we can leave `text_b` blank.
- `label` is the label for our example, i.e. True, False

In [12]:
# Use the InputExample class from BERT's run_classifier code to create examples from the data
train_InputExamples = train.apply(lambda x: bert.run_classifier.InputExample(guid=None, # Globally unique ID for bookkeeping, unused in this example
                                                                   text_a = x[DATA_COLUMN], 
                                                                   text_b = None, 
                                                                   label = x[LABEL_COLUMN]), axis = 1)

test_InputExamples = test.apply(lambda x: bert.run_classifier.InputExample(guid=None, 
                                                                   text_a = x[DATA_COLUMN], 
                                                                   text_b = None, 
                                                                   label = x[LABEL_COLUMN]), axis = 1)

Next, we need to preprocess our data so that it matches the data BERT was trained on. For this, we'll need to do a couple of things (but don't worry--this is also included in the Python library):


1. Lowercase our text (if we're using a BERT lowercase model)
2. Tokenize it (i.e. "sally says hi" -> ["sally", "says", "hi"])
3. Break words into WordPieces (i.e. "calling" -> ["call", "##ing"])
4. Map our words to indexes using a vocab file that BERT provides
5. Add special "CLS" and "SEP" tokens (see the [readme](https://github.com/google-research/bert))
6. Append "index" and "segment" tokens to each input (see the [BERT paper](https://arxiv.org/pdf/1810.04805.pdf))

Happily, we don't have to worry about most of these details.




To start, we'll need to load a vocabulary file and lowercasing information directly from the BERT tf hub module:

In [None]:
# This is a path to an uncased (all lowercase) version of BERT
BERT_MODEL_HUB = "https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1"

def create_tokenizer_from_hub_module():
  """Get the vocab file and casing info from the Hub module."""
  with tf.Graph().as_default():
    bert_module = hub.Module(BERT_MODEL_HUB)
    tokenization_info = bert_module(signature="tokenization_info", as_dict=True)
    with tf.Session() as sess:
      vocab_file, do_lower_case = sess.run([tokenization_info["vocab_file"],
                                            tokenization_info["do_lower_case"]])
      
  return bert.tokenization.FullTokenizer(
      vocab_file=vocab_file, do_lower_case=do_lower_case)

tokenizer = create_tokenizer_from_hub_module()

INFO:tensorflow:Saver not created because there are no variables in the graph to restore


Great--we just learned that the BERT model we're using expects lowercase data (that's what stored in tokenization_info["do_lower_case"]) and we also loaded BERT's vocab file. We also created a tokenizer, which breaks words into word pieces:

In [None]:
tokenizer.tokenize("This here's an example of using the BERT tokenizer")

In [None]:
tf.logging.set_verbosity(tf.logging.ERROR)

Using our tokenizer, we'll call `run_classifier.convert_examples_to_features` on our InputExamples to convert them into features BERT understands.

In [None]:
# We'll set sequences to be at most 128 tokens long.
MAX_SEQ_LENGTH = 128
# Convert our train and test features to InputFeatures that BERT understands.
train_features = bert.run_classifier.convert_examples_to_features(train_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)
test_features = bert.run_classifier.convert_examples_to_features(test_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)

In [None]:
print(train_features[0])

#Creating a model

Now that we've prepared our data, let's focus on building a model. `create_model` does just this below. First, it loads the BERT tf hub module again (this time to extract the computation graph). Next, it creates a single new layer that will be trained to adapt BERT to our sentiment task (i.e. classifying whether a movie review is positive or negative). This strategy of using a mostly trained model is called [fine-tuning](http://wiki.fast.ai/index.php/Fine_tuning).

In [None]:
def create_model(is_predicting, input_ids, input_mask, segment_ids, labels,
                 num_labels):
  """Creates a classification model."""

  bert_module = hub.Module(
      BERT_MODEL_HUB,
      trainable=True)
  bert_inputs = dict(
      input_ids=input_ids,
      input_mask=input_mask,
      segment_ids=segment_ids)
  bert_outputs = bert_module(
      inputs=bert_inputs,
      signature="tokens",
      as_dict=True)

  # Use "pooled_output" for classification tasks on an entire sentence.
  # Use "sequence_outputs" for token-level output.
  output_layer = bert_outputs["pooled_output"]

  hidden_size = output_layer.shape[-1].value

  # Create our own layer to tune for politeness data.
  output_weights = tf.get_variable(
      "output_weights", [num_labels, hidden_size],
      initializer=tf.truncated_normal_initializer(stddev=0.02))

  output_bias = tf.get_variable(
      "output_bias", [num_labels], initializer=tf.zeros_initializer())

  with tf.variable_scope("loss"):

    # Dropout helps prevent overfitting
    output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

    logits = tf.matmul(output_layer, output_weights, transpose_b=True)
    logits = tf.nn.bias_add(logits, output_bias)
    log_probs = tf.nn.log_softmax(logits, axis=-1)

    # Convert labels into one-hot encoding
    one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)

    predicted_labels = tf.squeeze(tf.argmax(log_probs, axis=-1, output_type=tf.int32))
    # If we're predicting, we want predicted labels and the probabiltiies.
    if is_predicting:
      return (predicted_labels, log_probs)

    # If we're train/eval, compute loss between predicted and actual label
    per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
    loss = tf.reduce_mean(per_example_loss)
    return (loss, predicted_labels, log_probs)


Next we'll wrap our model function in a `model_fn_builder` function that adapts our model to work for training, evaluation, and prediction.

In [None]:
# model_fn_builder actually creates our model function
# using the passed parameters for num_labels, learning_rate, etc.
def model_fn_builder(num_labels, learning_rate, num_train_steps,
                     num_warmup_steps):
  """Returns `model_fn` closure for TPUEstimator."""
  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    """The `model_fn` for TPUEstimator."""

    input_ids = features["input_ids"]
    input_mask = features["input_mask"]
    segment_ids = features["segment_ids"]
    label_ids = features["label_ids"]

    is_predicting = (mode == tf.estimator.ModeKeys.PREDICT)
    
    # TRAIN and EVAL
    if not is_predicting:

      (loss, predicted_labels, log_probs) = create_model(
        is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)

      train_op = bert.optimization.create_optimizer(
          loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu=False)

      # Calculate evaluation metrics. 
      def metric_fn(label_ids, predicted_labels):
        accuracy = tf.metrics.accuracy(label_ids, predicted_labels)
        f1_score = tf.contrib.metrics.f1_score(
            label_ids,
            predicted_labels)
        auc = tf.metrics.auc(
            label_ids,
            predicted_labels)
        recall = tf.metrics.recall(
            label_ids,
            predicted_labels)
        precision = tf.metrics.precision(
            label_ids,
            predicted_labels) 
        true_pos = tf.metrics.true_positives(
            label_ids,
            predicted_labels)
        true_neg = tf.metrics.true_negatives(
            label_ids,
            predicted_labels)   
        false_pos = tf.metrics.false_positives(
            label_ids,
            predicted_labels)  
        false_neg = tf.metrics.false_negatives(
            label_ids,
            predicted_labels)
        return {
            "eval_accuracy": accuracy,
            "f1_score": f1_score,
            "auc": auc,
            "precision": precision,
            "recall": recall,
            "true_positives": true_pos,
            "true_negatives": true_neg,
            "false_positives": false_pos,
            "false_negatives": false_neg
        }

      eval_metrics = metric_fn(label_ids, predicted_labels)

      if mode == tf.estimator.ModeKeys.TRAIN:
        return tf.estimator.EstimatorSpec(mode=mode,
          loss=loss,
          train_op=train_op)
      else:
          return tf.estimator.EstimatorSpec(mode=mode,
            loss=loss,
            eval_metric_ops=eval_metrics)
    else:
      (predicted_labels, log_probs) = create_model(
        is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)

      predictions = {
          'probabilities': log_probs,
          'labels': predicted_labels
      }
      return tf.estimator.EstimatorSpec(mode, predictions=predictions)

  # Return the actual model function in the closure
  return model_fn


In [None]:
# Compute train and warmup steps from batch size
# These hyperparameters are copied from this colab notebook (https://colab.sandbox.google.com/github/tensorflow/tpu/blob/master/tools/colab/bert_finetuning_with_cloud_tpus.ipynb)
BATCH_SIZE = 10
LEARNING_RATE = 2e-5
NUM_TRAIN_EPOCHS = 3.0
# Warmup is a period of time where hte learning rate 
# is small and gradually increases--usually helps training.
WARMUP_PROPORTION = 0.1
# Model configs
SAVE_CHECKPOINTS_STEPS = 500
SAVE_SUMMARY_STEPS = 100

In [None]:
# Compute # train and warmup steps from batch size
num_train_steps = int(len(train_features) / BATCH_SIZE * NUM_TRAIN_EPOCHS)
num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)

In [None]:
# Specify output directory and number of checkpoint steps to save
run_config = tf.estimator.RunConfig(
    model_dir=OUTPUT_DIR,
    save_summary_steps=SAVE_SUMMARY_STEPS,
    save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS)

In [None]:
model_fn = model_fn_builder(
  num_labels=len(label_list),
  learning_rate=LEARNING_RATE,
  num_train_steps=num_train_steps,
  num_warmup_steps=num_warmup_steps)

estimator = tf.estimator.Estimator(
  model_fn=model_fn,
  config=run_config,
  params={"batch_size": BATCH_SIZE})


Next we create an input builder function that takes our training feature set (`train_features`) and produces a generator. This is a pretty standard design pattern for working with Tensorflow [Estimators](https://www.tensorflow.org/guide/estimators).

In [None]:
# Create an input function for training. drop_remainder = True for using TPUs.
train_input_fn = bert.run_classifier.input_fn_builder(
    features=train_features,
    seq_length=MAX_SEQ_LENGTH,
    is_training=True,
    drop_remainder=False)

Now we train our model! For me, using a Colab notebook running on Google's GPUs, my training time was about 14 minutes.

In [None]:
print(f'Beginning Training!')
current_time = datetime.now()
estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
print("Training took time ", datetime.now() - current_time)

Now let's use our test data to see how well our model did:

In [None]:
test_input_fn = run_classifier.input_fn_builder(
    features=test_features,
    seq_length=MAX_SEQ_LENGTH,
    is_training=False,
    drop_remainder=False)

In [None]:
estimator.evaluate(input_fn=test_input_fn, steps=None)

Now let's write code to make predictions on new sentences:

In [None]:
def getPrediction(in_sentences):
    labels = [0, 1]
    result = []
    t0 = time.time()
    input_examples = [run_classifier.InputExample(guid="", text_a = x, text_b = None, label = 0) for x in in_sentences] # here, "" is just a dummy label
    input_features = run_classifier.convert_examples_to_features(input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
    predict_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=False)
    #print(input_features[0].input_ids)
    t1 = time.time()
    predictions = estimator.predict(predict_input_fn)
    #print(predictions.keys)
    t2 = time.time()
    #if len(in_sentences)==1:
        #for prediction in zip(predictions):
     #   prediction = predictions.__next__()
      #  return (in_sentences[0], np.exp(prediction['probabilities']), labels[prediction['labels']])
    #else:
    for sentence, prediction in zip(in_sentences, predictions):
        result.append((sentence, np.exp(prediction['probabilities']), labels[prediction['labels']]))
    #result.extend((sentence, np.exp(prediction['probabilities']), labels[prediction['labels']]) for sentence, prediction in zip(in_sentences, predictions))
    t3 = time.time()
    print("time total in function: ", t3 - t0)
    print("time preprocess: ", t1 - t0)
    print("time preprocess/sentence: ", (t1 - t0)/len(in_sentences))
    print("time estimator.predict: ", t2-t1)
    print("time pred: ", t3 - t2)
    print("time pred/sentence: ", (t3 - t2)/len(in_sentences))
    return result 

In [None]:
pred_sentences = np.array([
  "That movie was absolutely awful",
  "The acting was a bit lacking",
  "The film was creative and surprising",
  "Absolutely fantastic!",
  "I am happy",
  "I am happy!!!"])
#sentence = ["i had few problems with this film and i have heard a lot of criticisms saying it is overlong and overrated .  true    it is over three hours long    but i was amazed that it goes by so quickly .  i don t think it is overrated at all    i think the imdb rating is perfectly decent .  the film looks sumptuous    with gorgeous costumes and excellent effects    and the direction from james cameron rarely slips from focus .  leonardo dicaprio gives one of his best performances as jack    and kate winslet is lovely as rose .  david warner    a great actor    steals every scene he s in .  the story is very rich in detail    and is hot on character development    obvious with the love story which is very moving when it needs to be    though in the first bit of the movie it is a little slow .  the last hour is extremely riveting    and i will confess that i was on the edge of my seat    when the titanic sank .  i will also say that the last five minutes were very moving .  the music score by james horner was lovely    though i never was a huge fan of the song my heart will go on .  the      miniseries was good    but suffered from undeveloped scenarios and some historical inaccuracies .  overall    i give titanic an"]
sentence= np.array(["The actors were good but the plot was terrible", ""])
print(sentence.shape)
#sentence.reshape(-1, 1)

In [None]:
ct = time.time()
predictions_result = getPrediction(pred_sentences)
print("time total: ", time.time()-ct)

In [None]:
predictions_result

Voila! We have a sentiment classifier!

In [None]:
ct = datetime.now()
predictions_test = getPrediction(test.Phrase)
print(time.time()-ct)

In [None]:
print(predictions_test[6787])
pred = [predictions_test[i][2] for i in range(len(predictions_test))]
print(pred[0:2])
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
print("Accuracy: %s" % accuracy_score(test.Tag, pred))
print("Precision: %s" %precision_score(test.Tag, pred))
print("Recall: %s" %recall_score(test.Tag, pred))
print("f1: %s" %f1_score(test.Tag, pred))
print(confusion_matrix(test.Tag, pred))

In [None]:
#errors = pd.DataFrame(index=np.arange(0, len(test.Phrase)), columns=('Phrase', 'Tag', 'Pred'))
errors = pd.DataFrame(columns=('Phrase', 'Tag', 'Pred'))
c=0
for i in range(len(test.Phrase)):
    if test.Tag.iloc[i] != predictions_test[i][2]:
        errors.loc[len(errors)] = [test.Phrase.iloc[i],  test.Tag.iloc[i], predictions_test[i][2]]
        #print("diff ", errors.tail())
print(errors.shape)
print(errors.head())

In [None]:
errors.to_csv('errors_bert.csv', sep=';')

# Export

In [None]:
#essai raté
feature_spec = {'input_ids': tf.FixedLenFeature([MAX_SEQ_LENGTH],tf.int64), 
                'input_masks':tf.FixedLenFeature([MAX_SEQ_LENGTH],tf.int64), 
                'segment_ids':tf.FixedLenFeature([MAX_SEQ_LENGTH],tf.int64),
                'label_id': tf.FixedLenFeature([1],tf.int64),
                'is_real_example': tf.FixedLenFeature([1],tf.bool)}

#def serving_input_receiver_fn():
 #   serialized_tf_example = tf.placeholder(dtype=tf.string,
 #                                        shape=[None],
  #                                       name='input_tensors')
   # receiver_tensors = {'inputs': serialized_tf_example}
    
    #features = tf.parse_example(serialized_tf_example, feature_spec)
    #return tf.estimator.export.ServingInputReceiver(features, receiver_tensors)

def serving_input_receiver_fn():
    """Serving input_fn that builds features from placeholders

    Returns
    -------
    tf.estimator.export.ServingInputReceiver
    """
    input_ids = tf.placeholder(dtype=tf.int32, shape=[None,], name='input_ids')
    input_mask = tf.placeholder(dtype=tf.int32, shape=[None,], name='input_mask')
    segment_ids = tf.placeholder(dtype=tf.int32, shape=[None,], name='segment_ids')
    label_ids = tf.placeholder(dtype=tf.int32, shape=[None], name='label_ids')
    receiver_tensors = {'input_ids': input_ids, 'input_mask': input_mask, 'segment_ids': segment_ids, 'label_ids': label_ids}
    features = {'input_ids': input_ids, 'input_mask': input_mask, 'segment_ids': segment_ids, 'label_ids': label_ids}
    return tf.estimator.export.ServingInputReceiver(features, receiver_tensors)

In [None]:
def serving_input_fn():
    label_ids = tf.placeholder(tf.int32, [None], name='label_ids')
    input_ids = tf.placeholder(tf.int32, [None, MAX_SEQ_LENGTH], name='input_ids')
    input_mask = tf.placeholder(tf.int32, [None, MAX_SEQ_LENGTH], name='input_mask')
    segment_ids = tf.placeholder(tf.int32, [None, MAX_SEQ_LENGTH], name='segment_ids')
    input_fn = tf.estimator.export.build_raw_serving_input_receiver_fn({
        'label_ids': label_ids,
        'input_ids': input_ids,
        'input_mask': input_mask,
        'segment_ids': segment_ids,
    })()
    return input_fn

In [None]:
import os
dir_path = os.path.dirname('.')
estimator._export_to_tpu = False
estimator.export_savedmodel(dir_path, serving_input_fn, as_text=True)

# essais importation

In [None]:
exported_path= os.path.join(dir_path,  "1551259383")
predictor= tf.contrib.predictor.from_saved_model(exported_path)
#tf.contrib not in TF 2.0

In [None]:
input_examples = [run_classifier.InputExample(guid="", text_a = x, text_b = None, label = 0) for x in test.Phrase] # here, "" is just a dummy label
input_features = run_classifier.convert_examples_to_features(input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
predict_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=False)
model_input= tf.train.Example(features=tf.train.Features(feature={
                'x': tf.train.Feature(float_list=tf.train.FloatList(value=[6.4, 3.2, 4.5, 1.5]))        
                })) 
output_dict= predictor({'input_ids': input_features[0].input_ids, 
                        'input_mask': input_features[0].input_mask, 
                        'segment_ids': input_features[0].segment_ids, 
                        'label_ids': input_features[0].label_id})
#output = predictor(input_features[0])
print(" prediction is " , output_dict['scores'])

In [None]:
print(input_features[0].input_mask)

In [None]:
def _int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=value))

#input_examples = [run_classifier.InputExample(guid="", text_a = x, text_b = None, label = 0) for x in test.Phrase] # here, "" is just a dummy label
#input_features = run_classifier.convert_examples_to_features(input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
# Create the Example
def create_example(ex):
    example = tf.train.Example(features=tf.train.Features(feature={
        'input_ids': _int64_feature(ex.input_ids),
        'input_mask': _int64_feature(ex.input_mask),
        'segment_ids': _int64_feature(ex.segment_ids),
        'label_ids': tf.train.Feature(
            int64_list=tf.train.Int64List(value=[ex.label_ids]))
    }))
    return example

In [None]:
with tf.python_io.TFRecordWriter('data.tfrecord') as writer:
    writer.write(example.SerializeToString())

In [None]:
input_examples = [run_classifier.InputExample(guid="", text_a = x, text_b = None, label = 0) for x in test.Phrase] # here, "" is just a dummy label
input_features = run_classifier.convert_examples_to_features(input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)

In [None]:
from tensorflow.python.saved_model import tag_constants
import time
def pred(sentences):
    export_dir = os.path.join(dir_path,  "1551259383")
    #predict_file='data.tfrecord'
    graph = tf.Graph()
    results = []
    t0 = time.time()
    input_examples = [run_classifier.InputExample(guid="", text_a = x, text_b = None, label = 0) for x in sentences] # here, "" is just a dummy label
    input_features = run_classifier.convert_examples_to_features(input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
    t1 = time.time()
    with graph.as_default():
        with tf.Session() as sess:
            tf.saved_model.loader.load(sess, [tag_constants.SERVING], export_dir)
        #print(sess.graph.get_operations())
        #for op in sess.graph.get_operations():
         #   print(op.values())
            tensor_input_ids = graph.get_tensor_by_name('input_ids_1:0')
            tensor_input_mask = graph.get_tensor_by_name('input_mask_1:0')
            tensor_label_ids = graph.get_tensor_by_name('label_ids_1:0')
            #print(tensor_label_ids.shape)
            tensor_segment_ids = graph.get_tensor_by_name('segment_ids_1:0')
            tensor_outputs = graph.get_tensor_by_name('loss/Squeeze:0')
        #record_iterator = tf.python_io.tf_record_iterator(path=predict_file)
            t2 = time.time()
            for i in range(len(sentences)):
            #example = tf.train.Example()
            #example.ParseFromString(string_record.encode('utf-8'))
            #input_ids = example.features.feature['input_ids'].int64_list.value
            #input_mask = example.features.feature['input_mask'].int64_list.value
            #label_ids = example.features.feature['label_ids'].int64_list.value
            #segment_ids = example.features.feature['segment_ids'].int64_list.value
                t=time.time()
                input_ids = input_features[i].input_ids
                input_mask = input_features[i].input_mask
                label_ids = input_features[i].label_id
                segment_ids = input_features[i].segment_ids
                result = sess.run(tensor_outputs, feed_dict={
                    tensor_input_ids: np.array(input_ids).reshape(-1, MAX_SEQ_LENGTH),
                    tensor_input_mask: np.array(input_mask).reshape(-1, MAX_SEQ_LENGTH),
                    tensor_label_ids: np.array(label_ids).reshape(-1, ),
                    tensor_segment_ids: np.array(segment_ids).reshape(-1, MAX_SEQ_LENGTH),
                })
                tbis = time.time()
                #print(tbis-t)
                results.append(result)
        #print((results), sep='\t')
            t3 = time.time()
            
    print("time total in function: ", t3 - t0)
    print("time preprocess: ", t1 - t0)
    print("time preprocess/sentence: ", (t1 - t0)/len(sentences))
    print("time get tensors: ", t2-t1)
    print("time pred: ", t3 - t2)
    print("time pred/sentence: ", (t3 - t2)/len(sentences))
    return results

In [None]:
pred_sentences = np.array([
  "That movie was absolutely awful",
  "The acting was a bit lacking",
  "The film was creative and surprising",
  "Absolutely fantastic!",
  "it's very funny",
  "The actors were good but the plot was terrible"])
sentence_long = np.array(["", "i had few problems with this film and i have heard a lot of criticisms saying it is overlong and overrated .  true    it is over three hours long    but i was amazed that it goes by so quickly .  i don t think it is overrated at all    i think the imdb rating is perfectly decent .  the film looks sumptuous    with gorgeous costumes and excellent effects    and the direction from james cameron rarely slips from focus .  leonardo dicaprio gives one of his best performances as jack    and kate winslet is lovely as rose .  david warner    a great actor    steals every scene he s in .  the story is very rich in detail    and is hot on character development    obvious with the love story which is very moving when it needs to be    though in the first bit of the movie it is a little slow .  the last hour is extremely riveting    and i will confess that i was on the edge of my seat    when the titanic sank .  i will also say that the last five minutes were very moving .  the music score by james horner was lovely    though i never was a huge fan of the song my heart will go on .  the      miniseries was good    but suffered from undeveloped scenarios and some historical inaccuracies .  overall    i give titanic an"])
sentence_short = np.array(["That movie was absolutely awful"])
#sentence= np.array(["That movie was absolutely awful"])
dir_path = os.path.dirname('.')

In [None]:
t = time.time()
results = pred(pred_sentences)
print("total time:", time.time() - t)
print("\n")
t = time.time()
results = pred(sentence_short)
print("total time short:", time.time() - t)
print(results)
print("\n")
t = time.time()
results = pred(sentence_long)
print("total time long:", time.time() - t)
print(results)

In [None]:
results = pred(test.Phrase)
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
print("Accuracy: %s" % accuracy_score(test.Tag, results))
print("Precision: %s" %precision_score(test.Tag, results))
print("Recall: %s" %recall_score(test.Tag, results))
print("f1: %s" %f1_score(test.Tag, results))
print(confusion_matrix(test.Tag, results))