In [1]:
from sklearn.model_selection import train_test_split
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
from datetime import datetime
import numpy as np
import time
from tensorflow import keras
import os
import re
import json

import bert
from bert import modeling
from bert import run_classifier

from bert import optimization
from bert import tokenization

In [2]:
# Set the output directory for saving model file
# Optionally, set a GCP bucket location

import tensorboard as tb
#tf.io.gfile = tb.compat.tensorflow_stub.io.gfile

OUTPUT_DIR = 'output-amazon-testexport'#@param {type:"string"}
#@markdown Whether or not to clear/delete the directory and create a new one
DO_DELETE = False #@param {type:"boolean"}
#@markdown Set USE_BUCKET and BUCKET if you want to (optionally) store model output on GCP bucket.
USE_BUCKET = False #@param {type:"boolean"}
BUCKET = 'BUCKET_NAME' #@param {type:"string"}

if USE_BUCKET:
  OUTPUT_DIR = 'gs://{}/{}'.format(BUCKET, OUTPUT_DIR)
  from google.colab import auth
  auth.authenticate_user()   
    
if DO_DELETE:
  try:
    tf.io.gfile.DeleteRecursively(OUTPUT_DIR)
  except:
    # Doesn't matter if the directory didn't exist
    pass
tf.io.gfile.makedirs(OUTPUT_DIR)
print('***** Model output directory: {} *****'.format(OUTPUT_DIR))


***** Model output directory: output-amazon-testexport *****


In [3]:
def normalize_text(text):
    #s = text.split()
    #s = [t for t in s if (t not in stopwords.words('english')) or (t not in other_stopwords)]
    #s = [t for t in s if t not in other_stopwords]
    #s = " ".join(s)
    no_tabs = text.lower().replace('\t', ' ')
    remove_tag = re.sub(r'@[A-Za-z0-9]+', "", no_tabs)
    remove_url = re.sub(r'https?://[A-Za-z0-9./]+', "", remove_tag)
    alpha_only = re.sub("<br\s*/><br\s*/>", " ", no_tabs)
    #multi_spaces = re.sub("[^a-zA-Z\.\!]", " ", alpha_only) !!!!!! temporary
    multi_spaces = re.sub("[^a-zA-Z\!]", " ", alpha_only)
    text_clean = re.sub(" +", " ", multi_spaces);
    return text_clean

In [4]:
classifiedSentimentFile = "app_reviews_txt_short0_out.txt"
comments = pd.read_csv(classifiedSentimentFile, delimiter='\t', encoding='latin1')
comments.head()

label_sentistrength = []
for i,row in comments.iterrows():
    p = row[0]
    n = row[1]
    if p+n == 0:
        label_sentistrength.append(0)
    elif p+n<0:
        label_sentistrength.append(-1)
    elif p+n>0:
        label_sentistrength.append(1)
    if row[2] is np.NaN:
        comments.iloc[i,2] = 'N/A'
comments['label_sentistrength'] = label_sentistrength

print('Total: ',len(label_sentistrength),' Positive: ',np.sum(np.array(label_sentistrength)==1),' Negative: ',np.sum(np.array(label_sentistrength)==-1))

Total:  20000  Positive:  8758  Negative:  5012


In [5]:
comments

Unnamed: 0,Positive,Negative,Text,label_sentistrength
0,2,-2,Every other (always the first time) I open the...,0
1,2,-4,Latest update is a huge dissapointment! The ap...,-1
2,1,-2,Recent update has significantly change the app...,-1
3,2,-4,On the latest update using your mobile it is t...,-1
4,3,-3,The standards have really slipped lately. Now ...,0
...,...,...,...,...
19995,1,-1,Easy booking.,0
19996,2,-2,Apart from once where my booking appeared to b...,0
19997,3,-1,Love this app,1
19998,1,-1,Longtime user and no issues to date. Easy to n...,0


In [6]:
df_elec = pd.DataFrame()
df_elec['Tag'] = comments['label_sentistrength']
df_elec['Phrase'] = comments['Text']

df_elec

Unnamed: 0,Tag,Phrase
0,0,Every other (always the first time) I open the...
1,-1,Latest update is a huge dissapointment! The ap...
2,-1,Recent update has significantly change the app...
3,-1,On the latest update using your mobile it is t...
4,0,The standards have really slipped lately. Now ...
...,...,...
19995,0,Easy booking.
19996,0,Apart from once where my booking appeared to b...
19997,1,Love this app
19998,0,Longtime user and no issues to date. Easy to n...


In [7]:
df_elec_clean = df_elec.copy()
#print(df_elec_clean)
df_elec_pos_sample = df_elec_clean[df_elec_clean.Tag == 1]
df_elec_neg_sample = df_elec_clean[df_elec_clean.Tag == -1]
df_binary_small_elec = pd.concat([df_elec_pos_sample, df_elec_neg_sample])
df_binary_small_elec
#print(df_elec_pos_sample)
#print(df_elec_neg_sample)

Unnamed: 0,Tag,Phrase
9,1,This app is like going back in time. It both l...
17,1,Unable to change between countries. I move bet...
27,1,"This app used to run flawlessly, I don't know ..."
31,1,For two days in a row I'm unable to pick up my...
37,1,Slow and unusable. I have a galaxy note 9 . Ev...
...,...,...
19852,-1,Very easy to book and to cancel. Disappointed ...
19892,-1,Never have any problems with bookings or cance...
19909,-1,Never had a problem booking or cancelling keep...
19916,-1,Have loads of places to offer that are cheap a...


In [8]:
df_elec_pos_sample = df_elec_pos_sample.sample(n=5000, random_state=42)
df_elec_neg_sample = df_elec_neg_sample.sample(n=5000, random_state=42)
print(df_elec_pos_sample)
train_init = df_binary_small_elec.sample(n=8000, random_state=42)
test_init = df_binary_small_elec.loc[~df_binary_small_elec.index.isin(train_init.index)]
print(train_init)

       Tag                                             Phrase
13789    1  Becuse like reels and its efects but this app ...
12706    1  Okay so just today I was trying to open the ap...
18982    1     Excellent staff very economical shop I love it
17613    1  Please let us have at least 10 favourite stick...
7650     1  Good, easy to use app. Some negatives. Can't c...
...    ...                                                ...
4346     1                                    It's a nice app
9672     1  ive been using Google pay for awhile now and i...
14966    1                                     Itâs amazing
3829     1  Great App. Allows me to order up my Prescripti...
169      1                                          Good luck

[5000 rows x 2 columns]
       Tag                                             Phrase
447     -1  I used to really like the Amazon app - primari...
15705    1                                      user friendly
13075   -1  This app used to work flawlessly 

In [9]:
train = train_init#.sample(8000)
test = test_init#_pets#.sample(2000)

DATA_COLUMN = 'Phrase'
LABEL_COLUMN = 'Tag'
# label_list is the list of labels, i.e. True, False or 0, 1 or 'dog', 'cat'
label_list = [-1, 1]

In [10]:
# Use the InputExample class from BERT's run_classifier code to create examples from the data
train_InputExamples = train.apply(lambda x: bert.run_classifier.InputExample(guid=None, # Globally unique ID for bookkeeping, unused in this example
                                                                   text_a = x[DATA_COLUMN], 
                                                                   text_b = None, 
                                                                   label = x[LABEL_COLUMN]), axis = 1)

test_InputExamples = test.apply(lambda x: bert.run_classifier.InputExample(guid=None, 
                                                                   text_a = x[DATA_COLUMN], 
                                                                   text_b = None, 
                                                                   label = x[LABEL_COLUMN]), axis = 1)

In [11]:
import ssl

ssl._create_default_https_context = ssl._create_unverified_context

BERT_MODEL_HUB = "https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1"

def create_tokenizer_from_hub_module():
  """Get the vocab file and casing info from the Hub module."""
  with tf.Graph().as_default():
    bert_module = hub.Module(BERT_MODEL_HUB)
    tokenization_info = bert_module(signature="tokenization_info", as_dict=True)
    with tf.compat.v1.Session() as sess:
      vocab_file, do_lower_case = sess.run([tokenization_info["vocab_file"],
                                            tokenization_info["do_lower_case"]])
      
  return bert.tokenization.FullTokenizer(
      vocab_file=vocab_file, do_lower_case=do_lower_case)

tokenizer = create_tokenizer_from_hub_module()

INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore
2022-08-12 14:23:12.061798: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-08-12 14:23:12.141861: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:354] MLIR V1 optimization pass is not enabled


In [12]:
tokenizer.tokenize("This here's an example of using the BERT tokenizer")

['this',
 'here',
 "'",
 's',
 'an',
 'example',
 'of',
 'using',
 'the',
 'bert',
 'token',
 '##izer']

In [13]:
# We'll set sequences to be at most 128 tokens long.
MAX_SEQ_LENGTH = 128
# Convert our train and test features to InputFeatures that BERT understands.
train_features = bert.run_classifier.convert_examples_to_features(train_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)
test_features = bert.run_classifier.convert_examples_to_features(test_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)

In [14]:
def create_model(is_predicting, input_ids, input_mask, segment_ids, labels,
                 num_labels):
  """Creates a classification model."""

  bert_module = hub.Module(
      BERT_MODEL_HUB,
      trainable=True)
  bert_inputs = dict(
      input_ids=input_ids,
      input_mask=input_mask,
      segment_ids=segment_ids)
  bert_outputs = bert_module(
      inputs=bert_inputs,
      signature="tokens",
      as_dict=True)

  # Use "pooled_output" for classification tasks on an entire sentence.
  # Use "sequence_outputs" for token-level output.
  output_layer = bert_outputs["pooled_output"]

  #hidden_size = output_layer.shape[-1].value
  hidden_size = output_layer.shape[-1]
  # Create our own layer to tune for politeness data.
  output_weights = tf.compat.v1.get_variable(
      "output_weights", [num_labels, hidden_size],
      initializer=tf.compat.v1.truncated_normal_initializer(stddev=0.02))

  output_bias = tf.compat.v1.get_variable(
      "output_bias", [num_labels], initializer=tf.zeros_initializer())

  with tf.compat.v1.variable_scope("loss"):

    # Dropout helps prevent overfitting
    # keep_prob = tf.compat.v1.placeholder(tf.float32)
    output_layer = tf.nn.dropout(output_layer, rate = 0.5)

    logits = tf.matmul(output_layer, output_weights, transpose_b=True)
    logits = tf.nn.bias_add(logits, output_bias)
    log_probs = tf.nn.log_softmax(logits, axis=-1)

    # Convert labels into one-hot encoding
    one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)

    predicted_labels = tf.squeeze(tf.argmax(log_probs, axis=-1, output_type=tf.int32))
    # If we're predicting, we want predicted labels and the probabiltiies.
    if is_predicting:
      return (predicted_labels, log_probs)

    # If we're train/eval, compute loss between predicted and actual label
    per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
    loss = tf.reduce_mean(per_example_loss)
    return (loss, predicted_labels, log_probs)

In [43]:
# model_fn_builder actually creates our model function
# using the passed parameters for num_labels, learning_rate, etc.
def model_fn_builder(num_labels, learning_rate, num_train_steps,
                     num_warmup_steps):
  """Returns `model_fn` closure for TPUEstimator."""
  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    """The `model_fn` for TPUEstimator."""

    input_ids = features["input_ids"]
    input_mask = features["input_mask"]
    segment_ids = features["segment_ids"]
    label_ids = features["label_ids"]

    is_predicting = (mode == tf.estimator.ModeKeys.PREDICT)
    
    # TRAIN and EVAL
    if not is_predicting:

      (loss, predicted_labels, log_probs) = create_model(
        is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)

      train_op = bert.optimization.create_optimizer(
          loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu=False)

      # Calculate evaluation metrics. 
      def metric_fn(label_ids, predicted_labels):
        accuracy = tf.compat.v1.metrics.accuracy(label_ids, predicted_labels)
        '''
        f1_score = tf.metrics.f1_score(
            label_ids,
            predicted_labels)
            
        
        auc = tf.metrics.AUC(
            label_ids,
            predicted_labels)
        
        recall = tf.metrics.recall(
            label_ids,
            predicted_labels)
        precision = tf.metrics.precision(
            label_ids,
            predicted_labels) 
        
        true_pos = tf.metrics.true_positives(
            label_ids,
            predicted_labels)
        true_neg = tf.metrics.true_negatives(
            label_ids,
            predicted_labels)   
        false_pos = tf.metrics.false_positives(
            label_ids,
            predicted_labels)  
        false_neg = tf.metrics.false_negatives(
            label_ids,
            predicted_labels)
        '''
        
        recall = tf.keras.metrics.Recall()
        recall.update_state(label_ids,predicted_labels)
        #recall.result().numpy()
        
        return {
            "eval_accuracy": accuracy
            #"f1_score": f1_score,
            #"auc": auc,
            #"precision": precision,
            # "recall": recall.result()
            #"true_positives": true_pos,
            #"true_negatives": true_neg,
            #"false_positives": false_pos,
            #"false_negatives": false_neg
        }

      eval_metrics = metric_fn(label_ids, predicted_labels)

      if mode == tf.estimator.ModeKeys.TRAIN:
        return tf.estimator.EstimatorSpec(mode=mode,
          loss=loss,
          train_op=train_op)
      else:
          return tf.estimator.EstimatorSpec(mode=mode,
            loss=loss,
            eval_metric_ops=eval_metrics)
    else:
      (predicted_labels, log_probs) = create_model(
        is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)

      predictions = {
          'probabilities': log_probs,
          'labels': predicted_labels
      }
      return tf.estimator.EstimatorSpec(mode, predictions=predictions)

  # Return the actual model function in the closure
  return model_fn


In [44]:
# Compute train and warmup steps from batch size
# These hyperparameters are copied from this colab notebook (https://colab.sandbox.google.com/github/tensorflow/tpu/blob/master/tools/colab/bert_finetuning_with_cloud_tpus.ipynb)
BATCH_SIZE = 10
LEARNING_RATE = 2e-5
NUM_TRAIN_EPOCHS = 3.0
# Warmup is a period of time where hte learning rate 
# is small and gradually increases--usually helps training.
WARMUP_PROPORTION = 0.1
# Model configs
SAVE_CHECKPOINTS_STEPS = 500
SAVE_SUMMARY_STEPS = 100

In [45]:
# Compute # train and warmup steps from batch size
num_train_steps = int(len(train_features) / BATCH_SIZE * NUM_TRAIN_EPOCHS)
num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)

In [46]:
# Specify output directory and number of checkpoint steps to save
run_config = tf.estimator.RunConfig(
    model_dir=OUTPUT_DIR,
    save_summary_steps=SAVE_SUMMARY_STEPS,
    save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS)

In [47]:
model_fn = model_fn_builder(
  num_labels=len(label_list),
  learning_rate=LEARNING_RATE,
  num_train_steps=num_train_steps,
  num_warmup_steps=num_warmup_steps)

estimator = tf.estimator.Estimator(
  model_fn=model_fn,
  config=run_config,
  params={"batch_size": BATCH_SIZE})


INFO:tensorflow:Using config: {'_model_dir': 'output-amazon-testexport', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 500, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


INFO:tensorflow:Using config: {'_model_dir': 'output-amazon-testexport', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 500, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [48]:
# Create an input function for training. drop_remainder = True for using TPUs.
train_input_fn = bert.run_classifier.input_fn_builder(
    features=train_features,
    seq_length=MAX_SEQ_LENGTH,
    is_training=True,
    drop_remainder=False)

In [49]:
print(f'Beginning Training!')
current_time = datetime.now()
estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
print("Training took time ", datetime.now() - current_time)

Beginning Training!
INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Calling model_fn.
2022-08-12 14:30:02.950252: W tensorflow/core/common_runtime/graph_constructor.cc:1526] Importing a graph with a lower producer version 27 into an existing graph with producer version 1087. Shape inference will have run different parts of the graph with different producer versions.


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Create CheckpointSaverHook.


INFO:tensorflow:Create CheckpointSaverHook.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 0...


INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 0...


INFO:tensorflow:Saving checkpoints for 0 into output-amazon-testexport/model.ckpt.


INFO:tensorflow:Saving checkpoints for 0 into output-amazon-testexport/model.ckpt.


INFO:tensorflow:output-amazon-testexport/model.ckpt-0.data-00000-of-00001


INFO:tensorflow:output-amazon-testexport/model.ckpt-0.data-00000-of-00001


INFO:tensorflow:1316300


INFO:tensorflow:1316300


INFO:tensorflow:output-amazon-testexport/model.ckpt-0.index


INFO:tensorflow:output-amazon-testexport/model.ckpt-0.index


INFO:tensorflow:1316300


INFO:tensorflow:1316300


INFO:tensorflow:output-amazon-testexport/model.ckpt-0.meta


INFO:tensorflow:output-amazon-testexport/model.ckpt-0.meta


INFO:tensorflow:1334600


INFO:tensorflow:1334600


INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 0...


INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 0...


INFO:tensorflow:loss = 0.89493906, step = 0


INFO:tensorflow:loss = 0.89493906, step = 0


INFO:tensorflow:global_step/sec: 0.0866416


INFO:tensorflow:global_step/sec: 0.0866416


INFO:tensorflow:loss = 0.13429427, step = 100 (1154.278 sec)


INFO:tensorflow:loss = 0.13429427, step = 100 (1154.278 sec)


INFO:tensorflow:global_step/sec: 0.0869333


INFO:tensorflow:global_step/sec: 0.0869333


INFO:tensorflow:loss = 0.16362333, step = 200 (1150.215 sec)


INFO:tensorflow:loss = 0.16362333, step = 200 (1150.215 sec)


INFO:tensorflow:global_step/sec: 0.0827807


INFO:tensorflow:global_step/sec: 0.0827807


ERROR:tensorflow:EventFileWriter writer thread error: output-amazon-testexport/events.out.tfevents.1660285811.Elroy-MacBook-Pro.local; No space left on device
	Failed to flush 2 events to output-amazon-testexport/events.out.tfevents.1660285811.Elroy-MacBook-Pro.local


ERROR:tensorflow:EventFileWriter writer thread error: output-amazon-testexport/events.out.tfevents.1660285811.Elroy-MacBook-Pro.local; No space left on device
	Failed to flush 2 events to output-amazon-testexport/events.out.tfevents.1660285811.Elroy-MacBook-Pro.local


INFO:tensorflow:loss = 0.10880401, step = 300 (1208.081 sec)


INFO:tensorflow:loss = 0.10880401, step = 300 (1208.081 sec)
Exception in thread EventLoggerThread:
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/threading.py", line 926, in _bootstrap_inner
    self.run()
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tensorflow/python/summary/writer/event_file_writer.py", line 215, in run
    self._ev_writer.Flush()
RuntimeError: output-amazon-testexport/events.out.tfevents.1660285811.Elroy-MacBook-Pro.local; No space left on device
	Failed to flush 2 events to output-amazon-testexport/events.out.tfevents.1660285811.Elroy-MacBook-Pro.local







RuntimeError: output-amazon-testexport/events.out.tfevents.1660285811.Elroy-MacBook-Pro.local; No space left on device
	Failed to flush 2 events to output-amazon-testexport/events.out.tfevents.1660285811.Elroy-MacBook-Pro.local

In [50]:
test_input_fn = run_classifier.input_fn_builder(
    features=test_features,
    seq_length=MAX_SEQ_LENGTH,
    is_training=False,
    drop_remainder=False)

In [51]:
estimator.evaluate(input_fn=test_input_fn, steps=None)

INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Calling model_fn.
2022-08-12 15:53:29.629041: W tensorflow/core/common_runtime/graph_constructor.cc:1526] Importing a graph with a lower producer version 27 into an existing graph with producer version 1087. Shape inference will have run different parts of the graph with different producer versions.


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


TypeError: Values of eval_metric_ops must be (metric_value, update_op) tuples, given: Tensor("Identity_204:0", shape=(), dtype=float32) for key: recall

In [None]:
def getPrediction(in_sentences):
    labels = [0, 1]
    result = []
    t0 = time.time()
    input_examples = [run_classifier.InputExample(guid="", text_a = x, text_b = None, label = 0) for x in in_sentences] # here, "" is just a dummy label
    input_features = run_classifier.convert_examples_to_features(input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
    predict_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=False)
    #print(input_features[0].input_ids)
    t1 = time.time()
    predictions = estimator.predict(predict_input_fn)
    #print(predictions.keys)
    t2 = time.time()
    #if len(in_sentences)==1:
        #for prediction in zip(predictions):
     #   prediction = predictions.__next__()
      #  return (in_sentences[0], np.exp(prediction['probabilities']), labels[prediction['labels']])
    #else:
    for sentence, prediction in zip(in_sentences, predictions):
        result.append((sentence, np.exp(prediction['probabilities']), labels[prediction['labels']]))
    #result.extend((sentence, np.exp(prediction['probabilities']), labels[prediction['labels']]) for sentence, prediction in zip(in_sentences, predictions))
    t3 = time.time()
    print("time total in function: ", t3 - t0)
    print("time preprocess: ", t1 - t0)
    print("time preprocess/sentence: ", (t1 - t0)/len(in_sentences))
    print("time estimator.predict: ", t2-t1)
    print("time pred: ", t3 - t2)
    print("time pred/sentence: ", (t3 - t2)/len(in_sentences))
    return result 

In [None]:
pred_sentences = np.array([
  "That movie was absolutely awful",
  "The acting was a bit lacking",
  "The film was creative and surprising",
  "Absolutely fantastic!",
  "I am happy",
  "I am happy!!!"])
#sentence = ["i had few problems with this film and i have heard a lot of criticisms saying it is overlong and overrated .  true    it is over three hours long    but i was amazed that it goes by so quickly .  i don t think it is overrated at all    i think the imdb rating is perfectly decent .  the film looks sumptuous    with gorgeous costumes and excellent effects    and the direction from james cameron rarely slips from focus .  leonardo dicaprio gives one of his best performances as jack    and kate winslet is lovely as rose .  david warner    a great actor    steals every scene he s in .  the story is very rich in detail    and is hot on character development    obvious with the love story which is very moving when it needs to be    though in the first bit of the movie it is a little slow .  the last hour is extremely riveting    and i will confess that i was on the edge of my seat    when the titanic sank .  i will also say that the last five minutes were very moving .  the music score by james horner was lovely    though i never was a huge fan of the song my heart will go on .  the      miniseries was good    but suffered from undeveloped scenarios and some historical inaccuracies .  overall    i give titanic an"]
sentence= np.array(["The actors were good but the plot was terrible", ""])
print(sentence.shape)
#sentence.reshape(-1, 1)

In [None]:
ct = time.time()
predictions_result = getPrediction(pred_sentences)
print("time total: ", time.time()-ct)

In [None]:
predictions_result