In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install numpy==1.19.4
!pip install tensorflow==2.4.0
!pip install tensorflow-hub==0.10.0
!pip install bert-for-tf2==0.14.7
!pip install sentencepiece==0.1.94

Collecting bert-for-tf2==0.14.7
[?25l  Downloading https://files.pythonhosted.org/packages/18/d3/820ccaf55f1e24b5dd43583ac0da6d86c2d27bbdfffadbba69bafe73ca93/bert-for-tf2-0.14.7.tar.gz (41kB)
[K     |████████████████████████████████| 51kB 5.5MB/s 
[?25hCollecting py-params>=0.9.6
  Downloading https://files.pythonhosted.org/packages/a4/bf/c1c70d5315a8677310ea10a41cfc41c5970d9b37c31f9c90d4ab98021fd1/py-params-0.9.7.tar.gz
Collecting params-flow>=0.8.0
  Downloading https://files.pythonhosted.org/packages/a9/95/ff49f5ebd501f142a6f0aaf42bcfd1c192dc54909d1d9eb84ab031d46056/params-flow-0.8.2.tar.gz
Building wheels for collected packages: bert-for-tf2, py-params, params-flow
  Building wheel for bert-for-tf2 (setup.py) ... [?25l[?25hdone
  Created wheel for bert-for-tf2: filename=bert_for_tf2-0.14.7-cp36-none-any.whl size=30537 sha256=f31f187ee2b2d3e4c209b920070c23056c8d87f4e1a71077be19bddb497651d9
  Stored in directory: /root/.cache/pip/wheels/e1/f8/e2/b98f79a6b8cc898d8e4102b83acb8a098

In [3]:
import numpy as np

try:
    %tensorflow_version 2.x
except Exception:
    pass

import bert
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow.keras import layers

In [4]:
FullTokenizer = bert.bert_tokenization.FullTokenizer
bert_layer = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/1", trainable=False)
vocab_file = bert_layer.resolved_object.vocab_file.asset_path.numpy()
do_lower_case = bert_layer.resolved_object.do_lower_case.numpy()
tokenizer = FullTokenizer(vocab_file, do_lower_case)

In [13]:
def get_ids(tokens):
    return tokenizer.convert_tokens_to_ids(tokens)

def get_mask(tokens):
    return np.char.not_equal(tokens, '[PAD]').astype(int)

def get_segments(tokens):
    current_seg_id, seg_ids = 0, []
    for token in tokens:
        seg_ids.append(current_seg_id)
        if token == '[SEP]':
            current_seg_id = 1-current_seg_id
    return seg_ids

In [19]:
old_model = tf.keras.models.load_model('/content/drive/MyDrive/Text-Classification/model')
new_model = tf.keras.models.load_model('/content/drive/MyDrive/Text-Classification-v1/model')

In [23]:
def get_predictions_old(model, sentence):

    def encode_sentence(sentence):
        return tokenizer.convert_tokens_to_ids(tokenizer.tokenize(sentence))

    if len(sentence.split()) < 3: return 'neutral'
    tokens = encode_sentence(sentence)
    inputs = tf.expand_dims(tokens, 0)
    output = model(inputs, training=False)
    return "negative" if not np.floor(output*2) else "positive"

In [18]:
def get_predictions_new(model, sentence):

    def encode_sentence(sentence):
        return ['[CLS]']+tokenizer.tokenize(sentence)+['[SEP]']

    tokens = encode_sentence(sentence)
    input_ids = get_ids(tokens)
    input_mask = get_mask(tokens)
    segment_ids = get_segments(tokens)
    inputs = tf.stack([tf.cast(input_ids, dtype=tf.int32), tf.cast(input_mask, dtype=tf.int32), tf.cast(segment_ids, dtype=tf.int32)], axis=0)
    inputs = tf.expand_dims(inputs, 0)
    output = model(inputs, training=False)
    return "negative" if not np.floor(output*2) else "positive"

In [26]:
print(get_predictions_old(old_model, 'today is my favourite day'))
print(get_predictions_old(old_model, 'today is not a awful day'))

positive
negative


In [28]:
print(get_predictions_new(model, 'today is my favourite day'))
print(get_predictions_new(model, 'today is not my favourite day'))

positive
negative
