Install dependencies

In [None]:
pip install -q -U tensorflow-text

In [None]:
pip install -q tf-models-official==2.4.0

In [None]:
pip install transformers

In [1]:
import os
import io
import re
import sys

import numpy as np
import pandas as pd
from time import time
import matplotlib.pyplot as plt

import pickle
from csv import reader

import tensorflow as tf

from transformers import BertTokenizer, TFBertModel

In [2]:
# path = 'drive/MyDrive/MIDS/chemical_patent_cer_ee'

#### Upload sample data

In [3]:
# full_path = f'{path}/data/sre_em/sre_em_sample.csv'
# full_path = '../data/sre_em/sre_em_sample.csv'

In [2]:
max_length = 500

In [3]:
with open(r"../pickles/sample/train_em_pool_base_cased.pickle", "rb") as file:
    train_em_pool = pickle.load(file)

In [36]:
model_inputs_train = [x.astype(np.int) for x in train_em_pool[0][:3]] + [x.astype(np.bool) for x in train_em_pool[0][3:]]
model_labels_train = train_em_pool[1]
model_labels_train = tf.one_hot(model_labels_train, depth=2)

In [27]:
with open(r"../pickles/sample/test_em_pool_base_cased.pickle", "rb") as file:
    test_em_pool = pickle.load(file)

In [35]:
model_inputs_test = [np.array(x).astype(np.int) for x in test_em_pool[0][:3]] + [np.array(x).astype(np.bool) for x in test_em_pool[0][3:]]
model_labels_test = test_em_pool[1]
model_labels_test = tf.one_hot(model_labels_test, depth=2)

Start Entity

In [102]:
with open(r"../pickles/sample/train_em_start_base_cased.pickle", "rb") as file:
    train_em_start = pickle.load(file)

In [103]:
model_inputs_train2 = [x.astype(np.int) for x in train_em_start[0][:3]] + [x.astype(np.bool) for x in train_em_start[0][3:]]
model_labels_train2 = train_em_start[1]
model_labels_train2 = tf.one_hot(model_labels_train2, depth=2)

In [104]:
with open(r"../pickles/sample/test_em_start_base_cased.pickle", "rb") as file:
    test_em_start = pickle.load(file)

In [105]:
model_inputs_test2 = [np.array(x).astype(np.int) for x in test_em_start[0][:3]] + [np.array(x).astype(np.bool) for x in test_em_start[0][3:]]
model_labels_test2 = test_em_start[1]
model_labels_test2 = tf.one_hot(model_labels_test2, depth=2)

#### Build models

In [37]:
def sre_pool_model(max_length):
    
    # input placeholders
    in_id = tf.keras.layers.Input(shape=(max_length,), dtype='int32', name='input_ids')
    in_mask = tf.keras.layers.Input(shape=(max_length,), dtype='int32', name='input_masks')
    in_segment = tf.keras.layers.Input(shape=(max_length,), dtype='int32', name='segment_ids')
    e1_mask = tf.keras.layers.Input(shape=(max_length,), dtype=tf.bool, name='e1_mask')
    e2_mask = tf.keras.layers.Input(shape=(max_length,), dtype=tf.bool, name='e2_mask')
    
    inputs = [in_id, in_mask, in_segment, e1_mask, e2_mask]
    
    # bert
    bert_inputs = inputs[:3]
    bert_layer = TFBertModel.from_pretrained('bert-base-cased')
    
    # insert freezing section (optional)
    
    bert_output = bert_layer(bert_inputs)[0]
    
    #post transformer layers (including prediction layer)
    
    e1_mention = tf.ragged.boolean_mask(bert_output, e1_mask, name='e1_mention')
    e1_mention = e1_mention.to_tensor()
    e2_mention = tf.ragged.boolean_mask(bert_output, e2_mask, name='e2_mention')
    e2_mention = e2_mention.to_tensor()
    
    e1_max = tf.math.reduce_max(e1_mention, axis=1, name='e1_max')
    e2_max = tf.math.reduce_max(e2_mention, axis=1, name='e2_max')
    
    dense_input = tf.keras.layers.Concatenate()([e1_max, e2_max])
    
    dense = tf.keras.layers.Dense(256, activation='relu', name='dense')(dense_input)
    dense = tf.keras.layers.Dropout(rate=0.1)(dense_input)
    predictions = tf.keras.layers.Dense(2, activation='softmax', name='sre')(dense)
    
    model = tf.keras.Model(inputs=inputs, outputs=predictions, name='sre_pool')
    
    optimizer = tf.keras.optimizers.Adam(learning_rate=3e-5)
    loss_fn = tf.keras.losses.BinaryCrossentropy()
    accuracy = tf.keras.metrics.Accuracy()
    binary_accuracy = tf.keras.metrics.BinaryAccuracy()
    recall = tf.keras.metrics.Recall()
    precision = tf.keras.metrics.Precision()
    
    model.compile(loss="binary_crossentropy", optimizer=optimizer) #, metrics=[accuracy, binary_accuracy, recall, precision])
    
    print()
    print("=== SRE Max Pool Model ===")
    print('Shape of the BERT layer output:', bert_output)
    print('Prediction:', predictions)
    print()
    
    model.summary()
    
    return model

In [47]:
def sre_cls_model(max_length):
    
    # input placeholders
    in_id = tf.keras.layers.Input(shape=(max_length,), dtype='int32', name='input_ids')
    in_mask = tf.keras.layers.Input(shape=(max_length,), dtype='int32', name='input_masks')
    in_segment = tf.keras.layers.Input(shape=(max_length,), dtype='int32', name='segment_ids')
    
    bert_inputs = [in_id, in_mask, in_segment]
    
    # bert
    bert_layer = TFBertModel.from_pretrained('bert-base-cased')
    
    # insert freezing section (optional)
    
    bert_output = bert_layer(bert_inputs)[1]
    
    #post transformer layers (including prediction layer)
    
    dense = tf.keras.layers.Dense(256, activation='relu', name='dense')(bert_output)
    dense = tf.keras.layers.Dropout(rate=0.1)(dense)
    predictions = tf.keras.layers.Dense(2, activation='softmax', name='sre')(dense)
    
    model = tf.keras.Model(inputs=bert_inputs, outputs=predictions, name='sre_pool')
    
    optimizer = tf.keras.optimizers.Adam(learning_rate=3e-5)
    loss_fn = tf.keras.losses.BinaryCrossentropy()
    accuracy = tf.keras.metrics.Accuracy()
    binary_accuracy = tf.keras.metrics.BinaryAccuracy()
    recall = tf.keras.metrics.Recall()
    precision = tf.keras.metrics.Precision()
    
    model.compile(loss="binary_crossentropy", optimizer=optimizer) #, metrics=[accuracy, binary_accuracy, recall, precision])
    
    print()
    print("=== SRE Max Pool Model ===")
    print('Shape of the BERT layer output:', bert_output)
    print('Prediction:', predictions)
    print()
    
    model.summary()
    
    return model

In [99]:
def sre_start_model(max_length):
    
    # input placeholders
    in_id = tf.keras.layers.Input(shape=(max_length,), dtype='int32', name='input_ids')
    in_mask = tf.keras.layers.Input(shape=(max_length,), dtype='int32', name='input_masks')
    in_segment = tf.keras.layers.Input(shape=(max_length,), dtype='int32', name='segment_ids')
    e1_mask = tf.keras.layers.Input(shape=(max_length,), dtype=tf.bool, name='e1_mask')
    e2_mask = tf.keras.layers.Input(shape=(max_length,), dtype=tf.bool, name='e2_mask')
    
    inputs = [in_id, in_mask, in_segment, e1_mask, e2_mask]
    
    # bert
    bert_inputs = inputs[:3]
    bert_layer = TFBertModel.from_pretrained('bert-base-cased')
    
    # insert freezing section (optional)
    
    bert_output = bert_layer(bert_inputs)[0]
    
    #post transformer layers (including prediction layer)
    
    e1_start = tf.ragged.boolean_mask(bert_output, e1_mask, name='e1_mention')
    #e1_start = e1_start.to_tensor()
    e1_start = tf.squeeze(e1_start, axis=1)
    e2_start = tf.ragged.boolean_mask(bert_output, e2_mask, name='e2_mention')
    #e2_start = e2_start.to_tensor()
    e2_start = tf.squeeze(e2_start, axis=1)
    
    dense_input = tf.keras.layers.Concatenate()([e1_start, e2_start])
    
    dense = tf.keras.layers.Dense(256, activation='relu', name='dense')(dense_input)
    dense = tf.keras.layers.Dropout(rate=0.1)(dense_input)
    predictions = tf.keras.layers.Dense(2, activation='softmax', name='sre')(dense)
    
    model = tf.keras.Model(inputs=inputs, outputs=predictions, name='sre_pool')
    
    optimizer = tf.keras.optimizers.Adam(learning_rate=3e-5)
    loss_fn = tf.keras.losses.BinaryCrossentropy()
    accuracy = tf.keras.metrics.Accuracy()
    binary_accuracy = tf.keras.metrics.BinaryAccuracy()
    recall = tf.keras.metrics.Recall()
    precision = tf.keras.metrics.Precision()
    
    model.compile(loss="binary_crossentropy", optimizer=optimizer) #, metrics=[accuracy, binary_accuracy, recall, precision])
    
    print()
    print("=== SRE Max Pool Model ===")
    print('Shape of the BERT layer output:', bert_output)
    print('Prediction:', predictions)
    print()
    
    model.summary()
    
    return model

#### Model Trial Runs

In [38]:
tf.keras.backend.clear_session()

In [39]:
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)


model = sre_pool_model(max_length)

model.fit(
    model_inputs_train, 
    {"sre": model_labels_train},
    validation_data=(model_inputs_test, {"sre": model_labels_test}),
    epochs=5,
    batch_size=16
)

Some layers from the model checkpoint at bert-base-cased were not used when initializing TFBertModel: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-cased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.



=== SRE Max Pool Model ===
Shape of the BERT layer output: KerasTensor(type_spec=TensorSpec(shape=(None, 500, 768), dtype=tf.float32, name=None), name='tf_bert_model/bert/encoder/layer_._11/output/LayerNorm/batchnorm/add_1:0', description="created by layer 'tf_bert_model'")
Prediction: KerasTensor(type_spec=TensorSpec(shape=(None, 2), dtype=tf.float32, name=None), name='sre/Softmax:0', description="created by layer 'sre'")

Model: "sre_pool"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_ids (InputLayer)          [(None, 500)]        0                                            
__________________________________________________________________________________________________
input_masks (InputLayer)        [(None, 500)]        0                                            
__________________________________________________________

KeyboardInterrupt: 

In [45]:
tf.keras.backend.clear_session()

In [48]:
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)


model = sre_cls_model(max_length)

model.fit(
    model_inputs_train[:3], 
    {"sre": model_labels_train},
    validation_data=(model_inputs_test[:3], {"sre": model_labels_test}),
    epochs=5,
    batch_size=16
)

Some layers from the model checkpoint at bert-base-cased were not used when initializing TFBertModel: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-cased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.



=== SRE Max Pool Model ===
Shape of the BERT layer output: KerasTensor(type_spec=TensorSpec(shape=(None, 768), dtype=tf.float32, name=None), name='tf_bert_model_1/bert/pooler/dense/Tanh:0', description="created by layer 'tf_bert_model_1'")
Prediction: KerasTensor(type_spec=TensorSpec(shape=(None, 2), dtype=tf.float32, name=None), name='sre/Softmax:0', description="created by layer 'sre'")

Model: "sre_pool"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_ids (InputLayer)          [(None, 500)]        0                                            
__________________________________________________________________________________________________
input_masks (InputLayer)        [(None, 500)]        0                                            
_____________________________________________________________________________________________

KeyboardInterrupt: 

In [106]:
tf.keras.backend.clear_session()

In [107]:
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)


model = sre_start_model(max_length)

model.fit(
    model_inputs_train2, 
    {"sre": model_labels_train2},
    validation_data=(model_inputs_test2, {"sre": model_labels_test2}),
    epochs=5,
    batch_size=16
)

Some layers from the model checkpoint at bert-base-cased were not used when initializing TFBertModel: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-cased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.



=== SRE Max Pool Model ===
Shape of the BERT layer output: KerasTensor(type_spec=TensorSpec(shape=(None, 500, 768), dtype=tf.float32, name=None), name='tf_bert_model/bert/encoder/layer_._11/output/LayerNorm/batchnorm/add_1:0', description="created by layer 'tf_bert_model'")
Prediction: KerasTensor(type_spec=TensorSpec(shape=(None, 2), dtype=tf.float32, name=None), name='sre/Softmax:0', description="created by layer 'sre'")

Model: "sre_pool"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_ids (InputLayer)          [(None, 500)]        0                                            
__________________________________________________________________________________________________
input_masks (InputLayer)        [(None, 500)]        0                                            
__________________________________________________________

KeyboardInterrupt: 