Using basic Python for MobileBert

In [None]:
!pip install tensorflow_text

Collecting tensorflow_text
  Downloading tensorflow_text-2.15.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.2/5.2 MB[0m [31m14.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tensorflow_text
Successfully installed tensorflow_text-2.15.0


In [None]:
from transformers import TFMobileBertForQuestionAnswering, MobileBertConfig
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text

In [None]:
preprocessor = hub.load("https://www.kaggle.com/models/tensorflow/bert/frameworks/TensorFlow2/variations/en-uncased-preprocess/versions/3")
bert_pack_inputs = hub.KerasLayer(
    preprocessor.bert_pack_inputs,
    arguments=dict(seq_length=512))
tokenize = hub.KerasLayer(preprocessor.tokenize)

In [None]:
configuration = MobileBertConfig(
    hidden_act="relu",
    hidden_dropout_prob=0.2,
    max_position_embeddings=1024,
    embedding_size=512)

configurated_model = TFMobileBertForQuestionAnswering(configuration).from_pretrained("vumichien/mobilebert-uncased-squad-v2")


with open('/content/drive/MyDrive/Colab Notebooks/Chatbot/dataset/health-care_qa.txt', 'r', encoding='utf-8') as file:
    dataset = file.readlines()

questions = []
answers = []

for line in dataset[:100]:
    q, a = line.split('\t')
    questions.append(q)
    answers.append(a.replace('\n', ''))


def preprocessing(question, answer):
    tokenized_question = tokenize([question])
    tokenized_answer = tokenize([answer])

    packed_inputs = bert_pack_inputs([tokenized_question, tokenized_answer])

    input_ids = packed_inputs["input_word_ids"]
    attention_mask = packed_inputs["input_mask"]
    token_type_ids = packed_inputs["input_type_ids"]

    return input_ids, attention_mask, token_type_ids

All model checkpoint layers were used when initializing TFMobileBertForQuestionAnswering.

All the layers of TFMobileBertForQuestionAnswering were initialized from the model checkpoint at vumichien/mobilebert-uncased-squad-v2.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFMobileBertForQuestionAnswering for predictions without further training.


In [None]:
# Define Dummy Label
start_answer_positions = tf.constant([0])
end_answer_positions = tf.constant([100])

# Define Model
model =  configurated_model
model.summary()

# Define Optimizer
optimizer = tf.keras.optimizers.RMSprop(1e-3)

Model: "tf_mobile_bert_for_question_answering_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 mobilebert (TFMobileBertMa  multiple                  24581888  
 inLayer)                                                        
                                                                 
 qa_outputs (Dense)          multiple                  1026      
                                                                 
Total params: 24582914 (93.78 MB)
Trainable params: 24582914 (93.78 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [None]:
class MyQAModel(tf.keras.Model):
    def __init__(self, qamodel):
        super().__init__()
        self.model = qamodel


    def call(self, inputs, start_label, end_label, training=False):
        input_ids, attention_mask, token_type_ids = inputs
        outputs = self.model([inputs], start_positions=start_label, end_positions=end_label, training=True)

        return outputs

In [None]:
qamodel_subclass = MyQAModel(model)

In [None]:
EPOCHS = 1

for epoch in range(EPOCHS):
    total_losses = []
    print(f"Epoch: {epoch}/{EPOCHS}")

    for i in range(len(questions)):
        losses = []

        with tf.GradientTape() as tape:
            input_ids, attention_mask, token_type_ids = preprocessing(questions[i], answers[i])
            outputs = qamodel_subclass([input_ids, attention_mask, token_type_ids], start_answer_positions, end_answer_positions, training=True)

            # Calculate loss
            loss = tf.math.reduce_mean(outputs.loss)
            losses.append(round(float(loss), 2))

            # Update gradients and apply to the optimizer
            gradients = tape.gradient(loss, qamodel_subclass.trainable_variables)
            optimizer.apply_gradients(zip(gradients, qamodel_subclass.trainable_variables))

    losses = tf.math.reduce_mean(losses)
    total_losses.append(losses)

    print(f'Epoch {epoch + 1}/{EPOCHS}, Loss: {losses}')

Epoch: 0/1




Epoch 1/1, Loss: 2170.2900390625


#EXPORT-SAVE MODEL

In [None]:
import pathlib
import os

export_dir = "saved_model"
tf.saved_model.save(model, export_dir)