# Intsall 

In [None]:
# Install PyPI packages required in this guide, including BentoML
!pip install -q --pre bentoml  # install preview version of BentoML for this guide
!pip install transformers

[K     |████████████████████████████████| 5.0MB 30.3MB/s 
[K     |████████████████████████████████| 133kB 54.6MB/s 
[K     |████████████████████████████████| 112kB 45.1MB/s 
[K     |████████████████████████████████| 174kB 58.2MB/s 
[K     |████████████████████████████████| 71kB 10.0MB/s 
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone
[K     |████████████████████████████████| 1.3MB 42.3MB/s 
[K     |████████████████████████████████| 81kB 9.7MB/s 
[K     |████████████████████████████████| 1.2MB 38.0MB/s 
[K     |████████████████████████████████| 153kB 37.0MB/s 
[K     |████████████████████████████████| 92kB 11.5MB/s 
[K     |████████████████████████████████| 3.2MB 39.8MB/s 
[K     |████████████████████████████████| 133kB 49.2MB/s 
[K     |████████████████████████████████| 7.7MB 42.7MB/s 
[K     |████████████████████████████████| 81kB 9.4MB/s 
[K     |███

# BentoML script

In [None]:
%%writefile bento_service.py
import bentoml
from bentoml.adapters import StringInput
from bentoml.frameworks.transformers import TransformersModelArtifact
from bentoml.frameworks.sklearn import SklearnModelArtifact
import re
import tensorflow as tf
import numpy as np
@bentoml.env(infer_pip_packages=True)
@bentoml.artifacts([TransformersModelArtifact('model'),SklearnModelArtifact('classifier')])
class HatespeechDetector(bentoml.BentoService):
  def __init__(self,maximum_len=1000,maximum_sentence=2):
    super(HatespeechDetector, self).__init__()
    self.maximum_len = maximum_len
    self.maximum_sentence = maximum_sentence
    self.chat_list = []
    self.model = 0
    self.classifier = 0
    self.tokenizer = 0

  def chat_stack(self,last_chat):
    self.chat_list.append(last_chat)
    if sum([len(chat) for chat in self.chat_list])>self.maximum_len:
      while sum([len(chat) for chat in self.chat_list])>self.maximum_len:
        self.chat_list.pop(0)
    if len(self.chat_list)>self.maximum_sentence:
      while len(self.chat_list)>self.maximum_sentence:
        self.chat_list.pop(0)

  def chat_sum(self):
    text = ''
    for t in self.chat_list[:-1]:
      text += t
      text += ' [SEP] '
    text += self.chat_list[-1]
    return text

  def detecting(self):
    input_text = self.tokenizer([self.chat_sum()], truncation=True, padding=True)
    print("1",input_text)
    input_text = tf.data.Dataset.from_tensor_slices((dict(input_text)))
    print("2",input_text)
    input_text = self.model.predict(input_text.batch(1)).last_hidden_state[:,0,:]
    print("3",input_text.shape)
    result = self.classifier.predict(input_text)
    print("4",result)
    return f"{result} {[self.chat_sum()]}"

  def preprocess(self, text):
    text = re.sub('http\S+', '', text).strip()
    text = re.sub('^!!+', '', text).strip()
    text = re.sub('^(rt)', '', text).strip()
    text = re.sub('@\S+', '', text).strip()
    delete_e = re.compile("[^a-zA-Z\!\?\$\%\' ]")
    text = delete_e.sub("",text).strip()
    text = text.lower()
    return text

  @bentoml.api(input=StringInput(), batch=False)
  def chat(self,text):
    if self.model == 0 :
      self.model = tf.keras.Sequential(self.artifacts.model.get("model").layers[:-1])
    if self.classifier == 0 :
      self.classifier = self.artifacts.classifier
    if self.tokenizer == 0 :
      self.tokenizer = self.artifacts.model.get("tokenizer")
    text = text
    text = self.preprocess(text)
    self.chat_stack(text)
    result = self.detecting()
    return result

Writing bento_service.py


# Load weights

In [None]:
from transformers import TFElectraForSequenceClassification
from transformers import ElectraTokenizer
import tensorflow as tf
import pickle

tokenizer = ElectraTokenizer.from_pretrained('google/electra-small-discriminator')
model_electra = TFElectraForSequenceClassification.from_pretrained("google/electra-small-discriminator")
model_electra.load_weights("/content/drive/MyDrive/HateSpeech/Weight/ELECTRA")
classifier = pickle.load(open("/content/drive/MyDrive/HateSpeech/Weight/ML/LogisticRegression_electra.sav", 'rb'))

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=231508.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=29.0, style=ProgressStyle(description_w…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=466062.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=665.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=54466044.0, style=ProgressStyle(descrip…




Some layers from the model checkpoint at google/electra-small-discriminator were not used when initializing TFElectraForSequenceClassification: ['discriminator_predictions']
- This IS expected if you are initializing TFElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of TFElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
model_electra.summary()

Model: "tf_electra_for_sequence_classification"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
electra (TFElectraMainLayer) multiple                  13483008  
_________________________________________________________________
classifier (TFElectraClassif multiple                  66306     
Total params: 13,549,314
Trainable params: 13,549,314
Non-trainable params: 0
_________________________________________________________________


In [None]:
input_text = "mother fucker"
input_text = tokenizer([input_text], truncation=True, padding=True)
print("1",input_text)
input_text = tf.data.Dataset.from_tensor_slices((dict(input_text)))
print("2",input_text)
print()
input_text = tf.keras.Sequential(model_electra.layers[:-1]).predict(input_text.batch(1)).last_hidden_state[:,0,:]
print("3",input_text.shape)
result = classifier.predict(input_text)
result

1 {'input_ids': [[101, 2388, 6616, 2121, 102]], 'token_type_ids': [[0, 0, 0, 0, 0]], 'attention_mask': [[1, 1, 1, 1, 1]]}
2 <TensorSliceDataset shapes: {input_ids: (5,), token_type_ids: (5,), attention_mask: (5,)}, types: {input_ids: tf.int32, token_type_ids: tf.int32, attention_mask: tf.int32}>

Consider rewriting this model with the Functional API.
Instructions for updating:
The `validate_indices` argument has no effect. Indices are always validated on CPU and never validated on GPU.
3 (1, 256)


array([1], dtype=int32)

# Packing

In [None]:
# input_text = tokenizer(["hello"], truncation=True, padding=True)
# input_text = tf.data.Dataset.from_tensor_slices((dict(input_text)))
# input_text = model_electra.predict(input_text).last_hidden_state[:,0,:]

In [None]:
from bento_service import HatespeechDetector
artifact = {"model": model_electra, "tokenizer": tokenizer}
chatting = HatespeechDetector()
chatting.pack('model',artifact)
chatting.pack('classifier',classifier)
saved_path = chatting.save()



  """)


[2021-07-08 11:04:55,394] INFO - BentoService bundle 'HatespeechDetector:20210708110453_4FD7EB' saved to: /root/bentoml/repository/HatespeechDetector/20210708110453_4FD7EB


# Distribution

In [None]:
!bentoml serve HatespeechDetector:latest --run-with-ngrok

  """)
[2021-07-08 11:08:46,192] INFO - Getting latest version HatespeechDetector:20210708110453_4FD7EB
[2021-07-08 11:08:46,206] INFO - Starting BentoML API proxy in development mode..
[2021-07-08 11:08:46,208] INFO - Starting BentoML API server in development mode..
[2021-07-08 11:08:46,418] INFO - Your system nofile limit is 1048576, which means each instance of microbatch service is able to hold this number of connections at same time. You can increase the number of file descriptors for the server process, or launch more microbatch instances to accept more concurrent connection.
(Press CTRL+C to quit)
2021-07-08 11:08:47.192037: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0
[2021-07-08 11:08:48,216] INFO -  * Running on http://1edb94f3956c.ngrok.io
[2021-07-08 11:08:48,217] INFO -  * Traffic stats available on http://127.0.0.1:4040
2021-07-08 11:08:48.885872: I tensorflow/stream_executor/platform/default/dso_lo