In [1]:
%load_ext autoreload
%autoreload 2

# Converting a Transformers TensorFlow model to TFLite

https://www.tensorflow.org/lite/guide/signatures

In [133]:
import tensorflow as tf
import numpy as np

from transformers import TFAutoModelForSequenceClassification, TFAutoModel, AutoTokenizer

#MODEL_NAME = "j-hartmann/emotion-english-distilroberta-base"
MODEL_NAME = "bhadresh-savani/distilbert-base-uncased-emotion"
SAVED_MODEL_NAME = "bhadresh-emotion-classifier-output"
TOKENIZER_NAME = "bert-base-uncased"
TFLITE_OUTPUT = "tflite/bhadresh-output"


def load_vocab(filename, reverse=False):
    data = load_data(filename, reverse=reverse)
    return dict(data)


def load_data(filename, reverse=False):
    def prepare(i, line):
        line = line.strip()
        return (i, line) if reverse else (line, i)
        
    with open(filename, "r") as f:
        lines = f.readlines()
        #lines = list(filter(lambda l: not l.startswith("##"), lines))
        return [prepare(i, line) for i, line in enumerate(lines)]
 

class DataUtil:
    def __init__(self):
        self.token2id = load_vocab("vocab.txt")
        self.id2token = load_vocab("vocab.txt", reverse=True) 
    
    def tokenize(self, text, padding=True, max_len=87):
        tokens = [line.strip().lower() for line in text.split(" ")]
        # Starting
        token_ids = [101]
        count = 1

        for token in tokens:
            if token == 'ive':
                part1 = self.token2id.get("iv")
                part2 = self.token2id.get("##e")
                token_ids += [part1, part2]
                continue
                
            token_id = self.token2id.get(token)
            token_ids.append(token_id)
            count += 1

        token_ids.append(102)
        count +=1

        # Padding
        if padding:
            for _ in range(count, max_len):
                 token_ids.append(0)
        
        return token_ids
    
    def preprocess(self, text, padding=True, max_len=87):
        token_ids = self.tokenize(text, padding=padding, max_len=max_len)
        return np.array([token_ids], dtype=np.int32) # float32 for text_classification.tflite


data_util = DataUtil()

In [134]:
data_util.tokenize("ive been")

[101,
 4921,
 2063,
 2042,
 102,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0]

# Load Transformers Model (TF)

In [2]:
model = TFAutoModelForSequenceClassification.from_pretrained(MODEL_NAME)

2023-01-04 16:43:13.162319: E tensorflow/stream_executor/cuda/cuda_driver.cc:271] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2023-01-04 16:43:13.162343: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:169] retrieving CUDA diagnostic information for host: 0701f5cb6f1a
2023-01-04 16:43:13.162348: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:176] hostname: 0701f5cb6f1a
2023-01-04 16:43:13.162402: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:200] libcuda reported version is: 510.73.5
2023-01-04 16:43:13.162416: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:204] kernel reported version is: 510.73.5
2023-01-04 16:43:13.162420: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:310] kernel version seems to match DSO: 510.73.5
2023-01-04 16:43:13.162601: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instruct

In [3]:
model

<transformers.models.distilbert.modeling_tf_distilbert.TFDistilBertForSequenceClassification at 0x7f55ed3ed630>

In [4]:
model.config

DistilBertConfig {
  "_name_or_path": "bhadresh-savani/distilbert-base-uncased-emotion",
  "activation": "gelu",
  "architectures": [
    "DistilBertForSequenceClassification"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "sadness",
    "1": "joy",
    "2": "love",
    "3": "anger",
    "4": "fear",
    "5": "surprise"
  },
  "initializer_range": 0.02,
  "label2id": {
    "anger": 3,
    "fear": 4,
    "joy": 1,
    "love": 2,
    "sadness": 0,
    "surprise": 5
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "transformers_version": "4.20.0",
  "vocab_size": 30522
}

# Load tokenizer

In [99]:
tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_NAME)

In [100]:
def tokenize(batch):
    return tokenizer(batch["text"], padding=True, truncation=True)

In [101]:
from datasets import load_dataset
# emotions = load_dataset("emotion")
emotions = load_dataset("SetFit/emotion")  # emotion is broken right now



  0%|          | 0/3 [00:00<?, ?it/s]

In [102]:
emotions_encoded = emotions.map(tokenize, batched=True, batch_size=None)
emotions_encoded.set_format("tf", columns=["input_ids", "attention_mask"])

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

# Test original model

https://huggingface.co/course/chapter2/2?fw=tf#model-heads-making-sense-out-of-numbers

In [139]:
test_id = 9
emotions["train"][test_id]

{'text': 'i feel romantic too', 'label': 2, 'label_text': 'love'}

In [140]:
test_input_data = emotions_encoded["train"][test_id]

In [141]:
test_input_data

{'input_ids': <tf.Tensor: shape=(87,), dtype=int64, numpy=
 array([ 101, 1045, 2514, 6298, 2205,  102,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0])>,
 'attention_mask': <tf.Tensor: shape=(87,), dtype=int64, numpy=
 array([1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

In [142]:
outputs = model(test_input_data)

In [143]:
emotions["train"][test_id]  # original input data before preprocessing

{'text': 'i feel romantic too', 'label': 2, 'label_text': 'love'}

In [144]:
outputs.logits.shape

TensorShape([1, 6])

In [145]:
outputs.logits

<tf.Tensor: shape=(1, 6), dtype=float32, numpy=
array([[-1.460196 , -0.5406702,  5.794202 , -1.9637783, -1.5457897,
        -1.2147366]], dtype=float32)>

In [146]:
predictions = tf.math.softmax(outputs.logits, axis=-1)
print(predictions)

tf.Tensor(
[[7.0391811e-04 1.7654973e-03 9.9555922e-01 4.2542111e-04 6.4617366e-04
  8.9975417e-04]], shape=(1, 6), dtype=float32)


In [147]:
predictions

<tf.Tensor: shape=(1, 6), dtype=float32, numpy=
array([[7.0391811e-04, 1.7654973e-03, 9.9555922e-01, 4.2542111e-04,
        6.4617366e-04, 8.9975417e-04]], dtype=float32)>

# Convert and save model as tflite

In [5]:
def convert(model, saved_model_path):
    # Attempt conversion to tflite
    # Set TensorSpec - dynamic shape though
    input_spec = tf.TensorSpec([1, None], tf.int32)
    
    print(input_spec)

    # Save to correct tensor dims
    model._saved_model_inputs_spec = None
    model._set_save_spec(input_spec)

    # Convert
    converter = tf.lite.TFLiteConverter.from_keras_model(model)
    tflite_model = converter.convert()

    # Save the model.
    with open(saved_model_path, 'wb') as f:
      f.write(tflite_model)
    
    print(f"Saved to {saved_model_path}")
    
    return tflite_model

In [6]:
tflite_model = convert(model, SAVED_MODEL_NAME)

TensorSpec(shape=(1, None), dtype=tf.int32, name=None)




INFO:tensorflow:Assets written to: /tmp/tmp82_hxel4/assets


INFO:tensorflow:Assets written to: /tmp/tmp82_hxel4/assets
2023-01-04 16:43:38.263331: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:362] Ignored output_format.
2023-01-04 16:43:38.263362: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:365] Ignored drop_control_dependency.
2023-01-04 16:43:38.263851: I tensorflow/cc/saved_model/reader.cc:43] Reading SavedModel from: /tmp/tmp82_hxel4
2023-01-04 16:43:38.280089: I tensorflow/cc/saved_model/reader.cc:81] Reading meta graph with tags { serve }
2023-01-04 16:43:38.280107: I tensorflow/cc/saved_model/reader.cc:122] Reading SavedModel debug info (if present) from: /tmp/tmp82_hxel4
2023-01-04 16:43:38.333620: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:354] MLIR V1 optimization pass is not enabled
2023-01-04 16:43:38.352236: I tensorflow/cc/saved_model/loader.cc:228] Restoring SavedModel bundle.
2023-01-04 16:43:38.662523: I tensorflow/cc/saved_model/loader.cc:212] Running initializatio

Saved to bhadresh-emotion-classifier-output


In [None]:
# saved_model = tf.saved_model.load(SAVED_MODEL_NAME)
# print(saved_model.signatures)
# print(saved_model.signatures)
# saved_model.signatures[tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY].inputs[0]
# concrete_func = model.signatures[tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY]
# new_model = tf.keras.models.load_model("output")
# new_model.summary()
# concrete_func = new_model.signatures[tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY]
# tf.saved_model.save(new_model, "output-new", signatures={"infer": concrete_func})
# converter = tf.lite.TFLiteConverter.from_saved_model("output-new")

"""
converter.target_spec.supported_ops = [
    tf.lite.OpsSet.TFLITE_BUILTINS,  # enable TensorFlow Lite ops.
    tf.lite.OpsSet.SELECT_TF_OPS  # enable TensorFlow ops.
]
converter.experimental_enable_resource_variables = True
tflite_model = converter.convert()
"""


# Test tflite model

In [7]:
from pathlib import Path

def run(interpreter, text): 
    token_ids = data_util.tokenize(text)
    input_ids = np.array([token_ids], dtype=np.int32)
    
    # Get input and output tensors.
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    print("---INPUT DETAILS----")
    print(input_details)
    print("---OUTPUT DETAILS----")
    print(output_details)
     
    # Resize input shape based on current input
    interpreter.resize_tensor_input(input_details[0]['index'], input_ids.shape)
    interpreter.allocate_tensors()
    interpreter.set_tensor(input_details[0]['index'], input_ids)
    interpreter.invoke()
    output_data = interpreter.get_tensor(output_details[0]['index'])
    
    return output_data


def load_interpreter(saved_model_path):
    # Load the TFLite model and allocate tensors.
    if Path(saved_model_path).exists():
        print("Loading existing model")
        interpreter = tf.lite.Interpreter(model_path=saved_model_path)
    else:
        print("Converting model")
        interpreter = convert(model)
    return interpreter

In [27]:
interpreter = load_interpreter(SAVED_MODEL_NAME)

Loading existing model


In [148]:
input_data = emotions_encoded["train"][test_id]

In [149]:
# Expected
text = emotions["train"][test_id]["text"]
print(text)
input_data["input_ids"]

i feel romantic too


<tf.Tensor: shape=(87,), dtype=int64, numpy=
array([ 101, 1045, 2514, 6298, 2205,  102,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0])>

In [150]:
input_ids = data_util.tokenize(text)
print(input_ids)

for i, input_id in enumerate(input_ids):
    if input_id is None:
        print(i, input_id)

[101, 1045, 2514, 6298, 2205, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


In [151]:
output_data = run(interpreter, text)

---INPUT DETAILS----
[{'name': 'serving_default_args_0:0', 'index': 0, 'shape': array([ 1, 87], dtype=int32), 'shape_signature': array([-1, -1], dtype=int32), 'dtype': <class 'numpy.int32'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], dtype=float32), 'zero_points': array([], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}]
---OUTPUT DETAILS----
[{'name': 'StatefulPartitionedCall:0', 'index': 720, 'shape': array([1, 6], dtype=int32), 'shape_signature': array([-1,  6], dtype=int32), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], dtype=float32), 'zero_points': array([], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}]


In [152]:
output_data

array([[-1.4168833 , -0.69242686,  5.377131  , -1.7829715 , -1.1952535 ,
        -1.18323   ]], dtype=float32)

In [153]:
outputs.logits

<tf.Tensor: shape=(1, 6), dtype=float32, numpy=
array([[-1.460196 , -0.5406702,  5.794202 , -1.9637783, -1.5457897,
        -1.2147366]], dtype=float32)>

In [154]:
predictions = tf.math.softmax(output_data, axis=-1)

In [155]:
predictions

<tf.Tensor: shape=(1, 6), dtype=float32, numpy=
array([[1.1126471e-03, 2.2960694e-03, 9.9302554e-01, 7.7155605e-04,
        1.3887055e-03, 1.4055034e-03]], dtype=float32)>

In [156]:
predictions[0]

<tf.Tensor: shape=(6,), dtype=float32, numpy=
array([1.1126471e-03, 2.2960694e-03, 9.9302554e-01, 7.7155605e-04,
       1.3887055e-03, 1.4055034e-03], dtype=float32)>

In [179]:
predictions_rounded = tf.math.round(predictions[0])
predictions_rounded

<tf.Tensor: shape=(6,), dtype=float32, numpy=array([0., 0., 1., 0., 0., 0.], dtype=float32)>

In [190]:
max_idx = tf.keras.backend.eval(tf.argmax(predictions_rounded, axis=0))

In [191]:
model.config.id2label[max_idx]

'love'

In [162]:
print(emotions["train"][test_id])

{'text': 'i feel romantic too', 'label': 2, 'label_text': 'love'}


# Export additional files

In [194]:
lines = [f"{token} {token_id}" for token, token_id in data_util.token2id.items()] 

In [195]:
lines

['[PAD] 0',
 '[unused0] 1',
 '[unused1] 2',
 '[unused2] 3',
 '[unused3] 4',
 '[unused4] 5',
 '[unused5] 6',
 '[unused6] 7',
 '[unused7] 8',
 '[unused8] 9',
 '[unused9] 10',
 '[unused10] 11',
 '[unused11] 12',
 '[unused12] 13',
 '[unused13] 14',
 '[unused14] 15',
 '[unused15] 16',
 '[unused16] 17',
 '[unused17] 18',
 '[unused18] 19',
 '[unused19] 20',
 '[unused20] 21',
 '[unused21] 22',
 '[unused22] 23',
 '[unused23] 24',
 '[unused24] 25',
 '[unused25] 26',
 '[unused26] 27',
 '[unused27] 28',
 '[unused28] 29',
 '[unused29] 30',
 '[unused30] 31',
 '[unused31] 32',
 '[unused32] 33',
 '[unused33] 34',
 '[unused34] 35',
 '[unused35] 36',
 '[unused36] 37',
 '[unused37] 38',
 '[unused38] 39',
 '[unused39] 40',
 '[unused40] 41',
 '[unused41] 42',
 '[unused42] 43',
 '[unused43] 44',
 '[unused44] 45',
 '[unused45] 46',
 '[unused46] 47',
 '[unused47] 48',
 '[unused48] 49',
 '[unused49] 50',
 '[unused50] 51',
 '[unused51] 52',
 '[unused52] 53',
 '[unused53] 54',
 '[unused54] 55',
 '[unused55] 56',

In [196]:
with open("text_classification_vocab.txt", "w+") as f:
    f.writelines(lines)