In [1]:
import pandas as pd
import numpy as np

from pathlib import Path
from datetime import datetime

In [2]:
DATASET_SIZE = 120
DATASET_IS_BALANCED = True


MAX_FEATURES = 20000        # max_features params for CountVectorizer

training_name = 'glove-cnn-{}_{}k_{}'.format(
    MAX_FEATURES,
    DATASET_SIZE,
    'bal' if DATASET_IS_BALANCED else 'imbal'
)

training_args_datetime = datetime(year=2023, month=12, day=20)
training_storing_folder = Path(training_name).resolve()

In [3]:
import pickle

import tensorflow as tf
import keras

# load the tf model
# either a end-to-end
# or build our own (by loading the vectorizer and the model)


text_vectorizer_path = Path.joinpath(training_storing_folder, "{}_{}_textvectorizer.pkl".format(
    training_name,
    training_args_datetime.strftime("%Y-%m-%d")
))
vectorizer_from_disk = pickle.load(open(text_vectorizer_path, 'rb'))
vectorizer = tf.keras.layers.TextVectorization(
    max_tokens=MAX_FEATURES,
    output_sequence_length=512)

vectorizer.set_weights(vectorizer_from_disk['weights'])

model_path = Path.joinpath(training_storing_folder, "{}_{}_model.keras".format(
    training_name,
    training_args_datetime.strftime("%Y-%m-%d")
))
model = keras.models.load_model(model_path)

print('\n\n')
print('Loaded text vectorizer from {}'.format(text_vectorizer_path))
print('Loaded model from {}'.format(model_path))

string_input = keras.Input(shape=(1,), dtype="string")
x = vectorizer(string_input)
preds = model(x)
end_to_end_model = keras.Model(string_input, preds)

print('Created end to end model from trained vectorizer and cnn-model')

End to end model not found at /Users/michaelcheng/Documents/MyDocs/HKU/COMP4801 FYP/FYP/NLP/dev-workspace/sa/glove-cnn_2023-12-12/glove-cnn-20000_120k_bal/glove-cnn-20000_120k_bal_2023-12-20_end2end.keras
Attempt to build from existing vectorizer and model


2023-12-22 10:08:13.808253: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1 Max
2023-12-22 10:08:13.808275: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 32.00 GB
2023-12-22 10:08:13.808280: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 10.67 GB
2023-12-22 10:08:13.808322: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-12-22 10:08:13.808338: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)





Loaded text vectorizer from /Users/michaelcheng/Documents/MyDocs/HKU/COMP4801 FYP/FYP/NLP/dev-workspace/sa/glove-cnn_2023-12-12/glove-cnn-20000_120k_bal/glove-cnn-20000_120k_bal_2023-12-20_textvectorizer.pkl
Loaded model from /Users/michaelcheng/Documents/MyDocs/HKU/COMP4801 FYP/FYP/NLP/dev-workspace/sa/glove-cnn_2023-12-12/glove-cnn-20000_120k_bal/glove-cnn-20000_120k_bal_2023-12-20_model.keras
Created end to end model from trained vectorizer and cnn-model


In [5]:
end_to_end_model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 1)]               0         
                                                                 
 text_vectorization (TextVe  (None, 512)               0         
 ctorization)                                                    
                                                                 
 model_4 (Functional)        (None, 2)                 6808154   
                                                                 
Total params: 6808154 (25.97 MB)
Trainable params: 6808154 (25.97 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [6]:
# save the .keras model to SavedModel format
# which include a .pb file

# https://www.tensorflow.org/tutorials/keras/save_and_load#savedmodel_format

end_to_end_model.save(Path.joinpath(training_storing_folder, "{}_{}_end2end_savedmodel".format(
    training_name,
    training_args_datetime.strftime("%Y-%m-%d")
)))





INFO:tensorflow:Assets written to: /Users/michaelcheng/Documents/MyDocs/HKU/COMP4801 FYP/FYP/NLP/dev-workspace/sa/glove-cnn_2023-12-12/glove-cnn-20000_120k_bal/glove-cnn-20000_120k_bal_2023-12-20_end2end_savedmodel/assets


INFO:tensorflow:Assets written to: /Users/michaelcheng/Documents/MyDocs/HKU/COMP4801 FYP/FYP/NLP/dev-workspace/sa/glove-cnn_2023-12-12/glove-cnn-20000_120k_bal/glove-cnn-20000_120k_bal_2023-12-20_end2end_savedmodel/assets


In [7]:
# save the pretrained model to onnx

import tf2onnx
import onnx

onnx_model_path = Path.joinpath(training_storing_folder, "{}_{}_end2end.onnx".format(
    training_name,
    training_args_datetime.strftime("%Y-%m-%d")
))

# onnx_model, _ = tf2onnx.convert.from_keras(
#     end_to_end_model,
#     input_signature=[tf.TensorSpec([None,1], dtype=tf.string, name='input_3')],
#     extra_opset='ai.onnx.contrib:1',
#     opset=13        # support onnxruntime >= 1.13.0
# )

# onnx.save(onnx_model, onnx_model_path)

In [8]:
print(training_storing_folder)
print('\n\n')
print(onnx_model_path)

/Users/michaelcheng/Documents/MyDocs/HKU/COMP4801 FYP/FYP/NLP/dev-workspace/sa/glove-cnn_2023-12-12/glove-cnn-20000_120k_bal



/Users/michaelcheng/Documents/MyDocs/HKU/COMP4801 FYP/FYP/NLP/dev-workspace/sa/glove-cnn_2023-12-12/glove-cnn-20000_120k_bal/glove-cnn-20000_120k_bal_2023-12-20_end2end.onnx


In [11]:
# https://github.com/onnx/tensorflow-onnx/issues/1867
# "probably there's no shared-name keyword for hash tables"

!python -m tf2onnx.convert --saved-model "/Users/michaelcheng/Documents/MyDocs/HKU/COMP4801 FYP/FYP/NLP/dev-workspace/sa/glove-cnn_2023-12-12/glove-cnn-20000_120k_bal/glove-cnn-20000_120k_bal_2023-12-20_end2end_savedmodel" --output "/Users/michaelcheng/Documents/MyDocs/HKU/COMP4801 FYP/FYP/NLP/dev-workspace/sa/glove-cnn_2023-12-12/glove-cnn-20000_120k_bal/glove-cnn-20000_120k_bal_2023-12-20_end2end.onnx" --extra_opset ai.onnx.contrib:1 --opset 15

2023-12-22 10:19:03,704 - INFO - Signatures found in model: [serving_default].
2023-12-22 10:19:03,704 - INFO - Output names: ['model_4']
2023-12-22 10:19:04,043 - INFO - Using tensorflow=2.15.0, onnx=1.14.1, tf2onnx=1.15.1/37820d
2023-12-22 10:19:04,043 - INFO - Using opset <onnx, 15>
2023-12-22 10:19:04,118 - INFO - Computed 0 values for constant folding
{}
2023-12-22 10:19:04,180 - ERROR - Failed to convert node 'StatefulPartitionedCall/model/text_vectorization/string_lookup/None_Lookup/LookupTableFindV2' (fct=<bound method LookupTableFind.version_8 of <class 'tf2onnx.custom_opsets.onnx_ml.LookupTableFind'>>)
'OP=LookupTableFindV2\nName=StatefulPartitionedCall/model/text_vectorization/string_lookup/None_Lookup/LookupTableFindV2\nInputs:\n\tunknown:0=Placeholder, [], 7\n\tStatefulPartitionedCall/model/text_vectorization/StringSplit/StringSplitV2:1=StringSplit, [-1], 8\n\tFunc/StatefulPartitionedCall/input/_2:0=Const, [], 7\nOutpus:\n\tStatefulPartitionedCall/model/text_vectorization/

In [10]:
import onnxruntime as rt

sess = rt.InferenceSession(
    onnx_model_path,
    providers=['CPUExecutionProvider']
)

input_name = [inp.name for inp in sess.get_inputs()][0]     # only one input in this model
label_names = [label.name for label in sess.get_outputs()]  # it outputs the label and the probability

NoSuchFile: [ONNXRuntimeError] : 3 : NO_SUCHFILE : Load model from /Users/michaelcheng/Documents/MyDocs/HKU/COMP4801 FYP/FYP/NLP/dev-workspace/sa/glove-cnn_2023-12-12/glove-cnn-20000_120k_bal/glove-cnn-20000_120k_bal_2023-12-20_end2end.onnx failed:Load model /Users/michaelcheng/Documents/MyDocs/HKU/COMP4801 FYP/FYP/NLP/dev-workspace/sa/glove-cnn_2023-12-12/glove-cnn-20000_120k_bal/glove-cnn-20000_120k_bal_2023-12-20_end2end.onnx failed. File doesn't exist

In [None]:
test_data = [['I like the game'], ["I do not like it."], ["It crashes when I just run on my pc."]]

In [None]:
# test inference

pred_keras = []
perd_onnx = []

for i in range(len(test_data)):
    pred_keras.append(end_to_end_model.predict(test_data[i]))
    perd_onnx.append(sess.run(["output1", "output2"], {"input1": test_data[i]}))

print(pred_keras)
print(perd_onnx)

2023-12-21 23:27:40.454814: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.




AttributeError: predict