In [1]:
import pandas as pd
import numpy as np

from pathlib import Path
from datetime import datetime

In [2]:
DATASET_SIZE = 120
DATASET_IS_BALANCED = True

training_name = 'bert-finetune_{}k_{}'.format(
    DATASET_SIZE,
    'bal' if DATASET_IS_BALANCED else 'imbal'
)

training_args_datetime = datetime(year=2023, month=12, day=17)
training_storing_folder = Path(training_name).resolve()

model_path = Path.joinpath(
        training_storing_folder, 
        '{}_{}_model'.format(training_name, training_args_datetime.strftime('%Y-%m-%d')))

In [6]:
model_path.name

'bert-finetune_120k_bal_2023-12-17_model'

In [7]:
# save the model and tokenizer to ONNX format

from transformers import AutoTokenizer, AutoModelForSequenceClassification
from optimum.onnxruntime import ORTModelForSequenceClassification


tokenizer = AutoTokenizer.from_pretrained('bert-base-cased')
ort_model = ORTModelForSequenceClassification.from_pretrained(
    model_path
    ,export=True)

save_directory = Path.joinpath(training_storing_folder, model_path.name + '_onnx')

if not save_directory.exists():
    save_directory.mkdir(parents=True)

tokenizer.save_pretrained(save_directory)
ort_model.save_pretrained(save_directory)

Framework not specified. Using pt to export to ONNX.
Using the export variant default. Available variants are:
    - default: The default ONNX variant.
Using framework PyTorch: 2.1.0
Overriding 1 configuration item(s)
	- use_cache -> False


In [4]:
save_directory

PosixPath('/Users/michaelcheng/Documents/MyDocs/HKU/COMP4801 FYP/FYP/NLP/dev-workspace/sa/bert_2023-12-13/bert-finetune_120k_bal/onnx')

In [15]:
# load the ONNX model and tokenizer

from transformers import AutoTokenizer
from onnxruntime import InferenceSession

tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
session = InferenceSession(Path.joinpath(
    save_directory,
    "model.onnx")
)

input_names = [label.name for label in session.get_inputs()]
output_names = [label.name for label in session.get_outputs()]

# ONNX Runtime expects NumPy arrays as input
inputs = tokenizer("Using DistilBERT with ONNX Runtime!", return_tensors="np")
outputs = session.run(output_names=output_names, input_feed=dict(inputs))

In [13]:
input_names

input_ids
attention_mask
token_type_ids


In [14]:
output_names

'logits'

In [22]:
# load huggingface original model

from transformers import Trainer, TrainingArguments
from transformers import AutoModelForSequenceClassification


hg_model = AutoModelForSequenceClassification.from_pretrained(model_path)

In [18]:
test_data = [['I like the game'], ["I do not like it."], ["It crashes when I just run on my pc."]]

In [29]:
# test inference

pred_hg = []
perd_onnx = []

for i in range(len(test_data)):

    # hg inference
    hg_inputs = tokenizer(test_data[i], return_tensors="pt", max_length=tokenizer.model_max_length, truncation=True)
    hg_outputs = hg_model(**hg_inputs)

    # onnx inference
    onnx_inputs = tokenizer(test_data[i], return_tensors="np", max_length=tokenizer.model_max_length, truncation=True)
    onnx_outputs = session.run(output_names=output_names, input_feed=dict(onnx_inputs))

    perd_onnx.append(onnx_outputs[0])   # only get the unsoftmaxed logits
    pred_hg.append(hg_outputs.logits.detach().numpy())  # only get the unsoftmaxed logits


print(pred_hg)
print(perd_onnx)

# compare the results
for i in range(len(pred_hg)):
    np.testing.assert_allclose(pred_hg[i], perd_onnx[i], rtol=1e-3, atol=1e-3)

[array([[-2.1389656,  1.6192293]], dtype=float32), array([[ 1.5018184, -1.5831153]], dtype=float32), array([[ 1.1016914, -1.3381388]], dtype=float32)]
[array([[-2.1389651,  1.6192296]], dtype=float32), array([[ 1.5018172, -1.5831144]], dtype=float32), array([[ 1.1016904, -1.3381379]], dtype=float32)]
