In [1]:
import torch
from transformers import BertTokenizer, BertForSequenceClassification
import onnx
import onnxruntime
from sklearn.metrics import f1_score
import time
import os
from datasets import load_dataset

import numpy as np
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from onnxruntime import (
    InferenceSession,
    SessionOptions
)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def convert_from_torch_to_onnx(
    onnx_path: str,
    tokenizer: AutoTokenizer,
    model: AutoModelForSequenceClassification
):
    dummy_model_input = tokenizer(
        "Один два три четыре",
        padding="max_length",
        truncation=True,
        max_length=512,
        return_tensors="pt",
    ).to("cpu")
    torch.onnx.export(
        model,
        dummy_model_input["input_ids"],
        onnx_path,
        opset_version=12,
        input_names=["input_ids"],
        output_names=["last_hidden_state", "pooler_output"],
        dynamic_axes={
            "input_ids": {
                0: "batch_size",
                1: "sequence_len"
            },
            "output": {
                0: "batch_size"
            }
        }
    )

In [3]:
# Сохранение BERT модели в ONNX
onnx_path = "bert-base-cased.onnx"
device = torch.device("cpu")

tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
#bert_model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased")

model_path = 'baseline.pt'


bert_model = torch.load(model_path).to(device)


bert_model.eval()


bert_model = bert_model.bert
convert_from_torch_to_onnx(onnx_path, tokenizer, bert_model)



verbose: False, log level: Level.ERROR



# Inference

In [4]:
import timeit
import onnxruntime
from transformers import AutoTokenizer
from datasets import load_dataset
from sklearn.metrics import f1_score
import numpy as np

dataset = load_dataset("mteb/tweet_sentiment_extraction")

onnx_model_path = 'bert-base-cased.onnx'
onnx_session = onnxruntime.InferenceSession(onnx_model_path)

input_names = [input_.name for input_ in onnx_session.get_inputs()]
print("Input Names of the ONNX Model:", input_names)

original_tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")



Input Names of the ONNX Model: ['input_ids']


In [5]:

start_time = time.time()

onnx_predictions = [onnx_session.run(None, {'input_ids': [original_tokenizer.encode(example["text"], add_special_tokens=True)]}) for example in dataset["test"]]

end_time = time.time()

onnx_execution_time = end_time - start_time

print(f"Время выполнения для вашей ONNX-модели: {onnx_execution_time} секунд")


Время выполнения для вашей ONNX-модели: 57.742637634277344 секунд


In [11]:
def post_process_predictions(onnx_predictions):
    softmax = lambda x: np.exp(x) / np.sum(np.exp(x), axis=-1, keepdims=True)
    
    processed_predictions = []
    for prediction in onnx_predictions:
        
        logits = prediction[0][0]  
        
        
        probabilities = softmax(logits)
        
       
        class_probabilities = probabilities[:, :3] 
        
        processed_predictions.append(class_probabilities)

    return processed_predictions




processed_predictions = post_process_predictions(onnx_predictions)

#print("Processed Predictions:", processed_predictions)


In [8]:
len(processed_predictions)

3534