In [1]:
from datasets import load_dataset

In [2]:
dataset = load_dataset("glue", "sst2")['validation']

In [3]:
import torch
import torch.utils.benchmark as benchmark
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from optimum.intel import OVModelForSequenceClassification

INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, onnx, openvino




In [4]:
model_id = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(model_id)

model_non_optimized = AutoModelForSequenceClassification.from_pretrained(model_id)

model_optimized = OVModelForSequenceClassification.from_pretrained(model_id, export=True)


Framework not specified. Using pt to export to ONNX.
Using framework PyTorch: 2.0.1+cu117
  op1 = operator(*args, **kwargs)


verbose: False, log level: Level.ERROR



Compiling the model...
Set CACHE_DIR to /tmp/tmp642hefn5/model_cache


In [5]:
def run_inference_non_optimized():
    inputs = tokenizer("Hello, how are you?", return_tensors="pt")
    outputs = model_non_optimized(**inputs)

def run_inference_optimized():
    inputs = tokenizer("Hello, how are you?", return_tensors="pt")
    outputs = model_optimized(**inputs)

In [6]:
timer_non_optimized = benchmark.Timer(
    stmt="run_inference_non_optimized()",
    setup="from __main__ import run_inference_non_optimized",
    num_threads=1,
)

timer_optimized = benchmark.Timer(
    stmt="run_inference_optimized()",
    setup="from __main__ import run_inference_optimized",
    num_threads=1,
)

In [7]:
result_non_optimized = timer_non_optimized.timeit(100)
print("Non-optimized model:", result_non_optimized)

Non-optimized model: <torch.utils.benchmark.utils.common.Measurement object at 0x7ff4a0cc1c10>
run_inference_non_optimized()
setup: from __main__ import run_inference_non_optimized
  54.75 ms
  1 measurement, 100 runs , 1 thread


In [8]:
result_optimized = timer_optimized.timeit(100)
print("Optimized model:", result_optimized)

Optimized model: <torch.utils.benchmark.utils.common.Measurement object at 0x7ff674674850>
run_inference_optimized()
setup: from __main__ import run_inference_optimized
  5.34 ms
  1 measurement, 100 runs , 1 thread


In [9]:
def calculate_accuracy(model):
    correct_predictions = 0
    total_predictions = 0

    for item in dataset:
        inputs = tokenizer(item['sentence'], return_tensors="pt")
        outputs = model(**inputs)
        predicted_label = torch.argmax(outputs.logits, dim=1).item()
        correct_predictions += (predicted_label == item['label'])
        total_predictions += 1

    return correct_predictions / total_predictions

In [12]:
accuracy_non_optimized = calculate_accuracy(model_non_optimized)
print(f'Accuracy of non-optimized model: {accuracy_non_optimized * 100:.4f}%')

Accuracy of non-optimized model: 91.0550%


In [13]:
accuracy_optimized = calculate_accuracy(model_optimized)
print(f'Accuracy of optimized model: {accuracy_optimized * 100:.4f}%')

Accuracy of optimized model: 91.0550%
