In [8]:
import os
import time

import torch

from optimum.onnxruntime import ORTModelForImageClassification, ORTQuantizer
from optimum.onnxruntime.configuration import AutoQuantizationConfig
from PIL import Image
from sklearn.metrics import accuracy_score
from transformers import AutoFeatureExtractor, pipeline
from pathlib import Path

In [2]:
model_id = "weights/my_model"
onnx_path = Path("onnx")

# load vanilla transformers and convert to onnx
model = ORTModelForImageClassification.from_pretrained(model_id, from_transformers=True)
preprocessor = AutoFeatureExtractor.from_pretrained(model_id)

# save onnx checkpoint and tokenizer
model.save_pretrained(onnx_path)
preprocessor.save_pretrained(onnx_path)

The argument `from_transformers` is deprecated, and will be removed in optimum 2.0.  Use `export` instead
Framework not specified. Using pt to export to ONNX.
Using the export variant default. Available variants are:
	- default: The default ONNX variant.
Using framework PyTorch: 2.0.1+cpu
  if num_channels != self.num_channels:
  if height != self.image_size[0] or width != self.image_size[1]:


verbose: False, log level: Level.ERROR


['onnx\\preprocessor_config.json']

In [3]:
from onnxruntime.quantization import QuantType
from optimum.onnxruntime.configuration import default_quantization_parameters
from optimum.onnxruntime import QuantizationConfig

# create ORTQuantizer and define quantization configuration
dynamic_quantizer = ORTQuantizer.from_pretrained(model)
#dqconfig = AutoQuantizationConfig.avx512_vnni(is_static=False, per_channel=False, nodes_to_exclude=['Conv_quant'])

format, mode, operators_to_quantize = default_quantization_parameters(
            False, operators_to_quantize=None
        )
dqconfig = QuantizationConfig(
            is_static=False,
            format=format,
            mode=mode,
            activations_dtype=QuantType.QUInt8,
            activations_symmetric=False,
            weights_dtype=QuantType.QUInt8,
            weights_symmetric=True,
            per_channel=False,
            reduce_range=False,
            nodes_to_quantize=None or [],
            nodes_to_exclude=None or [],
            operators_to_quantize=operators_to_quantize,
        )

# apply the quantization configuration to the model
model_quantized_path = dynamic_quantizer.quantize(
    save_dir=onnx_path,
    quantization_config=dqconfig
)

Creating dynamic quantizer: QOperator (mode: IntegerOps, schema: u8/u8, channel-wise: False)
Quantizing model...
Saving quantized model at: onnx (external data format: False)
Configuration saved in onnx\ort_config.json


In [4]:
# get model file size
size = os.path.getsize(onnx_path / "model.onnx")/(1024*1024)
quantized_model = os.path.getsize(onnx_path / "model_quantized.onnx")/(1024*1024)

print(f"Model file size: {size:.2f} MB")
print(f"Quantized Model file size: {quantized_model:.2f} MB")
#   Model file size: 330.27 MB
#   Quantized Model file size: 84.50 MB

Model file size: 327.55 MB
Quantized Model file size: 83.07 MB


In [9]:
model = ORTModelForImageClassification.from_pretrained(onnx_path, file_name="model_quantized.onnx")
pipe = pipeline("image-classification", model=model, feature_extractor=preprocessor)

In [15]:
path = "data/"
images_list = os.listdir(path)

In [11]:
def model_use(model, img):
    #model.eval()
    #with torch.no_grad():
    logits = model(torch.quantize_per_tensor(img['pixel_values'], 0.1, 10, torch.quint8)).logits
    logits = model(img['pixel_values']).logits
    logits = logits.int_repr().to(torch.float32)

    predicted_label = logits.argmax(-1).item()

    return model.module.config.id2label[predicted_label]

In [27]:
start_time = time.time()

# Собака 1, кошка 0.
target_list = []
predict_list = []

for element in images_list:

    image = Image.open(path + element, mode='r', formats=None)

    inputs = pipe.feature_extractor(images=image, return_tensors="pt")
    predict = pipe.model(**inputs).logits.argmax(-1).item()

    target = element[:element.find(".")]

    if target == "dog":
        label = 1
    else:
        label = 0

    target_list.append(label) 

    predict_list.append(predict)

end_time = time.time()

acc = accuracy_score(target_list, predict_list)
print("Точность квантизированной модели = ", acc)
print("Время обработки изображений квантизированной модели = ", end_time-start_time, " секунд")
print("Скорость обработки изображений у квантизированной модели составила  ", len(images_list)/(end_time-start_time), " картинок в секунду")

Точность квантизированной модели =  0.9875
Время обработки изображений квантизированной модели =  20.706819772720337  секунд
Скорость обработки изображений у квантизированной модели составила   7.7269229054085775  картинок в секунду
