In [21]:
import os
import time

from transformers import AutoFeatureExtractor, AutoModelForImageClassification, ViTImageProcessor
import torch
from PIL import Image
from sklearn.metrics import accuracy_score
import numpy
from openvino.tools.mo import convert_model
import openvino as ov
import numpy as np
from scipy.special import softmax

In [2]:
# Путь к нашему ViT, который будем конвертировать.
path_to_model = "weights/my_model"

extractor = ViTImageProcessor.from_pretrained(path_to_model)
vit_model = AutoModelForImageClassification.from_pretrained(path_to_model)

In [3]:
def model_use(model, img):
    with torch.no_grad():
        logits = model(**img).logits

    predicted_label = logits.argmax(-1).item()

    return model.config.id2label[predicted_label]

In [4]:
# Путь к тестовым картинкам.
path_to_images = "data/"
images_list = os.listdir(path_to_images)

In [5]:
# Функция для замера размера модели.
def size_measurement(model):
    param_size = 0
    for param in model.parameters():
        param_size += param.nelement() * param.element_size()

    buffer_size = 0
    for buffer in model.buffers():
        buffer_size += buffer.nelement() * buffer.element_size()

    size_all_mb = (param_size + buffer_size) / (1024 ** 2)
    print('model size: {:.3f}MB'.format(size_all_mb))

In [6]:
# Найдем исходный размер модели.
size_measurement(vit_model)

model size: 327.302MB


In [7]:
ov_model = convert_model(vit_model,  example_input=torch.randn(1, 3, 224, 224))

  if num_channels != self.num_channels:
  if height != self.image_size[0] or width != self.image_size[1]:


In [8]:
core = ov.Core()
compiled_model = core.compile_model(ov_model, "CPU")

In [15]:
# Postprocessing function for getting results in the same way for both PyTorch model inference and OpenVINO
def postprocess_result(output_tensor:np.ndarray, top_k:int = 5):
    """
    Posprocess model results. This function applied sofrmax on output tensor and returns specified top_k number of labels with highest probability
    Parameters:
      output_tensor (np.ndarray): model output tensor with probabilities
      top_k (int, *optional*, default 5): number of labels with highest probability for return
    Returns:
      topk_labels: label ids for selected top_k scores
      topk_scores: selected top_k highest scores predicted by model
    """
    softmaxed_scores = softmax(output_tensor, -1)[0]
    topk_labels = np.argsort(softmaxed_scores)[-top_k:][::-1]
    topk_scores = softmaxed_scores[topk_labels]
    return topk_labels, topk_scores

In [28]:
# Запустим тест нашей OpenVINO модели.

start_time = time.time()

# Собака 1, кошка 0.
target_list = []
predict_list = []

for element in images_list:

    image = Image.open(path_to_images + element, mode='r', formats=None)

    inputs = extractor(image, return_tensors="pt")["pixel_values"]
    result = compiled_model(inputs)[0]
    predict, score = postprocess_result(result, top_k=1)
    target = element[:element.find(".")]

    if target == "dog":
        label = 1
    else:
        label = 0

    target_list.append(label)
    predict_list.append(predict[0])

end_time = time.time()

acc = accuracy_score(target_list, predict_list)
# Postprocess results

print("Точность сконвертированной в OpenVINO модели= ", acc)
print("Время обработки изображений сконвертированной в OpenVINO моделью = ", end_time-start_time, " секунд")
print("Скорость обработки изображений у сконвертированной в OpenVINO модели составила  ", len(images_list)/(end_time-start_time), " картинок в секунду")

Точность сконвертированной в OpenVINO модели=  0.9875
Время обработки изображений сконвертированной в OpenVINO моделью =  7.952617883682251  секунд
Скорость обработки изображений у сконвертированной в OpenVINO модели составила   20.11916105365749  картинок в секунду


In [29]:
ov.save_model(ov_model, "openvino.xml")