# 모델 변환 및 경량화, 평가

In [1]:
!unzip -qq "saved_model.zip"

In [2]:
%pip install -q "openvino>=2023.1.0" "nncf>=2.6.0"

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m37.5/37.5 MB[0m [31m13.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m52.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m53.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m120.7/120.7 kB[0m [31m14.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.1/4.1 MB[0m [31m68.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.8/67.8 kB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m249.1/249.1 kB[0m [31m22.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.0/76.0 kB[0m [31m9.1 MB/s[0

## saved_model모델을 openvino로 변환

In [3]:
import openvino as ov

ov_model = ov.convert_model('saved_model', input=[1,224,224,3])
ov.save_model(ov_model, "talkFinger_edit.xml")

#### 변환된 모델의 추론시간

In [4]:
from openvino.runtime import Core
from pathlib import Path
import numpy as np
import time

model_path = "talkFinger_edit.xml"
ie = Core() # initialize inference engine
network = ie.read_model(model=model_path, weights=Path(model_path).with_suffix('.bin'))
executable_network = ie.compile_model(model=network, device_name="CPU")

im = np.random.randn(1, 224, 224 , 3) #random input
output_layer = next(iter(executable_network.outputs)) # OpenVINO model의 output layer를 가져옴
y = executable_network([im])[output_layer] # Inference 실행하여 output_layer에 해당하는 output을 y에 할당

start = time.time()
y = executable_network([im]) [output_layer]
print(f'time lapse: {time.time()-start}')

time lapse: 0.02854776382446289


## 이미지 불러오는 함수

In [5]:
!pip install Pillow



In [6]:
from PIL import Image

def load_data(directory):
    images = []
    for filename in os.listdir(directory):
        img = Image.open(os.path.join(directory, filename))
        img = transform(img)
        images.append(img)
    return images

## 프레임 단위로 캡처해서 저장하기

In [7]:
import os
import cv2

video_path1 = 'KETI_SL_0000008142.avi'
video_path2 = 'KETI_SL_0000008143.avi'
video_path3 = 'KETI_SL_0000008144.avi'
video_path4 = 'KETI_SL_0000008145.avi'

save_dir = 'video_capture_datatset'

if not os.path.exists(save_dir):
    os.makedirs(save_dir)

cap1 = cv2.VideoCapture(video_path1)
cap2 = cv2.VideoCapture(video_path2)
cap3 = cv2.VideoCapture(video_path3)
cap4 = cv2.VideoCapture(video_path4)

frame_count1 = 0
frame_count2 = 76
frame_count3 = 158
frame_count4 = 258

while True:
    ret, frame = cap1.read()
    if not ret:
        break

    # 프레임 이미지 파일로 저장
    save_path = os.path.join(save_dir, f'frame_{frame_count1}.jpg')
    cv2.imwrite(save_path, frame)
    frame_count1 += 1

cap1.release()

while True:
    ret, frame = cap2.read()
    if not ret:
        break

    # 프레임 이미지 파일로 저장
    save_path = os.path.join(save_dir, f'frame_{frame_count2}.jpg')
    cv2.imwrite(save_path, frame)
    frame_count2 += 1

cap2.release()

while True:
    ret, frame = cap3.read()
    if not ret:
        break

    # 프레임 이미지 파일로 저장
    save_path = os.path.join(save_dir, f'frame_{frame_count3}.jpg')
    cv2.imwrite(save_path, frame)
    frame_count3 += 1

cap3.release()

while True:
    ret, frame = cap4.read()
    if not ret:
        break

    # 프레임 이미지 파일로 저장
    save_path = os.path.join(save_dir, f'frame_{frame_count4}.jpg')
    cv2.imwrite(save_path, frame)
    frame_count4 += 1

cap4.release()

## 전처리 및 정규화

In [9]:
import torch
from torchvision import transforms

transform = transforms.Compose([transforms.Resize((224, 224)),transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))])
dataset = load_data("video_capture_datatset")
val_loader = torch.utils.data.DataLoader(
    dataset,
    batch_size=1,
    shuffle=False,
    num_workers=0,
    pin_memory=True,
)

## 직접 구현한 최적화 로직

In [10]:
import nncf

def transform_fn(data_item):
    image_tensor = data_item[0]
    # HWC 형태를 CHW 형태로 변환
    image_tensor = image_tensor.permute(2, 0, 1)
    # 배치 차원 추가
    image_tensor = image_tensor.unsqueeze(0)
    # 채널 차원을 올바른 위치로 이동 (현재: BCHW, 목표: BHWC)
    image_tensor = image_tensor.permute(0, 1, 3, 2)
    return image_tensor.numpy()

quantization_dataset = nncf.Dataset(val_loader, transform_fn)

quant_ov_model = nncf.quantize(ov_model, quantization_dataset)
ov.save_model(quant_ov_model, "quantized_talkFinger.xml")

INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, openvino


Output()

Output()

## 성능 평가

In [11]:
from tqdm.notebook import tqdm
import numpy as np
import ipywidgets as widgets
import openvino as ov

In [12]:
core = ov.Core()
device = widgets.Dropdown(
    options=core.available_devices + ["AUTO"],
    value='AUTO',
    description='Device:',
    disabled=False,
)

device

Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO')

In [13]:
# Inference FP32 model (OpenVINO IR)
!benchmark_app -m "talkFinger_edit.xml" -d $device.value -api async -t 15 -shape "conv2d_input:[1,3,224,224]"

[Step 1/11] Parsing and validating input arguments
[ INFO ] Parsing input parameters
[Step 2/11] Loading OpenVINO Runtime
[ INFO ] OpenVINO:
[ INFO ] Build ................................. 2023.2.0-13089-cfd42bd2cb0-HEAD
[ INFO ] 
[ INFO ] Device info:
[ INFO ] AUTO
[ INFO ] Build ................................. 2023.2.0-13089-cfd42bd2cb0-HEAD
[ INFO ] 
[ INFO ] 
[Step 3/11] Setting device configuration
[Step 4/11] Reading model files
[ INFO ] Loading model files
[ INFO ] Read model took 10.46 ms
[ INFO ] Original model I/O parameters:
[ INFO ] Model inputs:
[ INFO ]     input_2 (node: input_2) : f32 / [...] / [1,224,224,3]
[ INFO ] Model outputs:
[ INFO ]     dense_3 (node: model_1/dense_3/Softmax) : f32 / [...] / [1,50]
[Step 5/11] Resizing model to match image sizes and given batch
[ INFO ] Model batch size: 1
[Step 6/11] Configuring input of the model
[ INFO ] Model inputs:
[ INFO ]     input_2 (node: input_2) : u8 / [N,H,W,C] / [1,224,224,3]
[ INFO ] Model outputs:
[ INFO ]    

In [14]:
# Inference INT8 model (OpenVINO IR)
!benchmark_app -m "quantized_talkFinger.xml" -d $device.value -api async -t 15 -shape "conv2d_input:[32,224,224,3]"

[Step 1/11] Parsing and validating input arguments
[ INFO ] Parsing input parameters
[Step 2/11] Loading OpenVINO Runtime
[ INFO ] OpenVINO:
[ INFO ] Build ................................. 2023.2.0-13089-cfd42bd2cb0-HEAD
[ INFO ] 
[ INFO ] Device info:
[ INFO ] AUTO
[ INFO ] Build ................................. 2023.2.0-13089-cfd42bd2cb0-HEAD
[ INFO ] 
[ INFO ] 
[Step 3/11] Setting device configuration
[Step 4/11] Reading model files
[ INFO ] Loading model files
[ INFO ] Read model took 20.50 ms
[ INFO ] Original model I/O parameters:
[ INFO ] Model inputs:
[ INFO ]     input_2 (node: input_2) : f32 / [...] / [1,224,224,3]
[ INFO ] Model outputs:
[ INFO ]     dense_3 (node: model_1/dense_3/Softmax) : f32 / [...] / [1,50]
[Step 5/11] Resizing model to match image sizes and given batch
[ INFO ] Model batch size: 1
[Step 6/11] Configuring input of the model
[ INFO ] Model inputs:
[ INFO ]     input_2 (node: input_2) : u8 / [N,H,W,C] / [1,224,224,3]
[ INFO ] Model outputs:
[ INFO ]    

## Intel에서 제공해준 예시코드 변형한 최적화 로직

In [1]:
# Copyright (c) 2023 Intel Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#      http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import re
import subprocess
from pathlib import Path
from typing import List, Optional

import openvino as ov
import tensorflow as tf
import tensorflow_datasets as tfds
from tqdm import tqdm

import nncf

ROOT = os.getcwd()
WEIGHTS_URL = "https://huggingface.co/alexsu52/mobilenet_v2_imagenette/resolve/main/tf_model.h5"
DATASET_CLASSES = 10


def validate(model: ov.Model, val_loader: tf.data.Dataset) -> tf.Tensor:
    compiled_model = ov.compile_model(model)
    output = compiled_model.outputs[0]

    metric = tf.keras.metrics.CategoricalAccuracy(name="acc@1")
    for images, labels in tqdm(val_loader):
        pred = compiled_model(images.numpy())[output]
        metric.update_state(labels, pred)

    return metric.result()


def run_benchmark(model_path: str, shape: Optional[List[int]] = None, verbose: bool = True) -> float:
    command = f"benchmark_app -m {model_path} -d CPU -api async -t 15"
    if shape is not None:
        command += f' -shape [{",".join(str(x) for x in shape)}]'
    cmd_output = subprocess.check_output(command, shell=True)  # nosec
    if verbose:
        print(*str(cmd_output).split("\\n")[-9:-1], sep="\n")
    match = re.search(r"Throughput\: (.+?) FPS", str(cmd_output))
    return float(match.group(1))


def get_model_size(ir_path: str, m_type: str = "Mb", verbose: bool = True) -> float:
    xml_size = os.path.getsize(ir_path)
    bin_size = os.path.getsize(os.path.splitext(ir_path)[0] + ".bin")
    for t in ["bytes", "Kb", "Mb"]:
        if m_type == t:
            break
        xml_size /= 1024
        bin_size /= 1024
    model_size = xml_size + bin_size
    if verbose:
        print(f"Model graph (xml):   {xml_size:.3f} Mb")
        print(f"Model weights (bin): {bin_size:.3f} Mb")
        print(f"Model size:          {model_size:.3f} Mb")
    return model_size


###############################################################################
# Create a Tensorflow model and dataset


def center_crop(image: tf.Tensor, image_size: int, crop_padding: int) -> tf.Tensor:
    shape = tf.shape(image)
    image_height = shape[0]
    image_width = shape[1]

    padded_center_crop_size = tf.cast(
        ((image_size / (image_size + crop_padding)) * tf.cast(tf.minimum(image_height, image_width), tf.float32)),
        tf.int32,
    )

    offset_height = ((image_height - padded_center_crop_size) + 1) // 2
    offset_width = ((image_width - padded_center_crop_size) + 1) // 2

    image = tf.image.crop_to_bounding_box(
        image,
        offset_height=offset_height,
        offset_width=offset_width,
        target_height=padded_center_crop_size,
        target_width=padded_center_crop_size,
    )

    image = tf.compat.v1.image.resize(
        image, [image_size, image_size], method=tf.image.ResizeMethod.BILINEAR, align_corners=False
    )

    return image


def preprocess_for_eval(image, label):
    image = center_crop(image, 224, 32)
    image = tf.keras.applications.mobilenet_v2.preprocess_input(image)
    image = tf.image.convert_image_dtype(image, tf.float32)

    label = tf.one_hot(label, DATASET_CLASSES)

    return image, label


val_dataset = tfds.load("imagenette/320px-v2", split="validation", shuffle_files=False, as_supervised=True)
val_dataset = val_dataset.map(preprocess_for_eval).batch(128)

# 모델 경로 업데이트
weights_path = r"talkfinger.h5"

# Keras 모델 로딩 방식을 파일 경로를 사용하는 방식으로 변경
tf_model = tf.keras.models.load_model(weights_path)

###############################################################################
# Quantize a Tensorflow model
#
# The transformation function transforms a data item into model input data.
#
# To validate the transform function use the following code:
# >> for data_item in val_loader:
# >>    model(transform_fn(data_item))


def transform_fn(data_item):
    images, _ = data_item
    return images


# The calibration dataset is a small, no label, representative dataset
# (~100-500 samples) that is used to estimate the range, i.e. (min, max) of all
# floating point activation tensors in the model, to initialize the quantization
# parameters.
#
# The easiest way to define a calibration dataset is to use a training or
# validation dataset and a transformation function to remove labels from the data
# item and prepare model input data. The quantize method uses a small subset
# (default: 300 samples) of the calibration dataset.

calibration_dataset = nncf.Dataset(val_dataset, transform_fn)
tf_quantized_model = nncf.quantize(tf_model, calibration_dataset)

###############################################################################
# Benchmark performance, calculate compression rate and validate accuracy

ov_model = ov.convert_model(tf_model, share_weights=False)
ov_quantized_model = ov.convert_model(tf_quantized_model, share_weights=False)

fp32_ir_path = f"{ROOT}/mobilenet_v2_fp32.xml"
ov.save_model(ov_model, fp32_ir_path, compress_to_fp16=False)
print(f"[1/7] Save FP32 model: {fp32_ir_path}")
fp32_model_size = get_model_size(fp32_ir_path, verbose=True)

int8_ir_path = f"{ROOT}/mobilenet_v2_int8.xml"
ov.save_model(ov_quantized_model, int8_ir_path, compress_to_fp16=False)
print(f"[2/7] Save INT8 model: {int8_ir_path}")
int8_model_size = get_model_size(int8_ir_path, verbose=True)

print("[3/7] Benchmark FP32 model:")
fp32_fps = run_benchmark(fp32_ir_path, shape=[1, 224, 224, 3], verbose=True)
print("[4/7] Benchmark INT8 model:")
int8_fps = run_benchmark(int8_ir_path, shape=[1, 224, 224, 3], verbose=True)

print("[5/7] Validate OpenVINO FP32 model:")
fp32_top1 = validate(ov_model, val_dataset)
print(f"Accuracy @ top1: {fp32_top1:.3f}")

print("[6/7] Validate OpenVINO INT8 model:")
int8_top1 = validate(ov_quantized_model, val_dataset)
print(f"Accuracy @ top1: {int8_top1:.3f}")

print("[7/7] Report:")
print(f"Accuracy drop: {fp32_top1 - int8_top1:.3f}")
print(f"Model compression rate: {fp32_model_size / int8_model_size:.3f}")
# https://docs.openvino.ai/latest/openvino_docs_optimization_guide_dldt_optimization_guide.html
print(f"Performance speed up (throughput mode): {int8_fps / fp32_fps:.3f}")

INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, openvino
INFO:nncf:Creating compression algorithm: quantization
INFO:nncf:Overflow issue fix was applied to first convolution weight quantizers.
INFO:nncf:Collecting tensor statistics/data |█████           | 1 / 3
INFO:nncf:Collecting tensor statistics/data |██████████      | 2 / 3
INFO:nncf:Collecting tensor statistics/data |████████████████| 3 / 3
INFO:nncf:BatchNorm statistics adaptation |█████           | 1 / 3
INFO:nncf:BatchNorm statistics adaptation |██████████      | 2 / 3
INFO:nncf:BatchNorm statistics adaptation |████████████████| 3 / 3
[1/7] Save FP32 model: /content/mobilenet_v2_fp32.xml
Model graph (xml):   0.071 Mb
Model weights (bin): 16.391 Mb
Model size:          16.463 Mb
[2/7] Save INT8 model: /content/mobilenet_v2_int8.xml
Model graph (xml):   0.217 Mb
Model weights (bin): 4.224 Mb
Model size:          4.441 Mb
[3/7] Benchmark FP32 model:
[ INFO ] Count:            816 iterati

100%|██████████| 31/31 [02:21<00:00,  4.58s/it]


Accuracy @ top1: 0.006
[6/7] Validate OpenVINO INT8 model:


100%|██████████| 31/31 [01:02<00:00,  2.02s/it]

Accuracy @ top1: 0.003
[7/7] Report:
Accuracy drop: 0.003
Model compression rate: 3.707
Performance speed up (throughput mode): 1.450





In [2]:
# Inference FP32 model (OpenVINO IR)
!benchmark_app -m "mobilenet_v2_fp32.xml" -d $device.value -api async -t 15 -shape "[1,224,224,3]"

[Step 1/11] Parsing and validating input arguments
[ INFO ] Parsing input parameters
[Step 2/11] Loading OpenVINO Runtime
[ INFO ] OpenVINO:
[ INFO ] Build ................................. 2023.2.0-13089-cfd42bd2cb0-HEAD
[ INFO ] 
[ INFO ] Device info:
[ INFO ] 
[ INFO ] 
[Step 3/11] Setting device configuration
[ ERROR ] Exception from src/inference/src/core.cpp:244:
Exception from src/inference/src/dev/core_impl.cpp:1184:
Exception is thrown while trying to call get_property with unsupported property: 'SUPPORTED_PROPERTIES'

Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/openvino/tools/benchmark/main.py", line 165, in main
    supported_properties = benchmark.core.get_property(device, properties.supported_properties())
RuntimeError: Exception from src/inference/src/core.cpp:244:
Exception from src/inference/src/dev/core_impl.cpp:1184:
Exception is thrown while trying to call get_property with unsupported property: 'SUPPORTED_PROPERTIES'




In [3]:
# Inference INT8 model (OpenVINO IR)
!benchmark_app -m "mobilenet_v2_int8.xml" -d $device.value -api async -t 15 -shape "[32,224,224,3]"

[Step 1/11] Parsing and validating input arguments
[ INFO ] Parsing input parameters
[Step 2/11] Loading OpenVINO Runtime
[ INFO ] OpenVINO:
[ INFO ] Build ................................. 2023.2.0-13089-cfd42bd2cb0-HEAD
[ INFO ] 
[ INFO ] Device info:
[ INFO ] 
[ INFO ] 
[Step 3/11] Setting device configuration
[ ERROR ] Exception from src/inference/src/core.cpp:244:
Exception from src/inference/src/dev/core_impl.cpp:1184:
Exception is thrown while trying to call get_property with unsupported property: 'SUPPORTED_PROPERTIES'

Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/openvino/tools/benchmark/main.py", line 165, in main
    supported_properties = benchmark.core.get_property(device, properties.supported_properties())
RuntimeError: Exception from src/inference/src/core.cpp:244:
Exception from src/inference/src/dev/core_impl.cpp:1184:
Exception is thrown while trying to call get_property with unsupported property: 'SUPPORTED_PROPERTIES'


