In [None]:
import onnx
import nncf
import torch
from torchvision import datasets
from torchvision import transforms

# Instantiate your uncompressed model
onnx_model = onnx.load_model("mobile.onnx")

# Provide validation part of the dataset to collect statistics needed for the compression algorithm
val_dataset = datasets.ImageFolder("/kaggle/input/nclass-dataset", transform=transforms.Compose([
    transforms.Resize((224,224))
    ,transforms.ToTensor()]))
dataset_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1)

# Step 1: Initialize transformation function
input_name = onnx_model.graph.input[0].name
def transform_fn(data_item):
    images, _ = data_item
    return {input_name: images.numpy()}

# Step 2: Initialize NNCF Dataset
calibration_dataset = nncf.Dataset(dataset_loader, transform_fn)
# Step 3: Run the quantization pipeline
#nncfg可以直接量化onnx模型，也可以量化ov模型，取决于你传入模型的执行后端
quantized_model = nncf.quantize(onnx_model, calibration_dataset)

In [None]:
model = ... # openvino.runtime.Model object
BATCHSIZE   = 32
INPUT_SHAPE = [BATCHSIZE, 3, 224, 224]
DEVICE      = 'cuda'
PLATFORM    = TargetPlatform.TRT_INT8

def load_calibration_dataset() -> Iterable:
    # ------------------------------------------------------------
    # 让我们从创建 calibration 数据开始做起， PPQ 需要你送入 32 ~ 1024 个样本数据作为校准数据集
    # 它们应该尽可能服从真实样本的分布，量化过程如同训练过程一样存在可能的过拟合问题
    # 你应当保证校准数据是经过正确预处理的、有代表性的数据，否则量化将会失败；校准数据不需要标签；数据集不能乱序
    # ------------------------------------------------------------
    return [torch.rand(size=INPUT_SHAPE) for _ in range(32)]


quantized_model = nncf.quantize(model, calibration_dataset)


#
#names = ['layer_1', 'layer_2', 'layer_3']
#types = ['Conv2d', 'Linear']
#nncf.quantize(model, dataset, ignored_scope=nncf.IgnoredScope(names=names,types=types)
#             target_device=nncf.TargetDevice.CPU
#
#              ) 

In [None]:
import openvino.runtime as ov
from openvino.tools.mo import convert_model

input_fp32 = ... # FP32 model input

# export PyTorch model to ONNX model
onnx_model_path = "model.onnx"
torch.onnx.export(quantized_model, input_fp32, onnx_model_path)

# convert ONNX model to OpenVINO model
ov_quantized_model = convert_model(onnx_model_path)

# compile the model to transform quantized operations to int8
model_int8 = ov.compile_model(ov_quantized_model)

res = model_int8(input_fp32)

# save the model
ov.serialize(ov_quantized_model, "quantized_model.xml")