### 对分类模型进行量化，并测试精度

In [7]:
import torchvision
from ppq import *
from ppq.api import *
from Utilities.Imagenet import (evaluate_mmlab_module_with_imagenet,
                                evaluate_onnx_module_with_imagenet,
                                evaluate_ppq_module_with_imagenet,
                                evaluate_torch_module_with_imagenet,
                                load_imagenet_from_directory)
import os

### 测试全精度模型

In [8]:
CFG_PLATFORM = TargetPlatform.TRT_INT8  # 用来指定目标平台
platform = "TRT"  #记得修改上面两个
QUANT_SETTING = QuantizationSettingFactory.trt_setting() # 用来指定量化配置

CFG_DEVICE = 'cuda'                            # 一个神奇的字符串，用来确定执行设备
CFG_BATCHSIZE = 64                             # 测试与calib时的 batchsize
CFG_INPUT_SHAPE = (CFG_BATCHSIZE, 3, 224, 224) # 用来确定模型输入的尺寸，好像 imagenet 都是这个尺寸
CFG_VALIDATION_DIR = '/home/geng/tinyml/ppq/benchmark/Assets/Imagenet_Valid'   # 用来读取 validation dataset
CFG_TRAIN_DIR = '/home/geng/tinyml/ppq/benchmark/Assets/Imagenet_Train'        # 用来读取 train dataset，注意该集合将被用来 calibrate 你的模型
CFG_DUMP_PATH = '/home/geng/tinyml/ppq/benchmark/classification/'+platform+'_output'    # 所有模型保存的路径名

if not os.path.exists(CFG_DUMP_PATH):
    os.makedirs(CFG_DUMP_PATH)

|model|TargetPlatform|ORT FP32|PPQ INT8|DQD ORT INT8|RealPlatform INT8|
|----|----|----|----|----|----|
|resnet18|OpenVino|69.764|69.466|67.109|-|
|resnet18|TRT|69.764|69.548|69.524|-|
|resnet18|Snpe|69.764|69.278|69.266|-|
|resnet18|Ncnn|69.764|69.106|69.070|-|

In [5]:
with ENABLE_CUDA_KERNEL():
    model_builder, model_name  = torchvision.models.resnet18, 'resnet18'

    print(f'---------------------- PPQ Quantization Test Running with {model_name} ----------------------')
    model = model_builder(pretrained=True).to(CFG_DEVICE)

    #测试FP32精度
    # fp32_report = evaluate_torch_module_with_imagenet(
    #     model=model, imagenet_validation_dir=CFG_VALIDATION_DIR,
    #     batchsize=CFG_BATCHSIZE, device=CFG_DEVICE, verbose=True)

    # 获取校准数据
    dataloader = load_imagenet_from_directory(
        directory=CFG_TRAIN_DIR, batchsize=CFG_BATCHSIZE,
        shuffle=False, subset=5120, require_label=False,
        num_of_workers=8)

    # 量化torch模型
    ppq_quant_ir = quantize_torch_model(
        model=model, calib_dataloader=dataloader, input_shape=CFG_INPUT_SHAPE,
        calib_steps=5120 // CFG_BATCHSIZE, collate_fn=lambda x: x.to(CFG_DEVICE), verbose=1,
        device=CFG_DEVICE, platform=CFG_PLATFORM, setting=QUANT_SETTING,
        onnx_export_file=f'{os.path.join(CFG_DUMP_PATH, model_name)}-FP32.onnx')
        
    # 评估PPQ量化后的模型
    ppq_int8_report = evaluate_ppq_module_with_imagenet(
        model=ppq_quant_ir, imagenet_validation_dir=CFG_VALIDATION_DIR,
        batchsize=CFG_BATCHSIZE, device=CFG_DEVICE, verbose=True)

    # 导出ORT模型
    export_ppq_graph(
        graph=ppq_quant_ir, 
        platform=TargetPlatform.ONNXRUNTIME,
        graph_save_to=f'{os.path.join(CFG_DUMP_PATH, model_name)}-INT8.onnx')
    
    # 导出部署平台模型
    export_ppq_graph(
        graph=ppq_quant_ir, 
        platform=TargetPlatform.TRT_INT8,
        graph_save_to=f'{os.path.join(CFG_DUMP_PATH, model_name)}-INT8.onnx')
    
    # 评估onnx运行模型
    evaluate_onnx_module_with_imagenet(
        onnxruntime_model_path=f'{os.path.join(CFG_DUMP_PATH, model_name)}-INT8.onnx', 
        imagenet_validation_dir=CFG_VALIDATION_DIR, batchsize=CFG_BATCHSIZE, 
        device=CFG_DEVICE)

    # ppq_int8_report.to_csv(f'{os.path.join(CFG_DUMP_PATH, model_name)}-report.csv')


[31mPPQ is compling CUDA Kernels. Please wait...If there is any problem with kernel compilation, feel free to remove ENABLE_CUDA_KERNEL clause.[0m
---------------------- PPQ Quantization Test Running with resnet18 ----------------------


  prec1, prec5 = accuracy(torch.tensor(batch_pred).to('cpu'), batch_label.to('cpu'), topk=(1, 5))
Evaluating Model...:   0%|          | 1/781 [00:00<08:21,  1.56it/s]

Test: [0 / 781]	Prec@1 84.375 (84.375)	Prec@5 95.312 (95.312)


Evaluating Model...:  13%|█▎        | 104/781 [00:05<00:31, 21.38it/s]

Test: [100 / 781]	Prec@1 76.269 (76.269)	Prec@5 92.280 (92.280)


Evaluating Model...:  26%|██▌       | 203/781 [00:10<00:24, 23.51it/s]

Test: [200 / 781]	Prec@1 75.793 (75.793)	Prec@5 93.190 (93.190)


Evaluating Model...:  39%|███▊      | 302/781 [00:15<00:20, 23.76it/s]

Test: [300 / 781]	Prec@1 76.230 (76.230)	Prec@5 93.490 (93.490)


Evaluating Model...:  52%|█████▏    | 404/781 [00:20<00:16, 22.80it/s]

Test: [400 / 781]	Prec@1 73.589 (73.589)	Prec@5 91.732 (91.732)


Evaluating Model...:  65%|██████▍   | 506/781 [00:24<00:11, 24.65it/s]

Test: [500 / 781]	Prec@1 72.040 (72.040)	Prec@5 90.556 (90.556)


Evaluating Model...:  77%|███████▋  | 605/781 [00:29<00:07, 22.96it/s]

Test: [600 / 781]	Prec@1 70.793 (70.793)	Prec@5 89.757 (89.757)


Evaluating Model...:  90%|█████████ | 704/781 [00:33<00:03, 20.40it/s]

Test: [700 / 781]	Prec@1 69.824 (69.824)	Prec@5 89.042 (89.042)


Evaluating Model...: 100%|██████████| 781/781 [00:37<00:00, 20.57it/s]


 * Prec@1 69.764 Prec@5 89.085
[07:45:02] PPQ Quantization Config Refine Pass Running ... Finished.
[07:45:02] PPQ Quantization Fusion Pass Running ...        Finished.
[07:45:02] PPQ Quantize Point Reduce Pass Running ...      Finished.
[07:45:02] PPQ Parameter Quantization Pass Running ...     Finished.
[07:45:02] PPQ Runtime Calibration Pass Running ...        

Calibration Progress(Phase 1): 100%|██████████| 80/80 [00:07<00:00, 10.28it/s]


Finished.
[07:45:10] PPQ Quantization Alignment Pass Running ...     Finished.
[07:45:10] PPQ Passive Parameter Quantization Running ...  Finished.
[07:45:10] PPQ Parameter Baking Pass Running ...           Finished.
--------- Network Snapshot ---------
Num of Op:                    [49]
Num of Quantized Op:          [47]
Num of Variable:              [92]
Num of Quantized Var:         [88]
------- Quantization Snapshot ------
Num of Quant Config:          [142]
BAKED:                        [20]
OVERLAPPED:                   [56]
SLAVE:                        [19]
ACTIVATED:                    [27]
PASSIVE_BAKED:                [20]
Network Quantization Finished.


  model_forward_function = lambda input_tensor: torch.tensor(
  prec1, prec5 = accuracy(torch.tensor(batch_pred).to('cpu'), batch_label.to('cpu'), topk=(1, 5))
Evaluating Model...:   0%|          | 1/781 [00:00<11:45,  1.11it/s]

Test: [0 / 781]	Prec@1 85.938 (85.938)	Prec@5 93.750 (93.750)


Evaluating Model...:  13%|█▎        | 103/781 [00:08<00:48, 14.01it/s]

Test: [100 / 781]	Prec@1 75.541 (75.541)	Prec@5 92.126 (92.126)


Evaluating Model...:  26%|██▌       | 203/781 [00:15<00:41, 14.09it/s]

Test: [200 / 781]	Prec@1 75.241 (75.241)	Prec@5 93.074 (93.074)


Evaluating Model...:  39%|███▉      | 303/781 [00:22<00:33, 14.08it/s]

Test: [300 / 781]	Prec@1 75.903 (75.903)	Prec@5 93.350 (93.350)


Evaluating Model...:  52%|█████▏    | 403/781 [00:29<00:26, 14.10it/s]

Test: [400 / 781]	Prec@1 73.231 (73.231)	Prec@5 91.650 (91.650)


Evaluating Model...:  64%|██████▍   | 503/781 [00:36<00:19, 14.11it/s]

Test: [500 / 781]	Prec@1 71.597 (71.597)	Prec@5 90.472 (90.472)


Evaluating Model...:  77%|███████▋  | 603/781 [00:43<00:12, 13.96it/s]

Test: [600 / 781]	Prec@1 70.289 (70.289)	Prec@5 89.624 (89.624)


Evaluating Model...:  90%|█████████ | 703/781 [00:51<00:05, 14.10it/s]

Test: [700 / 781]	Prec@1 69.318 (69.318)	Prec@5 88.880 (88.880)


Evaluating Model...: 100%|██████████| 781/781 [00:56<00:00, 13.77it/s]


 * Prec@1 69.278 Prec@5 88.936


  prec1, prec5 = accuracy(torch.tensor(batch_pred).to('cpu'), batch_label.to('cpu'), topk=(1, 5))
Evaluating Model...:   0%|          | 1/781 [00:01<17:37,  1.36s/it]

Test: [0 / 781]	Prec@1 85.938 (85.938)	Prec@5 93.750 (93.750)


Evaluating Model...:  13%|█▎        | 101/781 [00:59<06:21,  1.78it/s]

Test: [100 / 781]	Prec@1 75.572 (75.572)	Prec@5 92.141 (92.141)


Evaluating Model...:  26%|██▌       | 201/781 [01:56<05:43,  1.69it/s]

Test: [200 / 781]	Prec@1 75.218 (75.218)	Prec@5 93.097 (93.097)


Evaluating Model...:  39%|███▊      | 301/781 [02:53<04:39,  1.72it/s]

Test: [300 / 781]	Prec@1 75.851 (75.851)	Prec@5 93.355 (93.355)


Evaluating Model...:  51%|█████▏    | 401/781 [03:52<03:48,  1.67it/s]

Test: [400 / 781]	Prec@1 73.200 (73.200)	Prec@5 91.619 (91.619)


Evaluating Model...:  64%|██████▍   | 501/781 [05:01<02:57,  1.58it/s]

Test: [500 / 781]	Prec@1 71.551 (71.551)	Prec@5 90.447 (90.447)


Evaluating Model...:  77%|███████▋  | 601/781 [06:09<02:05,  1.44it/s]

Test: [600 / 781]	Prec@1 70.255 (70.255)	Prec@5 89.588 (89.588)


Evaluating Model...:  90%|████████▉ | 701/781 [08:25<02:09,  1.62s/it]

Test: [700 / 781]	Prec@1 69.307 (69.307)	Prec@5 88.844 (88.844)


Evaluating Model...: 100%|██████████| 781/781 [10:34<00:00,  1.23it/s]


 * Prec@1 69.266 Prec@5 88.896


## 评估Openvino推理精度

In [1]:
import openvino
import openvino.runtime
import cfg
import torch
from ppq.core import *
from Utilities.Imagenet import evaluate_openvino_module_with_imagenet

CFG_BATCHSIZE = 64                             # 测试与calib时的 batchsize
CFG_INPUT_SHAPE = (CFG_BATCHSIZE, 3, 224, 224) # 用来确定模型输入的尺寸，好像 imagenet 都是这个尺寸
CFG_VALIDATION_DIR = '/home/geng/tinyml/ppq/benchmark/Assets/Imagenet_Valid'   # 用来读取 validation dataset
CFG_TRAIN_DIR = '/home/geng/tinyml/ppq/benchmark/Assets/Imagenet_Train'        # 用来读取 train dataset，注意该集合将被用来 calibrate 你的模型



      ____  ____  __   ____                    __              __
     / __ \/ __ \/ /  / __ \__  ______ _____  / /_____  ____  / /
    / /_/ / /_/ / /  / / / / / / / __ `/ __ \/ __/ __ \/ __ \/ /
   / ____/ ____/ /__/ /_/ / /_/ / /_/ / / / / /_/ /_/ / /_/ / /
  /_/   /_/   /_____\___\_\__,_/\__,_/_/ /_/\__/\____/\____/_/


[31mTensorRT is not installed, TRT Exporter is disabled.[0m


In [3]:
name = 'ResNet18'
target = "OpenVino"

model_path = f'{os.path.join(cfg.OPENVINO_BASE_PATH, name)}-{target}-INT8.onnx'

evaluate_openvino_module_with_imagenet(model_path=model_path,imagenet_validation_dir=CFG_VALIDATION_DIR,
batchsize=CFG_BATCHSIZE,device="cpu")

  prec1, prec5 = accuracy(torch.tensor(batch_pred).to('cpu'), batch_label.to('cpu'), topk=(1, 5))
Evaluating Model...:   0%|          | 1/781 [00:01<13:35,  1.05s/it]

Test: [0 / 781]	Prec@1 81.250 (81.250)	Prec@5 92.188 (92.188)


Evaluating Model...:  13%|█▎        | 101/781 [00:33<03:30,  3.24it/s]

Test: [100 / 781]	Prec@1 74.165 (74.165)	Prec@5 91.352 (91.352)


Evaluating Model...:  26%|██▌       | 201/781 [01:04<03:06,  3.10it/s]

Test: [200 / 781]	Prec@1 73.298 (73.298)	Prec@5 92.141 (92.141)


Evaluating Model...:  39%|███▊      | 301/781 [01:35<02:29,  3.22it/s]

Test: [300 / 781]	Prec@1 73.785 (73.785)	Prec@5 92.229 (92.229)


Evaluating Model...:  51%|█████▏    | 401/781 [02:07<01:57,  3.23it/s]

Test: [400 / 781]	Prec@1 71.267 (71.267)	Prec@5 90.290 (90.290)


Evaluating Model...:  64%|██████▍   | 501/781 [02:38<01:29,  3.14it/s]

Test: [500 / 781]	Prec@1 69.586 (69.586)	Prec@5 88.997 (88.997)


Evaluating Model...:  77%|███████▋  | 601/781 [03:09<00:55,  3.22it/s]

Test: [600 / 781]	Prec@1 68.274 (68.274)	Prec@5 88.025 (88.025)


Evaluating Model...:  77%|███████▋  | 605/781 [03:11<00:55,  3.16it/s]


KeyboardInterrupt: 

In [4]:

INPUT_SHAPE = (64, 3, 224, 224) # 用来确定模型输入的尺寸，好像 imagenet 都是这个尺寸
SAMPLES = torch.rand(size=INPUT_SHAPE)
res = torch.tensor(list(model_openvino([convert_any_to_numpy(SAMPLES)]).values())[0])   #openvino获取结推理果

In [7]:
torch.tensor(list(res)[0])

tensor([[-2.0925,  0.9155,  1.4386,  ..., -1.0462,  0.1308,  2.4848],
        [-1.7001,  0.7847,  1.5694,  ..., -0.9155,  0.0000,  2.2233],
        [-1.7001,  0.6539,  1.1770,  ..., -0.9155,  0.1308,  2.2233],
        ...,
        [-1.5694,  0.6539,  1.3078,  ..., -0.7847,  0.1308,  2.6156],
        [-1.7001,  0.7847,  1.4386,  ..., -0.7847,  0.0000,  2.6156],
        [-2.0925,  0.5231,  1.1770,  ..., -0.9155,  0.2616,  2.4848]])

## 测试TensorRT精度

In [8]:
import trt_infer
import tensorrt as trt

def infer_trt(model_path: str, samples: List[np.ndarray]) -> List[np.ndarray]:
    """ Run a tensorrt model with given samples
    你需要注意我这里留了数据 IO，数据总是从 host 送往 device 的
    如果你只关心 GPU 上的运行时间，你应该修改这个方法使得数据不发生迁移
    """
    logger = trt.Logger(trt.Logger.INFO)
    with open(model_path, 'rb') as f, trt.Runtime(logger) as runtime:
        engine = runtime.deserialize_cuda_engine(f.read())

    results = []
    with engine.create_execution_context() as context:
        inputs, outputs, bindings, stream = trt_infer.allocate_buffers(context.engine)
        for sample in tqdm(samples, desc='TensorRT is running...'):
            inputs[0].host = convert_any_to_numpy(sample)
            [output] = trt_infer.do_inference(
                context, bindings=bindings, inputs=inputs, 
                outputs=outputs, stream=stream, batch_size=1)
            results.append(convert_any_to_torch_tensor(output).reshape([-1, 1000]))
    return results


ImportError: libnvinfer.so.8: cannot open shared object file: No such file or directory

## 保存中间PPQ graph

In [1]:
platform = "OpenVino"  #记得修改上面两个
CFG_DUMP_PATH = '/home/geng/tinyml/ppq/benchmark/classification/'+platform+'_output'
model_name = "ResNet18"

with open(f"{os.path.join(CFG_DUMP_PATH, model_name)}-PPQ-INT8.graph","rb") as f:
    ppq_ir =  pickle.load(f)

NameError: name 'CFG_DUMP_PATH' is not defined