### 对分类模型进行量化，并测试精度

In [1]:
import torchvision
from ppq import *
from ppq.api import *
from Utilities.Imagenet import (evaluate_mmlab_module_with_imagenet,
                                evaluate_onnx_module_with_imagenet,
                                evaluate_ppq_module_with_imagenet,
                                evaluate_torch_module_with_imagenet,
                                load_imagenet_from_directory)
import os


      ____  ____  __   ____                    __              __
     / __ \/ __ \/ /  / __ \__  ______ _____  / /_____  ____  / /
    / /_/ / /_/ / /  / / / / / / / __ `/ __ \/ __/ __ \/ __ \/ /
   / ____/ ____/ /__/ /_/ / /_/ / /_/ / / / / /_/ /_/ / /_/ / /
  /_/   /_/   /_____\___\_\__,_/\__,_/_/ /_/\__/\____/\____/_/


[31mTensorRT is not installed, TRT Exporter is disabled.[0m


### 测试全精度模型

In [4]:
CFG_PLATFORM = TargetPlatform.OPENVINO_INT8   # 用来指定目标平台
platform = "OpenVino"

CFG_DEVICE = 'cuda'                            # 一个神奇的字符串，用来确定执行设备
CFG_BATCHSIZE = 64                             # 测试与calib时的 batchsize
CFG_INPUT_SHAPE = (CFG_BATCHSIZE, 3, 224, 224) # 用来确定模型输入的尺寸，好像 imagenet 都是这个尺寸
CFG_VALIDATION_DIR = '/home/geng/tinyml/ppq/benchmark/Assets/Imagenet_Valid'   # 用来读取 validation dataset
CFG_TRAIN_DIR = '/home/geng/tinyml/ppq/benchmark/Assets/Imagenet_Train'        # 用来读取 train dataset，注意该集合将被用来 calibrate 你的模型
CFG_DUMP_PATH = '/home/geng/tinyml/ppq/benchmark/classification/'+platform+'_output'    # 所有模型保存的路径名
QUANT_SETTING = QuantizationSettingFactory.default_setting() # 用来指定量化配置
if not os.path.exists(CFG_DUMP_PATH):
    os.makedirs(CFG_DUMP_PATH)

|model|TargetPlatform|ORT FP32|PPQ INT8|DQD ORT INT8|RealPlatform INT8|
|----|----|----|----|----|----|
|resnet18|OpenVino|69.764|69.466|67.109|-|

In [5]:
with ENABLE_CUDA_KERNEL():
    model_builder, model_name  = torchvision.models.resnet18, 'resnet18'

    print(f'---------------------- PPQ Quantization Test Running with {model_name} ----------------------')
    model = model_builder(pretrained=True).to(CFG_DEVICE)

    #测试FP32精度
    fp32_report = evaluate_torch_module_with_imagenet(
        model=model, imagenet_validation_dir=CFG_VALIDATION_DIR,
        batchsize=CFG_BATCHSIZE, device=CFG_DEVICE, verbose=True)

    # 获取校准数据
    dataloader = load_imagenet_from_directory(
        directory=CFG_TRAIN_DIR, batchsize=CFG_BATCHSIZE,
        shuffle=False, subset=5120, require_label=False,
        num_of_workers=8)

    # 量化torch模型
    ppq_quant_ir = quantize_torch_model(
        model=model, calib_dataloader=dataloader, input_shape=CFG_INPUT_SHAPE,
        calib_steps=5120 // CFG_BATCHSIZE, collate_fn=lambda x: x.to(CFG_DEVICE), verbose=1,
        device=CFG_DEVICE, platform=CFG_PLATFORM, setting=QUANT_SETTING,
        onnx_export_file=f'{os.path.join(CFG_DUMP_PATH, model_name)}-FP32.onnx')
        
    # 评估PPQ量化后的模型
    ppq_int8_report = evaluate_ppq_module_with_imagenet(
        model=ppq_quant_ir, imagenet_validation_dir=CFG_VALIDATION_DIR,
        batchsize=CFG_BATCHSIZE, device=CFG_DEVICE, verbose=True)

    # 导出平台部署模型
    export_ppq_graph(
        graph=ppq_quant_ir, 
        platform=CFG_PLATFORM,
        graph_save_to=f'{os.path.join(CFG_DUMP_PATH, model_name)}-INT8.onnx',
        config_save_to=f'{os.path.join(CFG_DUMP_PATH, model_name)}-INT8.json' )
    
    # 评估onnx运行模型
    evaluate_onnx_module_with_imagenet(
        onnxruntime_model_path=f'{os.path.join(CFG_DUMP_PATH, model_name)}-INT8.onnx', 
        imagenet_validation_dir=CFG_VALIDATION_DIR, batchsize=CFG_BATCHSIZE, 
        device=CFG_DEVICE)

    # ppq_int8_report.to_csv(f'{os.path.join(CFG_DUMP_PATH, model_name)}-report.csv')


[31mPPQ is compling CUDA Kernels. Please wait...If there is any problem with kernel compilation, feel free to remove ENABLE_CUDA_KERNEL clause.[0m
---------------------- PPQ Quantization Test Running with resnet18 ----------------------


  prec1, prec5 = accuracy(torch.tensor(batch_pred).to('cpu'), batch_label.to('cpu'), topk=(1, 5))
Evaluating Model...:   0%|          | 3/781 [00:01<03:41,  3.51it/s]

Test: [0 / 781]	Prec@1 84.375 (84.375)	Prec@5 95.312 (95.312)


Evaluating Model...:  13%|█▎        | 104/781 [00:05<00:35, 18.89it/s]

Test: [100 / 781]	Prec@1 76.269 (76.269)	Prec@5 92.280 (92.280)


Evaluating Model...:  26%|██▌       | 203/781 [00:10<00:26, 21.61it/s]

Test: [200 / 781]	Prec@1 75.793 (75.793)	Prec@5 93.190 (93.190)


Evaluating Model...:  39%|███▊      | 302/781 [00:15<00:20, 23.49it/s]

Test: [300 / 781]	Prec@1 76.230 (76.230)	Prec@5 93.490 (93.490)


Evaluating Model...:  52%|█████▏    | 404/781 [00:20<00:17, 21.71it/s]

Test: [400 / 781]	Prec@1 73.589 (73.589)	Prec@5 91.732 (91.732)


Evaluating Model...:  64%|██████▍   | 503/781 [00:24<00:11, 23.78it/s]

Test: [500 / 781]	Prec@1 72.040 (72.040)	Prec@5 90.556 (90.556)


Evaluating Model...:  77%|███████▋  | 605/781 [00:29<00:07, 22.31it/s]

Test: [600 / 781]	Prec@1 70.793 (70.793)	Prec@5 89.757 (89.757)


Evaluating Model...:  90%|█████████ | 704/781 [00:34<00:03, 23.46it/s]

Test: [700 / 781]	Prec@1 69.824 (69.824)	Prec@5 89.042 (89.042)


Evaluating Model...: 100%|██████████| 781/781 [00:38<00:00, 20.44it/s]


 * Prec@1 69.764 Prec@5 89.085
[22:33:35] PPQ Quantization Config Refine Pass Running ... Finished.
[22:33:35] PPQ Quantization Fusion Pass Running ...        Finished.
[22:33:36] PPQ Quantize Point Reduce Pass Running ...      Finished.
[22:33:36] PPQ Parameter Quantization Pass Running ...     [31mNumeric instability detected: ppq find there is a scale value < 1e-7, which probably cause numeric underflow in further computation.[0m
[31mNumeric instability detected: ppq find there is a scale value < 1e-7, which probably cause numeric underflow in further computation.[0m
[31mNumeric instability detected: ppq find there is a scale value < 1e-7, which probably cause numeric underflow in further computation.[0m
[31mNumeric instability detected: ppq find there is a scale value < 1e-7, which probably cause numeric underflow in further computation.[0m
[31mNumeric instability detected: ppq find there is a scale value < 1e-7, which probably cause numeric underflow in further computatio

Calibration Progress(Phase 1): 100%|██████████| 80/80 [00:07<00:00, 10.00it/s]


Finished.
[22:33:44] PPQ Quantization Alignment Pass Running ...     Finished.
[22:33:44] PPQ Passive Parameter Quantization Running ...  Finished.
[22:33:44] PPQ Parameter Baking Pass Running ...           Finished.
--------- Network Snapshot ---------
Num of Op:                    [49]
Num of Quantized Op:          [49]
Num of Variable:              [92]
Num of Quantized Var:         [92]
------- Quantization Snapshot ------
Num of Quant Config:          [148]
BAKED:                        [21]
OVERLAPPED:                   [57]
SLAVE:                        [19]
ACTIVATED:                    [30]
PASSIVE_BAKED:                [21]
Network Quantization Finished.


  model_forward_function = lambda input_tensor: torch.tensor(
  prec1, prec5 = accuracy(torch.tensor(batch_pred).to('cpu'), batch_label.to('cpu'), topk=(1, 5))
Evaluating Model...:   0%|          | 2/781 [00:00<04:05,  3.17it/s]

Test: [0 / 781]	Prec@1 84.375 (84.375)	Prec@5 96.875 (96.875)


Evaluating Model...:  13%|█▎        | 103/781 [00:08<00:48, 13.84it/s]

Test: [100 / 781]	Prec@1 75.820 (75.820)	Prec@5 92.188 (92.188)


Evaluating Model...:  26%|██▌       | 203/781 [00:15<00:42, 13.63it/s]

Test: [200 / 781]	Prec@1 75.319 (75.319)	Prec@5 93.113 (93.113)


Evaluating Model...:  39%|███▉      | 303/781 [00:22<00:35, 13.62it/s]

Test: [300 / 781]	Prec@1 75.810 (75.810)	Prec@5 93.366 (93.366)


Evaluating Model...:  52%|█████▏    | 403/781 [00:30<00:27, 13.75it/s]

Test: [400 / 781]	Prec@1 73.289 (73.289)	Prec@5 91.619 (91.619)


Evaluating Model...:  64%|██████▍   | 503/781 [00:37<00:20, 13.79it/s]

Test: [500 / 781]	Prec@1 71.744 (71.744)	Prec@5 90.435 (90.435)


Evaluating Model...:  77%|███████▋  | 603/781 [00:44<00:13, 13.45it/s]

Test: [600 / 781]	Prec@1 70.435 (70.435)	Prec@5 89.608 (89.608)


Evaluating Model...:  90%|█████████ | 703/781 [00:51<00:05, 13.87it/s]

Test: [700 / 781]	Prec@1 69.512 (69.512)	Prec@5 88.902 (88.902)


Evaluating Model...: 100%|██████████| 781/781 [00:57<00:00, 13.56it/s]


 * Prec@1 69.466 Prec@5 88.934


  model_forward_function = lambda input_tensor: torch.tensor(sess.run(
  prec1, prec5 = accuracy(torch.tensor(batch_pred).to('cpu'), batch_label.to('cpu'), topk=(1, 5))
Evaluating Model...:   0%|          | 1/781 [00:01<20:34,  1.58s/it]

Test: [0 / 781]	Prec@1 81.250 (81.250)	Prec@5 93.750 (93.750)


Evaluating Model...:  13%|█▎        | 101/781 [01:00<06:12,  1.83it/s]

Test: [100 / 781]	Prec@1 74.288 (74.288)	Prec@5 91.290 (91.290)


Evaluating Model...:  26%|██▌       | 201/781 [02:00<05:30,  1.76it/s]

Test: [200 / 781]	Prec@1 73.593 (73.593)	Prec@5 92.211 (92.211)


Evaluating Model...:  39%|███▊      | 301/781 [03:01<05:09,  1.55it/s]

Test: [300 / 781]	Prec@1 74.024 (74.024)	Prec@5 92.333 (92.333)


Evaluating Model...:  51%|█████▏    | 401/781 [04:02<04:07,  1.54it/s]

Test: [400 / 781]	Prec@1 71.372 (71.372)	Prec@5 90.395 (90.395)


Evaluating Model...:  64%|██████▍   | 501/781 [05:01<03:07,  1.49it/s]

Test: [500 / 781]	Prec@1 69.673 (69.673)	Prec@5 89.097 (89.097)


Evaluating Model...:  77%|███████▋  | 601/781 [06:00<01:47,  1.68it/s]

Test: [600 / 781]	Prec@1 68.342 (68.342)	Prec@5 88.150 (88.150)


Evaluating Model...:  90%|████████▉ | 701/781 [06:58<00:48,  1.65it/s]

Test: [700 / 781]	Prec@1 67.306 (67.306)	Prec@5 87.357 (87.357)


Evaluating Model...: 100%|██████████| 781/781 [07:43<00:00,  1.68it/s]

 * Prec@1 67.109 Prec@5 87.246



