In [None]:
!pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.2.57-py3-none-any.whl (801 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m801.6/801.6 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.0-py3-none-any.whl (25 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.8.0->ultralytics)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.8.0->ultralytics)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.8.0->ultralytics)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=1.8.0->ultralytics)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-c

# Before quantization

In [None]:
from ultralytics import YOLO

model = YOLO("yolov8n.pt")

In [None]:
metrics = model.val(data='coco.yaml')

Ultralytics YOLOv8.2.57 🚀 Python-3.10.12 torch-2.3.0+cu121 CUDA:0 (Tesla T4, 15102MiB)
YOLOv8n summary (fused): 168 layers, 3,151,904 parameters, 0 gradients, 8.7 GFLOPs
Downloading https://ultralytics.com/assets/Arial.ttf to '/root/.config/Ultralytics/Arial.ttf'...


100%|██████████| 755k/755k [00:00<00:00, 22.1MB/s]
[34m[1mval: [0mScanning /content/datasets/coco/labels/val2017... 4952 images, 48 backgrounds, 0 corrupt: 100%|██████████| 5000/5000 [00:14<00:00, 340.73it/s]


[34m[1mval: [0mNew cache created: /content/datasets/coco/labels/val2017.cache


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 313/313 [01:15<00:00,  4.14it/s]


                   all       5000      36335      0.634      0.474      0.521      0.371
                person       2693      10777      0.755      0.671      0.745      0.514
               bicycle        149        314       0.69      0.392      0.457      0.265
                   car        535       1918      0.648      0.514      0.562      0.364
            motorcycle        159        367      0.715       0.58      0.657      0.414
              airplane         97        143      0.814      0.764      0.832      0.653
                   bus        189        283      0.747      0.643      0.739       0.62
                 train        157        190      0.798       0.77      0.834      0.645
                 truck        250        414      0.547      0.397      0.435      0.293
                  boat        121        424      0.583        0.3      0.378      0.211
         traffic light        191        634      0.647      0.345      0.409      0.211
          fire hydran

In [None]:
print(f'box map50-95 {metrics.box.map:.3}')
print(f'box map50 {metrics.box.map50:.3}')
print(f'box map75 {metrics.box.map75:.3}')

box map50-95 0.371
box map50 0.521
box map75 0.403


In [None]:
def check_model_size(model):
    buffer_size, param_size = 0, 0
    for param in model.parameters():
        param_size += param.nelement() * param.element_size()

    for buffer in model.buffers():
        buffer_size += buffer.nelement() * buffer.element_size()

    size_all_mb = (param_size + buffer_size) / 1024**2

    return size_all_mb

In [None]:
check_model_size(model.model) # size in mb

12.0235595703125

# Model quantization
links:
- https://pytorch.org/tutorials/recipes/quantization.html

In [None]:
model = YOLO("yolov8n.pt")

In [None]:
def fuse_yolo_layers(model):
    for module_name, module in model.named_modules():
        if isinstance(module, torch.nn.Sequential):
            for idx, layer in enumerate(module):
                if isinstance(layer, torch.nn.Conv2d):
                    # Check if the next layer is BatchNorm and the one after is ReLU
                    if (idx + 1 < len(module) and isinstance(module[idx + 1], torch.nn.BatchNorm2d) and
                            idx + 2 < len(module) and isinstance(module[idx + 2], torch.nn.ReLU)):
                        torch.quantization.fuse_modules(module, [str(idx), str(idx + 1), str(idx + 2)], inplace=True)
    return model

model_fused = fuse_yolo_layers(model)


## Using torch

### Linear

#### FP16

In [None]:
import torch
model_fp16 = torch.ao.quantization.quantize_dynamic(
    model.model,  # the original model
    {torch.nn.Linear},  # a set of layers to dynamically quantize
    dtype=torch.float16)

In [None]:
ckpt = {

            'model': model_fp16,
            'train_args': {},  # save as dict
}

print(f'Quant model size: {check_model_size(model_fp16)}')

torch.save(ckpt, './model_quant_fp16_linear.pt')

Quant model size: 6.042518615722656


In [None]:
results = model_fp16.val(data='coco.yaml')

RuntimeError: expected m1 and m2 to have the same dtype, but got: c10::Half != float

In [None]:
print(f'box map50-95 {results.box.map:.3}')
print(f'box map50 {results.box.map50:.3}')
print(f'box map75 {results.box.map75:.3}')

box map50-95 0.371
box map50 0.521
box map75 0.403


#### INT8

In [None]:
import torch
model_int8 = torch.ao.quantization.quantize_dynamic(
    model.model,  # the original model
    {torch.nn.Linear},  # a set of layers to dynamically quantize
    dtype=torch.qint8)



In [None]:
ckpt = {

            'model': model_int8,
            'train_args': {},  # save as dict
}

print(f'Quant model size: {check_model_size(model_int8)}')

torch.save(ckpt, './model_quant_int8.pt')

quant_model = YOLO("./model_quant_int8.pt")

Quant model size: 0.041492462158203125


  device=storage.device,


AttributeError: 'Conv2d' object has no attribute '_modules'

In [None]:
results = model_int8.val(data='coco.yaml')

[34m[1mval: [0mScanning /content/datasets/coco/labels/val2017.cache... 4952 images, 48 backgrounds, 0 corrupt: 100%|██████████| 5000/5000 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):   0%|          | 0/313 [00:00<?, ?it/s]


NotImplementedError: Could not run 'quantized::conv2d.new' with arguments from the 'CPU' backend. This could be because the operator doesn't exist for this backend, or was omitted during the selective/custom build process (if using custom build). If you are a Facebook employee using PyTorch on mobile, please visit https://fburl.com/ptmfixes for possible resolutions. 'quantized::conv2d.new' is only available for these backends: [Meta, QuantizedCPU, QuantizedCUDA, BackendSelect, Python, FuncTorchDynamicLayerBackMode, Functionalize, Named, Conjugate, Negative, ZeroTensor, ADInplaceOrView, AutogradOther, AutogradCPU, AutogradCUDA, AutogradXLA, AutogradMPS, AutogradXPU, AutogradHPU, AutogradLazy, AutogradMeta, Tracer, AutocastCPU, AutocastCUDA, FuncTorchBatched, BatchedNestedTensor, FuncTorchVmapMode, Batched, VmapMode, FuncTorchGradWrapper, PythonTLSSnapshot, FuncTorchDynamicLayerFrontMode, PreDispatch, PythonDispatcher].

Meta: registered at ../aten/src/ATen/core/MetaFallbackKernel.cpp:23 [backend fallback]
QuantizedCPU: registered at ../aten/src/ATen/native/quantized/cpu/qconv.cpp:1928 [kernel]
QuantizedCUDA: registered at ../aten/src/ATen/native/quantized/cudnn/Conv.cpp:391 [kernel]
BackendSelect: fallthrough registered at ../aten/src/ATen/core/BackendSelectFallbackKernel.cpp:3 [backend fallback]
Python: registered at ../aten/src/ATen/core/PythonFallbackKernel.cpp:154 [backend fallback]
FuncTorchDynamicLayerBackMode: registered at ../aten/src/ATen/functorch/DynamicLayer.cpp:497 [backend fallback]
Functionalize: registered at ../aten/src/ATen/FunctionalizeFallbackKernel.cpp:324 [backend fallback]
Named: registered at ../aten/src/ATen/core/NamedRegistrations.cpp:7 [backend fallback]
Conjugate: registered at ../aten/src/ATen/ConjugateFallback.cpp:17 [backend fallback]
Negative: registered at ../aten/src/ATen/native/NegateFallback.cpp:18 [backend fallback]
ZeroTensor: registered at ../aten/src/ATen/ZeroTensorFallback.cpp:86 [backend fallback]
ADInplaceOrView: fallthrough registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:86 [backend fallback]
AutogradOther: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:53 [backend fallback]
AutogradCPU: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:57 [backend fallback]
AutogradCUDA: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:65 [backend fallback]
AutogradXLA: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:69 [backend fallback]
AutogradMPS: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:77 [backend fallback]
AutogradXPU: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:61 [backend fallback]
AutogradHPU: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:90 [backend fallback]
AutogradLazy: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:73 [backend fallback]
AutogradMeta: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:81 [backend fallback]
Tracer: registered at ../torch/csrc/autograd/TraceTypeManual.cpp:297 [backend fallback]
AutocastCPU: fallthrough registered at ../aten/src/ATen/autocast_mode.cpp:378 [backend fallback]
AutocastCUDA: fallthrough registered at ../aten/src/ATen/autocast_mode.cpp:244 [backend fallback]
FuncTorchBatched: registered at ../aten/src/ATen/functorch/LegacyBatchingRegistrations.cpp:731 [backend fallback]
BatchedNestedTensor: registered at ../aten/src/ATen/functorch/LegacyBatchingRegistrations.cpp:758 [backend fallback]
FuncTorchVmapMode: fallthrough registered at ../aten/src/ATen/functorch/VmapModeRegistrations.cpp:27 [backend fallback]
Batched: registered at ../aten/src/ATen/LegacyBatchingRegistrations.cpp:1075 [backend fallback]
VmapMode: fallthrough registered at ../aten/src/ATen/VmapModeRegistrations.cpp:33 [backend fallback]
FuncTorchGradWrapper: registered at ../aten/src/ATen/functorch/TensorWrapper.cpp:202 [backend fallback]
PythonTLSSnapshot: registered at ../aten/src/ATen/core/PythonFallbackKernel.cpp:162 [backend fallback]
FuncTorchDynamicLayerFrontMode: registered at ../aten/src/ATen/functorch/DynamicLayer.cpp:493 [backend fallback]
PreDispatch: registered at ../aten/src/ATen/core/PythonFallbackKernel.cpp:166 [backend fallback]
PythonDispatcher: registered at ../aten/src/ATen/core/PythonFallbackKernel.cpp:158 [backend fallback]


In [None]:
print(f'box map50-95 {results.box.map:.3}')
print(f'box map50 {results.box.map50:.3}')
print(f'box map75 {results.box.map75:.3}')

box map50-95 0.371
box map50 0.521
box map75 0.403


#### UINT8

In [None]:
import torch
model_int8 = torch.ao.quantization.quantize_dynamic(
    model.model,  # the original model
    {torch.nn.Linear},  # a set of layers to dynamically quantize
    dtype=torch.qint8)

In [None]:
ckpt = {

            'model': model_uint8,
            'train_args': {},  # save as dict
}

print(f'Quant model size: {check_model_size(model_uint8)}')

torch.save(ckpt, './model_quant_uint8_linear.pt')

quant_model = YOLO("./model_quant_uint8_linear.pt")

Quant model size: 12.084602355957031


In [None]:
results = quant_model.val(data='coco.yaml')

Ultralytics YOLOv8.2.57 🚀 Python-3.10.12 torch-2.3.0+cu121 CUDA:0 (Tesla T4, 15102MiB)
YOLOv8n summary (fused): 168 layers, 3,151,904 parameters, 0 gradients, 8.7 GFLOPs


[34m[1mval: [0mScanning /content/datasets/coco/labels/val2017.cache... 4952 images, 48 backgrounds, 0 corrupt: 100%|██████████| 5000/5000 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 313/313 [01:20<00:00,  3.90it/s]


                   all       5000      36335      0.634      0.474      0.521      0.371
                person       2693      10777      0.755      0.671      0.745      0.514
               bicycle        149        314       0.69      0.392      0.457      0.265
                   car        535       1918      0.648      0.514      0.562      0.364
            motorcycle        159        367      0.715       0.58      0.657      0.414
              airplane         97        143      0.814      0.764      0.832      0.653
                   bus        189        283      0.747      0.643      0.739       0.62
                 train        157        190      0.798       0.77      0.834      0.645
                 truck        250        414      0.547      0.397      0.435      0.293
                  boat        121        424      0.583        0.3      0.378      0.211
         traffic light        191        634      0.647      0.345      0.409      0.211
          fire hydran

In [None]:
print(f'box map50-95 {results.box.map:.3}')
print(f'box map50 {results.box.map50:.3}')
print(f'box map75 {results.box.map75:.3}')

box map50-95 0.371
box map50 0.521
box map75 0.403


### Conv2d

#### FP16

In [None]:
import torch
model_fp16 = torch.ao.quantization.quantize_dynamic(
    model.model,  # the original model
    {torch.nn.Conv2d},  # a set of layers to dynamically quantize
    dtype=torch.float16)

In [None]:
ckpt = {

            'model': model_fp16,
            'train_args': {},  # save as dict
}

print(f'Quant model size: {check_model_size(model_fp16)}')

torch.save(ckpt, './model_quant_fp16_conv2d.pt')

quant_model = YOLO("./model_quant_fp16_conv2d.pt")

Quant model size: 12.084602355957031


In [None]:
results = quant_model.val(data='coco.yaml')

Ultralytics YOLOv8.2.57 🚀 Python-3.10.12 torch-2.3.0+cu121 CUDA:0 (Tesla T4, 15102MiB)
YOLOv8n summary (fused): 168 layers, 3,151,904 parameters, 0 gradients, 8.7 GFLOPs


[34m[1mval: [0mScanning /content/datasets/coco/labels/val2017.cache... 4952 images, 48 backgrounds, 0 corrupt: 100%|██████████| 5000/5000 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 313/313 [01:24<00:00,  3.72it/s]


                   all       5000      36335      0.634      0.474      0.521      0.371
                person       2693      10777      0.755      0.671      0.745      0.514
               bicycle        149        314       0.69      0.392      0.457      0.265
                   car        535       1918      0.648      0.514      0.562      0.364
            motorcycle        159        367      0.715       0.58      0.657      0.414
              airplane         97        143      0.814      0.764      0.832      0.653
                   bus        189        283      0.747      0.643      0.739       0.62
                 train        157        190      0.798       0.77      0.834      0.645
                 truck        250        414      0.547      0.397      0.435      0.293
                  boat        121        424      0.583        0.3      0.378      0.211
         traffic light        191        634      0.647      0.345      0.409      0.211
          fire hydran

In [None]:
print(f'box map50-95 {results.box.map:.3}')
print(f'box map50 {results.box.map50:.3}')
print(f'box map75 {results.box.map75:.3}')

box map50-95 0.371
box map50 0.521
box map75 0.403


#### INT8

In [None]:
import torch
model_int8 = torch.ao.quantization.quantize_dynamic(
    model.model,  # the original model
    {torch.nn.Conv2d},  # a set of layers to dynamically quantize
    dtype=torch.qint8)

In [None]:
ckpt = {

            'model': model_int8,
            'train_args': {},  # save as dict
}

print(f'Quant model size: {check_model_size(model_int8)}')

torch.save(ckpt, './model_quant_int8_conv2d.pt')

quant_model = YOLO("./model_quant_int8_conv2d.pt")

Quant model size: 12.084602355957031


In [None]:
results = quant_model.val(data='coco.yaml')

Ultralytics YOLOv8.2.57 🚀 Python-3.10.12 torch-2.3.0+cu121 CUDA:0 (Tesla T4, 15102MiB)
YOLOv8n summary (fused): 168 layers, 3,151,904 parameters, 0 gradients, 8.7 GFLOPs


[34m[1mval: [0mScanning /content/datasets/coco/labels/val2017.cache... 4952 images, 48 backgrounds, 0 corrupt: 100%|██████████| 5000/5000 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):   5%|▌         | 16/313 [00:09<02:48,  1.77it/s]


KeyboardInterrupt: 

In [None]:
print(f'box map50-95 {results.box.map:.3}')
print(f'box map50 {results.box.map50:.3}')
print(f'box map75 {results.box.map75:.3}')

#### UINT8

In [None]:
import torch
model_uint8 = torch.ao.quantization.quantize_dynamic(
    model.model,  # the original model
    {torch.nn.Conv2d},  # a set of layers to dynamically quantize
    dtype=torch.quint8)

In [None]:
ckpt = {

            'model': model_uint8,
            'train_args': {},  # save as dict
}

print(f'Quant model size: {check_model_size(model_uint8)}')

torch.save(ckpt, './model_quant_uint8_conv2d.pt')

quant_model = YOLO("./model_quant_uint8_conv2d.pt")

In [None]:
results = quant_model.val(data='coco.yaml')

In [None]:
print(f'box map50-95 {results.box.map:.3}')
print(f'box map50 {results.box.map50:.3}')
print(f'box map75 {results.box.map75:.3}')

### Linear + Conv2d

#### FP16

In [None]:
import torch
model_fp16 = torch.ao.quantization.quantize_dynamic(
    model.model,  # the original model
    {torch.nn.Linear, torch.nn.Conv2d},  # a set of layers to dynamically quantize
    dtype=torch.float16)

In [None]:
ckpt = {

            'model': model_fp16,
            'train_args': {},  # save as dict
}

print(f'Quant model size: {check_model_size(model_fp16)}')

torch.save(ckpt, './model_quant_fp16_linear_conv2d.pt')

quant_model = YOLO("./model_quant_fp16_linear_conv2d.pt")

In [None]:
results = quant_model.val(data='coco.yaml')

In [None]:
print(f'box map50-95 {results.box.map:.3}')
print(f'box map50 {results.box.map50:.3}')
print(f'box map75 {results.box.map75:.3}')

#### INT8

In [None]:
import torch
model_int8 = torch.ao.quantization.quantize_dynamic(
    model.model,  # the original model
    {torch.nn.Linear, torch.nn.Conv2d},  # a set of layers to dynamically quantize
    dtype=torch.qint8)

In [None]:
ckpt = {

            'model': model_int8,
            'train_args': {},  # save as dict
}

print(f'Quant model size: {check_model_size(model_int8)}')

torch.save(ckpt, './model_quant_int8_linear_conv2d.pt')

quant_model = YOLO("./model_quant_int8_linear_conv2d.pt")

In [None]:
results = quant_model.val(data='coco.yaml')

In [None]:
print(f'box map50-95 {results.box.map:.3}')
print(f'box map50 {results.box.map50:.3}')
print(f'box map75 {results.box.map75:.3}')

#### UINT8

In [None]:
import torch
model_uint8 = torch.ao.quantization.quantize_dynamic(
    model.model,  # the original model
    {torch.nn.Linear, torch.nn.Conv2d},  # a set of layers to dynamically quantize
    dtype=torch.quint8)

In [None]:
ckpt = {

            'model': model_uint8,
            'train_args': {},  # save as dict
}

print(f'Quant model size: {check_model_size(model_uint8)}')

torch.save(ckpt, './model_quant_uint8_linear_conv2d.pt')

quant_model = YOLO("./model_quant_uint8_linear_conv2d.pt")

In [None]:
results = quant_model.val(data='coco.yaml')

In [None]:
print(f'box map50-95 {results.box.map:.3}')
print(f'box map50 {results.box.map50:.3}')
print(f'box map75 {results.box.map75:.3}')

## ONNX dynamic quantization


In [None]:
# Load a model
model = YOLO("yolov8m.yaml")  # build a new model from scratch
model = YOLO("yolov8m.pt")  # load a pretrained model (recommended for training)


                   from  n    params  module                                       arguments                     
  0                  -1  1      1392  ultralytics.nn.modules.conv.Conv             [3, 48, 3, 2]                 
  1                  -1  1     41664  ultralytics.nn.modules.conv.Conv             [48, 96, 3, 2]                
  2                  -1  2    111360  ultralytics.nn.modules.block.C2f             [96, 96, 2, True]             
  3                  -1  1    166272  ultralytics.nn.modules.conv.Conv             [96, 192, 3, 2]               
  4                  -1  4    813312  ultralytics.nn.modules.block.C2f             [192, 192, 4, True]           
  5                  -1  1    664320  ultralytics.nn.modules.conv.Conv             [192, 384, 3, 2]              
  6                  -1  4   3248640  ultralytics.nn.modules.block.C2f             [384, 384, 4, True]           
  7                  -1  1   1991808  ultralytics.nn.modules.conv.Conv             [384

In [None]:
model.export(format='onnx')

Ultralytics YOLOv8.0.184 🚀 Python-3.10.12 torch-2.0.1+cu118 CPU (Intel Xeon 2.20GHz)
YOLOv8m summary (fused): 218 layers, 25886080 parameters, 0 gradients

[34m[1mPyTorch:[0m starting from 'yolov8m.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 84, 8400) (49.7 MB)
[31m[1mrequirements:[0m Ultralytics requirement ['onnx>=1.12.0'] not found, attempting AutoUpdate...
Collecting onnx>=1.12.0
  Downloading onnx-1.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (14.6 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 14.6/14.6 MB 236.7 MB/s eta 0:00:00
Installing collected packages: onnx
Successfully installed onnx-1.14.1

[31m[1mrequirements:[0m AutoUpdate success ✅ 10.4s, installed 1 package: ['onnx>=1.12.0']
[31m[1mrequirements:[0m ⚠️ [1mRestart runtime or rerun command for updates to take effect[0m


[34m[1mONNX:[0m starting export with onnx 1.14.1 opset 17...


verbose: False, log level: Level.ERROR



[34m[1mONNX:[0m export success ✅ 13.8s, saved as 'yolov8m.onnx' (99.0 MB)

Export complete (19.3s)
Results saved to [1m/content[0m
Predict:         yolo predict task=detect model=yolov8m.onnx imgsz=640  
Validate:        yolo val task=detect model=yolov8m.onnx imgsz=640 data=coco.yaml  
Visualize:       https://netron.app


'yolov8m.onnx'

In [None]:
os.path.getsize('yolov8m.pt') / 1024**2

49.70324993133545

In [None]:
import os

os.path.getsize('yolov8m.onnx') / 1024**2

98.9661512374878

In [None]:
def quantize_onnx_model(onnx_model_path, quantized_model_path):
    from onnxruntime.quantization import quantize_dynamic, QuantType
    import onnx
    onnx_opt_model = onnx.load(onnx_model_path)
    quantize_dynamic(onnx_model_path,
                     quantized_model_path,
                     weight_type=QuantType.QUInt8)

    print(f"quantized model saved to:{quantized_model_path}")

In [None]:
!pip install onnxruntime

Collecting onnxruntime
  Downloading onnxruntime-1.16.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.2/6.2 MB[0m [31m41.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting coloredlogs (from onnxruntime)
  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.0/46.0 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime)
  Downloading humanfriendly-10.0-py2.py3-none-any.whl (86 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.8/86.8 kB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: humanfriendly, coloredlogs, onnxruntime
Successfully installed coloredlogs-15.0.1 humanfriendly-10.0 onnxruntime-1.16.0


In [None]:
quantize_onnx_model('yolov8m.onnx', 'quant_yolo8m.onnx')



quantized model saved to:quant_yolo8m.onnx


In [None]:
os.path.getsize('quant_yolo8m.onnx') / 1024**2

25.08861541748047

In [None]:
model = YOLO("quant_yolo8m.onnx")



In [None]:
model.val(task='detect', imgsz=640, data='coco.yaml')

Ultralytics YOLOv8.0.182 🚀 Python-3.10.12 torch-2.0.1+cu118 CPU (AMD EPYC 7B12)
Loading quant_yolo8m.onnx for ONNX Runtime inference...
Forcing batch=1 square inference (1,3,640,640) for non-PyTorch models

Dataset 'coco.yaml' images not found ⚠️, missing path '/content/datasets/coco/val2017.txt'
Downloading https://github.com/ultralytics/yolov5/releases/download/v1.0/coco2017labels-segments.zip to '/content/datasets/coco2017labels-segments.zip'...
100%|██████████| 169M/169M [00:01<00:00, 124MB/s] 
Unzipping /content/datasets/coco2017labels-segments.zip to /content/datasets/coco...: 100%|██████████| 122232/122232 [00:25<00:00, 4787.77file/s]
Downloading http://images.cocodataset.org/zips/val2017.zip to '/content/datasets/coco/images/val2017.zip'...
Dataset download success ✅ (45.4s), saved to [1m/content/datasets[0m

Downloading https://ultralytics.com/assets/Arial.ttf to '/root/.config/Ultralytics/Arial.ttf'...
100%|██████████| 755k/755k [00:00<00:00, 15.8MB/s]
[34m[1mval: [0mSca

loading annotations into memory...
Done (t=0.54s)
creating index...
index created!
Loading and preparing results...
DONE (t=1.82s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=43.29s).
Accumulating evaluation results...
DONE (t=9.11s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.496
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.666
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.539
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.314
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.548
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.661
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.378
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.632
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDet

Results saved to [1mruns/detect/val[0m


ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77,
       78, 79])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x7943f786d030>
fitness: 0.5118514555597652
keys: ['metrics/precision(B)', 'metrics/recall(B)', 'metrics/mAP50(B)', 'metrics/mAP50-95(B)']
maps: array([    0.61131,     0.40411,     0.48941,     0.53579,      0.7704,     0.74419,     0.76023,     0.43462,     0.32572,     0.31115,     0.73899,     0.69911,     0.52364,      0.3223,     0.40233,     0.78055,     0.72512,     0.71186,     0.62602,     0.65875,     0.71949,     0.80256,     0.75911,     0.77691,
       

In [None]:
map50 = results['metrics_mAP50']
map95 = results['metrics_mAP5095']

In [None]:
print(f'box map50-95 {map95:.3}')
print(f'box map50 {map50:.3}')


box map50-95 0.495
box map50 0.662


In [None]:
from ultralytics import YOLO
import torch

# Load the YOLOv8n model
model = YOLO('yolov8n.pt')

# FP16 Quantization
fp16_model = YOLO('yolov8n.pt')
fp16_model.model = fp16_model.model.half().to('cuda')

# Run validation for FP16 model
fp16_model.val()

# INT8 Quantization
def quantize_int8(model):
    model.eval()
    qconfig = torch.quantization.get_default_qconfig('qnnpack')
    model_fp32_prepared = torch.quantization.prepare(model, qconfig)
    model_int8 = torch.quantization.convert(model_fp32_prepared)
    return model_int8

int8_model = quantize_int8(model.model.cpu())
int8_yolo = YOLO(int8_model)

# Run validation for INT8 model
int8_yolo.val()

print("Original model validation complete")
print("FP16 model validation complete")
print("INT8 model validation complete")

Ultralytics YOLOv8.2.57 🚀 Python-3.10.12 torch-2.3.0+cu121 CUDA:0 (Tesla T4, 15102MiB)


RuntimeError: expected mat1 and mat2 to have the same dtype, but got: c10::Half != float