In [11]:
# Convert YOLOv8 to ONNX with optimizations
from ultralytics import YOLO

# Load model and export to ONNX with optimizations
model = YOLO('yolov8n.pt')
model.export(
    format='onnx', 
    opset=17,
    optimize=True,    # Enable ONNX optimizations
    dynamic=False,    # Static input shapes for better performance
    simplify=True     # Simplify the model graph
)
print("Optimized model converted to ONNX successfully!")

Ultralytics 8.3.203 🚀 Python-3.9.23 torch-2.8.0+cpu CPU (Intel Core i7-10700F 2.90GHz)
YOLOv8n summary (fused): 72 layers, 3,151,904 parameters, 0 gradients, 8.7 GFLOPs
YOLOv8n summary (fused): 72 layers, 3,151,904 parameters, 0 gradients, 8.7 GFLOPs

[34m[1mPyTorch:[0m starting from 'yolov8n.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 84, 8400) (6.2 MB)

[34m[1mPyTorch:[0m starting from 'yolov8n.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 84, 8400) (6.2 MB)

[34m[1mONNX:[0m starting export with onnx 1.19.0 opset 17...

[34m[1mONNX:[0m starting export with onnx 1.19.0 opset 17...
[34m[1mONNX:[0m slimming with onnxslim 0.1.69...
[34m[1mONNX:[0m slimming with onnxslim 0.1.69...
[34m[1mONNX:[0m export success ✅ 0.6s, saved as 'yolov8n.onnx' (12.3 MB)

Export complete (0.9s)
Results saved to [1m/home/hassan/Yolov8[0m
Predict:         yolo predict task=detect model=yolov8n.onnx imgsz=640  
Validate:        yolo val task=de

In [17]:
# Visualize ONNX model graph
import onnx
import netron
import os

# Load and display the ONNX model graph
onnx_path = 'yolov8n.onnx'
if os.path.exists(onnx_path):
    try:
        # Load ONNX model
        onnx_model = onnx.load(onnx_path)
        
        # Print model info
        print(f"Model IR version: {onnx_model.ir_version}")
        print(f"Producer: {onnx_model.producer_name}")
        print(f"Graph name: {onnx_model.graph.name}")
        print(f"Number of nodes: {len(onnx_model.graph.node)}")
        
        # Print input/output info
        for input_tensor in onnx_model.graph.input:
            print(f"Input: {input_tensor.name}")
        for output_tensor in onnx_model.graph.output:
            print(f"Output: {output_tensor.name}")
        
        # Start netron visualization (simplified)
        print(f"\nStarting Netron visualization...")
        netron.start(onnx_path)
        print(f"Netron server started! Check your browser for the visualization.")
        
    except Exception as e:
        print(f"Error with Netron: {e}")
        print("You can manually view the model by running: netron yolov8n.onnx")
else:
    print(f"ONNX file not found: {onnx_path}")
    print("Please run the first cell to create the ONNX model first!")

Model IR version: 8
Producer: pytorch
Graph name: main_graph
Number of nodes: 233
Input: images
Output: output0

Starting Netron visualization...
Netron server started! Check your browser for the visualization.
Netron server started! Check your browser for the visualization.


/usr/bin/x-www-browser: 12: xdg-settings: not found
touch: cannot touch '/home/hassan/snap/firefox/common/.cache/desktop-runtime-date': No such file or directory
touch: cannot touch '/home/hassan/snap/firefox/common/.cache/desktop-runtime-date': No such file or directory










In [15]:
# Proper ONNX Model Quantization
from onnxruntime.quantization import quantize_dynamic, QuantType
import onnx
import os

# First convert to ONNX (float32), then quantize
onnx_path = 'yolov8n.onnx'
quantized_path = 'yolov8n_quantized.onnx'

if os.path.exists(onnx_path):
    # Dynamic quantization to INT8
    quantize_dynamic(
        model_input=onnx_path,
        model_output=quantized_path,
        weight_type=QuantType.QUInt8  # Quantize weights to 8-bit
    )
    
    # Compare model sizes
    original_size = os.path.getsize(onnx_path) / (1024*1024)  # MB
    quantized_size = os.path.getsize(quantized_path) / (1024*1024)  # MB
    
    print(f"Original model size: {original_size:.2f} MB")
    print(f"Quantized model size: {quantized_size:.2f} MB")
    print(f"Size reduction: {((original_size - quantized_size) / original_size * 100):.1f}%")
else:
    print("Please run the first cell to create the ONNX model first!")



Original model size: 12.26 MB
Quantized model size: 3.34 MB
Size reduction: 72.7%
