# Install packages

In [1]:
%%capture

!git clone https://github.com/Deci-AI/super-gradients.git /content/super_gradients_folder
! sed -i 's/sghub.deci.ai/sg-hub-nv.s3.amazonaws.com/' /content/super_gradients_folder/src/super_gradients/training/pretrained_models.py
! sed -i 's/sghub.deci.ai/sg-hub-nv.s3.amazonaws.com/' /content/super_gradients_folder/src/super_gradients/training/utils/checkpoint_utils.py
!pip install -e /content/super_gradients_folder
!pip install onnxruntime

## Select model

In [2]:
model_name = 'yolonas_s' #@param ["yolonas_s", "yolonas_m", "yolonas_l"]
quantization = 'uint8' #@param ["fp16", "uint8"]
input_width = 320 #@param {type:"slider", min:320, max:640, step:320}
input_height = 320 #@param {type:"slider", min:320, max:640, step:320}

MODEL_FILENAME = f"{model_name}_{quantization}_{input_width}x{input_height}.onnx"

In [3]:
from super_gradients.common.object_names import Models
from super_gradients.training import models

MODEL_NAMES = {
    "yolonas_s": Models.YOLO_NAS_S,
    "yolonas_m": Models.YOLO_NAS_M,
    "yolonas_l": Models.YOLO_NAS_L,
}

model = models.get(MODEL_NAMES[model_name], pretrained_weights="coco")
model.eval()
model.prep_model_for_conversion(input_size=[1, 3, input_height, input_width])

The console stream is logged into /root/sg_logs/console.log


[2025-03-13 19:42:59] INFO - crash_tips_setup.py - Crash tips is enabled. You can set your environment variable to CRASH_HANDLER=FALSE to disable it
[2025-03-13 19:43:10] INFO - utils.py - NumExpr defaulting to 2 threads.
DEBUG:2025-03-13 19:43:16,401:jax._src.path:31: etils.epath found. Using etils.epath for file I/O.
  check_for_updates()
 It is your responsibility to determine whether you have permission to use the models for your use case.
 The model you have requested was pre-trained on the coco dataset, published under the following terms: https://cocodataset.org/#termsofuse
[2025-03-13 19:43:21] INFO - checkpoint_utils.py - License Notification: YOLO-NAS pre-trained weights are subjected to the specific license terms and conditions detailed in 
https://github.com/Deci-AI/super-gradients/blob/master/LICENSE.YOLONAS.md
By downloading the pre-trained weight files you agree to comply with these terms.
[2025-03-13 19:43:22] INFO - checkpoint_utils.py - Successfully loaded pretrained 

In [4]:
from super_gradients.conversion import DetectionOutputFormatMode
from super_gradients.conversion.conversion_enums import ExportQuantizationMode

quantization_mode = ExportQuantizationMode.FP16 if dtype == "fp16" else ExportQuantizationMode.INT8
model.export(
  MODEL_FILENAME,
  input_image_shape=(input_height, input_width),
  num_pre_nms_predictions=1000,
  max_predictions_per_image=20,
  nms_threshold=0.7,
  confidence_threshold=0.4,
  output_predictions_format=DetectionOutputFormatMode.FLAT_FORMAT,
  quantization_mode=quantization_mode
)

W0313 19:43:46.409715 140049008185344 tensor_quantizer.py:281] Use Pytorch's native experimental fake quantization.
  if amax.numel() == 1:
  inputs, amax.item() / bound, 0,
  if not keepdims or output.numel() == 1:
  quant_dim = list(amax.shape).index(list(amax_sequeeze.shape)[0])


Model exported successfully to yolonas_s_uint8_320x320.onnx
Model expects input image of shape [1, 3, 320, 320]
Input image dtype is torch.uint8
Exported model already contains preprocessing (normalization) step, so you don't need to do it manually.
Preprocessing steps to be applied to input image are:
Sequential(
  (0): CastTensorTo(dtype=torch.float32)
  (1): ApplyMeanStd(mean=[0.], scale=[255.])
)

Exported model contains postprocessing (NMS) step with the following parameters:
    num_pre_nms_predictions=1000
    max_predictions_per_image=20
    nms_threshold=0.7
    confidence_threshold=0.4
    output_predictions_format=DetectionOutputFormatMode.FLAT_FORMAT

Exported model is in ONNX format and can be used with ONNXRuntime
To run inference with ONNXRuntime, please use the following code snippet:

    import onnxruntime
    import numpy as np
    session = onnxruntime.InferenceSession("yolonas_s_uint8_320x320.onnx", providers=["CUDAExecutionProvider", "CPUExecutionProvider"])
    i

In [5]:
import onnxruntime as ort
import numpy as np

dummy_input = np.random.randint(0, 255, (1, 3, input_width, input_height)).astype(np.uint8)

ort_session = ort.InferenceSession(MODEL_FILENAME, providers=["ROCMExecutionProvider"])
ort_session.run(None, {"input": dummy_input})



[array([[ 0.000000e+00,  9.965515e-02, -7.989502e-02,  3.195147e+02,
          3.200067e+02,  8.423366e-01,  5.000000e+01]], dtype=float32)]

In [6]:
from google.colab import files

files.download(MODEL_FILENAME)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>