Transfer the data into .npy

In [2]:
import cv2
import numpy as np
import os

img_folder = "/opt/projects/aoi/led_detection/training/scratch/ONNX_TensorRT_INT8/Kalibrierungsdatensatz/RetinaNet&RT_DETR/images/train"
save_folder = "/opt/projects/aoi/led_detection/training/scratch/ONNX_TensorRT_INT8/Kalibrierungsdatensatz/RetinaNet&RT_DETR/images/train_npy"

os.makedirs(save_folder, exist_ok=True)

for root, _, files in os.walk(img_folder):
    for img_file in files:
        if img_file.lower().endswith(('.jpg', '.png', '.jpeg')):  # Process only image files
            img_path = os.path.join(root, img_file)
            img = cv2.imread(img_path)
            if img is None:
                print(f"Warning: Unable to read {img_path}")
                continue
            
            # Convert image format HWC -> CHW
            img = img.transpose(2, 0, 1)  
            img = img.astype(np.float32) / 255.0  
            
            # Generate save path while maintaining directory structure
            relative_path = os.path.relpath(img_path, img_folder)  # Compute relative path
            save_path = os.path.join(save_folder, relative_path).replace(".jpg", ".npy").replace(".png", ".npy").replace(".jpeg", ".npy")
            
            # Ensure the target subdirectory exists
            os.makedirs(os.path.dirname(save_path), exist_ok=True)
            
            # Save as .npy file
            np.save(save_path, img)
            print(f"Processed: {img_path} -> {save_path}")

print("Processing complete!")


Processed: /opt/projects/aoi/led_detection/training/scratch/ONNX_TensorRT_INT8/Kalibrierungsdatensatz/RetinaNet&RT_DETR/images/train/2500K_E15K/004988_2024_10_29_11_27_30_1620_1620.jpg -> /opt/projects/aoi/led_detection/training/scratch/ONNX_TensorRT_INT8/Kalibrierungsdatensatz/RetinaNet&RT_DETR/images/train_npy/2500K_E15K/004988_2024_10_29_11_27_30_1620_1620.npy
Processed: /opt/projects/aoi/led_detection/training/scratch/ONNX_TensorRT_INT8/Kalibrierungsdatensatz/RetinaNet&RT_DETR/images/train/2500K_E15K/004070_2024_10_28_16_37_05_2700_0.jpg -> /opt/projects/aoi/led_detection/training/scratch/ONNX_TensorRT_INT8/Kalibrierungsdatensatz/RetinaNet&RT_DETR/images/train_npy/2500K_E15K/004070_2024_10_28_16_37_05_2700_0.npy
Processed: /opt/projects/aoi/led_detection/training/scratch/ONNX_TensorRT_INT8/Kalibrierungsdatensatz/RetinaNet&RT_DETR/images/train/2500K_E15K/003418_2024_10_28_15_22_30_540_1620.jpg -> /opt/projects/aoi/led_detection/training/scratch/ONNX_TensorRT_INT8/Kalibrierungsdatens

Create the TensorRT INT8 

In [1]:
import tensorrt as trt
import numpy as np
import os
import pycuda.driver as cuda
import pycuda.autoinit

# Enter ONNX and TensorRT engine paths
onnx_file = "/home/guoy/led_detection/training/RT-DETR/model_v1.onnx"
engine_file = "/home/guoy/led_detection/training/yolov8/model_int8.trt"

# Create TensorRT Logger
logger = trt.Logger(trt.Logger.INFO)

# Create TensorRT Builder 和 Network
builder = trt.Builder(logger)
network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
parser = trt.OnnxParser(network, logger)

with open(onnx_file, 'rb') as model:
    if not parser.parse(model.read()):
        for i in range(parser.num_errors):
            print(f"ONNX Parsing Error {i}: {parser.get_error(i)}")
        raise RuntimeError("Failed to parse ONNX model")
    else:
        print("ONNX model successfully parsed!")

# Create a TensorRT Configuration
config = builder.create_builder_config()
config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 8 << 30) 
config.set_flag(trt.BuilderFlag.INT8)  # set INT8 modul

# INT8 Calibrator
class Int8EntropyCalibrator2(trt.IInt8EntropyCalibrator2):
    def __init__(self, calibration_data_path, batch_size=8): #set the batch size
        trt.IInt8EntropyCalibrator2.__init__(self)
        self.batch_size = batch_size
        self.data_path = calibration_data_path
        self.data_files = sorted(os.listdir(calibration_data_path))  
        self.current_index = 0
        self.device_input = None
        
        # Preloaded calibration data
        self.data = []
        for file in self.data_files:
            file_path = os.path.join(calibration_data_path, file)
            img = np.load(file_path,allow_pickle=True)  # load the  the calibration data （.npy Format）
            self.data.append(img)
        
        self.device_input = cuda.mem_alloc(self.batch_size * self.data[0].nbytes)  
        
    def get_batch_size(self):
        return self.batch_size

    def get_batch(self, names):
        if self.current_index + self.batch_size > len(self.data):
            return None  
        batch = np.ascontiguousarray(self.data[self.current_index:self.current_index + self.batch_size])
        cuda.memcpy_htod(self.device_input, batch)
        self.current_index += self.batch_size
        return [self.device_input]

    def read_calibration_cache(self):
        return None  

    def write_calibration_cache(self, cache):
        pass  

# Setting the INT8 calibrator 
calibration_data_path = "/opt/projects/aoi/led_detection/training/scratch/models_TensorRT/Kalibrierungsdatensatz/RetinaNet&RT_DETR/images/train_npy"  # the calibration data in .npy format
config.int8_calibrator = Int8EntropyCalibrator2(calibration_data_path)

# Creating Optimisation Profiles
profile = builder.create_optimization_profile()

# Set the dynamic batch size
BATCH_SIZE_MIN, BATCH_SIZE_OPT, BATCH_SIZE_MAX = 1, 8, 16  
profile.set_shape("images", 
                  (BATCH_SIZE_MIN, 3, 640, 640),  
                  (BATCH_SIZE_OPT, 3, 640, 640),  
                  (BATCH_SIZE_MAX, 3, 640, 640))  

config.add_optimization_profile(profile)

# Building the TensorRT Engine
serialized_engine = builder.build_serialized_network(network, config)

if serialized_engine is None:
    raise RuntimeError("Failed to build TensorRT engine!")

# save TensorRT INT8 Engine
with open(engine_file, 'wb') as f:
    f.write(serialized_engine)

print(f"INT8 TensorRT engine saved at: {engine_file}")


[03/06/2025-09:24:42] [TRT] [I] [MemUsageChange] Init CUDA: CPU -2, GPU +0, now: CPU 40, GPU 4061 (MiB)
[03/06/2025-09:24:44] [TRT] [I] [MemUsageChange] Init builder kernel library: CPU +2755, GPU +446, now: CPU 2997, GPU 4507 (MiB)
ONNX model successfully parsed!
[03/06/2025-09:24:44] [TRT] [W] ModelImporter.cpp:804: Make sure output labels has Int64 binding.


  config.int8_calibrator = Int8EntropyCalibrator2(calibration_data_path)


[03/06/2025-09:25:27] [TRT] [I] Perform graph optimization on calibration graph.
[03/06/2025-09:25:27] [TRT] [I] Local timing cache in use. Profiling results in this builder pass will not be stored.
[03/06/2025-09:25:27] [TRT] [W] Calibration Profile is not defined. Calibrating with Profile 0
[03/06/2025-09:25:27] [TRT] [I] Compiler backend is used during engine build.
[03/06/2025-09:25:32] [TRT] [I] Detected 1 inputs and 7 output network tensors.
[03/06/2025-09:25:34] [TRT] [I] Total Host Persistent Memory: 968320 bytes
[03/06/2025-09:25:34] [TRT] [I] Total Device Persistent Memory: 1745920 bytes
[03/06/2025-09:25:34] [TRT] [I] Max Scratch Memory: 153600 bytes
[03/06/2025-09:25:34] [TRT] [I] [BlockAssignment] Started assigning block shifts. This will take 1064 steps to complete.
[03/06/2025-09:25:34] [TRT] [I] [BlockAssignment] Algorithm ShiftNTopDown took 317.623ms to assign 60 blocks to 1064 nodes requiring 751090688 bytes.
[03/06/2025-09:25:34] [TRT] [I] Total Activation Memory: 75