pyRPP support – Python bindings for RPP#685
Conversation
HazarathKumarM
commented
Feb 27, 2026
- Adds Python bindings for 10 RPP augmentations using pybind11
- Brightness
- Contrast
- Resize
- Rotate
- Crop
- Flip
- Gamma Correction
- Hue
- Vignette
- Pixelate
- Adds test suite support to test newly added Python APIs
There was a problem hiding this comment.
Pull request overview
This PR adds comprehensive Python bindings (pyRPP) for AMD's ROCm Performance Primitives (RPP) library using pybind11. The implementation provides a rocAL-style API for 10 core image augmentation operations with support for both CPU (HOST) and GPU (HIP) backends.
Changes:
- Added C++ pybind11 bindings for 10 RPP augmentations across color, geometric, and effects categories
- Implemented Python wrapper API with automatic backend detection and layout conversion utilities
- Created comprehensive test suite supporting unit testing, QA validation, and performance benchmarking
- Added CMake build support for multi-version Python (3.8-3.13) with PyTorch integration
Reviewed changes
Copilot reviewed 14 out of 15 changed files in this pull request and generated 14 comments.
Show a summary per file
| File | Description |
|---|---|
| rpp_pybind/rpp_pybind.cpp | C++ pybind11 module implementing tensor operations and GPU memory management for 10 augmentations |
| rpp_pybind/amd/rpp/fn.py | High-level Python wrappers providing user-friendly API with auto backend/device selection |
| rpp_pybind/amd/rpp/utils.py | Image I/O utilities using TurboJPEG for pixel-accurate testing and layout conversions |
| rpp_pybind/amd/rpp/rpp_types.py | Type definitions and helper functions for backend/layout management |
| rpp_pybind/init.py | Main package init exposing C++ functions and type definitions |
| utilities/python_tests/test_suite.py | Comprehensive test suite with unit, QA, and performance testing modes |
| rpp_pybind/CMakeLists.txt | CMake configuration for multi-Python version builds with PyTorch/HIP dependencies |
| CMakeLists.txt | Root CMake updates adding RPP_PYPACKAGE build option |
| rpp_pybind/README.md | Complete documentation with installation instructions, usage examples, and API reference |
| cmake/FindDLPACK.cmake | CMake module for finding DLPack header-only library |
💡 Add Copilot custom instructions for smarter, more guided reviews. Learn how to get started.
| frames = [] | ||
| prevLine = "" | ||
| funcCount = 0 | ||
| # current_category = "" # UNUSED: assigned but never read |
There was a problem hiding this comment.
The variable 'current_category' is assigned but never used. This was likely intended for organizing test output but has been replaced by other mechanisms. Consider removing this unused variable assignment.
| # current_category = "" # UNUSED: assigned but never read |
| if(backend == 1) { // Check if backend is HIP (GPU) | ||
| // Allocate GPU memory | ||
| size_t alpha_size = alpha.size() * sizeof(float); | ||
| size_t beta_size = beta.size() * sizeof(float); | ||
| size_t roi_size = batch_size * sizeof(RpptROI); | ||
|
|
||
| hipMalloc(&alpha_gpu_ptr, alpha_size); | ||
| hipMalloc(&beta_gpu_ptr, beta_size); | ||
| hipMalloc(&roi_gpu_ptr, roi_size); | ||
|
|
||
| // Copy CPU data to GPU | ||
| hipMemcpy(alpha_gpu_ptr, alpha_ptr, alpha_size, hipMemcpyHostToDevice); | ||
| hipMemcpy(beta_gpu_ptr, beta_ptr, beta_size, hipMemcpyHostToDevice); | ||
| hipMemcpy(roi_gpu_ptr, roi.data(), batch_size * sizeof(RpptROI), hipMemcpyHostToDevice); | ||
|
|
||
| // Use GPU pointers for the function call | ||
| alpha_ptr = alpha_gpu_ptr; | ||
| beta_ptr = beta_gpu_ptr; | ||
| roi_ptr = roi_gpu_ptr; | ||
| } | ||
|
|
||
| RppStatus status = rppt_brightness(input_data.ptr, &src_desc, | ||
| output_data.ptr, &dst_desc, | ||
| alpha_ptr, | ||
| beta_ptr, | ||
| roi_ptr, RpptRoiType::XYWH, | ||
| rpp_handle, static_cast<RppBackend>(backend)); | ||
|
|
||
| if (status != RPP_SUCCESS) { | ||
| throw std::runtime_error("rppt_brightness failed"); | ||
| } | ||
|
|
||
| hip_cleanup(); // success path cleanup | ||
| } | ||
| catch (...) { | ||
| hip_cleanup(); // error path cleanup | ||
| throw; // rethrow original exception | ||
| } | ||
| } | ||
|
|
||
| // 2. Gamma Correction (Color) | ||
| void gamma_correction(const torch::Tensor& input_tensor, | ||
| torch::Tensor& output_tensor, | ||
| const std::vector<float>& gamma, | ||
| const std::vector<int>& roi_widths, | ||
| const std::vector<int>& roi_heights, | ||
| uintptr_t handle, | ||
| int backend) { | ||
|
|
||
| auto input_data = get_tensor_data(input_tensor); | ||
| auto output_data = get_tensor_data(output_tensor); | ||
|
|
||
| // Detect layouts from tensor shapes | ||
| input_data.layout = detect_layout_from_tensor(input_tensor); | ||
| output_data.layout = detect_layout_from_tensor(output_tensor); | ||
|
|
||
| RpptDesc src_desc, dst_desc; | ||
| setup_tensor_descriptor(src_desc, input_data); | ||
| setup_tensor_descriptor(dst_desc, output_data); | ||
| auto rpp_handle = reinterpret_cast<rppHandle_t>(handle); | ||
|
|
||
| int batch_size = input_data.shape[0]; | ||
| std::vector<RpptROI> roi(batch_size); | ||
| for(int i = 0; i < batch_size; i++) { | ||
| roi[i].xywhROI = {0, 0, roi_widths[i], roi_heights[i]}; | ||
| } | ||
|
|
||
| // Initialize pointers to CPU data by default | ||
| float* gamma_ptr = const_cast<float*>(gamma.data()); | ||
| RpptROI* roi_ptr = roi.data(); | ||
|
|
||
| float* gamma_gpu_ptr = nullptr; | ||
| RpptROI *roi_gpu_ptr = nullptr; | ||
|
|
||
| // RAII cleanup guard | ||
| auto hip_cleanup = [&]() { | ||
| if (gamma_gpu_ptr) hipFree(gamma_gpu_ptr); | ||
| if (roi_gpu_ptr) hipFree(roi_gpu_ptr); | ||
| }; | ||
|
|
||
| try { | ||
| // Check if backend is HIP (GPU) | ||
| if(backend == 1) { | ||
| // Allocate GPU memory | ||
| size_t gamma_size = gamma.size() * sizeof(float); | ||
| size_t roi_size = batch_size * sizeof(RpptROI); | ||
|
|
||
| hipMalloc(&gamma_gpu_ptr, gamma_size); | ||
| hipMalloc(&roi_gpu_ptr, roi_size); | ||
|
|
||
| // Copy CPU data to GPU | ||
| hipMemcpy(gamma_gpu_ptr, gamma_ptr, gamma_size, hipMemcpyHostToDevice); | ||
| hipMemcpy(roi_gpu_ptr, roi.data(), batch_size * sizeof(RpptROI), hipMemcpyHostToDevice); | ||
|
|
||
| // Use GPU pointers for the function call | ||
| gamma_ptr = gamma_gpu_ptr; | ||
| roi_ptr = roi_gpu_ptr; | ||
| } | ||
|
|
||
| RppStatus status = rppt_gamma_correction(input_data.ptr, &src_desc, | ||
| output_data.ptr, &dst_desc, | ||
| gamma_ptr, | ||
| roi_ptr, RpptRoiType::XYWH, | ||
| rpp_handle, static_cast<RppBackend>(backend)); | ||
|
|
||
| if (status != RPP_SUCCESS) { | ||
| throw std::runtime_error("rppt_gamma_correction failed"); | ||
| } | ||
|
|
||
| hip_cleanup(); // success path cleanup | ||
| } | ||
| catch (...) { | ||
| hip_cleanup(); // error path cleanup | ||
| throw; // rethrow original exception | ||
| } | ||
| } | ||
|
|
||
| // 3. Contrast (Color) | ||
| void contrast(const torch::Tensor& input_tensor, | ||
| torch::Tensor& output_tensor, | ||
| const std::vector<float>& contrast_factor, | ||
| const std::vector<float>& contrast_center, | ||
| const std::vector<int>& roi_widths, | ||
| const std::vector<int>& roi_heights, | ||
| uintptr_t handle, | ||
| int backend) { | ||
| auto input_data = get_tensor_data(input_tensor); | ||
| auto output_data = get_tensor_data(output_tensor); | ||
|
|
||
| // Detect layouts from tensor shapes | ||
| input_data.layout = detect_layout_from_tensor(input_tensor); | ||
| output_data.layout = detect_layout_from_tensor(output_tensor); | ||
|
|
||
| RpptDesc src_desc, dst_desc; | ||
| setup_tensor_descriptor(src_desc, input_data); | ||
| setup_tensor_descriptor(dst_desc, output_data); | ||
| auto rpp_handle = reinterpret_cast<rppHandle_t>(handle); | ||
|
|
||
| int batch_size = input_data.shape[0]; | ||
| std::vector<RpptROI> roi(batch_size); | ||
| for(int i = 0; i < batch_size; i++) { | ||
| roi[i].xywhROI = {0, 0, roi_widths[i], roi_heights[i]}; | ||
| } | ||
|
|
||
| // Initialize pointers to CPU data by default | ||
| float* contrast_factor_ptr = const_cast<float*>(contrast_factor.data()); | ||
| float* contrast_center_ptr = const_cast<float*>(contrast_center.data()); | ||
| RpptROI* roi_ptr = roi.data(); | ||
|
|
||
| float* contrast_factor_gpu = nullptr; | ||
| float* contrast_center_gpu = nullptr; | ||
| RpptROI* roi_gpu = nullptr; | ||
|
|
||
| // RAII cleanup guard | ||
| auto hip_cleanup = [&]() { | ||
| if (contrast_factor_gpu) hipFree(contrast_factor_gpu); | ||
| if (contrast_center_gpu) hipFree(contrast_center_gpu); | ||
| if (roi_gpu) hipFree(roi_gpu); | ||
| }; | ||
|
|
||
| try { | ||
| if(backend == 1) { // HIP backend | ||
| // Allocate GPU memory | ||
| size_t contrast_factor_size = contrast_factor.size() * sizeof(float); | ||
| size_t contrast_center_size = contrast_center.size() * sizeof(float); | ||
| size_t roi_size = batch_size * sizeof(RpptROI); | ||
|
|
||
| hipMalloc(&contrast_factor_gpu, contrast_factor_size); | ||
| hipMalloc(&contrast_center_gpu, contrast_center_size); | ||
| hipMalloc(&roi_gpu, roi_size); | ||
|
|
||
| // Copy CPU data to GPU | ||
| hipMemcpy(contrast_factor_gpu, contrast_factor_ptr, contrast_factor_size, hipMemcpyHostToDevice); | ||
| hipMemcpy(contrast_center_gpu, contrast_center_ptr, contrast_center_size, hipMemcpyHostToDevice); | ||
| hipMemcpy(roi_gpu, roi.data(), batch_size * sizeof(RpptROI), hipMemcpyHostToDevice); | ||
|
|
||
| // Use GPU pointers for the function call | ||
| contrast_factor_ptr = contrast_factor_gpu; | ||
| contrast_center_ptr = contrast_center_gpu; | ||
| roi_ptr = roi_gpu; | ||
| } | ||
|
|
||
| RppStatus status = rppt_contrast(input_data.ptr, &src_desc, | ||
| output_data.ptr, &dst_desc, | ||
| contrast_factor_ptr, | ||
| contrast_center_ptr, | ||
| roi_ptr, RpptRoiType::XYWH, | ||
| rpp_handle, static_cast<RppBackend>(backend)); | ||
|
|
||
| if (status != RPP_SUCCESS) { | ||
| throw std::runtime_error("rppt_contrast failed"); | ||
| } | ||
|
|
||
| hip_cleanup(); // success path cleanup | ||
| } | ||
| catch (...) { | ||
| hip_cleanup(); // error path cleanup | ||
| throw; // rethrow original exception | ||
| } | ||
| } | ||
|
|
||
| // 4. Hue (Color) | ||
| void hue(const torch::Tensor& input_tensor, | ||
| torch::Tensor& output_tensor, | ||
| const std::vector<float>& hue_shift, | ||
| const std::vector<int>& roi_widths, | ||
| const std::vector<int>& roi_heights, | ||
| uintptr_t handle, | ||
| int backend) { | ||
| auto input_data = get_tensor_data(input_tensor); | ||
| auto output_data = get_tensor_data(output_tensor); | ||
|
|
||
| // Detect layouts from tensor shapes | ||
| input_data.layout = detect_layout_from_tensor(input_tensor); | ||
| output_data.layout = detect_layout_from_tensor(output_tensor); | ||
|
|
||
| RpptDesc src_desc, dst_desc; | ||
| setup_tensor_descriptor(src_desc, input_data); | ||
| setup_tensor_descriptor(dst_desc, output_data); | ||
| auto rpp_handle = reinterpret_cast<rppHandle_t>(handle); | ||
|
|
||
| int batch_size = input_data.shape[0]; | ||
| std::vector<RpptROI> roi(batch_size); | ||
| for(int i = 0; i < batch_size; i++) { | ||
| roi[i].xywhROI = {0, 0, roi_widths[i], roi_heights[i]}; | ||
| } | ||
|
|
||
| // Initialize pointers to CPU data by default | ||
| float* hue_shift_ptr = const_cast<float*>(hue_shift.data()); | ||
| RpptROI* roi_ptr = roi.data(); | ||
|
|
||
| float* hue_shift_gpu = nullptr; | ||
| RpptROI* roi_gpu = nullptr; | ||
|
|
||
| // RAII cleanup guard | ||
| auto hip_cleanup = [&]() { | ||
| if (hue_shift_gpu) hipFree(hue_shift_gpu); | ||
| if (roi_gpu) hipFree(roi_gpu); | ||
| }; | ||
|
|
||
| try { | ||
| if(backend == 1) { // HIP backend | ||
| // Allocate GPU memory | ||
| size_t hue_shift_size = hue_shift.size() * sizeof(float); | ||
| size_t roi_size = batch_size * sizeof(RpptROI); | ||
|
|
||
| hipMalloc(&hue_shift_gpu, hue_shift_size); | ||
| hipMalloc(&roi_gpu, roi_size); | ||
|
|
||
| // Copy CPU data to GPU | ||
| hipMemcpy(hue_shift_gpu, hue_shift_ptr, hue_shift_size, hipMemcpyHostToDevice); | ||
| hipMemcpy(roi_gpu, roi.data(), roi_size, hipMemcpyHostToDevice); | ||
|
|
||
| // Use GPU pointers | ||
| hue_shift_ptr = hue_shift_gpu; | ||
| roi_ptr = roi_gpu; | ||
| } | ||
|
|
||
| RppStatus status = rppt_hue(input_data.ptr, &src_desc, | ||
| output_data.ptr, &dst_desc, | ||
| hue_shift_ptr, | ||
| roi_ptr, RpptRoiType::XYWH, | ||
| rpp_handle, static_cast<RppBackend>(backend)); | ||
|
|
||
| if (status != RPP_SUCCESS) { | ||
| throw std::runtime_error("rppt_hue failed"); | ||
| } | ||
|
|
||
| hip_cleanup(); // success path cleanup | ||
| } | ||
| catch (...) { | ||
| hip_cleanup(); // error path cleanup | ||
| throw; // rethrow original exception | ||
| } | ||
| } | ||
|
|
||
| // 5. Flip (Geometric) | ||
| void flip(const torch::Tensor& input_tensor, | ||
| torch::Tensor& output_tensor, | ||
| const std::vector<int>& horizontal, | ||
| const std::vector<int>& vertical, | ||
| const std::vector<int>& roi_widths, | ||
| const std::vector<int>& roi_heights, | ||
| uintptr_t handle, | ||
| int backend) { | ||
| auto input_data = get_tensor_data(input_tensor); | ||
| auto output_data = get_tensor_data(output_tensor); | ||
|
|
||
| // Detect layouts from tensor shapes | ||
| input_data.layout = detect_layout_from_tensor(input_tensor); | ||
| output_data.layout = detect_layout_from_tensor(output_tensor); | ||
|
|
||
| RpptDesc src_desc, dst_desc; | ||
| setup_tensor_descriptor(src_desc, input_data); | ||
| setup_tensor_descriptor(dst_desc, output_data); | ||
| auto rpp_handle = reinterpret_cast<rppHandle_t>(handle); | ||
|
|
||
| int batch_size = input_data.shape[0]; | ||
| std::vector<RpptROI> roi(batch_size); | ||
| std::vector<Rpp32u> h_tensor(batch_size); | ||
| std::vector<Rpp32u> v_tensor(batch_size); | ||
|
|
||
| for(int i = 0; i < batch_size; i++) { | ||
| roi[i].xywhROI = {0, 0, roi_widths[i], roi_heights[i]}; | ||
| h_tensor[i] = horizontal[i]; | ||
| v_tensor[i] = vertical[i]; | ||
| } | ||
|
|
||
| // Initialize pointers to CPU data by default | ||
| Rpp32u* h_tensor_ptr = h_tensor.data(); | ||
| Rpp32u* v_tensor_ptr = v_tensor.data(); | ||
| RpptROI* roi_ptr = roi.data(); | ||
|
|
||
| Rpp32u* h_tensor_gpu = nullptr; | ||
| Rpp32u* v_tensor_gpu = nullptr; | ||
| RpptROI* roi_gpu = nullptr; | ||
|
|
||
| // RAII cleanup guard | ||
| auto hip_cleanup = [&]() { | ||
| if (h_tensor_gpu) hipFree(h_tensor_gpu); | ||
| if (v_tensor_gpu) hipFree(v_tensor_gpu); | ||
| if (roi_gpu) hipFree(roi_gpu); | ||
| }; | ||
|
|
||
| try { | ||
| if(backend == 1) { // HIP backend | ||
| // Allocate GPU memory | ||
| size_t h_tensor_size = batch_size * sizeof(Rpp32u); | ||
| size_t v_tensor_size = batch_size * sizeof(Rpp32u); | ||
| size_t roi_size = batch_size * sizeof(RpptROI); | ||
|
|
||
| hipMalloc(&h_tensor_gpu, h_tensor_size); | ||
| hipMalloc(&v_tensor_gpu, v_tensor_size); | ||
| hipMalloc(&roi_gpu, roi_size); | ||
|
|
||
| // Copy CPU data to GPU | ||
| hipMemcpy(h_tensor_gpu, h_tensor_ptr, h_tensor_size, hipMemcpyHostToDevice); | ||
| hipMemcpy(v_tensor_gpu, v_tensor_ptr, v_tensor_size, hipMemcpyHostToDevice); | ||
| hipMemcpy(roi_gpu, roi.data(), roi_size, hipMemcpyHostToDevice); | ||
|
|
||
| // Use GPU pointers | ||
| h_tensor_ptr = h_tensor_gpu; | ||
| v_tensor_ptr = v_tensor_gpu; | ||
| roi_ptr = roi_gpu; | ||
| } | ||
|
|
||
| RppStatus status = rppt_flip(input_data.ptr, &src_desc, | ||
| output_data.ptr, &dst_desc, | ||
| h_tensor_ptr, v_tensor_ptr, | ||
| roi_ptr, RpptRoiType::XYWH, | ||
| rpp_handle, static_cast<RppBackend>(backend)); | ||
|
|
||
| if (status != RPP_SUCCESS) { | ||
| throw std::runtime_error("rppt_flip failed"); | ||
| } | ||
|
|
||
| hip_cleanup(); // success path cleanup | ||
| } | ||
| catch (...) { | ||
| hip_cleanup(); // error path cleanup | ||
| throw; // rethrow original exception | ||
| } | ||
| } | ||
|
|
||
| // 6. Resize (Geometric) | ||
| void resize(const torch::Tensor& input_tensor, | ||
| torch::Tensor& output_tensor, | ||
| const std::vector<int>& dst_width, | ||
| const std::vector<int>& dst_height, | ||
| const std::vector<int>& roi_widths, | ||
| const std::vector<int>& roi_heights, | ||
| uintptr_t handle, | ||
| int backend) { | ||
| auto input_data = get_tensor_data(input_tensor); | ||
| auto output_data = get_tensor_data(output_tensor); | ||
|
|
||
| // Detect layouts from tensor shapes | ||
| input_data.layout = detect_layout_from_tensor(input_tensor); | ||
| output_data.layout = detect_layout_from_tensor(output_tensor); | ||
|
|
||
| RpptDesc src_desc, dst_desc; | ||
| setup_tensor_descriptor(src_desc, input_data); | ||
| setup_tensor_descriptor(dst_desc, output_data); | ||
| auto rpp_handle = reinterpret_cast<rppHandle_t>(handle); | ||
|
|
||
| int batch_size = input_data.shape[0]; | ||
| std::vector<RpptROI> roi(batch_size); | ||
| std::vector<RpptImagePatch> dst_sizes(batch_size); | ||
|
|
||
| for(int i = 0; i < batch_size; i++) { | ||
| roi[i].xywhROI = {0, 0, roi_widths[i], roi_heights[i]}; | ||
| dst_sizes[i].width = dst_width[i]; | ||
| dst_sizes[i].height = dst_height[i]; | ||
| } | ||
|
|
||
| // Initialize pointers to CPU data by default | ||
| RpptImagePatch* dst_sizes_ptr = dst_sizes.data(); | ||
| RpptROI* roi_ptr = roi.data(); | ||
|
|
||
| RpptImagePatch* dst_sizes_gpu = nullptr; | ||
| RpptROI* roi_gpu = nullptr; | ||
|
|
||
| // RAII cleanup guard | ||
| auto hip_cleanup = [&]() { | ||
| if (dst_sizes_gpu) hipFree(dst_sizes_gpu); | ||
| if (roi_gpu) hipFree(roi_gpu); | ||
| }; | ||
|
|
||
| try { | ||
| if(backend == 1) { // HIP backend | ||
| // Allocate GPU memory | ||
| size_t dst_sizes_size = batch_size * sizeof(RpptImagePatch); | ||
| size_t roi_size = batch_size * sizeof(RpptROI); | ||
|
|
||
| hipMalloc(&dst_sizes_gpu, dst_sizes_size); | ||
| hipMalloc(&roi_gpu, roi_size); | ||
|
|
||
| // Copy CPU data to GPU | ||
| hipMemcpy(dst_sizes_gpu, dst_sizes_ptr, dst_sizes_size, hipMemcpyHostToDevice); | ||
| hipMemcpy(roi_gpu, roi.data(), roi_size, hipMemcpyHostToDevice); | ||
|
|
||
| // Use GPU pointers | ||
| dst_sizes_ptr = dst_sizes_gpu; | ||
| roi_ptr = roi_gpu; | ||
| } | ||
|
|
||
| RppStatus status = rppt_resize(input_data.ptr, &src_desc, | ||
| output_data.ptr, &dst_desc, | ||
| dst_sizes_ptr, | ||
| RpptInterpolationType::BILINEAR, | ||
| roi_ptr, RpptRoiType::XYWH, | ||
| rpp_handle, static_cast<RppBackend>(backend)); | ||
|
|
||
| if (status != RPP_SUCCESS) { | ||
| throw std::runtime_error("rppt_resize failed"); | ||
| } | ||
|
|
||
| hip_cleanup(); // success path cleanup | ||
| } | ||
| catch (...) { | ||
| hip_cleanup(); // error path cleanup | ||
| throw; // rethrow original exception | ||
| } | ||
| } | ||
|
|
||
| // 7. Rotate (Geometric) | ||
| void rotate(const torch::Tensor& input_tensor, | ||
| torch::Tensor& output_tensor, | ||
| const std::vector<float>& angle, | ||
| const std::vector<int>& roi_widths, | ||
| const std::vector<int>& roi_heights, | ||
| uintptr_t handle, | ||
| int backend) { | ||
| auto input_data = get_tensor_data(input_tensor); | ||
| auto output_data = get_tensor_data(output_tensor); | ||
|
|
||
| // Detect layouts from tensor shapes | ||
| input_data.layout = detect_layout_from_tensor(input_tensor); | ||
| output_data.layout = detect_layout_from_tensor(output_tensor); | ||
|
|
||
| RpptDesc src_desc, dst_desc; | ||
| setup_tensor_descriptor(src_desc, input_data); | ||
| setup_tensor_descriptor(dst_desc, output_data); | ||
| auto rpp_handle = reinterpret_cast<rppHandle_t>(handle); | ||
|
|
||
| int batch_size = input_data.shape[0]; | ||
| std::vector<RpptROI> roi(batch_size); | ||
| for(int i = 0; i < batch_size; i++) { | ||
| roi[i].xywhROI = {0, 0, roi_widths[i], roi_heights[i]}; | ||
| } | ||
|
|
||
| // Initialize pointers to CPU data by default | ||
| float* angle_ptr = const_cast<float*>(angle.data()); | ||
| RpptROI* roi_ptr = roi.data(); | ||
|
|
||
| float* angle_gpu = nullptr; | ||
| RpptROI* roi_gpu = nullptr; | ||
|
|
||
| // RAII cleanup guard | ||
| auto hip_cleanup = [&]() { | ||
| if (angle_gpu) hipFree(angle_gpu); | ||
| if (roi_gpu) hipFree(roi_gpu); | ||
| }; | ||
|
|
||
| try { | ||
| if(backend == 1) { // HIP backend | ||
| // Allocate GPU memory | ||
| size_t angle_size = angle.size() * sizeof(float); | ||
| size_t roi_size = batch_size * sizeof(RpptROI); | ||
|
|
||
| hipMalloc(&angle_gpu, angle_size); | ||
| hipMalloc(&roi_gpu, roi_size); | ||
|
|
||
| // Copy CPU data to GPU | ||
| hipMemcpy(angle_gpu, angle_ptr, angle_size, hipMemcpyHostToDevice); | ||
| hipMemcpy(roi_gpu, roi.data(), roi_size, hipMemcpyHostToDevice); | ||
|
|
||
| // Use GPU pointers | ||
| angle_ptr = angle_gpu; | ||
| roi_ptr = roi_gpu; | ||
| } | ||
|
|
||
| RppStatus status = rppt_rotate(input_data.ptr, &src_desc, | ||
| output_data.ptr, &dst_desc, | ||
| angle_ptr, | ||
| RpptInterpolationType::BILINEAR, | ||
| roi_ptr, RpptRoiType::XYWH, | ||
| rpp_handle, static_cast<RppBackend>(backend)); | ||
|
|
||
| if (status != RPP_SUCCESS) { | ||
| throw std::runtime_error("rppt_rotate failed"); | ||
| } | ||
|
|
||
| hip_cleanup(); // success path cleanup | ||
| } | ||
| catch (...) { | ||
| hip_cleanup(); // error path cleanup | ||
| throw; // rethrow original exception | ||
| } | ||
| } | ||
|
|
||
| // 8. Crop (Geometric) | ||
| void crop(const torch::Tensor& input_tensor, | ||
| torch::Tensor& output_tensor, | ||
| const std::vector<int>& x1, | ||
| const std::vector<int>& y1, | ||
| const std::vector<int>& crop_width, | ||
| const std::vector<int>& crop_height, | ||
| uintptr_t handle, | ||
| int backend) { | ||
| auto input_data = get_tensor_data(input_tensor); | ||
| auto output_data = get_tensor_data(output_tensor); | ||
|
|
||
| // Detect layouts from tensor shapes | ||
| input_data.layout = detect_layout_from_tensor(input_tensor); | ||
| output_data.layout = detect_layout_from_tensor(output_tensor); | ||
|
|
||
| RpptDesc src_desc, dst_desc; | ||
| setup_tensor_descriptor(src_desc, input_data); | ||
| setup_tensor_descriptor(dst_desc, output_data); | ||
| auto rpp_handle = reinterpret_cast<rppHandle_t>(handle); | ||
|
|
||
| int batch_size = input_data.shape[0]; | ||
| std::vector<RpptROI> roi(batch_size); | ||
|
|
||
| for(int i = 0; i < batch_size; i++) { | ||
| roi[i].xywhROI = {x1[i], | ||
| y1[i], | ||
| crop_width[i], | ||
| crop_height[i]}; | ||
| } | ||
|
|
||
| // Initialize pointers to CPU data by default | ||
| RpptROI* roi_ptr = roi.data(); | ||
|
|
||
| RpptROI* roi_gpu = nullptr; | ||
| // RAII cleanup guard | ||
| auto hip_cleanup = [&]() { | ||
| if (roi_gpu) hipFree(roi_gpu); | ||
| }; | ||
|
|
||
| try { | ||
| if(backend == 1) { // HIP backend | ||
| // Allocate GPU memory | ||
| size_t roi_size = batch_size * sizeof(RpptROI); | ||
|
|
||
| hipMalloc(&roi_gpu, roi_size); | ||
|
|
||
| // Copy CPU data to GPU | ||
| hipMemcpy(roi_gpu, roi.data(), roi_size, hipMemcpyHostToDevice); | ||
|
|
||
| // Use GPU pointers | ||
| roi_ptr = roi_gpu; | ||
| } | ||
|
|
||
| RppStatus status = rppt_crop(input_data.ptr, &src_desc, | ||
| output_data.ptr, &dst_desc, | ||
| roi_ptr, RpptRoiType::XYWH, | ||
| rpp_handle, static_cast<RppBackend>(backend)); | ||
|
|
||
| if (status != RPP_SUCCESS) { | ||
| throw std::runtime_error("rppt_crop failed"); | ||
| } | ||
|
|
||
| hip_cleanup(); // success path cleanup | ||
| } | ||
| catch (...) { | ||
| hip_cleanup(); // error path cleanup | ||
| throw; // rethrow original exception | ||
| } | ||
| } | ||
|
|
||
| // 9. Vignette (Effects) | ||
| void vignette(const torch::Tensor& input_tensor, | ||
| torch::Tensor& output_tensor, | ||
| const std::vector<float>& intensity, | ||
| const std::vector<int>& roi_widths, | ||
| const std::vector<int>& roi_heights, | ||
| uintptr_t handle, | ||
| int backend) { | ||
| auto input_data = get_tensor_data(input_tensor); | ||
| auto output_data = get_tensor_data(output_tensor); | ||
|
|
||
| // Detect layouts from tensor shapes | ||
| input_data.layout = detect_layout_from_tensor(input_tensor); | ||
| output_data.layout = detect_layout_from_tensor(output_tensor); | ||
|
|
||
| RpptDesc src_desc, dst_desc; | ||
| setup_tensor_descriptor(src_desc, input_data); | ||
| setup_tensor_descriptor(dst_desc, output_data); | ||
| auto rpp_handle = reinterpret_cast<rppHandle_t>(handle); | ||
|
|
||
| int batch_size = input_data.shape[0]; | ||
| std::vector<RpptROI> roi(batch_size); | ||
| for(int i = 0; i < batch_size; i++) { | ||
| roi[i].xywhROI = {0, 0, roi_widths[i], roi_heights[i]}; | ||
| } | ||
|
|
||
| // Initialize pointers to CPU data by default | ||
| float* intensity_ptr = const_cast<float*>(intensity.data()); | ||
| RpptROI* roi_ptr = roi.data(); | ||
|
|
||
| float* intensity_gpu = nullptr; | ||
| RpptROI* roi_gpu = nullptr; | ||
|
|
||
| // RAII cleanup guard | ||
| auto hip_cleanup = [&]() { | ||
| if (intensity_gpu) hipFree(intensity_gpu); | ||
| if (roi_gpu) hipFree(roi_gpu); | ||
| }; | ||
|
|
||
| try { | ||
| if(backend == 1) { // HIP backend | ||
| // Allocate GPU memory | ||
| size_t intensity_size = intensity.size() * sizeof(float); | ||
| size_t roi_size = batch_size * sizeof(RpptROI); | ||
|
|
||
| hipMalloc(&intensity_gpu, intensity_size); | ||
| hipMalloc(&roi_gpu, roi_size); | ||
|
|
||
| // Copy CPU data to GPU | ||
| hipMemcpy(intensity_gpu, intensity_ptr, intensity_size, hipMemcpyHostToDevice); | ||
| hipMemcpy(roi_gpu, roi.data(), roi_size, hipMemcpyHostToDevice); | ||
|
|
||
| // Use GPU pointers | ||
| intensity_ptr = intensity_gpu; | ||
| roi_ptr = roi_gpu; | ||
| } | ||
|
|
||
| RppStatus status = rppt_vignette(input_data.ptr, &src_desc, | ||
| output_data.ptr, &dst_desc, | ||
| intensity_ptr, | ||
| roi_ptr, RpptRoiType::XYWH, | ||
| rpp_handle, static_cast<RppBackend>(backend)); | ||
|
|
||
| if (status != RPP_SUCCESS) { | ||
| throw std::runtime_error("rppt_vignette failed"); | ||
| } | ||
|
|
||
| hip_cleanup(); // success path cleanup | ||
| } | ||
| catch (...) { | ||
| hip_cleanup(); // error path cleanup | ||
| throw; // rethrow original exception | ||
| } | ||
| } | ||
|
|
||
| // 10. Pixelate (Effects) | ||
| void pixelate(const torch::Tensor& input_tensor, | ||
| torch::Tensor& output_tensor, | ||
| const torch::Tensor& scratch_tensor, | ||
| float pixelation_pct, | ||
| const std::vector<int>& roi_widths, | ||
| const std::vector<int>& roi_heights, | ||
| uintptr_t handle, | ||
| int backend) { | ||
| auto input_data = get_tensor_data(input_tensor); | ||
| auto output_data = get_tensor_data(output_tensor); | ||
| auto scratch_data = get_tensor_data(scratch_tensor); | ||
|
|
||
| // Detect layouts from tensor shapes | ||
| input_data.layout = detect_layout_from_tensor(input_tensor); | ||
| output_data.layout = detect_layout_from_tensor(output_tensor); | ||
|
|
||
| RpptDesc src_desc, dst_desc; | ||
| setup_tensor_descriptor(src_desc, input_data); | ||
| setup_tensor_descriptor(dst_desc, output_data); | ||
| auto rpp_handle = reinterpret_cast<rppHandle_t>(handle); | ||
|
|
||
| int batch_size = input_data.shape[0]; | ||
|
|
||
| std::vector<RpptROI> roi(batch_size); | ||
| for(int i = 0; i < batch_size; i++) { | ||
| roi[i].xywhROI = {0, 0, roi_widths[i], roi_heights[i]}; | ||
| } | ||
|
|
||
| // Initialize pointers to CPU data by default | ||
| RpptROI* roi_ptr = roi.data(); | ||
|
|
||
| RpptROI* roi_gpu = nullptr; | ||
|
|
||
| // RAII cleanup guard | ||
| auto hip_cleanup = [&]() { | ||
| if (roi_gpu) hipFree(roi_gpu); | ||
| }; | ||
|
|
||
| try { | ||
| if(backend == 1) { // HIP backend | ||
| // Allocate GPU memory | ||
| size_t roi_size = batch_size * sizeof(RpptROI); | ||
|
|
||
| hipMalloc(&roi_gpu, roi_size); | ||
|
|
||
| // Copy CPU data to GPU | ||
| hipMemcpy(roi_gpu, roi.data(), roi_size, hipMemcpyHostToDevice); | ||
|
|
||
| // Use GPU pointers | ||
| roi_ptr = roi_gpu; | ||
| } | ||
|
|
||
| RppStatus status = rppt_pixelate(input_data.ptr, &src_desc, | ||
| output_data.ptr, &dst_desc, | ||
| scratch_data.ptr, | ||
| pixelation_pct, | ||
| roi_ptr, RpptRoiType::XYWH, | ||
| rpp_handle, static_cast<RppBackend>(backend)); | ||
|
|
||
| if (status != RPP_SUCCESS) { | ||
| throw std::runtime_error("rppt_pixelate failed"); | ||
| } | ||
|
|
||
| hip_cleanup(); // success path cleanup | ||
| } | ||
| catch (...) { | ||
| hip_cleanup(); // error path cleanup | ||
| throw; // rethrow original exception | ||
| } | ||
| } |
There was a problem hiding this comment.
All hipMalloc and hipMemcpy calls throughout this file lack error checking. This affects all 10 augmentation functions (brightness, gamma_correction, contrast, hue, flip, resize, rotate, crop, vignette, pixelate). If GPU memory allocation or transfer fails, the code will proceed with null or invalid pointers, leading to undefined behavior or crashes. Each hipMalloc and hipMemcpy call should be followed by error checking to verify success before proceeding with the operation.
There was a problem hiding this comment.
Added CHECK_RETURN_STATUS for error handling, Done.
| def brightness(images, alpha=1.0, beta=0.0, roi_widths=None, roi_heights=None, input_layout=None, output_layout=None, backend=None): | ||
| """ | ||
| Adjust image brightness. | ||
|
|
||
| Args: | ||
| images: Input tensor (B, C, H, W) - PyTorch tensor | ||
| alpha: Brightness multiplier (default 1.0) | ||
| beta: Brightness offset (default 0.0) | ||
| roi_widths: List of actual image widths (None = use full tensor width) | ||
| roi_heights: List of actual image heights (None = use full tensor height) | ||
| backend: RppBackend (None = auto-detect) | ||
|
|
||
| Returns: | ||
| Augmented images tensor | ||
| """ | ||
| if backend is None: | ||
| backend = get_default_backend() | ||
|
|
||
| # Convert backend to integer | ||
| backend_int = backend.value if hasattr(backend, 'value') else int(backend) | ||
|
|
||
| # Ensure tensor is contiguous and on correct device | ||
| if not images.is_contiguous(): | ||
| images = images.contiguous() | ||
|
|
||
| # Move to correct device | ||
| if backend_int == 1: # HIP | ||
| if not images.is_cuda: | ||
| images = images.cuda() | ||
| else: # HOST | ||
| if images.is_cuda: | ||
| images = images.cpu() | ||
|
|
||
| batch_size = images.shape[0] | ||
| output = torch.zeros_like(images).contiguous() | ||
| layout = _resolve_layout(images, input_layout) | ||
|
|
||
| # Set ROI dimensions | ||
| if roi_widths is None: | ||
| roi_widths = [images.shape[3] if layout == "NCHW" else images.shape[2]] * batch_size | ||
| if roi_heights is None: | ||
| roi_heights = [images.shape[2] if layout == "NCHW" else images.shape[1]] * batch_size | ||
|
|
||
| handle = rppCreate(batch_size, backend_int) | ||
|
|
||
| alpha_array = [alpha] * batch_size | ||
| beta_array = [beta] * batch_size | ||
|
|
||
| _brightness(images, output, alpha_array, beta_array, roi_widths, roi_heights, handle, backend_int) | ||
| rppDestroy(handle, backend_int) | ||
|
|
||
| return output | ||
|
|
||
|
|
||
| def gamma_correction(images, gamma=1.0, roi_widths=None, roi_heights=None, input_layout=None, output_layout=None, backend=None): | ||
| """ | ||
| Apply gamma correction. | ||
|
|
||
| Args: | ||
| images: Input tensor (B, C, H, W) | ||
| gamma: Gamma value (default 1.0) | ||
| roi_widths: List of actual image widths (None = use full tensor width) | ||
| roi_heights: List of actual image heights (None = use full tensor height) | ||
| backend: RppBackend (None = auto-detect) | ||
|
|
||
| Returns: | ||
| Gamma-corrected images | ||
| """ | ||
| if backend is None: | ||
| backend = get_default_backend() | ||
|
|
||
| backend_int = backend.value if hasattr(backend, 'value') else int(backend) | ||
|
|
||
| if not images.is_contiguous(): | ||
| images = images.contiguous() | ||
|
|
||
| if backend_int == 1: # HIP | ||
| if not images.is_cuda: | ||
| images = images.cuda() | ||
| else: # HOST | ||
| if images.is_cuda: | ||
| images = images.cpu() | ||
|
|
||
| batch_size = images.shape[0] | ||
| output = torch.zeros_like(images).contiguous() | ||
| layout = _resolve_layout(images, input_layout) | ||
|
|
||
| # Set ROI dimensions | ||
| if roi_widths is None: | ||
| roi_widths = [images.shape[3] if layout == "NCHW" else images.shape[2]] * batch_size | ||
| if roi_heights is None: | ||
| roi_heights = [images.shape[2] if layout == "NCHW" else images.shape[1]] * batch_size | ||
|
|
||
| handle = rppCreate(batch_size, backend_int) | ||
| gamma_array = [gamma] * batch_size | ||
|
|
||
| _gamma_correction(images, output, gamma_array, roi_widths, roi_heights, handle, backend_int) | ||
| rppDestroy(handle, backend_int) | ||
|
|
||
| return output | ||
|
|
||
| def contrast(images, contrast_factor=1.0, contrast_center=128.0, roi_widths=None, roi_heights=None, input_layout=None, output_layout=None, backend=None): | ||
| """ | ||
| Adjust image contrast. | ||
|
|
||
| Args: | ||
| images: Input tensor (B, C, H, W) | ||
| contrast_factor: Contrast factor | ||
| contrast_center: Center value for contrast | ||
| roi_widths: List of actual image widths (None = use full tensor width) | ||
| roi_heights: List of actual image heights (None = use full tensor height) | ||
| backend: RppBackend (None = auto-detect) | ||
|
|
||
| Returns: | ||
| Contrast-adjusted images | ||
| """ | ||
| if backend is None: | ||
| backend = get_default_backend() | ||
|
|
||
| backend_int = backend.value if hasattr(backend, 'value') else int(backend) | ||
|
|
||
| if not images.is_contiguous(): | ||
| images = images.contiguous() | ||
|
|
||
| if backend_int == 1: # HIP | ||
| if not images.is_cuda: | ||
| images = images.cuda() | ||
| else: # HOST | ||
| if images.is_cuda: | ||
| images = images.cpu() | ||
|
|
||
| batch_size = images.shape[0] | ||
| output = torch.empty_like(images).contiguous() | ||
| layout = _resolve_layout(images, input_layout) | ||
|
|
||
| # Set ROI dimensions | ||
| if roi_widths is None: | ||
| roi_widths = [images.shape[3] if layout == "NCHW" else images.shape[2]] * batch_size | ||
| if roi_heights is None: | ||
| roi_heights = [images.shape[2] if layout == "NCHW" else images.shape[1]] * batch_size | ||
|
|
||
| handle = rppCreate(batch_size, backend_int) | ||
|
|
||
| contrast_factor_array = [contrast_factor] * batch_size | ||
| contrast_center_array = [contrast_center] * batch_size | ||
|
|
||
| _contrast(images, output, contrast_factor_array, contrast_center_array, roi_widths, roi_heights, handle, backend_int) | ||
| rppDestroy(handle, backend_int) | ||
|
|
||
| return output | ||
|
|
||
|
|
||
| def hue(images, hue_shift=0.0, roi_widths=None, roi_heights=None, input_layout=None, output_layout=None, backend=None): | ||
| """ | ||
| Adjust image hue (for RGB images only). | ||
|
|
||
| Args: | ||
| images: Input tensor (B, 3, H, W) - RGB images only | ||
| hue_shift: Hue shift in degrees (0-359) | ||
| roi_widths: List of actual image widths (None = use full tensor width) | ||
| roi_heights: List of actual image heights (None = use full tensor height) | ||
| backend: RppBackend (None = auto-detect) | ||
|
|
||
| Returns: | ||
| Hue-adjusted images | ||
| """ | ||
| if backend is None: | ||
| backend = get_default_backend() | ||
|
|
||
| backend_int = backend.value if hasattr(backend, 'value') else int(backend) | ||
|
|
||
| if not images.is_contiguous(): | ||
| images = images.contiguous() | ||
|
|
||
| if backend_int == 1: # HIP | ||
| if not images.is_cuda: | ||
| images = images.cuda() | ||
| else: # HOST | ||
| if images.is_cuda: | ||
| images = images.cpu() | ||
|
|
||
| batch_size = images.shape[0] | ||
| output = torch.zeros_like(images).contiguous() | ||
| layout = _resolve_layout(images, input_layout) | ||
|
|
||
| # Set ROI dimensions | ||
| if roi_widths is None: | ||
| roi_widths = [images.shape[3] if layout == "NCHW" else images.shape[2]] * batch_size | ||
| if roi_heights is None: | ||
| roi_heights = [images.shape[2] if layout == "NCHW" else images.shape[1]] * batch_size | ||
|
|
||
| handle = rppCreate(batch_size, backend_int) | ||
| hue_shift_array = [hue_shift] * batch_size | ||
|
|
||
| _hue(images, output, hue_shift_array, roi_widths, roi_heights, handle, backend_int) | ||
| rppDestroy(handle, backend_int) | ||
|
|
||
| return output | ||
|
|
||
|
|
||
| # Geometric Augmentations (4) | ||
| def flip(images, horizontal=False, vertical=False, roi_widths=None, roi_heights=None, input_layout=None, output_layout=None, backend=None): | ||
| """ | ||
| Flip images horizontally and/or vertically. | ||
|
|
||
| Args: | ||
| images: Input tensor (B, C, H, W) | ||
| horizontal: Flip horizontally (bool or list) | ||
| vertical: Flip vertically (bool or list) | ||
| roi_widths: List of actual image widths (None = use full tensor width) | ||
| roi_heights: List of actual image heights (None = use full tensor height) | ||
| backend: RppBackend (None = auto-detect) | ||
|
|
||
| Returns: | ||
| Flipped images tensor | ||
| """ | ||
| if backend is None: | ||
| backend = get_default_backend() | ||
|
|
||
| backend_int = backend.value if hasattr(backend, 'value') else int(backend) | ||
|
|
||
| if not images.is_contiguous(): | ||
| images = images.contiguous() | ||
|
|
||
| if backend_int == 1: # HIP | ||
| if not images.is_cuda: | ||
| images = images.cuda() | ||
| else: # HOST | ||
| if images.is_cuda: | ||
| images = images.cpu() | ||
|
|
||
| batch_size = images.shape[0] | ||
| output = torch.zeros_like(images).contiguous() | ||
| layout = _resolve_layout(images, input_layout) | ||
|
|
||
| # Set ROI dimensions | ||
| if roi_widths is None: | ||
| roi_widths = [images.shape[3] if layout == "NCHW" else images.shape[2]] * batch_size | ||
| if roi_heights is None: | ||
| roi_heights = [images.shape[2] if layout == "NCHW" else images.shape[1]] * batch_size | ||
|
|
||
| handle = rppCreate(batch_size, backend_int) | ||
|
|
||
| # Convert bool to list if needed | ||
| if isinstance(horizontal, bool): | ||
| horizontal = [int(horizontal)] * batch_size | ||
| if isinstance(vertical, bool): | ||
| vertical = [int(vertical)] * batch_size | ||
|
|
||
| _flip(images, output, horizontal, vertical, roi_widths, roi_heights, handle, backend_int) | ||
| rppDestroy(handle, backend_int) | ||
|
|
||
| return output | ||
|
|
||
|
|
||
| def resize(images, width, height, roi_widths=None, roi_heights=None, input_layout=None, output_layout=None, backend=None): | ||
| """ | ||
| Resize images. | ||
|
|
||
| Args: | ||
| images: Input tensor (B, C, H, W) | ||
| width: Target width | ||
| height: Target height | ||
| roi_widths: List of actual image widths (None = use full tensor width) | ||
| roi_heights: List of actual image heights (None = use full tensor height) | ||
| backend: RppBackend (None = auto-detect) | ||
|
|
||
| Returns: | ||
| Resized images tensor | ||
| """ | ||
| if backend is None: | ||
| backend = get_default_backend() | ||
|
|
||
| backend_int = backend.value if hasattr(backend, 'value') else int(backend) | ||
|
|
||
| if not images.is_contiguous(): | ||
| images = images.contiguous() | ||
|
|
||
| if backend_int == 1: # HIP | ||
| if not images.is_cuda: | ||
| images = images.cuda() | ||
| else: # HOST | ||
| if images.is_cuda: | ||
| images = images.cpu() | ||
|
|
||
| batch_size = images.shape[0] | ||
| output = torch.empty_like(images).contiguous() | ||
| layout = _resolve_layout(images, input_layout) | ||
|
|
||
| # Set ROI dimensions | ||
| if roi_widths is None: | ||
| roi_widths = [images.shape[3] if layout == "NCHW" else images.shape[2]] * batch_size | ||
| if roi_heights is None: | ||
| roi_heights = [images.shape[2] if layout == "NCHW" else images.shape[1]] * batch_size | ||
|
|
||
| handle = rppCreate(batch_size, backend_int) | ||
|
|
||
| width_array = [width] * batch_size | ||
| height_array = [height] * batch_size | ||
|
|
||
| _resize(images, output, width_array, height_array, roi_widths, roi_heights, handle, backend_int) | ||
| rppDestroy(handle, backend_int) | ||
|
|
||
| return output | ||
|
|
||
|
|
||
| def rotate(images, angle=0.0, roi_widths=None, roi_heights=None, input_layout=None, output_layout=None, backend=None): | ||
| """ | ||
| Rotate images by given angle. | ||
|
|
||
| Args: | ||
| images: Input tensor (B, C, H, W) | ||
| angle: Rotation angle in degrees (positive = counter-clockwise) | ||
| roi_widths: List of actual image widths (None = use full tensor width) | ||
| roi_heights: List of actual image heights (None = use full tensor height) | ||
| backend: RppBackend (None = auto-detect) | ||
|
|
||
| Returns: | ||
| Rotated images tensor | ||
| """ | ||
| if backend is None: | ||
| backend = get_default_backend() | ||
|
|
||
| backend_int = backend.value if hasattr(backend, 'value') else int(backend) | ||
|
|
||
| if not images.is_contiguous(): | ||
| images = images.contiguous() | ||
|
|
||
| if backend_int == 1: # HIP | ||
| if not images.is_cuda: | ||
| images = images.cuda() | ||
| else: # HOST | ||
| if images.is_cuda: | ||
| images = images.cpu() | ||
|
|
||
| batch_size = images.shape[0] | ||
| output = torch.empty_like(images).contiguous() | ||
| layout = _resolve_layout(images, input_layout) | ||
|
|
||
| # Set ROI dimensions | ||
| if roi_widths is None: | ||
| roi_widths = [images.shape[3] if layout == "NCHW" else images.shape[2]] * batch_size | ||
| if roi_heights is None: | ||
| roi_heights = [images.shape[2] if layout == "NCHW" else images.shape[1]] * batch_size | ||
|
|
||
| handle = rppCreate(batch_size, backend_int) | ||
| angle_array = [angle] * batch_size | ||
|
|
||
| _rotate(images, output, angle_array, roi_widths, roi_heights, handle, backend_int) | ||
| rppDestroy(handle, backend_int) | ||
|
|
||
| return output | ||
|
|
||
|
|
||
| def crop(images, x1, y1, crop_width, crop_height, roi_widths=None, roi_heights=None, input_layout=None, output_layout=None, backend=None): | ||
| """ | ||
| Crop images to specified region. | ||
|
|
||
| Args: | ||
| images: Input tensor (B, C, H, W) | ||
| x1: Top-left x coordinate | ||
| y1: Top-left y coordinate | ||
| crop_width: Width of crop region | ||
| crop_height: Height of crop region | ||
| backend: RppBackend (None = auto-detect) | ||
|
|
||
| Returns: | ||
| Cropped images tensor | ||
| """ | ||
| if backend is None: | ||
| backend = get_default_backend() | ||
|
|
||
| backend_int = backend.value if hasattr(backend, 'value') else int(backend) | ||
|
|
||
| if not images.is_contiguous(): | ||
| images = images.contiguous() | ||
|
|
||
| if backend_int == 1: # HIP | ||
| if not images.is_cuda: | ||
| images = images.cuda() | ||
| else: # HOST | ||
| if images.is_cuda: | ||
| images = images.cpu() | ||
|
|
||
| batch_size = images.shape[0] | ||
| output = torch.zeros_like(images).contiguous() | ||
|
|
||
| handle = rppCreate(batch_size, backend_int) | ||
|
|
||
| # Convert scalars to lists | ||
| x1_array = [x1] * batch_size if isinstance(x1, (int, float)) else x1 | ||
| y1_array = [y1] * batch_size if isinstance(y1, (int, float)) else y1 | ||
| width_array = [crop_width] * batch_size if isinstance(crop_width, (int, float)) else crop_width | ||
| height_array = [crop_height] * batch_size if isinstance(crop_height, (int, float)) else crop_height | ||
|
|
||
| _crop(images, output, x1_array, y1_array, width_array, height_array, handle, backend_int) | ||
| rppDestroy(handle, backend_int) | ||
|
|
||
| return output | ||
|
|
||
|
|
||
| # Effects Augmentations (2) | ||
| def vignette(images, intensity=0.5, roi_widths=None, roi_heights=None, input_layout=None, output_layout=None, backend=None): | ||
| """ | ||
| Apply vignette effect to images. | ||
|
|
||
| Args: | ||
| images: Input tensor (B, C, H, W) | ||
| intensity: Vignette intensity (0.0 to 1.0) | ||
| roi_widths: List of actual image widths (None = use full tensor width) | ||
| roi_heights: List of actual image heights (None = use full tensor height) | ||
| backend: RppBackend (None = auto-detect) | ||
|
|
||
| Returns: | ||
| Images with vignette effect | ||
| """ | ||
| if backend is None: | ||
| backend = get_default_backend() | ||
|
|
||
| backend_int = backend.value if hasattr(backend, 'value') else int(backend) | ||
|
|
||
| if not images.is_contiguous(): | ||
| images = images.contiguous() | ||
|
|
||
| if backend_int == 1: # HIP | ||
| if not images.is_cuda: | ||
| images = images.cuda() | ||
| else: # HOST | ||
| if images.is_cuda: | ||
| images = images.cpu() | ||
|
|
||
| batch_size = images.shape[0] | ||
| output = torch.zeros_like(images).contiguous() | ||
| layout = _resolve_layout(images, input_layout) | ||
|
|
||
| # Set ROI dimensions | ||
| if roi_widths is None: | ||
| roi_widths = [images.shape[3] if layout == "NCHW" else images.shape[2]] * batch_size | ||
| if roi_heights is None: | ||
| roi_heights = [images.shape[2] if layout == "NCHW" else images.shape[1]] * batch_size | ||
|
|
||
| handle = rppCreate(batch_size, backend_int) | ||
| intensity_array = [intensity] * batch_size | ||
| _vignette(images, output, intensity_array, roi_widths, roi_heights, handle, backend_int) | ||
| rppDestroy(handle, backend_int) | ||
|
|
||
| return output | ||
|
|
||
|
|
||
| def pixelate(images, pixelation_percentage=50.0, roi_widths=None, roi_heights=None, input_layout='NCHW', output_layout=None, backend=None): | ||
| """ | ||
| Apply pixelate effect to images. | ||
|
|
||
| Args: | ||
| images: Input tensor (B, C, H, W) | ||
| pixelation_percentage: Pixelation level (0-100) | ||
| roi_widths: List of actual image widths (None = use full tensor width) | ||
| roi_heights: List of actual image heights (None = use full tensor height) | ||
| backend: RppBackend (None = auto-detect) | ||
|
|
||
| Returns: | ||
| Pixelated images | ||
| """ | ||
| if backend is None: | ||
| backend = get_default_backend() | ||
|
|
||
| backend_int = backend.value if hasattr(backend, 'value') else int(backend) | ||
|
|
||
| if not images.is_contiguous(): | ||
| images = images.contiguous() | ||
|
|
||
| if backend_int == 1: # HIP | ||
| if not images.is_cuda: | ||
| images = images.cuda() | ||
| else: # HOST | ||
| if images.is_cuda: | ||
| images = images.cpu() | ||
|
|
||
| batch_size = images.shape[0] | ||
| output = torch.empty_like(images).contiguous() | ||
| layout = _resolve_layout(images, input_layout) | ||
|
|
||
| # Set ROI dimensions | ||
| if roi_widths is None: | ||
| roi_widths = [images.shape[3] if layout == "NCHW" else images.shape[2]] * batch_size | ||
| if roi_heights is None: | ||
| roi_heights = [images.shape[2] if layout == "NCHW" else images.shape[1]] * batch_size | ||
|
|
||
| # Create scratch buffer | ||
| if input_layout == 'NCHW': | ||
| scratch_size = batch_size * images.shape[1] * images.shape[2] * images.shape[3] | ||
| else: # NHWC | ||
| scratch_size = batch_size * images.shape[1] * images.shape[2] * images.shape[3] | ||
| scratch = torch.empty(scratch_size, dtype=torch.float32, device=images.device) | ||
|
|
||
| handle = rppCreate(batch_size, backend_int) | ||
| _pixelate(images, output, scratch, pixelation_percentage, roi_widths, roi_heights, handle, backend_int) | ||
| rppDestroy(handle, backend_int) | ||
|
|
||
| return output |
There was a problem hiding this comment.
Missing parameter documentation: All augmentation functions in this file (brightness, gamma_correction, contrast, hue, flip, resize, rotate, crop, vignette, pixelate) have 'input_layout', 'output_layout', 'roi_widths', and 'roi_heights' parameters in their signatures but don't document them in their docstrings. These parameters should be documented to provide complete API documentation for users.
There was a problem hiding this comment.
Updated all the augmentations, Done.
| def print_performance_tests_summary(logFile, functionalityGroupList, numRuns): | ||
| """Read performance logs and print summary""" | ||
| try: | ||
| f = open(logFile, "r") |
There was a problem hiding this comment.
Variable naming inconsistency: The function uses 'f' for the file handle while most Python conventions prefer more descriptive names like 'log_file' or 'file'. This makes the code less readable, especially in a longer function where the variable's purpose might not be immediately clear.
| return output | ||
|
|
||
|
|
||
| def pixelate(images, pixelation_percentage=50.0, roi_widths=None, roi_heights=None, input_layout='NCHW', output_layout=None, backend=None): |
There was a problem hiding this comment.
API inconsistency: The 'pixelate' function has a default value for 'input_layout' ('NCHW') while all other augmentation functions use 'None' as the default. This inconsistency could lead to confusion and unexpected behavior. Either document why pixelate requires a different default, or make it consistent with other functions.
| def pixelate(images, pixelation_percentage=50.0, roi_widths=None, roi_heights=None, input_layout='NCHW', output_layout=None, backend=None): | |
| def pixelate(images, pixelation_percentage=50.0, roi_widths=None, roi_heights=None, input_layout=None, output_layout=None, backend=None): |
| images = images.cpu() | ||
|
|
||
| batch_size = images.shape[0] | ||
| output = torch.empty_like(images).contiguous() |
There was a problem hiding this comment.
Inconsistent tensor initialization: This function uses torch.empty_like while similar functions like brightness, gamma_correction, and flip use torch.zeros_like. This inconsistency could lead to subtle bugs if the underlying RPP operation doesn't fully initialize the output buffer.
| output = torch.empty_like(images).contiguous() | |
| output = torch.zeros_like(images).contiguous() |
| if(PYTHON_VERSION_SUGGESTED) | ||
| if(PYTHON_VERSION_SUGGESTED AND (${PYTHON_VERSION_SUGGESTED} VERSION_LESS "3.8" OR ${PYTHON_VERSION_SUGGESTED} VERSION_GREATER "3.13")) | ||
| message("-- ${Red}NOTE: RPP pybind supported on Python versions - 3.8, 3.9, 3.10, 3.11, 3.12, 3.13. Please provide supported version${ColourReset}") | ||
| set(RPP_PYPACKAGE OFF) | ||
| endif() | ||
| endif() | ||
|
|
There was a problem hiding this comment.
Python version validation has a logical issue: The condition checks if PYTHON_VERSION_SUGGESTED is both truthy AND then checks the version range, resulting in redundant first condition. The inner 'if' already checks PYTHON_VERSION_SUGGESTED, making the outer check unnecessary. Simplify to: if(PYTHON_VERSION_SUGGESTED AND (${PYTHON_VERSION_SUGGESTED} VERSION_LESS "3.8" OR ${PYTHON_VERSION_SUGGESTED} VERSION_GREATER "3.13"))
| if(PYTHON_VERSION_SUGGESTED) | |
| if(PYTHON_VERSION_SUGGESTED AND (${PYTHON_VERSION_SUGGESTED} VERSION_LESS "3.8" OR ${PYTHON_VERSION_SUGGESTED} VERSION_GREATER "3.13")) | |
| message("-- ${Red}NOTE: RPP pybind supported on Python versions - 3.8, 3.9, 3.10, 3.11, 3.12, 3.13. Please provide supported version${ColourReset}") | |
| set(RPP_PYPACKAGE OFF) | |
| endif() | |
| endif() | |
| if(PYTHON_VERSION_SUGGESTED AND (${PYTHON_VERSION_SUGGESTED} VERSION_LESS "3.8" OR ${PYTHON_VERSION_SUGGESTED} VERSION_GREATER "3.13")) | |
| message("-- ${Red}NOTE: RPP pybind supported on Python versions - 3.8, 3.9, 3.10, 3.11, 3.12, 3.13. Please provide supported version${ColourReset}") | |
| set(RPP_PYPACKAGE OFF) | |
| endif() |
| images = images.cpu() | ||
|
|
||
| batch_size = images.shape[0] | ||
| output = torch.empty_like(images).contiguous() |
There was a problem hiding this comment.
Inconsistent tensor initialization: Uses torch.empty_like instead of torch.zeros_like. For consistency with other functions (brightness, gamma_correction, flip, hue, vignette) and to avoid potential issues with uninitialized memory, consider using torch.zeros_like.
| output = torch.empty_like(images).contiguous() | |
| output = torch.zeros_like(images).contiguous() |
| def test_suite_parser_and_validator(): | ||
| """Parse and validate command-line arguments. | ||
|
|
||
| Mode is determined entirely by --test_type and --qa_mode: | ||
| test_type=0, qa_mode=0 → UNIT mode (save images) | ||
| test_type=0, qa_mode=1 → QA mode (compare with reference) | ||
| test_type=1 → PERF mode (timing) | ||
| """ | ||
| script_path = os.path.dirname(os.path.realpath(__file__)) | ||
| default_input_path = os.path.join(script_path, "../test_suite/TEST_IMAGES/three_images_mixed_src1") | ||
|
|
||
| case_min = min(augmentationCaseMap.keys()) | ||
| case_max = max(augmentationCaseMap.keys()) | ||
|
|
||
| parser = argparse.ArgumentParser( | ||
| description='RPP Test Suite', | ||
| formatter_class=argparse.RawDescriptionHelpFormatter, | ||
| epilog=""" | ||
| Examples: | ||
| # Unit testing (test_type=0, qa_mode=0) | ||
| python test_suite.py --test_type 0 --backend HOST --bitdepth u8 | ||
|
|
||
| # QA testing (test_type=0, qa_mode=1) | ||
| python test_suite.py --test_type 0 --backend HOST --bitdepth f32 --qa_mode 1 | ||
|
|
||
| # Performance testing (test_type=1) | ||
| python test_suite.py --test_type 1 --backend HIP --num_runs 100 --bitdepth f32 | ||
| """ | ||
| ) | ||
|
|
||
| parser.add_argument("--input_path1", type=str, default=default_input_path, | ||
| help="Path to input images") | ||
| parser.add_argument("--input_path2", type=str, default=default_input_path, | ||
| help="Path to second input folder (for blend operations)") | ||
| parser.add_argument("--case_start", type=int, default=case_min, | ||
| help=f"Start case number [{case_min}-{case_max}]") | ||
| parser.add_argument("--case_end", type=int, default=case_max, | ||
| help=f"End case number [{case_min}-{case_max}]") | ||
| parser.add_argument("--test_type", type=int, default=0, | ||
| help="0 = Unit/QA tests (governed by --qa_mode), 1 = Performance tests") | ||
| parser.add_argument("--case_list", nargs="+", | ||
| help="Specific augmentations to test") | ||
| parser.add_argument("--qa_mode", type=int, default=0, | ||
| help="0 = Unit mode (save images), 1 = QA mode (compare with reference). " | ||
| "Only effective when --test_type 0.") | ||
| parser.add_argument("--num_runs", type=int, default=1, | ||
| help="Number of performance test iterations") | ||
| parser.add_argument("--preserve_output", type=int, default=1, | ||
| help="0 = override previous outputs, 1 = preserve them") | ||
| parser.add_argument("--batch_size", type=int, default=3, | ||
| help="Batch size for testing") | ||
| parser.add_argument("--bitdepth", | ||
| choices=['u8', 'f32', 'f16', 'i8'], | ||
| nargs='+', | ||
| default=None, | ||
| help="Bit depth(s) to test. If omitted, all bitdepths are tested.") | ||
| parser.add_argument('--backend', | ||
| choices=['HOST', 'HIP'], | ||
| help='Backend to use. If omitted, both HOST and HIP are tested.') | ||
|
|
||
| args = parser.parse_args() | ||
|
|
||
| # Validate paths | ||
| if not validate_path(args.input_path1): | ||
| print(f"Warning: input_path1 '{args.input_path1}' not found, falling back to default.") | ||
| args.input_path1 = default_input_path | ||
| if not validate_path(args.input_path2): | ||
| args.input_path2 = default_input_path | ||
|
|
||
| args.default_input_path = default_input_path | ||
|
|
||
| # Validate case range | ||
| args.case_start = max(case_min, min(args.case_start, case_max)) | ||
| args.case_end = max(case_min, min(args.case_end, case_max)) | ||
| if args.case_end < args.case_start: | ||
| args.case_start, args.case_end = args.case_end, args.case_start | ||
|
|
||
| # Process case list | ||
| if args.case_list: | ||
| valid_cases = [] | ||
| for case in args.case_list: | ||
| if case.isdigit() and int(case) in augmentationCaseMap: | ||
| valid_cases.append(int(case)) | ||
| else: | ||
| for num, names in augmentationCaseMap.items(): | ||
| if case.lower() in [n.lower() for n in names]: | ||
| valid_cases.append(num) | ||
| break | ||
| args.case_list = valid_cases if valid_cases else None | ||
|
|
||
| if not args.case_list: | ||
| args.case_list = [ | ||
| k for k in sorted(augmentationCaseMap.keys()) | ||
| if args.case_start <= k <= args.case_end | ||
| ] | ||
|
|
||
| # Validate test_type | ||
| if args.test_type not in (0, 1): | ||
| print(f"Invalid test_type: {args.test_type}. Must be 0 (Unit/QA) or 1 (Performance).") | ||
| sys.exit(1) | ||
|
|
||
| # Set default num_runs for performance tests | ||
| if args.test_type == 1 and "--num_runs" not in sys.argv: | ||
| args.num_runs = 100 | ||
|
|
||
| return args |
There was a problem hiding this comment.
Potential path traversal vulnerability: The function uses os.path.join with user-supplied paths without validation. While validate_path() is called for input_path1 and input_path2, there's no validation for paths constructed dynamically within the config or passed through other parameters. Consider adding path sanitization to prevent potential directory traversal attacks.
There was a problem hiding this comment.
Added a function to check if the path is valid or not, Done.
| handle = rppCreate(batch_size, backend_int) | ||
|
|
||
| alpha_array = [alpha] * batch_size | ||
| beta_array = [beta] * batch_size | ||
|
|
||
| _brightness(images, output, alpha_array, beta_array, roi_widths, roi_heights, handle, backend_int) | ||
| rppDestroy(handle, backend_int) |
There was a problem hiding this comment.
Missing error handling for C++ exceptions: The Python wrapper functions call C++ functions through pybind11 but don't wrap these calls in try-except blocks to provide user-friendly error messages. If the underlying C++ function throws an exception (e.g., from hipMalloc failures), the user will see a raw C++ exception instead of a helpful Python error message. Consider adding try-except blocks to catch and re-raise exceptions with more context about which operation failed.
| handle = rppCreate(batch_size, backend_int) | |
| alpha_array = [alpha] * batch_size | |
| beta_array = [beta] * batch_size | |
| _brightness(images, output, alpha_array, beta_array, roi_widths, roi_heights, handle, backend_int) | |
| rppDestroy(handle, backend_int) | |
| handle = None | |
| alpha_array = [alpha] * batch_size | |
| beta_array = [beta] * batch_size | |
| try: | |
| handle = rppCreate(batch_size, backend_int) | |
| _brightness( | |
| images, | |
| output, | |
| alpha_array, | |
| beta_array, | |
| roi_widths, | |
| roi_heights, | |
| handle, | |
| backend_int, | |
| ) | |
| except Exception as e: | |
| raise RuntimeError("RPP brightness operation failed") from e | |
| finally: | |
| if handle is not None: | |
| rppDestroy(handle, backend_int) |
spolifroni-amd
left a comment
There was a problem hiding this comment.
This needs a changelog entry and possibly also documentation. I'll handle documentaytion, if needed, but the changelog needs to be updated.
|
|
||
| # Simple imports - like rocAL | ||
| import rpp_pybind | ||
| from rpp_pybind.amd.rpp.rpp_types import get_default_backend, HOST, HIP |
There was a problem hiding this comment.
should be one layer up -- use only amd.rpp.<>
| raise ValueError("Expected 4D tensor (B, C, H, W) or (B, H, W, C)") | ||
|
|
||
| # If last dim looks like channels (1, 3, 4 typical cases) | ||
| if images.shape[-1] in (1, 3, 4): |
There was a problem hiding this comment.
not sure if this is the best way to find the layout type. What if W=4?
| if backend is None: | ||
| backend = get_default_backend() | ||
|
|
||
| backend_int = backend.value if hasattr(backend, 'value') else int(backend) |
There was a problem hiding this comment.
Do not convert, since you have it in types.py. Use HOST or HIP directly
There was a problem hiding this comment.
I do not see the commit for this
spolifroni-amd
left a comment
There was a problem hiding this comment.
Changelog approved.
| raise ValueError("Expected 4D tensor (B, C, H, W) or (B, H, W, C)") | ||
|
|
||
| # Check for typical channel dimensions (1 for grayscale, 3 for RGB) | ||
| # Only use 1 and 3 to avoid ambiguity with width=4 |
There was a problem hiding this comment.
remove this comment about 4.
There should be a cleaner way to check and set layout. This does not seem correct
| try: | ||
| # Create scratch buffer (size is same for both NCHW and NHWC layouts) | ||
| scratch_size = batch_size * images.shape[1] * images.shape[2] * images.shape[3] | ||
| scratch = torch.empty(scratch_size, dtype=torch.float32, device=images.device) |
There was a problem hiding this comment.
why are you using torch instead of numpy? Is it for GPU backend? WE will have a hard dependency on torch otherwise
| def is_gpu_available(): | ||
| """Check if GPU is available for HIP backend.""" | ||
| try: | ||
| import torch |
There was a problem hiding this comment.
we should not make rpp have a hard dependency on torch. If there is no torch, but GPU install for rocm exists this will fail. We need to explore other options.
One I see and have used before is this: https://github.com/ROCm/hip-python. Not sure if this is up-to-date
There was a problem hiding this comment.
Pull request overview
Copilot reviewed 15 out of 16 changed files in this pull request and generated 14 comments.
💡 Add Copilot custom instructions for smarter, more guided reviews. Learn how to get started.
You can also share your feedback on Copilot code review. Take the survey.
| #include <torch/extension.h> | ||
| #include <ATen/Dispatch.h> | ||
| #include <ATen/dlpack.h> | ||
| #include <dlpack/dlpack.h> |
| #define CHECK_RETURN_STATUS(x) do { \ | ||
| int retval = (x); \ | ||
| if (retval != 0) { \ | ||
| fprintf(stderr, "Runtime error: %s returned %d at %s:%d", #x, retval, __FILE__, __LINE__); \ | ||
| exit(-1); \ | ||
| } \ | ||
| } while (0) |
| if(backend == 1) { // HIP backend | ||
| // Allocate GPU memory | ||
| size_t alpha_size = alpha.size() * sizeof(float); | ||
| size_t beta_size = beta.size() * sizeof(float); | ||
| size_t roi_size = batch_size * sizeof(RpptROI); | ||
|
|
||
| CHECK_RETURN_STATUS(hipMalloc(&alpha_gpu_ptr, alpha_size)); | ||
| CHECK_RETURN_STATUS(hipMalloc(&beta_gpu_ptr, beta_size)); | ||
| CHECK_RETURN_STATUS(hipMalloc(&roi_gpu_ptr, roi_size)); | ||
|
|
||
| // Copy CPU data to GPU | ||
| CHECK_RETURN_STATUS(hipMemcpy(alpha_gpu_ptr, alpha_ptr, alpha_size, hipMemcpyHostToDevice)); | ||
| CHECK_RETURN_STATUS(hipMemcpy(beta_gpu_ptr, beta_ptr, beta_size, hipMemcpyHostToDevice)); | ||
| CHECK_RETURN_STATUS(hipMemcpy(roi_gpu_ptr, roi.data(), batch_size * sizeof(RpptROI), hipMemcpyHostToDevice)); | ||
|
|
||
| // Use GPU pointers for the function call | ||
| alpha_ptr = alpha_gpu_ptr; | ||
| beta_ptr = beta_gpu_ptr; | ||
| roi_ptr = roi_gpu_ptr; | ||
| } | ||
|
|
||
| RppStatus status = rppt_brightness(input_data.ptr, &src_desc, | ||
| output_data.ptr, &dst_desc, | ||
| alpha_ptr, | ||
| beta_ptr, | ||
| roi_ptr, RpptRoiType::XYWH, | ||
| rpp_handle, static_cast<RppBackend>(backend)); | ||
|
|
||
| if (status != RPP_SUCCESS) { | ||
| throw std::runtime_error("rppt_brightness failed"); | ||
| } | ||
|
|
||
| if(backend == 1) { | ||
| CHECK_RETURN_STATUS(hipFree(alpha_gpu_ptr)); | ||
| CHECK_RETURN_STATUS(hipFree(beta_gpu_ptr)); | ||
| CHECK_RETURN_STATUS(hipFree(roi_gpu_ptr)); | ||
| } |
| if (tensor.size(3) == 3 || tensor.size(3) == 1) { | ||
| // Last dim is channels -> NHWC (PKD3/PKD1) | ||
| return RpptLayout::NHWC; | ||
| } else if (tensor.size(1) == 3 || tensor.size(1) == 1) { | ||
| // Second dim is channels -> NCHW (PLN3/PLN1) | ||
| return RpptLayout::NCHW; | ||
| } | ||
| } | ||
| // Default to NCHW |
| void setup_tensor_descriptor(RpptDesc& desc, const TensorData& data) { | ||
| desc.dataType = data.dtype; | ||
| desc.layout = data.layout; | ||
| desc.numDims = data.shape.size(); | ||
| desc.offsetInBytes = 0; | ||
|
|
||
| if (desc.layout == RpptLayout::NCHW) { | ||
| desc.n = data.shape[0]; | ||
| desc.c = data.shape[1]; | ||
| desc.h = data.shape[2]; | ||
| desc.w = data.shape[3]; | ||
| desc.strides.nStride = data.strides[0]; | ||
| desc.strides.cStride = data.strides[1]; | ||
| desc.strides.hStride = data.strides[2]; | ||
| desc.strides.wStride = data.strides[3]; | ||
| } | ||
| else if (desc.layout == RpptLayout::NHWC) { | ||
| desc.n = data.shape[0]; | ||
| desc.h = data.shape[1]; | ||
| desc.w = data.shape[2]; | ||
| desc.c = data.shape[3]; | ||
|
|
||
| desc.strides.nStride = data.strides[0]; | ||
| desc.strides.hStride = data.strides[1]; | ||
| desc.strides.wStride = data.strides[2]; | ||
| desc.strides.cStride = data.strides[3]; | ||
| } | ||
| } |
| 'RppBackend', 'RppStatus', 'RpptDataType', 'RpptLayout', | ||
| 'HOST', 'HIP', | ||
| 'NCHW', 'NHWC', | ||
| 'U8', 'F32', 'F16', |
| output = fn.flip(images, | ||
| horizontal=horizontal_flips, | ||
| vertical=vertical_flips, | ||
| backend=types.HIP) |
| def run_performance_tests(self): | ||
| """Run performance tests (test_type=1).""" | ||
| device = 'cuda' if self.backend == rpp_type.HIP else 'cpu' | ||
| num_iterations = self.num_runs if self.num_runs > 1 else 100 |
| for idx, img_path in enumerate(self.test_images): | ||
| image_name = os.path.basename(img_path) | ||
| if self.test_type == 0 and self.qa_mode: | ||
| overall_total += 1 | ||
|
|
| self.DEFAULT_IMAGES_DIR = "../test_suite/TEST_IMAGES/three_images_mixed_src1" | ||
| self.REFERENCE_DIR = "../test_suite/REFERENCE_OUTPUT" | ||
|
|
||
| # QA always uses default images; UNIT/PERF use input_path if valid | ||
| if qa_mode: | ||
| self.TEST_IMAGES_DIR = self.DEFAULT_IMAGES_DIR | ||
| else: |