Skip to content

Commit

Permalink
Detect missing vst1q_f32_x2 and provide replacement if necessary
Browse files Browse the repository at this point in the history
Older versions of GCC (< 9) do not provide the vst1q_f32_x2 intrinsic on
aarch64, so we must detect when vst1q_f32_x2 is not available and provide
our own implementation instead.

Signed-off-by: Yining Karl Li <betajippity@gmail.com>
  • Loading branch information
betajippity committed Mar 16, 2023
1 parent 3f97750 commit 1b9d235
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 0 deletions.
18 changes: 18 additions & 0 deletions cmake/OpenEXRSetup.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -312,3 +312,21 @@ else()
message(STATUS "Imath interface dirs ${IMATH_HEADER_ONLY_INCLUDE_DIRS}")
endif()
endif()

###########################################
# Check if we need to emulate vld1q_f32_x2
###########################################

if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
include(CheckCSourceCompiles)
check_c_source_compiles("#include <arm_neon.h>
int main() {
float a[] = {1.0, 1.0};
vld1q_f32_x2(a);
return 0;
}" HAS_VLD1)

if(NOT HAS_VLD1)
string(APPEND CMAKE_CXX_FLAGS " -DMISSING_ARM_VLD1")
endif()
endif()
13 changes: 13 additions & 0 deletions src/lib/OpenEXR/ImfSimd.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,4 +62,17 @@ extern "C" {

}

#if defined (MISSING_ARM_VLD1)
/* Workaround for missing vld1q_f32_x2 in older gcc versions. */

__extension__ extern __inline float32x4x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vld1q_f32_x2 (const float32_t* __a)
{
float32x4x2_t ret;
asm ("ld1 {%S0.4s - %T0.4s}, [%1]" : "=w"(ret) : "r"(__a) :);
return ret;
}
#endif

#endif

0 comments on commit 1b9d235

Please sign in to comment.