From dc0e2412fdcbb365a2ff8d72d41f0e60f5eb773a Mon Sep 17 00:00:00 2001 From: Yining Karl Li Date: Thu, 16 Mar 2023 09:50:19 -0700 Subject: [PATCH] Detect missing vst1q_f32_x2 and provide replacement if necessary Older versions of GCC (< 9) do not provide the vst1q_f32_x2 intrinsic on aarch64, so we must detect when vst1q_f32_x2 is not available and provide our own implementation instead. Signed-off-by: Yining Karl Li --- BUILD.bazel | 1 + cmake/OpenEXRConfigInternal.h.in | 7 +++++++ cmake/OpenEXRSetup.cmake | 18 ++++++++++++++++++ src/lib/OpenEXR/ImfSimd.h | 14 ++++++++++++++ 4 files changed, 40 insertions(+) diff --git a/BUILD.bazel b/BUILD.bazel index f9283d1143..56facf391d 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -72,6 +72,7 @@ expand_template( "#cmakedefine OPENEXR_IMF_HAVE_GCC_INLINE_ASM_AVX 1": "/* #undef OPENEXR_IMF_HAVE_GCC_INLINE_ASM_AVX */", "#cmakedefine OPENEXR_IMF_HAVE_LINUX_PROCFS 1": "/* #undef OPENEXR_IMF_HAVE_LINUX_PROCFS */", "#cmakedefine OPENEXR_IMF_HAVE_SYSCONF_NPROCESSORS_ONLN 1": "/* #undef OPENEXR_IMF_HAVE_SYSCONF_NPROCESSORS_ONLN */", + "#cmakedefine OPENEXR_MISSING_ARM_VLD1 0": "/* #undef OPENEXR_MISSING_ARM_VLD1 */", }, template = "cmake/OpenEXRConfigInternal.h.in", ) diff --git a/cmake/OpenEXRConfigInternal.h.in b/cmake/OpenEXRConfigInternal.h.in index 30c6f1d586..2e242a6741 100644 --- a/cmake/OpenEXRConfigInternal.h.in +++ b/cmake/OpenEXRConfigInternal.h.in @@ -47,6 +47,13 @@ #cmakedefine OPENEXR_IMF_HAVE_GCC_INLINE_ASM_AVX 1 +// +// Define if we need to shim in our own implementation of vld1q_f32_x2 for +// older compilers that are missing x2 Neon intrinsics on aarch64 +// + +#cmakedefine OPENEXR_MISSING_ARM_VLD1 0 + // clang-format on #endif // INCLUDED_OPENEXR_INTERNAL_CONFIG_H diff --git a/cmake/OpenEXRSetup.cmake b/cmake/OpenEXRSetup.cmake index 757f233223..458c1b8987 100644 --- a/cmake/OpenEXRSetup.cmake +++ b/cmake/OpenEXRSetup.cmake @@ -312,3 +312,21 @@ else() message(STATUS "Imath interface dirs ${IMATH_HEADER_ONLY_INCLUDE_DIRS}") endif() endif() + +########################################### +# Check if we need to emulate vld1q_f32_x2 +########################################### + +if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64") + include(CheckCSourceCompiles) + check_c_source_compiles("#include +int main() { + float a[] = {1.0, 1.0}; + vld1q_f32_x2(a); + return 0; +}" HAS_VLD1) + + if(NOT HAS_VLD1) + set(OPENEXR_MISSING_ARM_VLD1 TRUE) + endif() +endif() diff --git a/src/lib/OpenEXR/ImfSimd.h b/src/lib/OpenEXR/ImfSimd.h index 3053a5d4e4..810b1b1bc3 100644 --- a/src/lib/OpenEXR/ImfSimd.h +++ b/src/lib/OpenEXR/ImfSimd.h @@ -62,4 +62,18 @@ extern "C" { } +#include "OpenEXRConfigInternal.h" +#ifdef OPENEXR_MISSING_ARM_VLD1 +/* Workaround for missing vld1q_f32_x2 in older gcc versions. */ + +__extension__ extern __inline float32x4x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) + vld1q_f32_x2 (const float32_t* __a) +{ + float32x4x2_t ret; + asm ("ld1 {%S0.4s - %T0.4s}, [%1]" : "=w"(ret) : "r"(__a) :); + return ret; +} +#endif + #endif