Skip to content

Commit

Permalink
Detect missing vst1q_f32_x2 and provide replacement if necessary (#1358)
Browse files Browse the repository at this point in the history
Older versions of GCC (< 9) do not provide the vst1q_f32_x2 intrinsic on
aarch64, so we must detect when vst1q_f32_x2 is not available and provide
our own implementation instead.

Signed-off-by: Yining Karl Li <betajippity@gmail.com>
  • Loading branch information
betajippity committed Mar 20, 2023
1 parent 673205a commit a2e9799
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 0 deletions.
1 change: 1 addition & 0 deletions BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ expand_template(
"#cmakedefine OPENEXR_IMF_HAVE_GCC_INLINE_ASM_AVX 1": "/* #undef OPENEXR_IMF_HAVE_GCC_INLINE_ASM_AVX */",
"#cmakedefine OPENEXR_IMF_HAVE_LINUX_PROCFS 1": "/* #undef OPENEXR_IMF_HAVE_LINUX_PROCFS */",
"#cmakedefine OPENEXR_IMF_HAVE_SYSCONF_NPROCESSORS_ONLN 1": "/* #undef OPENEXR_IMF_HAVE_SYSCONF_NPROCESSORS_ONLN */",
"#cmakedefine OPENEXR_MISSING_ARM_VLD1 0": "/* #undef OPENEXR_MISSING_ARM_VLD1 */",
},
template = "cmake/OpenEXRConfigInternal.h.in",
)
Expand Down
7 changes: 7 additions & 0 deletions cmake/OpenEXRConfigInternal.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,13 @@

#cmakedefine OPENEXR_IMF_HAVE_GCC_INLINE_ASM_AVX 1

//
// Define if we need to shim in our own implementation of vld1q_f32_x2 for
// older compilers that are missing x2 Neon intrinsics on aarch64
//

#cmakedefine OPENEXR_MISSING_ARM_VLD1 0

// clang-format on

#endif // INCLUDED_OPENEXR_INTERNAL_CONFIG_H
18 changes: 18 additions & 0 deletions cmake/OpenEXRSetup.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -312,3 +312,21 @@ else()
message(STATUS "Imath interface dirs ${IMATH_HEADER_ONLY_INCLUDE_DIRS}")
endif()
endif()

###########################################
# Check if we need to emulate vld1q_f32_x2
###########################################

if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
include(CheckCSourceCompiles)
check_c_source_compiles("#include <arm_neon.h>
int main() {
float a[] = {1.0, 1.0};
vld1q_f32_x2(a);
return 0;
}" HAS_VLD1)

if(NOT HAS_VLD1)
set(OPENEXR_MISSING_ARM_VLD1 TRUE)
endif()
endif()
14 changes: 14 additions & 0 deletions src/lib/OpenEXR/ImfSimd.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,4 +62,18 @@ extern "C" {

}

#include "OpenEXRConfigInternal.h"
#ifdef OPENEXR_MISSING_ARM_VLD1
/* Workaround for missing vld1q_f32_x2 in older gcc versions. */

__extension__ extern __inline float32x4x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vld1q_f32_x2 (const float32_t* __a)
{
float32x4x2_t ret;
asm ("ld1 {%S0.4s - %T0.4s}, [%1]" : "=w"(ret) : "r"(__a) :);
return ret;
}
#endif

#endif

0 comments on commit a2e9799

Please sign in to comment.