Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Detect missing vst1q_f32_x2 and provide replacement if necessary #1358

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ expand_template(
"#cmakedefine OPENEXR_IMF_HAVE_GCC_INLINE_ASM_AVX 1": "/* #undef OPENEXR_IMF_HAVE_GCC_INLINE_ASM_AVX */",
"#cmakedefine OPENEXR_IMF_HAVE_LINUX_PROCFS 1": "/* #undef OPENEXR_IMF_HAVE_LINUX_PROCFS */",
"#cmakedefine OPENEXR_IMF_HAVE_SYSCONF_NPROCESSORS_ONLN 1": "/* #undef OPENEXR_IMF_HAVE_SYSCONF_NPROCESSORS_ONLN */",
"#cmakedefine OPENEXR_MISSING_ARM_VLD1 0": "/* #undef OPENEXR_MISSING_ARM_VLD1 */",
},
template = "cmake/OpenEXRConfigInternal.h.in",
)
Expand Down
7 changes: 7 additions & 0 deletions cmake/OpenEXRConfigInternal.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,13 @@

#cmakedefine OPENEXR_IMF_HAVE_GCC_INLINE_ASM_AVX 1

//
// Define if we need to shim in our own implementation of vld1q_f32_x2 for
// older compilers that are missing x2 Neon intrinsics on aarch64
//

#cmakedefine OPENEXR_MISSING_ARM_VLD1 0

// clang-format on

#endif // INCLUDED_OPENEXR_INTERNAL_CONFIG_H
18 changes: 18 additions & 0 deletions cmake/OpenEXRSetup.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -312,3 +312,21 @@ else()
message(STATUS "Imath interface dirs ${IMATH_HEADER_ONLY_INCLUDE_DIRS}")
endif()
endif()

###########################################
# Check if we need to emulate vld1q_f32_x2
###########################################

if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
include(CheckCSourceCompiles)
check_c_source_compiles("#include <arm_neon.h>
int main() {
float a[] = {1.0, 1.0};
vld1q_f32_x2(a);
return 0;
}" HAS_VLD1)

if(NOT HAS_VLD1)
set(OPENEXR_MISSING_ARM_VLD1 TRUE)
endif()
endif()
14 changes: 14 additions & 0 deletions src/lib/OpenEXR/ImfSimd.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,4 +62,18 @@ extern "C" {

}

#include "OpenEXRConfigInternal.h"
#ifdef OPENEXR_MISSING_ARM_VLD1
/* Workaround for missing vld1q_f32_x2 in older gcc versions. */

__extension__ extern __inline float32x4x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vld1q_f32_x2 (const float32_t* __a)
{
float32x4x2_t ret;
asm ("ld1 {%S0.4s - %T0.4s}, [%1]" : "=w"(ret) : "r"(__a) :);
return ret;
}
#endif

#endif