Skip to content

Commit

Permalink
Fix ARMv7 build by making recent ZIP NEON optimizations be ARMv8 (aar…
Browse files Browse the repository at this point in the history
…ch64) only (#1366)

Should fix #1365. Recent PR (#1348) added NEON accelerated code paths
for ZIP filtering. But that code uses several instructions that are
ARMv8 (aarch64) only, and thus fail building on 32-bit ARM (armv7)
platforms. Make these optimizations only kick in when building
for 64-bit ARM platforms.

Signed-off-by: Aras Pranckevicius <aras@nesnausk.org>
  • Loading branch information
aras-p committed Mar 20, 2023
1 parent a2e9799 commit f29c01b
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 7 deletions.
4 changes: 4 additions & 0 deletions src/lib/OpenEXR/ImfSimd.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@
# define IMF_HAVE_NEON
#endif

#if defined(__aarch64__)
# define IMF_HAVE_NEON_AARCH64 1
#endif

extern "C" {
#ifdef IMF_HAVE_SSE2
# include <emmintrin.h>
Expand Down
6 changes: 3 additions & 3 deletions src/lib/OpenEXR/ImfZip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ reconstruct_sse41 (char* buf, size_t outSize)

#endif

#ifdef IMF_HAVE_NEON
#ifdef IMF_HAVE_NEON_AARCH64

void
reconstruct_neon (char* buf, size_t outSize)
Expand Down Expand Up @@ -262,7 +262,7 @@ interleave_sse2 (const char* source, size_t outSize, char* out)

#endif

#ifdef IMF_HAVE_NEON
#ifdef IMF_HAVE_NEON_AARCH64

void
interleave_neon (const char* source, size_t outSize, char* out)
Expand Down Expand Up @@ -380,7 +380,7 @@ Zip::initializeFuncs ()
}
#endif

#ifdef IMF_HAVE_NEON
#ifdef IMF_HAVE_NEON_AARCH64
reconstruct = reconstruct_neon;
interleave = interleave_neon;
#endif
Expand Down
8 changes: 4 additions & 4 deletions src/lib/OpenEXRCore/internal_zip.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
# define IMF_HAVE_SSE4_1 1
# include <smmintrin.h>
#endif
#if defined(__ARM_NEON)
# define IMF_HAVE_NEON 1
#if defined(__aarch64__)
# define IMF_HAVE_NEON_AARCH64 1
# include <arm_neon.h>
#endif

Expand Down Expand Up @@ -78,7 +78,7 @@ reconstruct (uint8_t* buf, uint64_t outSize)
prev = d;
}
}
#elif defined(IMF_HAVE_NEON)
#elif defined(IMF_HAVE_NEON_AARCH64)
static void
reconstruct (uint8_t* buf, uint64_t outSize)
{
Expand Down Expand Up @@ -174,7 +174,7 @@ interleave (uint8_t* out, const uint8_t* source, uint64_t outSize)
*(sOut++) = (i % 2 == 0) ? *(t1++) : *(t2++);
}

#elif defined(IMF_HAVE_NEON)
#elif defined(IMF_HAVE_NEON_AARCH64)
static void
interleave (uint8_t* out, const uint8_t* source, uint64_t outSize)
{
Expand Down

0 comments on commit f29c01b

Please sign in to comment.