Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix some ARM/clang-cl feature detection issues #623

Merged
merged 2 commits into from
Dec 1, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
18 changes: 12 additions & 6 deletions cli/xsum_arch.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@
#endif

/* makes the next part easier */
#if defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64)
#if (defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64)) && !defined(_M_ARM64EC)
# define XSUM_ARCH_X64 1
# define XSUM_ARCH_X86 "x86_64"
#elif defined(__i386__) || defined(_M_IX86) || defined(_M_IX86_FP)
Expand All @@ -102,29 +102,35 @@
# else
# define XSUM_ARCH XSUM_ARCH_X86
# endif
#elif defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64)
#elif defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
# define XSUM_ARCH "aarch64 + NEON"
#elif defined(__arm__) || defined(__thumb__) || defined(__thumb2__) || defined(_M_ARM)
/* ARM has a lot of different features that can change xxHash significantly. */
# if defined(__thumb2__) || (defined(__thumb__) && (__thumb__ == 2 || __ARM_ARCH >= 7))
# ifdef __ARM_ARCH
# define XSUM_ARCH_ARM_VER XSUM_EXPAND_AND_QUOTE(__ARM_ARCH)
# else
# define XSUM_ARCH_ARM_VER XSUM_EXPAND_AND_QUOTE(_M_ARM)
# endif
# if defined(_M_ARM) /* windows arm is always thumb-2 */ \
|| defined(__thumb2__) || (defined(__thumb__) && (__thumb__ == 2 || __ARM_ARCH >= 7))
# define XSUM_ARCH_THUMB " Thumb-2"
# elif defined(__thumb__)
# define XSUM_ARCH_THUMB " Thumb-1"
# else
# define XSUM_ARCH_THUMB ""
# endif
/* ARMv7 has unaligned by default */
# if defined(__ARM_FEATURE_UNALIGNED) || __ARM_ARCH >= 7 || defined(_M_ARMV7VE)
# if defined(__ARM_FEATURE_UNALIGNED) || __ARM_ARCH >= 7 || defined(_M_ARM)
# define XSUM_ARCH_UNALIGNED " + unaligned"
# else
# define XSUM_ARCH_UNALIGNED ""
# endif
# if defined(__ARM_NEON) || defined(__ARM_NEON__)
# if defined(__ARM_NEON) || defined(__ARM_NEON__) || defined(_M_ARM)
# define XSUM_ARCH_NEON " + NEON"
# else
# define XSUM_ARCH_NEON ""
# endif
# define XSUM_ARCH "ARMv" XSUM_EXPAND_AND_QUOTE(__ARM_ARCH) XSUM_ARCH_THUMB XSUM_ARCH_NEON XSUM_ARCH_UNALIGNED
# define XSUM_ARCH "ARMv" XSUM_ARCH_ARM_VER XSUM_ARCH_THUMB XSUM_ARCH_NEON XSUM_ARCH_UNALIGNED
#elif defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__)
# if defined(__GNUC__) && defined(__POWER9_VECTOR__)
# define XSUM_ARCH "ppc64 + POWER9 vector"
Expand Down
45 changes: 24 additions & 21 deletions xxhash.h
Original file line number Diff line number Diff line change
Expand Up @@ -2708,17 +2708,21 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src
# define XXH_unlikely(x) (x)
#endif

#if defined(__GNUC__)
# if defined(__AVX2__)
# include <immintrin.h>
# elif defined(__SSE2__)
# include <emmintrin.h>
# elif defined(__ARM_NEON__) || defined(__ARM_NEON)
#if defined(__GNUC__) || defined(__clang__)
# if defined(__ARM_NEON__) || defined(__ARM_NEON) \
|| defined(__aarch64__) || defined(_M_ARM) \
|| defined(_M_ARM64) || defined(_M_ARM64EC)
# define inline __inline__ /* circumvent a clang bug */
# include <arm_neon.h>
# undef inline
# elif defined(__AVX2__)
# include <immintrin.h>
# elif defined(__SSE2__)
# include <emmintrin.h>
# endif
#elif defined(_MSC_VER)
#endif

#if defined(_MSC_VER)
# include <intrin.h>
#endif

Expand Down Expand Up @@ -2856,20 +2860,20 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
#endif

#ifndef XXH_VECTOR /* can be defined on command line */
# if defined(__AVX512F__)
# define XXH_VECTOR XXH_AVX512
# elif defined(__AVX2__)
# define XXH_VECTOR XXH_AVX2
# elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP == 2))
# define XXH_VECTOR XXH_SSE2
# elif ( \
# if ( \
defined(__ARM_NEON__) || defined(__ARM_NEON) /* gcc */ \
|| defined(_M_ARM64) || defined(_M_ARM_ARMV7VE) /* msvc */ \
|| defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) /* msvc */ \
) && ( \
defined(_WIN32) || defined(__LITTLE_ENDIAN__) /* little endian only */ \
|| (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) \
)
# define XXH_VECTOR XXH_NEON
# elif defined(__AVX512F__)
# define XXH_VECTOR XXH_AVX512
# elif defined(__AVX2__)
# define XXH_VECTOR XXH_AVX2
# elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP == 2))
# define XXH_VECTOR XXH_SSE2
# elif (defined(__PPC64__) && defined(__POWER8_VECTOR__)) \
|| (defined(__s390x__) && defined(__VEC__)) \
&& defined(__GNUC__) /* TODO: IBM XL */
Expand Down Expand Up @@ -3019,8 +3023,8 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
* }
*/
# if !defined(XXH_NO_VZIP_HACK) /* define to disable */ \
&& defined(__GNUC__) \
&& !defined(__aarch64__) && !defined(__arm64__) && !defined(_M_ARM64)
&& (defined(__GNUC__) || defined(__clang__)) \
&& (defined(__arm__) || defined(__thumb__) || defined(_M_ARM))
# define XXH_SPLIT_IN_PLACE(in, outLo, outHi) \
do { \
/* Undocumented GCC/Clang operand modifier: %e0 = lower D half, %f0 = upper D half */ \
Expand Down Expand Up @@ -3213,7 +3217,6 @@ XXH_mult32to64(xxh_u64 x, xxh_u64 y)
return (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF);
}
#elif defined(_MSC_VER) && defined(_M_IX86)
# include <intrin.h>
# define XXH_mult32to64(x, y) __emulu((unsigned)(x), (unsigned)(y))
#else
/*
Expand Down Expand Up @@ -3253,7 +3256,7 @@ XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs)
* In that case it is best to use the portable one.
* https://github.com/Cyan4973/xxHash/issues/211#issuecomment-515575677
*/
#if defined(__GNUC__) && !defined(__wasm__) \
#if (defined(__GNUC__) || defined(__clang__)) && !defined(__wasm__) \
&& defined(__SIZEOF_INT128__) \
|| (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)

Expand All @@ -3270,7 +3273,7 @@ XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs)
*
* This compiles to single operand MUL on x64.
*/
#elif defined(_M_X64) || defined(_M_IA64)
#elif (defined(_M_X64) || defined(_M_IA64)) && !defined(_M_ARM64EC)

#ifndef _MSC_VER
# pragma intrinsic(_umul128)
Expand All @@ -3287,7 +3290,7 @@ XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs)
*
* This compiles to the same MUL + UMULH as GCC/Clang's __uint128_t method.
*/
#elif defined(_M_ARM64)
#elif defined(_M_ARM64) || defined(_M_ARM64EC)

#ifndef _MSC_VER
# pragma intrinsic(__umulh)
Expand Down