From e663d00fbe5cd00aa022a28027d0f8a72c98f11d Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Mon, 27 Feb 2017 11:15:28 +0100 Subject: [PATCH] Support AVX for 32 bit platforms _mm256_extract_epi64 is not available for 32 bit platforms, but it can be replaced by "a very simple workaround". Signed-off-by: Stefan Weil --- arch/dotproductavx.cpp | 13 ++++++------- configure.ac | 10 +++------- 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/arch/dotproductavx.cpp b/arch/dotproductavx.cpp index 9c84226439..b2025b517d 100644 --- a/arch/dotproductavx.cpp +++ b/arch/dotproductavx.cpp @@ -16,9 +16,8 @@ // limitations under the License. /////////////////////////////////////////////////////////////////////// -#if !defined(__AVX__) || defined(__i386__) +#if !defined(__AVX__) // Implementation for non-avx archs. -// Also used for 32 bit AVX archs because of missing _mm256_extract_epi64. #include "dotproductavx.h" #include @@ -92,13 +91,13 @@ double DotProductAVX(const double* u, const double* v, int n) { // fool the instrinsics into thinking we are extracting the bottom int64. auto cast_sum = _mm256_castpd_si256(sum); *(reinterpret_cast(&result)) = -#ifndef _WIN32 - _mm256_extract_epi64(cast_sum, 0) -#else - // this is a very simple workaround that probably could be activated - // for all other platforms that do not have _mm256_extract_epi64 +#if defined(_WIN32) || defined(__i386__) + // This is a very simple workaround that is activated + // for all platforms that do not have _mm256_extract_epi64. // _mm256_extract_epi64(X, Y) == ((uint64_t*)&X)[Y] ((uint64_t*)&cast_sum)[0] +#else + _mm256_extract_epi64(cast_sum, 0) #endif ; while (offset < n) { diff --git a/configure.ac b/configure.ac index 1c65024d09..490a45de7e 100644 --- a/configure.ac +++ b/configure.ac @@ -119,13 +119,9 @@ esac AM_CONDITIONAL([AVX_OPT], false) AM_CONDITIONAL([SSE41_OPT], false) -# The current implementation for AVX uses 64 bit code. -AC_CHECK_SIZEOF([void *]) -if test "$ac_cv_sizeof_void_p" = "8"; then - AX_CHECK_COMPILE_FLAG([-mavx], [avx=true], [avx=false]) - if $avx; then - AM_CONDITIONAL([AVX_OPT], true) - fi +AX_CHECK_COMPILE_FLAG([-mavx], [avx=true], [avx=false]) +if $avx; then + AM_CONDITIONAL([AVX_OPT], true) fi AX_CHECK_COMPILE_FLAG([-msse4.1], [sse41=true], [sse41=false])