Navigation Menu

Skip to content

Commit

Permalink
Support AVX for 32 bit platforms
Browse files Browse the repository at this point in the history
_mm256_extract_epi64 is not available for 32 bit platforms,
but it can be replaced by "a very simple workaround".

Signed-off-by: Stefan Weil <sw@weilnetz.de>
  • Loading branch information
stweil committed Feb 27, 2017
1 parent 6a7831b commit e663d00
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 14 deletions.
13 changes: 6 additions & 7 deletions arch/dotproductavx.cpp
Expand Up @@ -16,9 +16,8 @@
// limitations under the License.
///////////////////////////////////////////////////////////////////////

#if !defined(__AVX__) || defined(__i386__)
#if !defined(__AVX__)
// Implementation for non-avx archs.
// Also used for 32 bit AVX archs because of missing _mm256_extract_epi64.

#include "dotproductavx.h"
#include <stdio.h>
Expand Down Expand Up @@ -92,13 +91,13 @@ double DotProductAVX(const double* u, const double* v, int n) {
// fool the instrinsics into thinking we are extracting the bottom int64.
auto cast_sum = _mm256_castpd_si256(sum);
*(reinterpret_cast<inT64*>(&result)) =
#ifndef _WIN32
_mm256_extract_epi64(cast_sum, 0)
#else
// this is a very simple workaround that probably could be activated
// for all other platforms that do not have _mm256_extract_epi64
#if defined(_WIN32) || defined(__i386__)
// This is a very simple workaround that is activated
// for all platforms that do not have _mm256_extract_epi64.
// _mm256_extract_epi64(X, Y) == ((uint64_t*)&X)[Y]
((uint64_t*)&cast_sum)[0]
#else
_mm256_extract_epi64(cast_sum, 0)
#endif
;
while (offset < n) {
Expand Down
10 changes: 3 additions & 7 deletions configure.ac
Expand Up @@ -119,13 +119,9 @@ esac
AM_CONDITIONAL([AVX_OPT], false)
AM_CONDITIONAL([SSE41_OPT], false)

# The current implementation for AVX uses 64 bit code.
AC_CHECK_SIZEOF([void *])
if test "$ac_cv_sizeof_void_p" = "8"; then
AX_CHECK_COMPILE_FLAG([-mavx], [avx=true], [avx=false])
if $avx; then
AM_CONDITIONAL([AVX_OPT], true)
fi
AX_CHECK_COMPILE_FLAG([-mavx], [avx=true], [avx=false])
if $avx; then
AM_CONDITIONAL([AVX_OPT], true)
fi

AX_CHECK_COMPILE_FLAG([-msse4.1], [sse41=true], [sse41=false])
Expand Down

0 comments on commit e663d00

Please sign in to comment.