Skip to content

Commit

Permalink
Separate routines to query 128-bit AVX support
Browse files Browse the repository at this point in the history
This also disables 256-bit AVX for current AMD processors
that work better with 128-bit AVX. Note that this is not
detected by the timing routines since the effect is only
apparent when using multiple cores.
  • Loading branch information
Erik Lindahl committed May 7, 2015
1 parent a1cf415 commit cd2b27d
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 12 deletions.
6 changes: 2 additions & 4 deletions dft/conf.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,17 +47,15 @@ void X(dft_conf_standard)(planner *p)
#endif
#if HAVE_AVX
if (X(have_simd_avx)())
{
X(solvtab_exec)(X(solvtab_dft_avx), p);
if (X(have_simd_avx_128)())
X(solvtab_exec)(X(solvtab_dft_avx_128), p);
}
#endif
#if HAVE_AVX2
if (X(have_simd_avx2)())
{
X(solvtab_exec)(X(solvtab_dft_avx2), p);
if (X(have_simd_avx2_128)())
X(solvtab_exec)(X(solvtab_dft_avx2_128), p);
}
#endif
#if HAVE_AVX512
if (X(have_simd_avx512)())
Expand Down
2 changes: 2 additions & 0 deletions kernel/ifftw.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,9 @@ extern void X(extract_reim)(int sign, R *c, R **r, R **i);

extern int X(have_simd_sse2)(void);
extern int X(have_simd_avx)(void);
extern int X(have_simd_avx_128)(void);
extern int X(have_simd_avx2)(void);
extern int X(have_simd_avx2_128)(void);
extern int X(have_simd_avx512)(void);
extern int X(have_simd_altivec)(void);
extern int X(have_simd_vsx)(void);
Expand Down
6 changes: 2 additions & 4 deletions rdft/conf.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,17 +64,15 @@ void X(rdft_conf_standard)(planner *p)
#endif
#if HAVE_AVX
if (X(have_simd_avx)())
{
X(solvtab_exec)(X(solvtab_rdft_avx), p);
if (X(have_simd_avx_128)())
X(solvtab_exec)(X(solvtab_rdft_avx_128), p);
}
#endif
#if HAVE_AVX2
if (X(have_simd_avx2)())
{
X(solvtab_exec)(X(solvtab_rdft_avx2), p);
if (X(have_simd_avx2_128)())
X(solvtab_exec)(X(solvtab_rdft_avx2_128), p);
}
#endif
#if HAVE_AVX512
if (X(have_simd_avx512)())
Expand Down
47 changes: 45 additions & 2 deletions simd-support/avx.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

#include "amd64-cpuid.h"

int X(have_simd_avx)(void)
int X(have_simd_avx_128)(void)
{
static int init = 0, res;

Expand All @@ -44,7 +44,7 @@ int X(have_simd_avx)(void)

#include "x86-cpuid.h"

int X(have_simd_avx)(void)
int X(have_simd_avx_128)(void)
{
static int init = 0, res;

Expand All @@ -57,6 +57,49 @@ int X(have_simd_avx)(void)
}
return res;
}

int X(have_simd_avx)(void)
{
IF not AMD, call avx_128;
}

#endif

int X(have_simd_avx)(void)
{
static int init = 0, res;
int eax,ebx,ecx,edx;

if(!init)
{
/* Check if this is an AMD CPU */
cpuid_all(0,0,&eax,&ebx,&ecx,&edx);

/* 0x68747541: "Auth" , 0x444d4163: "enti" , 0x69746e65: "cAMD" */
if (ebx==0x68747541 && ecx==0x444d4163 && edx==0x69746e65)
{
/* This is an AMD chip. While AMD does support 256-bit AVX, it does
* so by separately scheduling two 128-bit lanes to both halves of
* a compute unit (pair of cores). Since 256-bit AVX requires more
* permutations on the load this is a _double_ loss for us.
* Unfortunately FFTW will often not detect this, this the
* timing script only run a single thread, and then the 256-bit
* version might appear faster, although it will be (much) slower
* in actual use.
*
* To work around this, we always disable 256-bit AVX and rely on the
* 128-bit flavor.
*/
res= 0;
}
else
{
/* For non-AMD, we rely on the result from 128-bit AVX */
res = X(have_simd_avx_128)();
}
init = 1;
}
return res;
}

#endif
15 changes: 13 additions & 2 deletions simd-support/avx2.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

#include "amd64-cpuid.h"

int X(have_simd_avx2)(void) /* fixme: still testing for normal AVX */
int X(have_simd_avx2_128)(void)
{
static int init = 0, res;

Expand All @@ -49,7 +49,7 @@ int X(have_simd_avx2)(void) /* fixme: still testing for normal AVX */

#include "x86-cpuid.h"

int X(have_simd_avx2)(void)
int X(have_simd_avx2_128)(void)
{
static int init = 0, res;

Expand All @@ -67,4 +67,15 @@ int X(have_simd_avx2)(void)
}
#endif

int X(have_simd_avx2)(void)
{
/*
* For now 256-bit AVX2 support is identical to 128-bit.
* This might change in the future if AMD released AVX2-capable
* chips that work better with the 128-bit flavor, but since AMD
* might actually change it to implement 256-bit AVX2 efficiently
* by then we don't want to disable it before we know.
*/
return X(have_simd_avx2_128)();
}
#endif

0 comments on commit cd2b27d

Please sign in to comment.