Skip to content

Commit

Permalink
Only build SSE3 kernel when SSE3 is available
Browse files Browse the repository at this point in the history
  • Loading branch information
jedbrown committed Oct 24, 2009
1 parent 4eb0d61 commit 2f9a58f
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 2 deletions.
15 changes: 13 additions & 2 deletions src/jacobi/impls/tensor/inlinetmulthex.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#include <pmmintrin.h>
#include "private/microbench.h"
#if !defined _INLINETMULTHEX_H
#define _INLINETMULTHEX_H

#include "dohptype.h"

/**
* The core computational kernel. Performs a tensor product operation with the matrices A[0..2].
Expand Down Expand Up @@ -97,6 +99,12 @@ static dErr TensorMult_Hex_nounroll(dInt D,const dInt P[3],const dInt Q[3],const
dFunctionReturn(0);
}


#if defined __SSE3__
#include <pmmintrin.h>
#include "private/microbench.h"


#define D 1
#define P2 4
#define Q2 4
Expand Down Expand Up @@ -382,3 +390,6 @@ static dErr TensorMult_Hex_P4_Q4_D1(dInt D_is_1,const dInt P[3],const dInt Q[3],
PetscLogFlops((Q[0]*P[0]*P[1]*P2 + Q[0]*Q[1]*P[1]*P2 + Q[0]*Q[1]*Q2*P2)*D*2);
dFunctionReturn(0);
}

#endif
#endif
2 changes: 2 additions & 0 deletions src/jacobi/impls/tensor/tensor.c
Original file line number Diff line number Diff line change
Expand Up @@ -718,9 +718,11 @@ static dErr TensorBasisCreate(TensorBuilder build,const TensorRule rule,dInt P,T
b->multhex[0] = &TensorMult_Hex_nounroll;
b->multhex[1] = &TensorMult_Hex_nounroll;
b->multhex[2] = &TensorMult_Hex_nounroll;
#if defined __SSE3__
if (P == 4 && Q == 4) {
b->multhex[0] = &TensorMult_Hex_P4_Q4_D1;
}
#endif
}
*basis = b;
dFunctionReturn(0);
Expand Down

0 comments on commit 2f9a58f

Please sign in to comment.