Skip to content

Commit

Permalink
Merge branch 'rename_load_simd_complex' into 'master'
Browse files Browse the repository at this point in the history
Rename SIMD<Complex>::Load to LoadFast (same for Store)

See merge request jschoeberl/ngsolve!292
  • Loading branch information
JSchoeberl committed Jan 26, 2018
2 parents 515f931 + 6abd176 commit 842a1e1
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 57 deletions.
72 changes: 36 additions & 36 deletions basiclinalg/ngblas.cpp
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -1111,30 +1111,30 @@ namespace ngbla
for ( ; j+4*WS <= w; j+=4*WS) for ( ; j+4*WS <= w; j+=4*WS)
{ {
SIMD<Complex> val1, val2, val3, val4; SIMD<Complex> val1, val2, val3, val4;
val1.Load(ps+j); val1.LoadFast(ps+j);
val2.Load(ps+j+WS); val2.LoadFast(ps+j+WS);
val3.Load(ps+j+2*WS); val3.LoadFast(ps+j+2*WS);
val4.Load(ps+j+3*WS); val4.LoadFast(ps+j+3*WS);
val1 = val1 * scale; val1 = val1 * scale;
val2 = val2 * scale; val2 = val2 * scale;
val3 = val3 * scale; val3 = val3 * scale;
val4 = val4 * scale; val4 = val4 * scale;
val1.Store(pd+j); val1.StoreFast(pd+j);
val2.Store(pd+j+WS); val2.StoreFast(pd+j+WS);
val3.Store(pd+j+2*WS); val3.StoreFast(pd+j+2*WS);
val4.Store(pd+j+3*WS); val4.StoreFast(pd+j+3*WS);
} }
for ( ; j+WS <= w; j+=WS) for ( ; j+WS <= w; j+=WS)
{ {
SIMD<Complex> val; SIMD<Complex> val;
val.Load(ps+j); val.LoadFast(ps+j);
val = val * scale; val = val * scale;
val.Store(pd+j); val.StoreFast(pd+j);
} }
SIMD<Complex> val; SIMD<Complex> val;
val.Load(ps+j, w-j); val.LoadFast(ps+j, w-j);
val = val * scale; val = val * scale;
val.Store(pd+j, w-j); val.StoreFast(pd+j, w-j);
} }
} }


Expand All @@ -1149,23 +1149,23 @@ namespace ngbla
size_t ninner) size_t ninner)
{ {
SIMD<Complex> sum1, sum2, sum3, sum4; SIMD<Complex> sum1, sum2, sum3, sum4;
sum1.Load (pc); sum1.LoadFast (pc);
sum2.Load (pc+dc); sum2.LoadFast (pc+dc);
sum3.Load (pc+2*dc); sum3.LoadFast (pc+2*dc);
sum4.Load (pc+3*dc); sum4.LoadFast (pc+3*dc);
for (size_t i = 0; i < ninner; i++, pa += da, pb += db) for (size_t i = 0; i < ninner; i++, pa += da, pb += db)
{ {
SIMD<Complex> b1; SIMD<Complex> b1;
b1.Load(pb); b1.LoadFast(pb);
sum1 = sum1 - SIMD<Complex> (pa[0]) * b1; sum1 = sum1 - SIMD<Complex> (pa[0]) * b1;
sum2 = sum2 - SIMD<Complex> (pa[1]) * b1; sum2 = sum2 - SIMD<Complex> (pa[1]) * b1;
sum3 = sum3 - SIMD<Complex> (pa[2]) * b1; sum3 = sum3 - SIMD<Complex> (pa[2]) * b1;
sum4 = sum4 - SIMD<Complex> (pa[3]) * b1; sum4 = sum4 - SIMD<Complex> (pa[3]) * b1;
} }
sum1.Store(pc); sum1.StoreFast(pc);
sum2.Store(pc+dc); sum2.StoreFast(pc+dc);
sum3.Store(pc+2*dc); sum3.StoreFast(pc+2*dc);
sum4.Store(pc+3*dc); sum4.StoreFast(pc+3*dc);
} }


void KernelScal4x4Trans (Complex * pa, size_t da, void KernelScal4x4Trans (Complex * pa, size_t da,
Expand All @@ -1174,23 +1174,23 @@ namespace ngbla
size_t ninner, int mask) size_t ninner, int mask)
{ {
SIMD<Complex> sum1, sum2, sum3, sum4; SIMD<Complex> sum1, sum2, sum3, sum4;
sum1.Load (pc, mask); sum1.LoadFast (pc, mask);
sum2.Load (pc+dc, mask); sum2.LoadFast (pc+dc, mask);
sum3.Load (pc+2*dc, mask); sum3.LoadFast (pc+2*dc, mask);
sum4.Load (pc+3*dc, mask); sum4.LoadFast (pc+3*dc, mask);
for (size_t i = 0; i < ninner; i++, pa += da, pb += db) for (size_t i = 0; i < ninner; i++, pa += da, pb += db)
{ {
SIMD<Complex> b1; SIMD<Complex> b1;
b1.Load(pb, mask); b1.LoadFast(pb, mask);
sum1 = sum1 - SIMD<Complex> (pa[0]) * b1; sum1 = sum1 - SIMD<Complex> (pa[0]) * b1;
sum2 = sum2 - SIMD<Complex> (pa[1]) * b1; sum2 = sum2 - SIMD<Complex> (pa[1]) * b1;
sum3 = sum3 - SIMD<Complex> (pa[2]) * b1; sum3 = sum3 - SIMD<Complex> (pa[2]) * b1;
sum4 = sum4 - SIMD<Complex> (pa[3]) * b1; sum4 = sum4 - SIMD<Complex> (pa[3]) * b1;
} }
sum1.Store(pc, mask); sum1.StoreFast(pc, mask);
sum2.Store(pc+dc, mask); sum2.StoreFast(pc+dc, mask);
sum3.Store(pc+2*dc, mask); sum3.StoreFast(pc+2*dc, mask);
sum4.Store(pc+3*dc, mask); sum4.StoreFast(pc+3*dc, mask);
} }




Expand All @@ -1201,14 +1201,14 @@ namespace ngbla
size_t ninner) size_t ninner)
{ {
SIMD<Complex> sum1; SIMD<Complex> sum1;
sum1.Load (pc); sum1.LoadFast (pc);
for (size_t i = 0; i < ninner; i++, pa += da, pb += db) for (size_t i = 0; i < ninner; i++, pa += da, pb += db)
{ {
SIMD<Complex> b1; SIMD<Complex> b1;
b1.Load(pb); b1.LoadFast(pb);
sum1 = sum1 - SIMD<Complex> (*pa) * b1; sum1 = sum1 - SIMD<Complex> (*pa) * b1;
} }
sum1.Store(pc); sum1.StoreFast(pc);
} }


void KernelScal1x4Trans (Complex * pa, size_t da, void KernelScal1x4Trans (Complex * pa, size_t da,
Expand All @@ -1217,14 +1217,14 @@ namespace ngbla
size_t ninner, int mask) size_t ninner, int mask)
{ {
SIMD<Complex> sum1; SIMD<Complex> sum1;
sum1.Load (pc, mask); sum1.LoadFast (pc, mask);
for (size_t i = 0; i < ninner; i++, pa += da, pb += db) for (size_t i = 0; i < ninner; i++, pa += da, pb += db)
{ {
SIMD<Complex> b1; SIMD<Complex> b1;
b1.Load(pb, mask); b1.LoadFast(pb, mask);
sum1 = sum1 - SIMD<Complex> (*pa) * b1; sum1 = sum1 - SIMD<Complex> (*pa) * b1;
} }
sum1.Store(pc, mask); sum1.StoreFast(pc, mask);
} }


void MySubAtDB_BB ( void MySubAtDB_BB (
Expand Down
15 changes: 9 additions & 6 deletions ngstd/simd_complex.hpp
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -50,29 +50,32 @@ namespace ngstd
SIMD<double> & imag() { return im; } SIMD<double> & imag() { return im; }




void Load (Complex * p) // Numbers in SIMD structure are not necessarily in same order as in memory
// for instance:
// [x0,y0,x1,y1,x2,y2,x3,y3] -> [x0,x2,x1,x3,y0,y2,y1,y3]
void LoadFast (Complex * p)
{ {
SIMD<double> c1((double*)p); SIMD<double> c1((double*)p);
SIMD<double> c2((double*)(p+SIMD<double>::Size()/2)); SIMD<double> c2((double*)(p+SIMD<double>::Size()/2));
tie(re,im) = Unpack(c1,c2); tie(re,im) = Unpack(c1,c2);
} }


void Store (Complex * p) void StoreFast (Complex * p)
{ {
SIMD<double> h1, h2; SIMD<double> h1, h2;
tie(h1,h2) = Unpack(re,im); tie(h1,h2) = Unpack(re,im);
h1.Store((double*)p); h1.Store((double*)p);
h2.Store((double*)(p+SIMD<double>::Size()/2)); h2.Store((double*)(p+SIMD<double>::Size()/2));
} }


void Load (Complex * p, int nr) void LoadFast (Complex * p, int nr)
{ {
SIMD<double> c1((double*)p, Mask128(nr)); SIMD<double> c1((double*)p, Mask128(nr));
SIMD<double> c2((double*)(p+SIMD<double>::Size()/2), Mask128(nr-SIMD<double>::Size()/2)); SIMD<double> c2((double*)(p+SIMD<double>::Size()/2), Mask128(nr-SIMD<double>::Size()/2));
tie(re,im) = Unpack(c1,c2); tie(re,im) = Unpack(c1,c2);
} }


void Store (Complex * p, int nr) void StoreFast (Complex * p, int nr)
{ {
SIMD<double> h1, h2; SIMD<double> h1, h2;
tie(h1,h2) = Unpack(re,im); tie(h1,h2) = Unpack(re,im);
Expand Down Expand Up @@ -140,9 +143,9 @@ namespace ngstd
SIMD<Complex> SIMDComplexWrapper (SIMD<Complex> x, FUNC f) SIMD<Complex> SIMDComplexWrapper (SIMD<Complex> x, FUNC f)
{ {
Complex hx[SIMD<double>::Size()]; Complex hx[SIMD<double>::Size()];
x.Store(hx); x.StoreFast(hx);
for (auto & hxi : hx) hxi = f(hxi); for (auto & hxi : hx) hxi = f(hxi);
x.Load(hx); x.LoadFast(hx);
return x; return x;
} }


Expand Down
18 changes: 3 additions & 15 deletions tests/catch/ngblas.cpp
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -87,27 +87,15 @@ TEST_CASE ("SIMD<Complex>", "[simd]") {
dst[i] = 0.0; dst[i] = 0.0;
} }


/* SECTION ("Mask load/store") {
SECTION ("Mask load") {
for (auto k : Range(N+1)) {
SIMD<Complex> simd;
simd.Load(src,k);
for (auto i : Range(N)) {
CHECK(simd.real()[i] == ( i<k? src[i].real() : 0.0 ));
CHECK(simd.imag()[i] == ( i<k? src[i].imag() : 0.0 ));
}
}
}
SECTION ("Mask store") {
for (auto k : Range(N+1)) { for (auto k : Range(N+1)) {
SIMD<Complex> simd; SIMD<Complex> simd;
simd.Store(dst, k); simd.LoadFast(src,k);
simd.StoreFast(dst,k);
for (auto i : Range(N)) { for (auto i : Range(N)) {
CHECK(dst[i].real() == ( i<k? src[i].real() : 0.0 )); CHECK(dst[i].real() == ( i<k? src[i].real() : 0.0 ));
CHECK(dst[i].imag() == ( i<k? src[i].imag() : 0.0 )); CHECK(dst[i].imag() == ( i<k? src[i].imag() : 0.0 ));
} }
} }
} }
*/
} }

0 comments on commit 842a1e1

Please sign in to comment.