# Bethmark

In [19]:
//% includes: full
//% includes: stdlib.h
//% includes: array
//% namespace: std
//% includes: timer.h
//% cflags: -I/home/superjom/project/jupyter-cpp-kernel/resources

const int size = 100000;
const int repeat = 10000;

array<float, size> a, b;

float c = 0;

// assign random
for (int i = 0; i < size; i++) {
  a[i] = rand() / RAND_MAX - 0.5;
  b[i] = rand() / RAND_MAX - 0.5;
}

// calculate c
{
  OnceTimer t0("loop");

  for (int r = 0; r < repeat; r++) {
    for (int i = 0; i < size; i++) {
      c += a[i] * b[i];
    }
  }
}

{
  c = 0;
  OnceTimer t1("loop unroll");
  
  for (int r = 0; r < repeat; r++) {
    for (int i = 0; i < size; i += 4) {
      c += a[i]*b[i] + a[i+1]*b[i+1] + a[i+2]*b[i+2] + a[i+3]*b[i+3]; 
    }
  }
}

{
  c = 0;
  OnceTimer t2("loop unroll2");
  
  float c0 = 0; float c1 = 0; float c2 = 0; float c3 = 0;
  for (int r = 0; r < repeat; r++) {
    for (int i = 0; i < size; i+=4) {
      c0 += a[i] * b[i];
      c1 += a[i+1] * b[i+1];
      c2 += a[i+2] * b[i+2];
      c3 += a[i+3] * b[i+3];
    }
    c = c0 + c1 + c2 + c3;
  }
}


Timer loop lasts 11
Timer loop unroll lasts 9
Timer loop unroll2 lasts 10


## SIMD

In [30]:
//% includes: mmintrin.h
//% includes: xmmintrin.h
//% includes: timer.h
//% includes: iostream
//% cflags: -I/home/superjom/project/jupyter-cpp-kernel/resources
//% namespace: std

const int size = 100000;
const int repeat = 10000;

float a[size] __attribute__((aligned(16)));
float b[size] __attribute__((aligned(16)));
float c;

// assign random
for (int i = 0; i < size; i++) {
  a[i] = rand() / RAND_MAX - 0.5;
  b[i] = rand() / RAND_MAX - 0.5;
}

{
  c = 0;
  OnceTimer t2("SIMD0");
  __m128 cc;
  for (int t = 0; t < repeat; t++) {
    for (int i = 0; i < size; i+=4) {
      auto* aa = (__m128*) &a[i];
      auto* bb = (__m128*) &b[i];
      _mm_store_ps((float*)&cc, _mm_add_ps(cc, _mm_mul_ps(*aa, *bb)));
    }
  }
  auto* cv = (float*) (&cc);
  c = cv[0] + cv[1] + cv[2] + cv[3];
}
cout << "c:" << c << endl;


Timer SIMD0 lasts 2
c:1.67772e+07
