In [1]:
from gimmik import generate_mm
import numpy as np

In [2]:
# c

a = 2 * np.eye(3)
print(generate_mm(a, np.float64, "c"))


void
gimmik_mm(int n,
         const double* restrict b, int ldb,
         double* restrict c, int ldc)
{
    double dotp;

    #pragma omp simd
    for (int i = 0; i < n; i++)
    {
        dotp = 2.0*b[i + 0*ldb];
        c[i + 0*ldc] = dotp;
        dotp = 2.0*b[i + 1*ldb];
        c[i + 1*ldc] = dotp;
        dotp = 2.0*b[i + 2*ldb];
        c[i + 2*ldc] = dotp;
    }
}



In [3]:
# c-omp

a = 2 * np.eye(3)
print(generate_mm(a, np.float64, "c-omp"))


void
gimmik_mm(int ncol,
         const double* restrict b, int ldb,
         double* restrict c, int ldc)
{
    double dotp;

    #pragma omp parallel for simd private(dotp)
    for (int i = 0; i < ncol; i++)
    {
        dotp = 2.0*b[i + 0*ldb];
        c[i + 0*ldc] = dotp;
        dotp = 2.0*b[i + 1*ldb];
        c[i + 1*ldc] = dotp;
        dotp = 2.0*b[i + 2*ldb];
        c[i + 2*ldc] = dotp;
    }
}



In [4]:
# cuda

a = 2 * np.eye(3)
print(generate_mm(a, np.float64, "cuda"))


__global__ void
gimmik_mm(int n,
         const double* __restrict__ b, int ldb,
         double* __restrict__ c, int ldc)
{
    int i = blockDim.x*blockIdx.x + threadIdx.x;
    double dotp;

    if (i < n)
    {
        dotp = 2.0*b[i + 0*ldb];
        c[i + 0*ldc] = dotp;
        dotp = 2.0*b[i + 1*ldb];
        c[i + 1*ldc] = dotp;
        dotp = 2.0*b[i + 2*ldb];
        c[i + 2*ldc] = dotp;
    }
}



In [5]:
# ispc

a = 2 * np.eye(3)
print(generate_mm(a, np.float64, "ispc"))


export void
gimmik_mm(uniform int n,
         const uniform double b[], uniform int ldb,
         double uniform c[], uniform int ldc)
{
    double dotp;

    foreach (i = 0 ... n)
    {
        dotp = 2.0*b[i + 0*ldb];
        c[i + 0*ldc] = dotp;
        dotp = 2.0*b[i + 1*ldb];
        c[i + 1*ldc] = dotp;
        dotp = 2.0*b[i + 2*ldb];
        c[i + 2*ldc] = dotp;
    }
}



In [6]:
# opencl

a = 2 * np.eye(3)
print(generate_mm(a, np.float64, "opencl"))


#if __OPENCL_VERSION__ < 120
# pragma OPENCL EXTENSION cl_khr_fp64: enable
#endif

__kernel void
gimmik_mm(int n,
         __global const double* restrict b, int ldb,
         __global double* restrict c, int ldc)
{
    int i = get_global_id(0);
    double dotp;

    if (i < n)
    {
        dotp = 2.0*b[i + 0*ldb];
        c[i + 0*ldc] = dotp;
        dotp = 2.0*b[i + 1*ldb];
        c[i + 1*ldc] = dotp;
        dotp = 2.0*b[i + 2*ldb];
        c[i + 2*ldc] = dotp;
    }
}

