In [1]:
import pyopencl as cl
import pyopencl.array as cl_array
import numpy
import numpy.linalg as la

In [2]:
%load_ext pyopencl.ipython_ext

In [3]:
a = numpy.random.rand(50000).astype(numpy.float32)
b = numpy.random.rand(50000).astype(numpy.float32)

In [4]:
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)

In [5]:
a_dev = cl_array.to_device(queue, a)
b_dev = cl_array.to_device(queue, b)
dest_dev = cl_array.empty_like(a_dev)

### python用法

In [6]:
prg = cl.Program(ctx, """
__kernel void sum(__global const float *a, __global const float *b, __global float *c)
{
  int gid = get_global_id(0);
  c[gid] = a[gid] + b[gid];
}
""").build()

In [7]:
knl = prg.sum
knl(queue, a.shape, None, a_dev.data, b_dev.data, dest_dev.data)

<pyopencl._cl.Event at 0x1eaf1bafac8>

In [8]:
print(la.norm((dest_dev - (a_dev+b_dev)).get()))

0.0


### ipython用法

In [9]:
%%cl_kernel -o "-cl-fast-relaxed-math"

__kernel void sum_vector(__global const float *a,
__global const float *b, __global float *c)
{
  int gid = get_global_id(0);
  c[gid] = a[gid] + b[gid];
}

In [10]:
sum_vector(queue, a.shape, None, a_dev.data, b_dev.data, dest_dev.data)

<pyopencl._cl.Event at 0x1eaf1be3288>

In [11]:
print(la.norm((dest_dev - (a_dev+b_dev)).get()))

0.0
