Open
Description
I have some scala code that puts the contents of an iterator into OpenCL device memory. If I do this in parallel on the Intel CPU OpenCL implementation it segfaults most of the time. I have reduced it down to the following code:
import org.jocl.CL._
import org.jocl._
import java.nio.ByteOrder
import java.nio.ByteBuffer
object OpenCL
{
setExceptionsEnabled(true)
val deviceType = CL_DEVICE_TYPE_CPU
val devices = {
val numPlatforms = Array(0)
clGetPlatformIDs(0, null, numPlatforms)
val platforms = new Array[cl_platform_id](numPlatforms(0))
clGetPlatformIDs(platforms.length, platforms, null)
platforms.flatMap(platform => {
try {
val contextProperties = new cl_context_properties
contextProperties.addProperty(CL_CONTEXT_PLATFORM, platform)
val numDevices = Array(0)
clGetDeviceIDs(platform, deviceType, 0, null, numDevices)
val devices = new Array[cl_device_id](numDevices(0))
clGetDeviceIDs(platform, deviceType, numDevices(0), devices, null)
devices.flatMap(device => {
try{
val vendorIdBuffer = new Array[Byte](1024)
clGetDeviceInfo(device, CL_DEVICE_VENDOR, 1024, Pointer.to(vendorIdBuffer), null)
val vendorId = new String(vendorIdBuffer, "UTF-8")
if(!vendorId.matches(".*Intel.*")) {
None
} else {
println(vendorId)
val context = clCreateContext(contextProperties, 1, Array(device), null, null, null)
val queue = clCreateCommandQueue(context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE, null)
Some(new OpenCLSession(context, queue, device))
}
} catch {
case e: CLException => None
}
})
} catch {
case e: CLException => Nil
}
})
}
def main(args: Array[String]) : Unit = println((0 to 30).par.map(x => OpenCL.devices(0).stream((0 to 1024*1024*256).iterator.map(_.toDouble),1024*1024*256)))
}
class OpenCLSession (val context: cl_context, val queue: cl_command_queue, val device: cl_device_id)
{
def stream(it: Iterator[Double], groupSize: Int = 1024*1024*256) : cl_mem = {
var on_host : Option[cl_mem] = None
try {
on_host = Some(clCreateBuffer(context, CL_MEM_ALLOC_HOST_PTR, groupSize, null, null))
val rawBuffer = clEnqueueMapBuffer(queue, on_host.get, true, CL_MAP_WRITE, 0, groupSize, 0, null, null, null)
val buffer = rawBuffer.order(ByteOrder.nativeOrder).asDoubleBuffer
var copied = 0
while(copied < groupSize/Sizeof.cl_double && it.hasNext) {
buffer.put(copied, it.next)
copied += 1
}
clEnqueueUnmapMemObject(queue, on_host.get, rawBuffer, 0, null, null)
clRetainMemObject(on_host.get)
on_host.get
} finally {
on_host.foreach(clReleaseMemObject)
}
}
override def finalize = {
clReleaseCommandQueue(queue)
clReleaseContext(context)
}
}
I suspect it is some unfortunate interference between the jvm and the Intel OpenCL implementation. I would be glad for some expert judgment on this.
Metadata
Metadata
Assignees
Labels
No labels