Non deterministic segfault with Intel CPU

I have some scala code that puts the contents of an iterator into OpenCL device memory. If I do this in parallel on the Intel CPU OpenCL implementation it segfaults most of the time. I have reduced it down to the following code:
```scala
import org.jocl.CL._
import org.jocl._
import java.nio.ByteOrder
import java.nio.ByteBuffer

object OpenCL
{
  setExceptionsEnabled(true)
  val deviceType = CL_DEVICE_TYPE_CPU
  val devices = {
    val numPlatforms = Array(0)
    clGetPlatformIDs(0, null, numPlatforms)
    val platforms = new Array[cl_platform_id](numPlatforms(0))
    clGetPlatformIDs(platforms.length, platforms, null)
    platforms.flatMap(platform => {
      try {
        val contextProperties = new cl_context_properties
        contextProperties.addProperty(CL_CONTEXT_PLATFORM, platform)
        val numDevices = Array(0)
        clGetDeviceIDs(platform, deviceType, 0, null, numDevices)
        val devices = new Array[cl_device_id](numDevices(0))
        clGetDeviceIDs(platform, deviceType, numDevices(0), devices, null)
        devices.flatMap(device => {
          try{
            val vendorIdBuffer = new Array[Byte](1024)
            clGetDeviceInfo(device, CL_DEVICE_VENDOR, 1024, Pointer.to(vendorIdBuffer), null)
            val vendorId = new String(vendorIdBuffer, "UTF-8")
            if(!vendorId.matches(".*Intel.*")) {
              None
            } else {
              println(vendorId)
              val context = clCreateContext(contextProperties, 1, Array(device), null, null, null)
              val queue = clCreateCommandQueue(context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE, null)
              Some(new OpenCLSession(context, queue, device))
            }
          } catch {
            case e: CLException => None
          }
        })
      } catch {
        case e: CLException => Nil
      }
    })
  }
  def main(args: Array[String]) : Unit = println((0 to 30).par.map(x => OpenCL.devices(0).stream((0 to 1024*1024*256).iterator.map(_.toDouble),1024*1024*256)))
}

class OpenCLSession (val context: cl_context, val queue: cl_command_queue, val device: cl_device_id)
{
  def stream(it: Iterator[Double], groupSize: Int = 1024*1024*256) : cl_mem = {
    var on_host : Option[cl_mem] = None
    try {
      on_host = Some(clCreateBuffer(context, CL_MEM_ALLOC_HOST_PTR, groupSize, null, null))
      val rawBuffer = clEnqueueMapBuffer(queue, on_host.get, true, CL_MAP_WRITE, 0, groupSize, 0, null, null, null)
      val buffer = rawBuffer.order(ByteOrder.nativeOrder).asDoubleBuffer
      var copied = 0
      while(copied < groupSize/Sizeof.cl_double && it.hasNext) {
        buffer.put(copied, it.next)
        copied += 1
      }
      clEnqueueUnmapMemObject(queue, on_host.get, rawBuffer, 0, null, null)
      clRetainMemObject(on_host.get)
      on_host.get
      } finally {
        on_host.foreach(clReleaseMemObject)
      }
  }

  override def finalize = {
    clReleaseCommandQueue(queue)
    clReleaseContext(context)
  }
}
```
I suspect it is some unfortunate interference between the jvm and the Intel OpenCL implementation. I would be glad for some expert judgment on this.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Non deterministic segfault with Intel CPU #12

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Non deterministic segfault with Intel CPU #12

Description

Metadata

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Issue actions