# Index in non-rejection mode.

The implementation documentation is as in parents. So not done again.

In [1]:
import numpy
import pycuda.autoinit
import pycuda.driver as cuda
import pycuda.gpuarray as gpuarray
from pycuda.compiler import *

In [2]:
NUMEVENTS = 320            # Number of evenets to simulate the data for.
AVENUMJETS = 32             # Average number of jets per event.

numjets = numpy.random.poisson(AVENUMJETS, NUMEVENTS).astype(numpy.int32) # Number of jets in each event
jets_stops = numpy.cumsum(numjets).astype(numpy.int32)                                      # Stops array
jets_starts = numpy.zeros_like(jets_stops)                              # Starts array
jets_starts[1:] = jets_stops[:-1]

In [3]:
pointer_cuda = numpy.empty(jets_stops[-1], dtype=numpy.int32)
NUMPARTICLES = jets_stops[-1]
gpu_starts = gpuarray.to_gpu(jets_starts)
gpu_stops = gpuarray.to_gpu(jets_stops)
gpu_pointer = gpuarray.to_gpu(pointer_cuda)
# Calculate the counts array on GPU
arr_numevents = numpy.array([NUMEVENTS]).astype(numpy.int32)
arr_numparticles = numpy.array([NUMPARTICLES]).astype(numpy.int32)

In [4]:
NUMBLOCKS_x = int(numpy.ceil(NUMPARTICLES/32))
NUMBLOCKS_y = int(numpy.ceil(NUMEVENTS/32))

### Implementation 
Run a thread over all particles(`i`), which is known(=`jets_stops[-1]`), and total number of events(`j`).

Compare the `starts[j]` and `stop[j]` with `i`, and assign `i-starts[j]` to the pointer value if match is found.

In [5]:
mod = SourceModule('''
__global__ void index(int* pointer,int* starts,int* stops,int* NUMEVENTS,int* NUMPARTICLES)
{
    // i for particles, j for events.
    
    int i = blockIdx.x*blockDim.x + threadIdx.x;
    int j = blockIdx.y*blockDim.y + threadIdx.y;
    
    if (i<NUMPARTICLES[0] && j<NUMEVENTS[0])
    {
        // There will be horrible warp divergence here. But I don't see anyway around it.
        if(starts[j]<=i && i<stops[j])
        {
            pointer[i] = i-starts[j];
        }
    }
}
''')

kernel.cu



In [7]:
func = mod.get_function("index")

In [8]:
func(gpu_pointer,gpu_starts,gpu_stops,cuda.In(arr_numevents),cuda.In(arr_numparticles), block=(32,32,1), grid=(NUMBLOCKS_x,NUMBLOCKS_y,1))

In [9]:
gpu_pointer

array([ 0,  1,  2, ..., 32, 33, 34])

In [10]:
pointer_seq = numpy.empty(jets_stops[-1], dtype=numpy.int32)
# Sequential evaluation
def index(starts, stops, pointers):
    for i in range(len(starts)):
        pointers[starts[i]:stops[i]] = numpy.arange(stops[i]-starts[i])
index(jets_starts, jets_stops, pointer_seq)

In [11]:
host_pointer_data = gpu_pointer.get()
# Compare. Will not print anything if equal
assert(host_pointer_data.all()==pointer_seq.all())