### Calculate $\delta R$ matrix from combinations (CUDA Version)

Steps are:
- Take $\phi$ and $\eta$ data arrays,starts and stops. 
- Get combinations indices in same manner as done earlier
- For each pair, calculate deltaR as $$\delta r = \sqrt{\delta \eta^{2} + \delta \phi^{2}}$$ where $$\delta \eta = \eta_{2} - \eta_{1}$$$$ \delta \phi = \phi_{2} - \phi_{1}$$
- Get the matrix $$\delta R = \{ \delta r_{ij}\} \quad \forall \: (i,j) \: \in \: combinations \: indices$$


In [1]:
import numpy
import pycuda.driver as cuda
from pycuda.compiler import *
import pycuda.autoinit

In [2]:
NUMEVENTS = 100      # exact number of events
AVENUMJETS = 10      # average number of jets per event
PHILOW = -numpy.pi   # bounding box of phi (azimuthal angle) and eta (~polar angle)
PHIHIGH = numpy.pi
ETALOW = -5
ETAHIGH = 5
ERRPHI = 0.01        # detector resolution
ERRETA = 0.01
RECOPROB = 0.95      # probability of not reconstructing a real jet
AVENUMFAKES = 1      # average number of spurious (fake) recontstructions

# simulate the generator-level jets
numgenjets = numpy.random.poisson(AVENUMJETS, NUMEVENTS)
genstops = numpy.cumsum(numgenjets).astype(numpy.int32)
genstarts = numpy.empty_like(genstops).astype(numpy.int32)
genstarts[0] = 0
genstarts[1:] = genstops[:-1]
genphi = numpy.random.uniform(PHILOW, PHIHIGH, genstops[-1]).astype(numpy.float32)
geneta = numpy.random.uniform(ETALOW, ETAHIGH, genstops[-1]).astype(numpy.float32)

# simulate mismeasurement (error in reconstructing phi and eta)
phiwitherr = genphi + numpy.random.normal(0, ERRPHI, genstops[-1]).astype(numpy.float32)
etawitherr = geneta + numpy.random.normal(0, ERRETA, genstops[-1]).astype(numpy.float32)

# simulate inefficiency in reconstruction (missing real jets)
recomask = (numpy.random.uniform(0, 1, genstops[-1]) < RECOPROB)

# simulate spurious (fake) jets per event
numfakes = numpy.random.poisson(AVENUMFAKES, NUMEVENTS)
fakestops = numpy.cumsum(numfakes).astype(numpy.int32)
fakestarts = numpy.empty_like(fakestops).astype(numpy.int32)
fakestarts[0] = 0
fakestarts[1:] = fakestops[:-1]
fakephi = numpy.random.uniform(PHILOW, PHIHIGH, fakestops[-1]).astype(numpy.float32)
fakeeta = numpy.random.uniform(ETALOW, ETAHIGH, fakestops[-1]).astype(numpy.float32)

# fill reconstructed data arrays
recostarts = numpy.empty_like(genstarts)
recostops = numpy.empty_like(genstops)
recophi = numpy.empty(recomask.sum() + numfakes.sum(), dtype=genphi.dtype)
recoeta = numpy.empty_like(recophi)

truematches = []
recostart, recostop = 0, 0

In [3]:
for i in range(NUMEVENTS):
    genstart, genstop = genstarts[i], genstops[i]
    fakestart, fakestop = fakestarts[i], fakestops[i]
    mask = recomask[genstart:genstop]

    phi = phiwitherr[genstart:genstop][mask]    # generated phi with error and mask
    eta = etawitherr[genstart:genstop][mask]    # generated eta with error and mask

    # concatenate the subset of real jets with some fake jets
    holdphi = numpy.concatenate((phi, fakephi[fakestart:fakestop]))
    holdeta = numpy.concatenate((eta, fakeeta[fakestart:fakestop]))
    recostop += len(holdphi)

    # gen-level and reco-level data are both unordered sets; randomly permute
    order = numpy.random.permutation(recostop - recostart)
    recophi[recostart:recostop][order] = holdphi
    recoeta[recostart:recostop][order] = holdeta

    # keep that permutation to use as a "true match" map (not known to physicist!)
    '''
    truematch = numpy.ones(genstop - genstart, dtype=numgenjets.dtype) * -1
    truematch[mask] = order[:mask.sum()]
    truematches.append(truematch)
    '''
    recostarts[i] = recostart
    recostops[i] = recostop
    recostart = recostop

In [5]:
gen_len = (genstops - genstarts).astype(numpy.int32)
reco_len = (recostops - recostarts).astype(numpy.int32)

pairs_len = numpy.zeros(NUMEVENTS+1, dtype = numpy.int32)
pairs_len[1:] = numpy.cumsum(gen_len*reco_len).astype(numpy.int32)

deltar = numpy.empty(pairs_len[-1], dtype=numpy.float32)
# Number of events = base_len
base_len = numpy.array([NUMEVENTS]).astype(numpy.int32)

In [7]:
# CUDA source
mod = SourceModule('''
#include <cmath>        // Not needed, but added to check external includes in PyCUDA 
extern "C"{

__device__ float deltaeta(float eta1,float eta2)
{
    return eta1-eta2;       // It is simple subtraction.
}

__device__ float deltaphi(float phi1, float phi2)
{
    return phi1-phi2;      // Here, we need to consider the case that phi1-phi2 is constrained 
                           // between [-pi, pi].
}

__global__ void deltaR(float* deltar,float* eta1, float* eta2, float* phi1, float* phi2,int* length, int* start1, int* start2, int* pairs_len,int* lengths1, int* lengths2)
{
    // 3 dimensional indices
    // We will use hypotf() builtin CUDA function for the calculation of deltaR. 
    // The indices are generated in same way as combinations solved earlier. This indices are passeed onto
    // eta and phi arrays.
    
    int i = blockIdx.x*blockDim.x + threadIdx.x;
    int j = blockIdx.y*blockDim.y + threadIdx.y;
    int k = blockIdx.z*blockDim.z + threadIdx.z;
    if (i <length[0]){
        if(j<lengths1[i] && k<lengths2[i]){
            deltar[pairs_len[i] + j*lengths2[i] + k] = hypotf(deltaeta(eta1[j+start1[i]], eta2[k+start2[i]]), deltaphi(phi1[j + start1[i]], phi2[k+start2[i]]));
        }
    }
}
}
''', no_extern_c = True)

kernel.cu



In [8]:
deltar_func = mod.get_function("deltaR")
deltar_func(cuda.InOut(deltar),cuda.In(geneta), cuda.In(recoeta), cuda.In(genphi), cuda.In(recophi), 
    cuda.In(base_len),cuda.In(genstarts), cuda.In(recostarts), cuda.In(pairs_len), cuda.In(gen_len), cuda.In(reco_len), block=(1,8,8),grid = (len(numgenjets), 20, 20))

In [9]:
# Print the values of deltaR
for i in range(6):
    print("Event:{} \n {}\n".format(i, deltar[pairs_len[i]:pairs_len[i+1]]))

Event:0 
 [  1.49774373e+00   1.04372529e-02   3.30006003e+00   2.24345756e+00
   3.04721904e+00   2.77237439e+00   4.10195732e+00   3.73045182e+00
   8.87853652e-03   1.48098230e+00   4.60296917e+00   2.48604774e+00
   4.41947985e+00   4.24365377e+00   3.03931975e+00   2.96533608e+00
   3.02670097e+00   4.08766842e+00   7.39728069e+00   2.94592309e+00
   7.12524939e+00   6.70722628e+00   1.25433635e-02   9.08910871e-01
   4.76786280e+00   3.28192878e+00   2.25364637e+00   4.09058762e+00
   1.81717634e+00   9.92189527e-01   6.89289904e+00   6.24381208e+00
   4.24983597e+00   2.77840996e+00   1.35851860e+00   4.11793804e+00
   9.00018096e-01   6.12535467e-03   6.70459795e+00   6.15671921e+00
   4.61325169e+00   3.31242228e+00   2.15570605e-03   5.11928558e+00
   4.59630758e-01   1.35314798e+00   7.40296125e+00   6.98049259e+00
   4.44005442e+00   3.07163286e+00   4.41364318e-01   4.76560736e+00
   1.88285410e-02   9.11642492e-01   7.14360332e+00   6.68297100e+00]

Event:1 
 [  8.3944225