# Sequential version of global argmin

Find the minimum deltar match per event. In case of collision, the last pair will be picked up. In CUDA code, the thread order will decide the pair in case of multiple global best solution.

It is based on product (combinations), but can easily be extended to pairs.

In [1]:
import numpy

In [2]:
# Data loading. It is same as before.

import numpy
import pycuda.driver as cuda
from pycuda.compiler import *
import pycuda.autoinit

NUMEVENTS = 100      # exact number of events
AVENUMJETS = 10      # average number of jets per event
PHILOW = -numpy.pi   # bounding box of phi (azimuthal angle) and eta (~polar angle)
PHIHIGH = numpy.pi
ETALOW = -5
ETAHIGH = 5
ERRPHI = 0.01        # detector resolution
ERRETA = 0.01
RECOPROB = 0.95      # probability of not reconstructing a real jet
AVENUMFAKES = 1      # average number of spurious (fake) recontstructions

# simulate the generator-level jets
numgenjets = numpy.random.poisson(AVENUMJETS, NUMEVENTS)
genstops = numpy.cumsum(numgenjets).astype(numpy.int32)
genstarts = numpy.empty_like(genstops).astype(numpy.int32)
genstarts[0] = 0
genstarts[1:] = genstops[:-1]
genphi = numpy.random.uniform(PHILOW, PHIHIGH, genstops[-1]).astype(numpy.float32)
geneta = numpy.random.uniform(ETALOW, ETAHIGH, genstops[-1]).astype(numpy.float32)

# simulate mismeasurement (error in reconstructing phi and eta)
phiwitherr = genphi + numpy.random.normal(0, ERRPHI, genstops[-1]).astype(numpy.float32)
etawitherr = geneta + numpy.random.normal(0, ERRETA, genstops[-1]).astype(numpy.float32)

# simulate inefficiency in reconstruction (missing real jets)
recomask = (numpy.random.uniform(0, 1, genstops[-1]) < RECOPROB)

# simulate spurious (fake) jets per event
numfakes = numpy.random.poisson(AVENUMFAKES, NUMEVENTS)
fakestops = numpy.cumsum(numfakes).astype(numpy.int32)
fakestarts = numpy.empty_like(fakestops).astype(numpy.int32)
fakestarts[0] = 0
fakestarts[1:] = fakestops[:-1]
fakephi = numpy.random.uniform(PHILOW, PHIHIGH, fakestops[-1]).astype(numpy.float32)
fakeeta = numpy.random.uniform(ETALOW, ETAHIGH, fakestops[-1]).astype(numpy.float32)

# fill reconstructed data arrays
recostarts = numpy.empty_like(genstarts)
recostops = numpy.empty_like(genstops)
recophi = numpy.empty(recomask.sum() + numfakes.sum(), dtype=genphi.dtype)
recoeta = numpy.empty_like(recophi)

truematches = []
recostart, recostop = 0, 0

for i in range(NUMEVENTS):
    genstart, genstop = genstarts[i], genstops[i]
    fakestart, fakestop = fakestarts[i], fakestops[i]
    mask = recomask[genstart:genstop]

    phi = phiwitherr[genstart:genstop][mask]    # generated phi with error and mask
    eta = etawitherr[genstart:genstop][mask]    # generated eta with error and mask

    # concatenate the subset of real jets with some fake jets
    holdphi = numpy.concatenate((phi, fakephi[fakestart:fakestop]))
    holdeta = numpy.concatenate((eta, fakeeta[fakestart:fakestop]))
    recostop += len(holdphi)

    # gen-level and reco-level data are both unordered sets; randomly permute
    order = numpy.random.permutation(recostop - recostart)
    recophi[recostart:recostop][order] = holdphi
    recoeta[recostart:recostop][order] = holdeta

    # keep that permutation to use as a "true match" map (not known to physicist!)
    '''
    truematch = numpy.ones(genstop - genstart, dtype=numgenjets.dtype) * -1
    truematch[mask] = order[:mask.sum()]
    truematches.append(truematch)
    '''
    recostarts[i] = recostart
    recostops[i] = recostop
    recostart = recostop

# Counts(lengths determination)
gen_counts = (genstops - genstarts).astype(numpy.int32)
reco_counts = (recostops - recostarts).astype(numpy.int32)

pairs_offsets = numpy.zeros(NUMEVENTS+1, dtype = numpy.int32)
pairs_offsets[1:] = numpy.cumsum(gen_counts*reco_counts).astype(numpy.int32)

In [4]:
# Calculate deltaR as done earlier. It can also be combined with argmin step, but that's an optimization issue.

deltaR = numpy.empty(pairs_offsets[-1])

# Define essential functions

def deltaeta(eta1, eta2):
    return eta1-eta2

def deltaPhi(phi1, phi2):
    dphi = phi1 - phi2
    while dphi < -numpy.pi:
        dphi += 2*numpy.pi
    while dphi >= numpy.pi:
        dphi -= 2*numpy.pi
    return dphi

def deltar(eta1, eta2, phi1, phi2):
    return numpy.hypot(deltaeta(eta1, eta2), deltaPhi(phi1, phi2))

def deltar(deta, dphi):
    return numpy.hypot(deta, dphi)

# Calculate deltaR
for i in range(NUMEVENTS):
    pairs_i = pairs_offsets[i]
    for j in range(genstarts[i], genstops[i]):
        for k in range(recostarts[i], recostops[i]):
            deta = deltaeta(geneta[j], recoeta[k])
            dphi = deltaPhi(genphi[j], recophi[k])
            deltaR[pairs_i] = deltar(deta, dphi)
            pairs_i +=1


### Argmin calculation step

This can be done in steps as below:

1. Iterate over all events, indexed by i.
2. For each event, 
    - create a dummy variable for storing the minimum value, and initialize it.
    - iterate over generator track 1(index j) and reconstructed track(index k).
    - If current deltar <= global deltar ( from minimum value), update minimum index variable with j and k.
    - Carry this for all acceptable values of j and k.
3. Store the minimum index for each event.    

In [7]:
# First let's create two arrays for storing the min indexes for each event. 
# left for generator track, right for reconstructed track.

left = numpy.empty(NUMEVENTS, dtype=numpy.int32)
right = numpy.empty(NUMEVENTS, dtype=numpy.int32)

for i in range(NUMEVENTS):
    pairs_i = pairs_offsets[i]
    min_val = deltaR[pairs_i]
    for j in range(genstarts[i], genstops[i]):
        for k in range(recostarts[i], recostops[i]):
            if deltaR[pairs_i] <= min_val :
                min_val = deltaR[pairs_i]
                left[i] = j
                right[i] = k
            pairs_i +=1

In [9]:
# Print out the global minimum indexes. Note that the indexes are based on cumulative array indices.
# So they carry over from one event to another. That is, if particle 1 in event 0 stops at 20, for event 1
# it will start at 21. We can fix this by subtracting starts indices from the index values. But that's not a relevant issue I think.

for i in range(10):
    print("\n Event {}: Minimum index: {}".format(i, (left[i], right[i])))



 Event 0: Minimum index: (6, 4)

 Event 1: Minimum index: (11, 12)

 Event 2: Minimum index: (17, 29)

 Event 3: Minimum index: (29, 38)

 Event 4: Minimum index: (41, 45)

 Event 5: Minimum index: (49, 62)

 Event 6: Minimum index: (58, 67)

 Event 7: Minimum index: (71, 76)

 Event 8: Minimum index: (78, 80)

 Event 9: Minimum index: (89, 102)
