# Sequential gen-reco matching

Matching renerated and recosntructed particles based on minimum deltaR criteria.

#### Issues:
Possible one-to-many mapping. Need to eliminate that.

In [24]:
import numpy
import math

In [25]:
NUMEVENTS = 100      # exact number of events
AVENUMJETS = 10      # average number of jets per event
PHILOW = -numpy.pi   # bounding box of phi (azimuthal angle) and eta (~polar angle)
PHIHIGH = numpy.pi
ETALOW = -5
ETAHIGH = 5
ERRPHI = 0.01        # detector resolution
ERRETA = 0.01
RECOPROB = 0.95      # probability of not reconstructing a real jet
AVENUMFAKES = 1      # average number of spurious (fake) recontstructions

# simulate the generator-level jets
numgenjets = numpy.random.poisson(AVENUMJETS, NUMEVENTS)
genstops = numpy.cumsum(numgenjets)
genstarts = numpy.empty_like(genstops)
genstarts[0] = 0
genstarts[1:] = genstops[:-1]
genphi = numpy.random.uniform(PHILOW, PHIHIGH, genstops[-1])
geneta = numpy.random.uniform(ETALOW, ETAHIGH, genstops[-1])

# simulate mismeasurement (error in reconstructing phi and eta)
phiwitherr = genphi + numpy.random.normal(0, ERRPHI, genstops[-1])
etawitherr = geneta + numpy.random.normal(0, ERRETA, genstops[-1])

# simulate inefficiency in reconstruction (missing real jets)
recomask = (numpy.random.uniform(0, 1, genstops[-1]) < RECOPROB)

# simulate spurious (fake) jets per event
numfakes = numpy.random.poisson(AVENUMFAKES, NUMEVENTS)
fakestops = numpy.cumsum(numfakes)
fakestarts = numpy.empty_like(fakestops)
fakestarts[0] = 0
fakestarts[1:] = fakestops[:-1]
fakephi = numpy.random.uniform(PHILOW, PHIHIGH, fakestops[-1])
fakeeta = numpy.random.uniform(ETALOW, ETAHIGH, fakestops[-1])

# fill reconstructed data arrays
recostarts = numpy.empty_like(genstarts)
recostops = numpy.empty_like(genstops)
recophi = numpy.empty(recomask.sum() + numfakes.sum(), dtype=genphi.dtype)
recoeta = numpy.empty_like(recophi)

truematches = []
recostart, recostop = 0, 0

In [26]:
for i in range(NUMEVENTS):
    genstart, genstop = genstarts[i], genstops[i]
    fakestart, fakestop = fakestarts[i], fakestops[i]
    mask = recomask[genstart:genstop]

    phi = phiwitherr[genstart:genstop][mask]    # generated phi with error and mask
    eta = etawitherr[genstart:genstop][mask]    # generated eta with error and mask

    # concatenate the subset of real jets with some fake jets
    holdphi = numpy.concatenate((phi, fakephi[fakestart:fakestop]))
    holdeta = numpy.concatenate((eta, fakeeta[fakestart:fakestop]))
    recostop += len(holdphi)

    # gen-level and reco-level data are both unordered sets; randomly permute
    order = numpy.random.permutation(recostop - recostart)
    recophi[recostart:recostop][order] = holdphi
    recoeta[recostart:recostop][order] = holdeta

    # keep that permutation to use as a "true match" map (not known to physicist!)
    '''
    truematch = numpy.ones(genstop - genstart, dtype=numgenjets.dtype) * -1
    truematch[mask] = order[:mask.sum()]
    truematches.append(truematch)
    '''
    recostarts[i] = recostart
    recostops[i] = recostop
    recostart = recostop

In [27]:
# Define the lengths and number of possible combinations ( or pairs maybe?)

gen_len = (genstops - genstarts).astype(numpy.int32)
reco_len = (recostops - recostarts).astype(numpy.int32)

pairs_len = numpy.zeros(NUMEVENTS+1, dtype = numpy.int32)
pairs_len[1:] = numpy.cumsum(gen_len*reco_len)

In [28]:
def deltaeta(eta1, eta2):
    return eta1-eta2

def deltaPhi(phi1, phi2):
    dphi = phi1 - phi2
    while dphi < -numpy.pi:
        dphi += 2*numpy.pi
    while dphi >= numpy.pi:
        dphi -= 2*numpy.pi
    return dphi

def deltar(eta1, eta2, phi1, phi2):
    return numpy.hypot(deltaeta(eta1, eta2), deltaPhi(phi1, phi2))

def deltar(deta, dphi):
    return numpy.hypot(deta, dphi)


In [29]:
# Form deltaR matrix
deltaR = numpy.empty(pairs_len[-1])
for i in range(NUMEVENTS):
    pairs_i = pairs_len[i]
    for j in range(genstarts[i], genstops[i]):
        for k in range(recostarts[i], recostops[i]):
            deta = deltaeta(geneta[j], recoeta[k])
            dphi = deltaPhi(genphi[j], recophi[k])
            deltaR[pairs_i] = deltar(deta, dphi)
            pairs_i +=1

#### How to determine truematch?

The approach taken here is that if deltaR is minimum by combining `particle1[i]` with `particle2[j]`, then `particle1[i]` is matched to `particle2[j]`. ~~Furthermore, this deltaR value must be less than a certain threshold, so that we know that we are not matching noise.~~

In [30]:

# Calculate minimum by running that triple nested loop again. 
# Note that this is required, as variable length arrays are not supported by numpy.
# Else, we could have resized deltaR appropriately, and calculate min index.
# Sorry for the horrible indexing.

truematches = numpy.ones(genstops[-1], dtype=numpy.int32)*-1
for i in range(NUMEVENTS):
    pairs_i = pairs_len[i]
    for j in range(genstarts[i], genstops[i]):
        min_idx = recostarts[i]
        truematches[j] = min_idx
        for k in range(recostarts[i], recostops[i]):
            if (deltaR[pairs_i] <= deltaR[pairs_len[i]+(j-genstarts[i])*reco_len[i]+(min_idx-recostarts[i])]):
                truematches[j] = k
                min_idx = k
            pairs_i+=1

In [31]:
# Print out the results. Print for 6 events only.
# - indicates that no such match is found.

for i in range(6):
    print("\n Event: {}\n".format(i))
    print(" Generator Level index              Reconstructed Level index\n")
    for j in range(genstarts[i], genstops[i]):
        if truematches[j] != -1 :
            print("      {}                  ->                   {}".format(j, truematches[j]))
        else:
            print("      {}                  ->                   {}".format(j, '-'))


 Event: 0

 Generator Level index              Reconstructed Level index

      0                  ->                   6
      1                  ->                   7
      2                  ->                   4
      3                  ->                   0
      4                  ->                   2
      5                  ->                   5
      6                  ->                   9
      7                  ->                   8
      8                  ->                   3
      9                  ->                   3
      10                  ->                   1
      11                  ->                   10

 Event: 1

 Generator Level index              Reconstructed Level index

      12                  ->                   19
      13                  ->                   20
      14                  ->                   23
      15                  ->                   12
      16                  ->                   21
      17             