# Combinations vectorized

#### Uses the following information:
- parents of pairs
- pairs indices
- Usual starts,stops,counts etc.

#### Basic idea:

We can note that `left` is the quotient of division between `pairs_index-pairs_start[event]` and `counts[event]`. `right` on the other hand, is the result of modulus of the abovementioned factors. We can use it to form the result.


In [4]:
import numpy as np
import sys
import numba
from awkward import *

In [5]:
NUMEVENTS = 50
AVENUMJETS = 2

numjets1 = np.random.poisson(AVENUMJETS, NUMEVENTS).astype(np.int)
stops1 = np.cumsum(numjets1).astype(np.int)
starts1 = np.zeros_like(stops1)
starts1[1:] = stops1[:-1]

counts1 = stops1-starts1
offsets1 = np.zeros(len(numjets1)+1)
offsets1[1:] = stops1[:]

numjets2 = np.random.poisson(AVENUMJETS, NUMEVENTS).astype(np.int)
stops2 = np.cumsum(numjets2).astype(np.int)
starts2 = np.zeros_like(stops2)
starts2[1:] = stops2[:-1]


counts2 = stops2-starts2
offsets2 = np.zeros(len(numjets2)+1)
offsets2[1:] = stops2[:]

In [6]:

@numba.jit()
def vectorized_search(offsets, content):
    index = np.arange(len(content), dtype=int)                     # threadIdx.x on CUDA
    below = np.zeros(len(content), dtype=int)                      # just below = 0 on CUDA
    above = np.ones(len(content), dtype=int) * (len(offsets) - 1)  # same for above
    while True:
        middle = (below + above) // 2

        change_below = offsets[middle + 1] <= index                   # which "belows" must we change?
        change_above = offsets[middle] > index                        # which "aboves"?

        if not np.bitwise_or(change_below, change_above).any():    # neither? great! we're done!
            break
        else:
            below = np.where(change_below, middle + 1, below)      # vectorized "if" statement
            above = np.where(change_above, middle - 1, above)      # this is the only branch

    return middle

In [7]:
# pairs_indices should properly be called pairs_counts
pairs_indices = np.zeros(NUMEVENTS+1)
pairs_indices[1:] = np.cumsum(counts1*counts2)
pairs_indices = pairs_indices.astype(np.int)

In [8]:
# pairs_contents should be called pairs_indices
pairs_contents = np.arange(pairs_indices[-1]).astype(np.int)
pairs_parents = vectorized_search(pairs_indices, pairs_contents)
pairs_parents = pairs_parents.astype(np.int)

In [9]:
left = np.empty_like(pairs_contents)
right = np.empty_like(pairs_contents)

In [10]:
left[pairs_contents] = starts1[pairs_parents[pairs_contents]] + np.floor((pairs_contents-pairs_indices[pairs_parents[pairs_contents]])/counts2[pairs_parents[pairs_contents]]).astype(np.int)
right[pairs_contents] = starts2[pairs_parents[pairs_contents]]+(pairs_contents-pairs_indices[pairs_parents[pairs_contents]])-counts2[pairs_parents[pairs_contents]]*np.floor((pairs_contents-pairs_indices[pairs_parents[pairs_contents]])/counts2[pairs_parents[pairs_contents]])

In [11]:
# Randdom Content
for i in range(6):
    print("Event {}\n Left {}\nRight {}\n\n".format(i, left[pairs_indices[i]:pairs_indices[i+1]], right[pairs_indices[i]:pairs_indices[i+1]]))

Event 0
 Left []
Right []


Event 1
 Left []
Right []


Event 2
 Left [3 3 4 4]
Right [0 1 0 1]


Event 3
 Left [5 5 5 6 6 6]
Right [2 3 4 2 3 4]


Event 4
 Left []
Right []


Event 5
 Left [ 9 10]
Right [5 5]




In [13]:
# Contents
content1 = np.random.randn(stops1[-1])
content2 = np.random.randn(stops2[-1])

In [14]:
# JaggedArrays
arr = JaggedArray(pairs_indices[:-1],pairs_indices[1:], Table(pairs_indices[-1], left, right))

In [22]:
# Print values
arr[2].tolist()

[{'f0': 3, 'f1': 0},
 {'f0': 3, 'f1': 1},
 {'f0': 4, 'f1': 0},
 {'f0': 4, 'f1': 1}]