# Vectorization of offsets from parents

This will change the two unvectorized parts in original awkward array version to make it fully vectorized. 

Here, the data has been generated as usual, and we use the parents algorithm to generate the parents from the starts and stops. Finally, from parents, we again derive the offsets and compare the two versions for proof.

In [1]:
import numpy
import numba

In [2]:
NUMEVENTS = 320            # Number of evenets to simulate the data for.
AVENUMJETS = 32             # Average number of jets per event.

numjets = numpy.random.poisson(AVENUMJETS, NUMEVENTS).astype(numpy.int32) # Number of jets in each event
jets_stops = numpy.cumsum(numjets).astype(numpy.int32)                                      # Stops array
jets_starts = numpy.zeros_like(jets_stops)                              # Starts array
jets_starts[1:] = jets_stops[:-1]

In [3]:
NUMPARTICLES = jets_stops[-1]

In [4]:
parents = numpy.empty(jets_stops[-1], dtype=numpy.int32)
# Sequential evaluation
@numba.jit()
def parent(starts, stops, pointers):
    for i in range(len(starts)):
        pointers[starts[i]:stops[i]] = i
parent(jets_starts, jets_stops, parents)

In [5]:
# Offsets from parents: original awkward array version

def fromparents(parents, content):
        if len(parents) != len(content):
            raise ValueError("parents array must have the same length as content")

        tmp = numpy.nonzero(parents[1:] != parents[:-1])[0] + 1

        changes = numpy.empty(len(tmp) + 2, dtype=numpy.int)
        changes[0] = 0
        changes[-1] = len(parents)
        changes[1:-1] = tmp

        length = parents.max() + 1
        starts = numpy.zeros(length, dtype=numpy.int)
        counts = numpy.zeros(length, dtype=numpy.int)

        where = parents[changes[:-1]]
        real = (where >= 0)

        starts[where[real]] = (changes[:-1])[real]
        counts[where[real]] = (changes[1:] - changes[:-1])[real]

        return [starts, counts]

In [6]:
content = numpy.random.randn(jets_stops[-1])
starts_awk, counts_awk = fromparents(parents, content)

### Parts that might be unvectorized 

1. `numpy.nonzero`
2. `parents.max()`

**Solutions**

1. `numpy.nonzero()` can be replaced with a combination of `numpy.where` with a boolean mask.
2. `numpy.max()` is s sequential evaluation ( atleast I think, as the source is all over the place ). A simpler log() reduction can be applied to calculate this.

In [8]:
# Vectorized max()
# Based on parallel reduction in bisection search spirit.

def arr_max(arr):
    index = numpy.arange(len(arr))  
    i = (len(arr))//2
    temp_data = numpy.copy(arr)
    while(i>=1):
        mask = numpy.array(index<i)
        temp_data[index[mask]] = numpy.maximum(temp_data[index[mask]], temp_data[(index[mask]+i).astype(numpy.int)])
        if (i%2!=0and i!=1):
            i = i//2+1
        else: 
            i = i//2
    return max(temp_data[0], temp_data[-1])



In [9]:
# modified version

def from_parents_modified(parents, content):
        if len(parents) != len(content):
                raise ValueError("parents array must have the same length as content")

        # Change 1
        mask = parents[1:] != parents[:-1]
        tmp = numpy.where(mask)[0] + 1

        changes = numpy.empty(len(tmp) + 2, dtype=numpy.int)
        changes[0] = 0
        changes[-1] = len(parents)
        changes[1:-1] = tmp

        # Change 2
        length = arr_max(parents) + 1
        starts = numpy.zeros(length, dtype=numpy.int)
        counts = numpy.zeros(length, dtype=numpy.int)

        where = parents[changes[:-1]]
        real = (where >= 0)

        starts[where[real]] = (changes[:-1])[real]
        counts[where[real]] = (changes[1:] - changes[:-1])[real]

        return [starts, counts]

In [10]:
starts_mod, counts_mod = from_parents_modified(parents, content)

In [13]:
# Compare the two outputs
assert((starts_mod==starts_awk).all())