In [1]:
import numpy
import numba

In [2]:
import collections
import pprint


@numba.jit
def assign_bin(val):
    limit = 32
    while limit < 2**12:
        if val < limit:
            return limit
        limit *= 2
    return limit
    

@numba.jit
def binning(ary, debug=False):
    bins = numpy.zeros(ary.size, dtype=numpy.int32)
    for i, v in enumerate(ary):
        bins[i] = assign_bin(v)
    if debug:
        ctr = collections.defaultdict(int)
        for k in bins:
            ctr[k] += 1
        pprint.pprint(ctr)
    return bins
            

In [3]:
ary = numpy.random.randint(0, 2**14, 10000)
bins = binning(ary)
print(len(bins))

10000


In [4]:
binning(ary, debug=True)

defaultdict(<class 'int'>,
            {32: 21,
             64: 23,
             128: 43,
             256: 70,
             512: 138,
             1024: 309,
             2048: 631,
             4096: 8765})


array([4096, 4096, 4096, ..., 4096, 4096, 4096], dtype=int32)

In [5]:
binning_no_lift = numba.jit(forceobj=True, looplift=False)(binning.py_func)

In [6]:
%timeit binning(ary)
%timeit binning.py_func(ary)
%timeit binning_no_lift(ary)

10000 loops, best of 3: 36.6 µs per loop
100 loops, best of 3: 6.04 ms per loop
The slowest run took 29.10 times longer than the fastest. This could mean that an intermediate result is being cached.
100 loops, best of 3: 5.85 ms per loop
