In [1]:
import numpy as np
import numba as nb
from nbgeohash import nb_point_decode, nb_point_encode, nb_vector_decode, nb_vector_encode
from pygeohash import point_decode, point_encode
np_vector_decode = np.vectorize(point_decode)
np_vector_encode = np.vectorize(point_encode)

# Create random geohashes

In [11]:
__base32 = '0123456789bcdefghjkmnpqrstuvwxyz'

# performance imporvements depend on geohash precision so we're gonna fix it to 12 which is a very common precision
geohash = "".join(np.random.choice(list(__base32), size=12))
# performance improvements also depend on the number of geohashes so we're gonna fix it to 100000
geohashes = np.array([
    "".join(arr_hash)
    for arr_hash in np.random.choice(list(__base32), size=(100000, 12))
])

# same thing for encoding
latitude, longitude = np.random.randint(-90, 90,
                                        size=1), np.random.randint(-180,
                                                                   180,
                                                                   size=1)
latitudes, longitudes = np.random.randint(
    -90, 90, size=100000), np.random.randint(-180, 180, size=100000)

# warm up for compilation
nb_point_decode(geohash)
nb_point_encode(latitude, longitude)
nb_vector_decode(geohashes)
nb_vector_encode(latitudes, longitudes)

array(['bpzpgxczbzur', 'p81b0bh2n0p0', 'j040h2081040', ...,
       'n0n2hb1850n2', 'gpbxyzbpvxyp', 'p81040h20810'], dtype='<U12')

# Point-wise performance

In [3]:
%%timeit
point_decode(geohash)

19.5 µs ± 470 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [4]:
%%timeit
nb_point_decode(geohash)

4.75 µs ± 216 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [5]:
%%timeit
point_encode(latitude, longitude)

92.8 µs ± 2.37 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [6]:
%%timeit
nb_point_encode(latitude, longitude)

11.2 µs ± 663 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


- That's a x4 gain in decoding performance and a x9 gain in encoding performance.

- Decoding can become faster if ever I find a fast hashtable in Numba because for now I'm using the `ord` to decode base32 characters while the python implementation uses a python dictionary xhich is extremely fast (compared to anything possible in numba).

# Vector-wise performance

In [7]:
%%timeit
np_vector_decode(geohashes)

1.94 s ± 13.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [8]:
%%timeit
nb_vector_decode(geohashes)

193 ms ± 3.85 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [9]:
%%timeit
np_vector_encode(latitudes, longitudes)

2.57 s ± 53.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [10]:
%%timeit
nb_vector_encode(latitudes, longitudes)

443 ms ± 12.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


- That's a x10 gain in decoding performance and x5 gain in encoding performance.
- Note that we are comparing Numba implementation to Python functions with optimized vectorization from Numpy. that's why the encoding performance gain is less than the one from point-wise functions.