In [None]:
import py4dgeo
import numpy as np
from scipy.spatial import KDTree as ScipyKDTree
from sklearn.neighbors import KDTree as SklearnKDTree
import laspy
from time import perf_counter
import matplotlib.pyplot as plt

In [None]:
def measure(f):
    """ A decorator that measures execution time and returns it as part of a tuple """
    def _decorated(*args, **kwargs):
        start = perf_counter()
        ret = f(*args, **kwargs)
        return perf_counter() - start, ret
    return _decorated

In [None]:
def minimum_across_runs(n, func, *args):
    measurements = []
    for _ in range(n):
        t, result = func(*args)
        measurements.append(t)
    return min(measurements), result

In [None]:
def create_random_data(n):
    """ Create n samples within the unitcube """
    rng = np.random.default_rng()
    return rng.uniform([0, 0, 0], [1, 1, 1], size=(n, 3)).astype('f')

In [None]:
def read_huge_las_dataset():
    """ Read 250M points dataset """
    data = laspy.read("ahk_2017_full.las")
    return np.stack((data.x.array, data.y.array, data.z.array), axis=1)

In [None]:
def read_small_las_dataset():
    """ Read 2M points dataset """
    return np.genfromtxt('ahk_2017_small.xyz', delimiter=' ', dtype=np.float32)

In [None]:
@measure
def build_pcl_kdtree(data):
    """ Build PCL KDTree data structure. """
    pc = py4dgeo.PCLPointCloud(data)
    pc.build_tree(py4dgeo.SearchStrategy.kdtree)
    return pc

In [None]:
@measure
def build_pcl_bruteforce(data):
    """ Build PCL Bruteforce data structure. """
    pc = py4dgeo.PCLPointCloud(data)
    pc.build_tree(py4dgeo.SearchStrategy.bruteforce)
    return pc

In [None]:
@measure
def build_pcl_octree(data):
    """ Build PCL OCTree data structure. """
    pc = py4dgeo.PCLPointCloud(data)
    pc.build_tree(py4dgeo.SearchStrategy.octree)
    return pc

In [None]:
@measure
def radius_mine(tree, point, radius):
    """ Invocation of radius search for PCL trees """
    return tree.radius_search(point, radius)

In [None]:
@measure
def build_scipy(data):
    """ Build Scipy KDTree data structure """
    return ScipyKDTree(data)

In [None]:
@measure
def radius_scipy(tree, point, radius):
    """ Invocation of Scipy radius search """
    return tree.query_ball_point(point, radius)

In [None]:
@measure
def build_sklearn(data):
    """ Build Sklearn KDTree data structure """
    return SklearnKDTree(data)

In [None]:
@measure
def radius_sklearn(tree, point, radius):
    """ Invocation of Sklearn radius search """
    return tree.query_radius(np.expand_dims(point, axis=0), radius)

In [None]:
@measure
def build_nanoflann(data):
    pc = py4dgeo.NFPointCloud2(data)
    pc.build_tree()
    return pc

In [None]:
# Make the different implementations iterable
implementations = [
    ("PCL KDTree", build_pcl_kdtree, radius_mine),
    ("SciPy KDTree", build_scipy, radius_scipy),
    ("NanoFLANN", build_nanoflann, radius_mine),
#     ("PCL Bruteforce", build_pcl_bruteforce, radius_mine),
#     ("PCL OCtree", build_pcl_octree, radius_mine),
#     ("SKLearn KDTree", build_sklearn, radius_sklearn),
]

In [None]:
nsamples = [2**i * 1000 for i in range(10)]
fig, axs = plt.subplots(1, 2, figsize=(16, 6))
for name, build_func, radius_func in implementations:
    build_times = []
    query_times = []
    for n in nsamples:
        data = create_random_data(n)
        build_time, cloud = minimum_across_runs(10, build_func, data)
        build_times.append(build_time / n)
        query_time, result = minimum_across_runs(10, radius_func, cloud, np.array([0.5, 0.5, 0.5]), 2 * n ** (-(1/3)))
        query_times.append(query_time)
    axs[0].plot(nsamples, build_times, label=name)
    axs[1].plot(nsamples, query_times, label=name)
axs[0].set_xscale("log")
axs[1].set_xscale("log")
axs[0].set_xlabel("Point Cloud size")
axs[0].set_ylabel("Time/Point [s]")
axs[1].set_xlabel("Point Cloud size")
axs[1].set_ylabel("Query Time [s]")
axs[0].set_title("KDTree build times/point")
axs[1].set_title("KDTree query time (appr. constant return size)")
axs[0].legend()
axs[1].legend()