### Imports and implementations to test

In [10]:
import numpy as np
import time as t

In [76]:
def find_distance_norm(pos1, pos2): # very slow for arrays(n=2), should not have been used
    return np.linalg.norm(pos1 - pos2, 1)

In [131]:
def find_distance_bi(pos1, pos2): # good for np.arrays, better for list of tuples
    return (abs(pos1[0] - pos2[0]) + abs(pos1[1] - pos2[1]))

In [82]:
def find_distance_vec(pos1, pos2): # vector algebra, doesn't work with tuples
    return sum(abs(pos1-pos2))

In [87]:
def find_distance_vec_np(pos1, pos2): # vector algebra with numPy, doesn't work with tuples
    return np.sum(np.abs(pos1-pos2))

### Preparations

In [75]:
n = int(1e7)
distances = np.zeros((n, 1))
target = np.array([0,0])

In [109]:
np_array = np.random.randint(2, size=(n,2))
np_array[:5]

array([[1, 1],
       [0, 1],
       [0, 0],
       [1, 0],
       [1, 0]])

In [110]:
bi_list = np_array.tolist()
bi_list[:5]

[[1, 1], [0, 1], [0, 0], [1, 0], [1, 0]]

In [113]:
bi_tuples = [(v[0], v[1]) for v in bi_list]
bi_tuples[:5]

[(1, 1), (0, 1), (0, 0), (1, 0), (1, 0)]

### Type checks

In [115]:
type(np_array)

numpy.ndarray

In [149]:
type(bi_list)

list

In [150]:
type(bi_list[0])

list

In [117]:
type(bi_tuples)

list

In [119]:
type(bi_tuples[0])

tuple

### Performance function for for-loop

In [144]:
def performance(f, data_struct, n=n, target=target):
    start_time = t.time()
    for i in range(n):
        distances[i] = f(data_struct[i], target)
    return t.time() - start_time

### For-loop tests with ndarray

In [152]:
func = [find_distance_bi, find_distance_vec, find_distance_vec_np, find_distance_norm] # functions to test

In [145]:
for f in func:
    res = performance(f, np_array)
    print(f'Total time taken: {res}, tested: {f.__name__}')

Total time taken: 19.80888056755066, tested: find_distance_bi
Total time taken: 56.1346549987793, tested: find_distance_vec
Total time taken: 96.89424157142639, tested: find_distance_vec_np
Total time taken: 153.18575859069824, tested: find_distance_norm


In [147]:
distances[:5]

array([[ 2.],
       [ 1.],
       [ 0.],
       [ 1.],
       [ 1.]])

### For-loop tests with nested in-built lists

In [153]:
func = [find_distance_bi, find_distance_vec, find_distance_vec_np, find_distance_norm] # functions to test

In [148]:
for f in func:
    res = performance(f, bi_list)
    print(f'Total time taken: {res}, tested: {f.__name__}')

Total time taken: 17.329957962036133, tested: find_distance_bi
Total time taken: 88.06503868103027, tested: find_distance_vec
Total time taken: 139.55150318145752, tested: find_distance_vec_np
Total time taken: 206.92552304267883, tested: find_distance_norm


In [151]:
distances[:5]

array([[ 2.],
       [ 1.],
       [ 0.],
       [ 1.],
       [ 1.]])

### For-loop tests with list of tuples

In [159]:
func = [find_distance_bi, find_distance_vec, find_distance_vec_np, find_distance_norm] # functions to test

In [162]:
for f in func:
    res = performance(f, bi_tuples)
    print(f'Total time taken: {res}, tested: {f.__name__}')

Total time taken: 17.36513113975525, tested: find_distance_bi
Total time taken: 92.77490258216858, tested: find_distance_vec
Total time taken: 136.1372253894806, tested: find_distance_vec_np
Total time taken: 206.48683667182922, tested: find_distance_norm


In [164]:
distances[:5]

array([[ 2.],
       [ 1.],
       [ 0.],
       [ 1.],
       [ 1.]])

### For-loop conclusions:
    1. Built-in distance function is the fasted distance function across all tested data structures
    2. List of tuples perform as good as nested list with the fasted distance function (in ~17.3)
    3. numpy.ndarrays performs the best with vector algebra-related functions by far, but the distance function
       is still slower

### Performace function for numpy.fromiter

In [185]:
# We don't use lambdas here because we want to be able to print out the fuction name in the for-loop
def dec_bi(t): return find_distance_bi(t, target) # dec stands for 'decorated'; we don't use lambdas here because
def dec_vec(t): return find_distance_vec(t, target)
def dec_vec_np(t): return find_distance_vec_np(t, target)
def dec_norm(t): return find_distance_norm(t, target)

In [196]:
def performance_fromiter(f, data_struct, n=n, target=target, dtype=np.int32):
    start_time = t.time()
    distances = np.fromiter([f(vec) for vec in data_struct], dtype=dtype, count=n)
    return t.time() - start_time

### numpy.fromiter tests with np.ndarray

In [187]:
func = [dec_bi, dec_vec, dec_vec_np, dec_norm] # functions to test

In [197]:
for f in func:
    res = performance_fromiter(f, np_array)
    print(f'Total time taken: {res}, tested: {f.__name__}')

Total time taken: 14.165681600570679, tested: dec_bi
Total time taken: 44.89225745201111, tested: dec_vec
Total time taken: 87.711678981781, tested: dec_vec_np
Total time taken: 147.02865481376648, tested: dec_norm


In [199]:
distances[:5]

array([[ 2.],
       [ 1.],
       [ 0.],
       [ 1.],
       [ 1.]])

### numpy.fromiter tests with a nested list

In [191]:
func = [dec_bi, dec_vec, dec_vec_np, dec_norm] # functions to test

In [198]:
for f in func:
    res = performance_fromiter(f, bi_list)
    print(f'Total time taken: {res}, tested: {f.__name__}')

Total time taken: 10.638958215713501, tested: dec_bi
Total time taken: 77.34232234954834, tested: dec_vec
Total time taken: 126.01267528533936, tested: dec_vec_np
Total time taken: 198.65167474746704, tested: dec_norm


In [193]:
distances[:5]

array([[ 2.],
       [ 1.],
       [ 0.],
       [ 1.],
       [ 1.]])

### numpy.fromiter tests with a list of tuples

In [200]:
func = [dec_bi, dec_vec, dec_vec_np, dec_norm] # functions to test

In [201]:
for f in func:
    res = performance_fromiter(f, bi_tuples)
    print(f'Total time taken: {res}, tested: {f.__name__}')

Total time taken: 10.850730895996094, tested: dec_bi
Total time taken: 74.99896883964539, tested: dec_vec
Total time taken: 122.50127458572388, tested: dec_vec_np
Total time taken: 191.18013072013855, tested: dec_norm


In [33]:
distances[:5]

array([ 2.,  0.,  1.,  2.,  2.,  0.,  1.,  0.,  0.,  2.])