In [7]:
from VecSim import *
import numpy as np

dim = 128
num_elements = 1000

# Create a brute force index for vectors of 128 floats. Use 'L2' as the distance metric
bf_params = BFParams()
bf_params.initialCapacity = num_elements
bf_params.blockSize = num_elements
bf_index = BFIndex(bf_params, VecSimType_FLOAT32, dim, VecSimMetric_L2)


In [8]:
# Add 1M random vectors to the index
data = np.float32(np.random.random((num_elements, dim)))
vectors = []

for i, vector in enumerate(data):
    bf_index.add_vector(vector, i)
    vectors.append((i, vector))

print(f'Index size: {bf_index.index_size()}')

Index size: 1000


In [9]:
# Create a random query vector
query_data = np.float32(np.random.random((1, dim)))

# Create batch iterator for this query vector
batch_iterator = BFBatchIterator(bf_index, query_data)
returned_results_num = 0

In [10]:
# Get the next best results
batch_size = 100
labels, distances = batch_iterator.get_next_results(batch_size, BY_SCORE)

print (f'Results in rank {returned_results_num}-{returned_results_num+len(labels[0])} are: \n')
print (f'labels: {labels}')
print (f'scores: {distances}')

returned_results_num += len(labels[0])

Results in rank 0-100 are: 

labels: [[838 671 170 797 755   9 273 748 398 601 508 815 353 847 921 212 311 684
  456 138 240 813 259 465  80 872 177 461 506 565 862 379 979 922 495 985
  323 218 631  18 701 149 931 338 852 293 142 467 574 129 151 913 377 394
  900 278 578 956 363 726 115 790 821 683  62 807 958 600 322  73  17 516
  190 954 472 704 760 104 298 577 795 277  11 478 560 728 553 653 761 521
  687 175  59 189 337 621 103 849 295 545]]
scores: [[16.183123 16.2841   16.43007  16.557827 17.128029 17.178886 17.238358
  17.356022 17.835823 17.858393 17.907871 18.027369 18.047752 18.0821
  18.21483  18.252707 18.26205  18.30121  18.32915  18.372751 18.388922
  18.412844 18.428331 18.443605 18.455826 18.473446 18.486614 18.496714
  18.501905 18.506962 18.530903 18.619265 18.639282 18.730307 18.735428
  18.735508 18.846548 18.911098 18.963318 18.971342 18.979776 19.023056
  19.023413 19.03611  19.058432 19.159397 19.166912 19.167627 19.178741
  19.190182 19.237139 19.254782 19.2570

In [93]:
import time

# Run batches until depleted
batch_size = 15
start = time.time()
while(batch_iterator.has_next()):
    labels, distances = batch_iterator.get_next_results(batch_size, BY_ID)
    returned_results_num += len(labels[0])

print(f'Total results returned: {returned_results_num}\n')
print(f'Total search time: {time.time() - start}')

Total results returned: 1000

Total search time: 0.0002129077911376953
