In [1]:
import tiledb
import tiledb.vector_search as vs
import numpy as np


#db_uri = "s3://tiledb-andrew/sift/sift_base"
#ground_truth_uri = "s3://tiledb-andrew/sift/sift_groundtruth"

db_uri = "~/work/proj/vector-search/datasets/sift-andrew/sift_base"
ground_truth_uri = "~/work/proj/vector-search/datasets/sift-andrew/sift_groundtruth"
query_uri = "~/work/proj/vector-search/datasets/sift-andrew/sift_query"

n_vectors = 10

In [6]:
# load target query vectors
query_vectors = vs.load_as_array(query_uri)[:,:n_vectors]

# [ read matrix ~/work/proj/vector-search/datasets/sift-andrew/sift_query ]: 6 ms
(128, 10)
[[ 1. 40. 28. ...  3.  1.  0.]
 [ 3. 25.  4. ...  1. 12. 11.]
 [11. 11.  3. ...  0.  4. 77.]
 ...
 [42.  3.  2. ... 25.  6. 53.]
 [48. 19. 54. ... 51. 17. 33.]
 [11. 13. 47. ... 15. 50.  2.]]


In [7]:
print(type(query_vectors))
print(query_vectors.shape)
print(query_vectors)

<class 'numpy.ndarray'>
(128, 10)
[[ 1. 40. 28. ...  3.  1.  0.]
 [ 3. 25.  4. ...  1. 12. 11.]
 [11. 11.  3. ...  0.  4. 77.]
 ...
 [42.  3.  2. ... 25.  6. 53.]
 [48. 19. 54. ... 51. 17. 33.]
 [11. 13. 47. ... 15. 50.  2.]]


In [None]:
# load database
index = vs.FlatIndex(db_uri)

In [8]:
# run query against the query_vectors array with these parameters
result = index.query(query_vectors, k=10, nqueries=10)

print(result)

# [ Total time vq_query_heap ]: 688 ms
[[932085 413247 669835 970797 340871 187470 402219 906750 376328 178811]
 [934876 413071 408764 125539 748397  67875 982379 618842 325865 177646]
 [561813 706838 408462  48044 748193 220473 652078 807599 323160 181997]
 [708177 880592 408855 191115 175336 460733 880346 569178 376277 181605]
 [706771 249062 551661 889039 716433 896005  59540 107468 323464 821938]
 [695756 400194 861882 546470 112586  27746 511412 207868 724549 807785]
 [435345 942339 406273 123985 107281 943689 595673 476215 176970 716433]
 [701258 880462 406324 546496 416604 669622 703786 762217 695175 358802]
 [455537 987636 551743 207768 371420 947299 982409 762237 803977 803100]
 [872728 941776 861530 831905 368246 515025 596714   5447 181435 866737]]


In [9]:
# load the ground truth array:
# this contains the vector IDs of the top neighbors for each vector in
# query_vectors

with tiledb.open(ground_truth_uri) as A:
    ground_truth = A[:10,:10]['a']

In [10]:
print(ground_truth)

[[932085 413247 669835 970797 340871 187470 402219 906750 376328 178811]
 [934876 413071 408764 125539 748397 220473 982379 618842 325865 177646]
 [561813 706838 408462  48044 748193  67875 652078 807599 323160 181997]
 [708177 880592 408855 191115 175336 460733 880346 569178 376277 181605]
 [706771 249062 551661 889039 716433 896005  59540 107468 323464 821938]
 [695756 400194 861882 546470 112586  27746 511412 207868 724549 807785]
 [435345 942339 406273 123985 107281 943689 595673 476215 176970 716433]
 [701258 880462 406324 546496 416604 669622 703786 762217 695175 358802]
 [455537 987636 551743 207768 371420 947299 982409 762237 803977 803100]
 [872728 941776 861530 831905 368246 515025 596714   5447 181435 866737]]
