In [1]:
# this is a simple example to show how to build a faiss index from scratch

from faisstoolbox.FaissManager import FaissIndexManager
import numpy as np

In [2]:
# create index
vec_dim = 128  # dimension of your vectors
index_types_str = 'Flat'  # index type, string format, ref: https://github.com/facebookresearch/faiss/wiki/Faiss-indexes
# here we use Flat, which means we won't do any encoding and compression on raw vector

fim = FaissIndexManager(dim=vec_dim, index_types=index_types_str)

[2020-08-17 15:43:04,197][pid:22971-tid:4599072192] FaissManager.__init__: INFO: index inited, is_trained=True


In [3]:
# add some rand vec by Numpy
rand_vecs = np.random.rand(1000, vec_dim)
print(rand_vecs.shape)
for i in range(len(rand_vecs)):
    vec_arr = rand_vecs[i]
    fim.add_vec(np.array(vec_arr, dtype=np.float32), str(i))
    if i % 100 == 0:
        print(i, 'done')

(1000, 128)
0 done
100 done
200 done
300 done
400 done
500 done
600 done
700 done
800 done
900 done


In [4]:
# save index to disk
index_path = './demo_index.idx'
vec_key_path = './demo_vec_key.dic'
fim.save(index_file_path=index_path, dict_path=vec_key_path)

successfully save index to ./demo_index.idx, dict path: ./demo_vec_key.dic


In [5]:
# load index and perform search
new_fim = FaissIndexManager(index_file_path=index_path, id_dict_path=vec_key_path)
test_vec = np.array(np.random.rand(1, vec_dim), dtype=np.float32)
test_vec = test_vec[0]
dist_list, key_list = new_fim.search_vec(np.array([test_vec]), topk=10)
result_list = []
for idx in range(len(dist_list)):
    result_list.append(list(zip(key_list[idx], dist_list[idx])))

print(result_list)

[2020-08-17 15:43:39,947][pid:22971-tid:4599072192] FaissManager.__init__: INFO: loading index from ./demo_index.idx
[2020-08-17 15:43:39,959][pid:22971-tid:4599072192] FaissManager.__init__: INFO: index inited, is_trained=True


[[('901', 1.1244163838718269), ('258', 1.1946260371141235), ('672', 1.206722266930447), ('368', 1.2134917251508728), ('265', 1.2135074880722159), ('386', 1.267817917063354), ('5', 1.3000176035634947), ('965', 1.3137157507980204), ('983', 1.3505976992866306), ('533', 1.3533024311429687)]]
