# Faiss

In [1]:
import numpy as np
import faiss

In [2]:
d = 64                           # dimension
nb = 100000                      # database size
nq = 10000                       # nb of queries
np.random.seed(1234)             # make reproducible
xb = np.random.random((nb, d)).astype('float32')
xb[:, 0] += np.arange(nb) / 1000.

xq = np.random.random((nq, d)).astype('float32')
xq[:, 0] += np.arange(nq) / 1000

In [3]:
print(xq.shape)
xq

(10000, 64)


array([[ 0.81432974,  0.7409969 ,  0.8915324 , ...,  0.72459674,
         0.893881  ,  0.6574571 ],
       [ 0.5844774 ,  0.797842  ,  0.74140453, ...,  0.6768835 ,
         0.05907924,  0.6396156 ],
       [ 0.75040764,  0.02659794,  0.5495097 , ...,  0.69562465,
         0.16268532,  0.76653737],
       ...,
       [10.96773   ,  0.05037309,  0.7342035 , ...,  0.89510185,
         0.6490696 ,  0.86151606],
       [10.831193  ,  0.70606154,  0.1922274 , ...,  0.8026039 ,
         0.6854174 ,  0.60209423],
       [10.078484  ,  0.39106598,  0.01359335, ...,  0.63193923,
         0.12561724,  0.78384215]], dtype=float32)

In [4]:
print(xb.shape)

(100000, 64)


In [5]:
index = faiss.IndexFlatL2(d)   # build the index
print(index.is_trained)
index.add(xb)                  # add vectors to the index
print(index.ntotal)

True
100000


In [6]:
k = 4                          # we want to see 4 nearest neighbors
D, I = index.search(xb[:5], k) # sanity check, 为每一个向量查找最近的k个向量，并放回相对应的索引和其度量值
print(I)
print(D)

In [6]:
D, I = index.search(xq, k)     # actual search
print(I[:5])                   # neighbors of the 5 first queries
print(I[-5:]) 

[[  0 393 363  78]
 [  1 555 277 364]
 [  2 304 101  13]
 [  3 173  18 182]
 [  4 288 370 531]]
[[0.        7.175174  7.2076287 7.251163 ]
 [0.        6.323565  6.684582  6.799944 ]
 [0.        5.7964087 6.3917365 7.2815127]
 [0.        7.277905  7.5279875 7.6628447]
 [0.        6.763804  7.295122  7.368814 ]]
[[ 381  207  210  477]
 [ 526  911  142   72]
 [ 838  527 1290  425]
 [ 196  184  164  359]
 [ 526  377  120  425]]
[[ 9900 10500  9309  9831]
 [11055 10895 10812 11321]
 [11353 11103 10164  9787]
 [10571 10664 10632  9638]
 [ 9628  9554 10036  9582]]


# 参考
[Faiss从入门到实战精通](https://blog.csdn.net/bitcarmanlee/article/details/106447629?ops_request_misc=%25257B%252522request%25255Fid%252522%25253A%252522161001198916780274125865%252522%25252C%252522scm%252522%25253A%25252220140713.130102334.pc%25255Fall.%252522%25257D&request_id=161001198916780274125865&biz_id=0&utm_medium=distribute.pc_search_result.none-task-blog-2~all~first_rank_v2~hot_rank-7-106447629.pc_search_result_no_baidu_js&utm_term=faiss)