In [1]:
import faiss
import numpy as np

In [2]:
d = 128  # Dimensions
nb = 10000  # Number of database vectors
np.random.seed(42)

# Generate random vectors
data = np.random.random((nb, d)).astype('float32')


In [3]:
index = faiss.IndexFlatL2(d)  # Create an index
print("Is the index trained?", index.is_trained)

index.add(data)  # Add vectors to the index
print("Number of vectors in index:", index.ntotal)


Is the index trained? True
Number of vectors in index: 10000


In [None]:
print(index)


<faiss.swigfaiss_avx2.IndexFlatL2; proxy of <Swig Object of type 'faiss::IndexFlatL2 *' at 0x000002717A345110> >


In [6]:
nq = 5  # Number of query vectors
query_vectors = np.random.random((nq, d)).astype('float32')

k = 4  # Number of nearest neighbors to retrieve
distances, indices = index.search(query_vectors, k)

print("Query Vectors",query_vectors)


Query Vectors [[0.42576364 0.7483603  0.31968448 0.844514   0.8715672  0.90923995
  0.62455064 0.01308648 0.28241047 0.43504998 0.34474972 0.02031084
  0.49643975 0.53565174 0.6669295  0.6868271  0.3204728  0.4439586
  0.12089147 0.43911597 0.552396   0.63258666 0.4372417  0.89686835
  0.11013106 0.5291878  0.6973343  0.9503829  0.52329665 0.81006116
  0.8298973  0.18941016 0.8264291  0.6941227  0.7805921  0.7927966
  0.9987547  0.41745672 0.98577434 0.36349994 0.08268002 0.53514713
  0.66551745 0.33850315 0.9789869  0.7366034  0.8822267  0.6507813
  0.27038968 0.591816   0.35031426 0.6754654  0.85852444 0.0217482
  0.95771056 0.458995   0.4062703  0.25550753 0.10522445 0.07769593
  0.6367766  0.27719513 0.66567814 0.26709223 0.48343158 0.18242414
  0.4818666  0.6784375  0.4832652  0.05837315 0.07890956 0.4888111
  0.6137257  0.91366744 0.74685466 0.7755822  0.30465937 0.95350736
  0.4031108  0.82178813 0.9129646  0.22721702 0.69569737 0.19220258
  0.5919482  0.6604615  0.00987705 0.16

In [7]:
print("Nearest neighbors indices:\n", indices)
print("Distances:\n", distances)

Nearest neighbors indices:
 [[9519 1968 3226 7852]
 [6244 8202 2063 2003]
 [5500 3763 8025 4223]
 [2867 4917 7676 9209]
 [2755 5488 9265 4689]]
Distances:
 [[12.162619  13.387042  13.904687  14.283823 ]
 [14.836632  15.083319  15.092712  15.166149 ]
 [13.105391  13.323711  13.36009   13.372475 ]
 [11.650248  12.50419   12.854918  12.877913 ]
 [15.032995  15.0730295 15.601614  15.612171 ]]


In [8]:
nlist = 100  # Number of clusters
quantizer = faiss.IndexFlatL2(d)  # Base index
index_ivf = faiss.IndexIVFFlat(quantizer, d, nlist, faiss.METRIC_L2)

# Train the index (required for IVFFlat)
index_ivf.train(data)
index_ivf.add(data)

# Search
distances, indices = index_ivf.search(query_vectors, k)
print("Approximate Nearest Neighbors:\n", indices)


Approximate Nearest Neighbors:
 [[1968 6504 3021 7698]
 [2063 7292 4656 9903]
 [7181 9181 2703  274]
 [8830 1809 5271 1403]
 [2755 8807 1447 3256]]


In [9]:
nlist = 100  # Number of clusters
quantizer = faiss.IndexFlatL2(d)  # Base index
index_ivf = faiss.IndexIVFFlat(quantizer, d, nlist, faiss.METRIC_L2)

# Train the index (required for IVFFlat)
index_ivf.train(data)
index_ivf.add(data)

# Search
distances, indices = index_ivf.search(query_vectors, k)
print("Approximate Nearest Neighbors:\n", indices)


Approximate Nearest Neighbors:
 [[1968 6504 3021 7698]
 [2063 7292 4656 9903]
 [7181 9181 2703  274]
 [8830 1809 5271 1403]
 [2755 8807 1447 3256]]
