# ScaNN vs FAISS


In [None]:
!pip install scann

Collecting scann
  Downloading scann-1.3.4-cp310-cp310-manylinux_2_27_x86_64.whl.metadata (4.8 kB)
Downloading scann-1.3.4-cp310-cp310-manylinux_2_27_x86_64.whl (11.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.7/11.7 MB[0m [31m29.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: scann
Successfully installed scann-1.3.4


In [None]:
!pip install faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.8.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.7 kB)
Downloading faiss_cpu-1.8.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.0/27.0 MB[0m [31m51.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.8.0.post1


In [None]:
import numpy as np

num_images = 1_000_000
vector_dim = 128
image_vectors = np.random.rand(num_images, vector_dim).astype(np.float32)


In [None]:
import faiss
import numpy as np
import time


d = vector_dim
nlist = 100
quantizer = faiss.IndexFlatL2(d)
index = faiss.IndexIVFFlat(quantizer, d, nlist, faiss.METRIC_L2)

index.train(image_vectors)
index.add(image_vectors)

query_vector = np.random.rand(1, vector_dim).astype(np.float32)

start_time = time.time()
D, I = index.search(query_vector, 5)
faiss_cpu_time = time.time() - start_time

print(f"FAISS (CPU) search time: {faiss_cpu_time:.6f} seconds")


FAISS (CPU) search time: 0.000981 seconds


In [None]:
import scann
import numpy as np
import time

num_images = 1_000_000
vector_dim = 128
image_vectors = np.random.rand(num_images, vector_dim).astype(np.float32)


searcher = scann.scann_ops_pybind.builder(image_vectors, 10, "dot_product").tree(
    num_leaves=100, num_leaves_to_search=10, training_sample_size=100000).score_ah(
    2, anisotropic_quantization_threshold=0.2).reorder(100).build()


query_vector = np.random.rand(vector_dim).astype(np.float32)

start_time = time.time()
neighbors, distances = searcher.search(query_vector)
scann_search_time = time.time() - start_time

print(f"ScaNN search time: {scann_search_time:.6f} seconds")



ScaNN search time: 0.000856 seconds
