In [None]:
import numpy as np
from time import time
from annoy import AnnoyIndex
import faiss
import hnswlib


In [None]:
# 1000/2D
data = np.random.random((1000, 2)).astype("float32")

# ==== ANNOY ====
f = data.shape[1]
t = AnnoyIndex(f, "euclidean")
for i in range(len(data)):
    t.add_item(i, data[i])

start = time()
t.build(10)
t.get_nns_by_item(0, 10)
annoy_time = time() - start

# ==== FAISS ====
index = faiss.IndexFlatL2(f)
index.add(data)
start = time()
index.search(data[:1], 10)
faiss_time = time() - start

# ==== HNSW ====
p = hnswlib.Index(space="l2", dim=f)
p.init_index(max_elements=len(data), ef_construction=100, M=16)
p.add_items(data)
start = time()
p.knn_query(data[:1], k=10)
hnsw_time = time() - start

print("ANNOY:", annoy_time)
print("FAISS:", faiss_time)
print("HNSW:", hnsw_time)


In [None]:
# 1000/5D
data = np.random.random((1000, 5)).astype("float32")

# ==== ANNOY ====
f = data.shape[1]
t = AnnoyIndex(f, "euclidean")
for i in range(len(data)):
    t.add_item(i, data[i])

start = time()
t.build(10)
t.get_nns_by_item(0, 10)
annoy_time = time() - start

# ==== FAISS ====
index = faiss.IndexFlatL2(f)
index.add(data)
start = time()
index.search(data[:1], 10)
faiss_time = time() - start

# ==== HNSW ====
p = hnswlib.Index(space="l2", dim=f)
p.init_index(max_elements=len(data), ef_construction=100, M=16)
p.add_items(data)
start = time()
p.knn_query(data[:1], k=10)
hnsw_time = time() - start

print("ANNOY:", annoy_time)
print("FAISS:", faiss_time)
print("HNSW:", hnsw_time)


In [None]:
# 1000000/2D
data = np.random.random((1000000, 2)).astype("float32")

# ==== ANNOY ====
f = data.shape[1]
t = AnnoyIndex(f, "euclidean")
for i in range(len(data)):
    t.add_item(i, data[i])

start = time()
t.build(10)
t.get_nns_by_item(0, 10)
annoy_time = time() - start

# ==== FAISS ====
index = faiss.IndexFlatL2(f)
index.add(data)
start = time()
index.search(data[:1], 10)
faiss_time = time() - start

# ==== HNSW ====
p = hnswlib.Index(space="l2", dim=f)
p.init_index(max_elements=len(data), ef_construction=100, M=16)
p.add_items(data)
start = time()
p.knn_query(data[:1], k=10)
hnsw_time = time() - start

print("ANNOY:", annoy_time)
print("FAISS:", faiss_time)
print("HNSW:", hnsw_time)


In [None]:
# 1000000/5D
data = np.random.random((1000000, 5)).astype("float32")

# ==== ANNOY ====
f = data.shape[1]
t = AnnoyIndex(f, "euclidean")
for i in range(len(data)):
    t.add_item(i, data[i])

start = time()
t.build(10)
t.get_nns_by_item(0, 10)
annoy_time = time() - start

# ==== FAISS ====
index = faiss.IndexFlatL2(f)
index.add(data)
start = time()
index.search(data[:1], 10)
faiss_time = time() - start

# ==== HNSW ====
p = hnswlib.Index(space="l2", dim=f)
p.init_index(max_elements=len(data), ef_construction=100, M=16)
p.add_items(data)
start = time()
p.knn_query(data[:1], k=10)
hnsw_time = time() - start

print("ANNOY:", annoy_time)
print("FAISS:", faiss_time)
print("HNSW:", hnsw_time)


The experiment revealed that FAISS and HNSWLIB consistently outperformed ANNOY in execution speed, particularly when processing large datasets. While all three algorithms performed efficiently on smaller datasets (around 1,000 samples), ANNOY’s performance dropped considerably with one million records, requiring several seconds to complete, whereas FAISS and HNSWLIB delivered near-instant responses. Additionally, increasing the data dimensionality from 2D to 5D had little effect on the performance of any method. In summary, FAISS and HNSWLIB demonstrated superior scalability and efficiency, making them more suitable for large-scale or real-time nearest neighbor applications, while ANNOY remains a straightforward and reliable choice for smaller datasets.