In [1]:
import numpy as np
from time import time
from annoy import AnnoyIndex
import faiss
import hnswlib

In [2]:
# 1000/2D
data = np.random.random((1000, 2)).astype('float32')

# ==== ANNOY ====
f = data.shape[1]
t = AnnoyIndex(f, 'euclidean')
for i in range(len(data)):
    t.add_item(i, data[i])

start = time()
t.build(10)
t.get_nns_by_item(0, 10) 
annoy_time = time() - start

# ==== FAISS ====
index = faiss.IndexFlatL2(f)
index.add(data)
start = time()
index.search(data[:1], 10)
faiss_time = time() - start

# ==== HNSW ====
p = hnswlib.Index(space='l2', dim=f)
p.init_index(max_elements=len(data), ef_construction=100, M=16)
p.add_items(data)
start = time()
p.knn_query(data[:1], k=10)
hnsw_time = time() - start

print("ANNOY:", annoy_time)
print("FAISS:", faiss_time)
print("HNSW:", hnsw_time)


ANNOY: 0.00856924057006836
FAISS: 0.000110626220703125
HNSW: 0.00012040138244628906


In [3]:
# 1000/5D
data = np.random.random((1000, 5)).astype('float32')

# ==== ANNOY ====
f = data.shape[1]
t = AnnoyIndex(f, 'euclidean')
for i in range(len(data)):
    t.add_item(i, data[i])

start = time()
t.build(10)
t.get_nns_by_item(0, 10) 
annoy_time = time() - start

# ==== FAISS ====
index = faiss.IndexFlatL2(f)
index.add(data)
start = time()
index.search(data[:1], 10)
faiss_time = time() - start

# ==== HNSW ====
p = hnswlib.Index(space='l2', dim=f)
p.init_index(max_elements=len(data), ef_construction=100, M=16)
p.add_items(data)
start = time()
p.knn_query(data[:1], k=10)
hnsw_time = time() - start

print("ANNOY:", annoy_time)
print("FAISS:", faiss_time)
print("HNSW:", hnsw_time)


ANNOY: 0.009158611297607422
FAISS: 9.894371032714844e-05
HNSW: 0.00013113021850585938


In [4]:
# 1000000/2D
data = np.random.random((1000000, 2)).astype('float32')

# ==== ANNOY ====
f = data.shape[1]
t = AnnoyIndex(f, 'euclidean')
for i in range(len(data)):
    t.add_item(i, data[i])

start = time()
t.build(10)
t.get_nns_by_item(0, 10) 
annoy_time = time() - start

# ==== FAISS ====
index = faiss.IndexFlatL2(f)
index.add(data)
start = time()
index.search(data[:1], 10)
faiss_time = time() - start

# ==== HNSW ====
p = hnswlib.Index(space='l2', dim=f)
p.init_index(max_elements=len(data), ef_construction=100, M=16)
p.add_items(data)
start = time()
p.knn_query(data[:1], k=10)
hnsw_time = time() - start

print("ANNOY:", annoy_time)
print("FAISS:", faiss_time)
print("HNSW:", hnsw_time)


ANNOY: 8.061258554458618
FAISS: 0.0031359195709228516
HNSW: 0.00017023086547851562


In [5]:
# 1000000/5D
data = np.random.random((1000000, 5)).astype('float32')

# ==== ANNOY ====
f = data.shape[1]
t = AnnoyIndex(f, 'euclidean')
for i in range(len(data)):
    t.add_item(i, data[i])

start = time()
t.build(10)
t.get_nns_by_item(0, 10) 
annoy_time = time() - start

# ==== FAISS ====
index = faiss.IndexFlatL2(f)
index.add(data)
start = time()
index.search(data[:1], 10)
faiss_time = time() - start

# ==== HNSW ====
p = hnswlib.Index(space='l2', dim=f)
p.init_index(max_elements=len(data), ef_construction=100, M=16)
p.add_items(data)
start = time()
p.knn_query(data[:1], k=10)
hnsw_time = time() - start

print("ANNOY:", annoy_time)
print("FAISS:", faiss_time)
print("HNSW:", hnsw_time)


ANNOY: 5.192508220672607
FAISS: 0.0051534175872802734
HNSW: 0.00016832351684570312


The experiment showed that FAISS and HNSWLIB consistently achieved faster execution times than ANNOY, especially when handling large datasets. Although all three algorithms were efficient on smaller datasets (1,000 samples), ANNOY's speed declined significantly with one million entries, taking several seconds, while FAISS and HNSWLIB responded almost instantly. The change in dimensionality from 2D to 5D had minimal impact on performance for any of the methods. Overall, FAISS and HNSWLIB proved to be more scalable and efficient, making them better suited for large-scale or real-time nearest neighbor tasks, whereas ANNOY remains a simple and effective option for smaller datasets.