In [5]:
import numpy as np 
import time
from annoy import AnnoyIndex
import faiss
import hnswlib

# ===============================
# 1. Buat dataset 1 juta data 5D
# ===============================
n_data = 1_000_000   # bisa coba 100_000 dulu jika RAM terbatas
dim = 5
X = np.random.random((n_data, dim)).astype(np.float32)

# Normalisasi data untuk cosine similarity
X_norm = X / np.linalg.norm(X, axis=1, keepdims=True)

# Query point
query = np.random.random((1, dim)).astype(np.float32)
query_norm = query / np.linalg.norm(query, axis=1, keepdims=True)
k = 10

# ===============================
# 2. Annoy (Cosine)
# ===============================
print("=== Annoy (Cosine) ===")
ann_index = AnnoyIndex(dim, 'angular')  # angular = cosine

start = time.time()
for i in range(n_data):
    ann_index.add_item(i, X[i])
ann_index.build(10)  # 10 trees
build_time = time.time() - start

start = time.time()
neighbors = ann_index.get_nns_by_vector(query[0], k, include_distances=True)
query_time = time.time() - start

print("Build time:", build_time, "detik")
print("Query time:", query_time, "detik")
print("Neighbors:", neighbors[0][:5], "...")

# ===============================
# 3. FAISS (Cosine)
# ===============================
print("\n=== FAISS (Cosine) ===")
faiss_index = faiss.IndexFlatIP(dim)  # Inner product = cosine similarity jika data dinormalisasi

start = time.time()
faiss_index.add(X_norm)
build_time = time.time() - start

start = time.time()
distances, indices = faiss_index.search(query_norm, k)
query_time = time.time() - start

print("Build time:", build_time, "detik")
print("Query time:", query_time, "detik")
print("Neighbors:", indices[0][:5], "...")

# ===============================
# 4. HNSW (Cosine)
# ===============================
print("\n=== HNSW (Cosine) ===")
hnsw_index = hnswlib.Index(space='cosine', dim=dim)

start = time.time()
hnsw_index.init_index(max_elements=n_data, ef_construction=200, M=16)
hnsw_index.add_items(X)
build_time = time.time() - start

hnsw_index.set_ef(50)

start = time.time()
labels, distances = hnsw_index.knn_query(query, k=k)
query_time = time.time() - start

print("Build time:", build_time, "detik")
print("Query time:", query_time, "detik")
print("Neighbors:", labels[0][:5], "...")


=== Annoy (Cosine) ===
Build time: 23.280031204223633 detik
Query time: 0.00028061866760253906 detik
Neighbors: [865071, 471165, 576587, 821462, 718965] ...

=== FAISS (Cosine) ===
Build time: 0.008265018463134766 detik
Query time: 0.009873628616333008 detik
Neighbors: [865071 471165 576587 821462 718965] ...

=== HNSW (Cosine) ===
Build time: 82.34957695007324 detik
Query time: 0.0008997917175292969 detik
Neighbors: [865071 471165 576587 821462 718965] ...
