Nama : Aditya Atadewa  
Kelas : TI 3G  
NIM : 2341720174  
Absen : 01  

# Praktikum 4

In [2]:
!pip install -q annoy faiss-cpu hnswlib

In [None]:
import numpy as np
import time
from annoy import AnnoyIndex
import faiss
import hnswlib

# ===============================
# 1. Buat dataset 1 juta data 5D
# ===============================
n_data = 1_000_000   # bisa coba 100_000 dulu jika RAM terbatas
dim = 5
X = np.random.random((n_data, dim)).astype(np.float32)

# Query point
query = np.random.random((1, dim)).astype(np.float32)
k = 10

# ===============================
# 2. Annoy
# ===============================
print("=== Annoy ===")
ann_index = AnnoyIndex(dim, 'euclidean')

start = time.time()
for i in range(n_data):
    ann_index.add_item(i, X[i])
ann_index.build(10)  # 10 trees
build_time = time.time() - start

start = time.time()
neighbors = ann_index.get_nns_by_vector(query[0], k, include_distances=True)
query_time = time.time() - start

print("Build time:", build_time, "detik")
print("Query time:", query_time, "detik")
print("Neighbors:", neighbors[0][:5], "...")

# ===============================
# 3. FAISS (Flat Index)
# ===============================
print("\n=== FAISS (IndexFlatL2) ===")
faiss_index = faiss.IndexFlatL2(dim)

start = time.time()
faiss_index.add(X)
build_time = time.time() - start

start = time.time()
distances, indices = faiss_index.search(query, k)
query_time = time.time() - start

print("Build time:", build_time, "detik")
print("Query time:", query_time, "detik")
print("Neighbors:", indices[0][:5], "...")

# ===============================
# 4. HNSW (hnswlib)
# ===============================
print("\n=== HNSW (hnswlib) ===")
hnsw_index = hnswlib.Index(space='l2', dim=dim)

start = time.time()
hnsw_index.init_index(max_elements=n_data, ef_construction=200, M=16)
hnsw_index.add_items(X)
build_time = time.time() - start

hnsw_index.set_ef(50)

start = time.time()
labels, distances = hnsw_index.knn_query(query, k=k)
query_time = time.time() - start

print("Build time:", build_time, "detik")
print("Query time:", query_time, "detik")
print("Neighbors:", labels[0][:5], "...")


=== Annoy ===
Build time: 22.682998657226562 detik
Query time: 0.0003337860107421875 detik
Neighbors: [513923, 8488, 4765, 591652, 54193] ...

=== FAISS (IndexFlatL2) ===
Build time: 0.025473833084106445 detik
Query time: 0.013890504837036133 detik
Neighbors: [513923   8488   4765 591652  54193] ...

=== HNSW (hnswlib) ===
Build time: 206.93902373313904 detik
Query time: 0.0003516674041748047 detik
Neighbors: [513923   8488   4765 591652  54193] ...


In [None]:
import numpy as np
import time
import faiss
import hnswlib
from annoy import AnnoyIndex
import pandas as pd
from tabulate import tabulate

# =======================================================
# Fungsi untuk menjalankan satu set eksperimen pada metric tertentu
# =======================================================
def benchmark_metric(X, query, k, metric_name):
    """
    Jalankan eksperimen Annoy, FAISS, dan HNSWlib
    untuk satu metric (L2 atau IP).
    """
    results = []
    dim = X.shape[1]
    n_data = X.shape[0]

    print(f"Metrik: {metric_name.upper()} | Data: {n_data:,} | Dimensi: {dim}")

    # --- Normalisasi jika Inner Product ---
    if metric_name == "ip":
        faiss.normalize_L2(X)
        faiss.normalize_L2(query)
        annoy_metric = "angular"   # setara dengan cosine/IP
        hnsw_space = "ip"
        faiss_index = faiss.IndexFlatIP(dim)
    else:
        annoy_metric = "euclidean"
        hnsw_space = "l2"
        faiss_index = faiss.IndexFlatL2(dim)

    # ======================
    # 1. Annoy
    # ======================
    ann = AnnoyIndex(dim, annoy_metric)
    t0 = time.time()
    for i in range(n_data):
        ann.add_item(i, X[i])
    ann.build(10)
    build_time = time.time() - t0

    t0 = time.time()
    ann.get_nns_by_vector(query[0], k)
    query_time = time.time() - t0
    results.append(["Annoy", metric_name.upper(), build_time, query_time])

    # ======================
    # 2. FAISS (Flat)
    # ======================
    t0 = time.time()
    faiss_index.add(X)
    build_time = time.time() - t0

    t0 = time.time()
    faiss_index.search(query, k)
    query_time = time.time() - t0
    results.append(["FAISS (Flat)", metric_name.upper(), build_time, query_time])

    # ======================
    # 3. HNSWlib
    # ======================
    hnsw = hnswlib.Index(space=hnsw_space, dim=dim)
    t0 = time.time()
    hnsw.init_index(max_elements=n_data, ef_construction=200, M=16)
    hnsw.add_items(X)
    build_time = time.time() - t0

    hnsw.set_ef(50)
    t0 = time.time()
    hnsw.knn_query(query, k=k)
    query_time = time.time() - t0
    results.append(["HNSWlib", metric_name.upper(), build_time, query_time])

    return results


# =======================================================
# Fungsi utama menjalankan semua eksperimen
# =======================================================
def run_experiments(n_data=1_000_000, dim=5, k=10):
    np.random.seed(42)
    X = np.random.rand(n_data, dim).astype(np.float32)
    query = np.random.rand(1, dim).astype(np.float32)

    all_results = []
    for metric in ["l2", "ip"]:
        all_results.extend(benchmark_metric(X.copy(), query.copy(), k, metric))

    # Konversi ke DataFrame
    df = pd.DataFrame(all_results, columns=["Library", "Metrik Jarak", "Waktu Build (s)", "Waktu Query (s)"])

    print("\n=== HASIL PERBANDINGAN LIBRARY ANN ===\n")
    print(tabulate(df.round(6), headers="keys", tablefmt="grid", showindex=False))


# =======================================================
# Eksekusi Eksperimen
# =======================================================
run_experiments(n_data=1_000_000, dim=5, k=10)


Metrik: L2 | Data: 1,000,000 | Dimensi: 5
Metrik: IP | Data: 1,000,000 | Dimensi: 5

=== HASIL PERBANDINGAN LIBRARY ANN ===

+--------------+----------------+-------------------+-------------------+
| Library      | Metrik Jarak   |   Waktu Build (s) |   Waktu Query (s) |
| Annoy        | L2             |         26.8141   |          0.00012  |
+--------------+----------------+-------------------+-------------------+
| FAISS (Flat) | L2             |          0.015148 |          0.005903 |
+--------------+----------------+-------------------+-------------------+
| HNSWlib      | L2             |        167.166    |          0.000132 |
+--------------+----------------+-------------------+-------------------+
| Annoy        | IP             |         27.3253   |          0.00013  |
+--------------+----------------+-------------------+-------------------+
| FAISS (Flat) | IP             |          0.008173 |          0.006349 |
+--------------+----------------+-------------------+--------