#**Tugas 1 Jobsheet 7**

Lakukan percobaan pada metric distance yang berbeda, 1000 vs 1.000.000 data, 2D vs 5D data, untuk algoritma,

ANNOY

FAISS

HNSW

Catat performansinya dalam bentuk tabel, misal

In [2]:
!pip install annoy faiss-cpu hnswlib


Collecting annoy
  Downloading annoy-1.17.3.tar.gz (647 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/647.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m235.5/647.5 kB[0m [31m6.9 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m647.5/647.5 kB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting faiss-cpu
  Downloading faiss_cpu-1.12.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.1 kB)
Collecting hnswlib
  Downloading hnswlib-0.8.0.tar.gz (36 kB)
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Downloading faiss_cpu-1.12.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (31.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [3]:
import numpy as np
import time
from annoy import AnnoyIndex
import faiss
import hnswlib

# Fungsi bantu untuk catat waktu
def measure_time(func):
    start = time.time()
    func()
    return time.time() - start

# Percobaan
configs = [
    (1000, 2),
    (1000, 5),
    (1_000_000, 2),
    (1_000_000, 5)
]

results = []

for n, dim in configs:
    print(f"\n=== {n} data, {dim}D ===")
    data = np.random.random((n, dim)).astype('float32')
    query = np.random.random((1, dim)).astype('float32')

    # --- ANNOY ---
    annoy_index = AnnoyIndex(dim, 'euclidean')
    for i in range(n):
        annoy_index.add_item(i, data[i])
    t_annoy_build = measure_time(lambda: annoy_index.build(10))
    t_annoy_search = measure_time(lambda: annoy_index.get_nns_by_vector(query[0], 5))

    # --- FAISS ---
    index_faiss = faiss.IndexFlatL2(dim)
    t_faiss_build = measure_time(lambda: index_faiss.add(data))
    t_faiss_search = measure_time(lambda: index_faiss.search(query, 5))

    # --- HNSW ---
    index_hnsw = hnswlib.Index(space='l2', dim=dim)
    index_hnsw.init_index(max_elements=n, ef_construction=200, M=16)
    t_hnsw_build = measure_time(lambda: index_hnsw.add_items(data))
    t_hnsw_search = measure_time(lambda: index_hnsw.knn_query(query, k=5))

    results.append({
        "Data/Dim": f"{n}/{dim}D",
        "Annoy": t_annoy_search,
        "FAISS": t_faiss_search,
        "HNSW": t_hnsw_search
    })

print("\nHasil perbandingan:")
for r in results:
    print(r)



=== 1000 data, 2D ===

=== 1000 data, 5D ===

=== 1000000 data, 2D ===

=== 1000000 data, 5D ===

Hasil perbandingan:
{'Data/Dim': '1000/2D', 'Annoy': 5.459785461425781e-05, 'FAISS': 6.67572021484375e-05, 'HNSW': 7.963180541992188e-05}
{'Data/Dim': '1000/5D', 'Annoy': 6.222724914550781e-05, 'FAISS': 5.14984130859375e-05, 'HNSW': 5.602836608886719e-05}
{'Data/Dim': '1000000/2D', 'Annoy': 7.987022399902344e-05, 'FAISS': 0.0055921077728271484, 'HNSW': 7.152557373046875e-05}
{'Data/Dim': '1000000/5D', 'Annoy': 9.131431579589844e-05, 'FAISS': 0.011671066284179688, 'HNSW': 0.00010395050048828125}


Jumlah Data/Dimensi

ANNOY
FAISS
HNSW

1000/2D Annoy : 'Annoy' : 5.459785461425781e-05, 'FAISS' : 6.67572021484375e-05, 'HNSW' : 7.963180541992188e-05

1000/5D : 'Annoy' : 6.222724914550781e-05, 'FAISS' : 5.14984130859375e-05, 'HNSW' : 5.602836608886719e-05}

1000000/2D : 'Annoy' : 7.987022399902344e-05, 'FAISS': 0.0055921077728271484, 'HNSW': 7.152557373046875e-05}

1000000/5D : 'Annoy' : 9.131431579589844e-05, 'FAISS' : 0.011671066284179688, 'HNSW' : 0.00010395050048828125}

