In [1]:
import hnswlib
import numpy as np
import pandas as pd
import time
from sklearn.neighbors import NearestNeighbors

In [2]:
# ===========================
# 1. Buat data 2D acak
# ===========================
num_elements = 1000
dim = 2
data = np.random.random((num_elements, dim)).astype(np.float32)

# Query point
query = np.array([[0.5, 0.5]], dtype=np.float32)
k = 5  # cari 5 tetangga terdekat

In [3]:
# ===========================
# 2. Exact NN (Brute Force)
# ===========================
nn = NearestNeighbors(n_neighbors=k, algorithm='brute', metric='euclidean')
nn.fit(data)

start = time.time()
distances, indices = nn.kneighbors(query)
end = time.time()

print("=== Exact NN ===")
print("Indices:", indices)
print("Distances:", distances)
print("Waktu:", end - start, "detik")

=== Exact NN ===
Indices: [[ 85 534 438 424 355]]
Distances: [[0.01057618 0.01909875 0.02147917 0.02274323 0.02451442]]
Waktu: 3.5419750213623047 detik


In [4]:
# ===========================
# 3. HNSW
# ===========================
# Inisialisasi index HNSW
p = hnswlib.Index(space='l2', dim=dim)  

# Ukuran maksimum elemen yang bisa ditampung
p.init_index(max_elements=num_elements, ef_construction=100, M=16)

# Tambahkan data
p.add_items(data)

# Set parameter pencarian
p.set_ef(50)   # tradeoff speed vs accuracy

start = time.time()
labels, distances = p.knn_query(query, k=k)
end = time.time()

print("\n=== HNSW ===")
print("Indices:", labels)
print("Distances:", distances)
print("Waktu:", end - start, "detik")


=== HNSW ===
Indices: [[ 85 534 438 424 355]]
Distances: [[0.00011186 0.00036476 0.00046135 0.00051725 0.00060096]]
Waktu: 0.0 detik


## Percobaan Metric Distance Berbeda

In [5]:
num_elements = [1000, 1000000]
dimensions = [2, 5]

results = []

for num_element in num_elements:
    for dim in dimensions:
        # ===========================
        # 1. Buat data 2D acak
        # ===========================
        data = np.random.random((num_element, dim)).astype(np.float32)
        
        # Query point
        query = np.random.rand(1, dim).astype('float32')
        k = 5  # cari 5 tetangga terdekat
        
        # ===========================
        # 2. Exact NN (Brute Force)
        # ===========================
        nn = NearestNeighbors(n_neighbors=k, algorithm='brute', metric='euclidean')
        nn.fit(data)
        
        start = time.time()
        nn_distances, nn_indices = nn.kneighbors(query)
        end = time.time()
        nn_time = end - start
        
        # ===========================
        # 3. HNSW
        # ===========================
        # Inisialisasi index HNSW
        p = hnswlib.Index(space='l2', dim=dim)  
        
        # Ukuran maksimum elemen yang bisa ditampung
        p.init_index(max_elements=num_element, ef_construction=100, M=16)
        
        # Tambahkan data
        p.add_items(data)
        
        # Set parameter pencarian
        p.set_ef(50)   # tradeoff speed vs accuracy
        
        start = time.time()
        hnsw_labels, hnsw_distances = p.knn_query(query, k=k)
        end = time.time()
        hnsw_time = end - start
    
        # --- Simpan hasil ke list ---
        results.append({
            "Metric": 'Exact NN',
            "Jumlah data": f"{num_element:,}",
            "Dimensi": dim,
            "Indices": nn_indices,
            "Distances": nn_distances,
            "Waktu": nn_time,
        })

        results.append({
            "Metric": 'HNSW',
            "Jumlah data": f"{num_element:,}",
            "Dimensi": dim,
            "Indices": hnsw_labels,
            "Distances": hnsw_distances,
            "Waktu": hnsw_time,
        })

# --- Convert ke DataFrame ---
df = pd.DataFrame(results)

In [6]:
# --- Tampilkan tabel rapi ---
print(df.to_string(index=False))

  Metric Jumlah data  Dimensi                                   Indices                                                                                                           Distances    Waktu
Exact NN       1,000        2               [[332, 290, 178, 217, 656]]       [[0.009308825246989727, 0.015814058482646942, 0.02120141126215458, 0.02167128585278988, 0.03912459686398506]] 0.003524
    HNSW       1,000        2               [[332, 290, 178, 217, 656]]                                        [[8.6654225e-05, 0.00025008447, 0.00044949987, 0.00046964464, 0.0015307341]] 0.000000
Exact NN       1,000        5               [[752, 744, 339, 503, 559]]          [[0.18438217043876648, 0.21670539677143097, 0.24761362373828888, 0.2604896128177643, 0.26508262753486633]] 0.001521
    HNSW       1,000        5               [[752, 744, 339, 503, 559]]                                                   [[0.033996783, 0.04696123, 0.061312508, 0.06785484, 0.070268795]] 0.000000
Exact NN   1,00