In [1]:
from hnsw_pipeline import HNSWPipeline

In [2]:
pipeline = HNSWPipeline(
    data_path='/data/anas.aitaomar/sift_1m_old_dist.h5',
    # data_path='/home/anas.aitaomar/yfcc/yfcc_10m_old_dist.h5',
    output_dir='pipeline_outputs',
    num_nodes=50_000,
    out_degree=4,
    ef_construction=100,
    seed=None
)

In [3]:
pipeline.step1_load_data(force_reload=True)
pipeline.step2_build_index()
adj_matrix = pipeline.step3_create_adjacency()

Loading and preprocessing data...
Saved processed data to pipeline_outputs/processed_data.npy
Building HNSW index...
Saved HNSW index to pipeline_outputs/hnsw_index.bin
Creating adjacency matrix...
Saved adjacency matrix to pipeline_outputs/adjacency.npz


In [4]:
adj_matrix

<50000x50000 sparse matrix of type '<class 'numpy.float32'>'
	with 297464 stored elements in Compressed Sparse Row format>

In [5]:
import networkx as nx

# For NetworkX ≥ 3.0 use from_scipy_sparse_array; on ≤ 2.8 use from_scipy_sparse_matrix
G = nx.from_scipy_sparse_array(
    adj_matrix,
    create_using=nx.DiGraph,          # <- make it directed
    # one edge (i→j); its value becomes weight
    parallel_edges=False,
    edge_attribute="weight"           # store A[i,j] as edge weight
)

In [9]:
G[1]

AtlasView({127: {'weight': 1.0}, 2465: {'weight': 1.0}, 7842: {'weight': 1.0}, 15131: {'weight': 1.0}, 20154: {'weight': 1.0}, 29376: {'weight': 1.0}, 44001: {'weight': 1.0}})

In [10]:
G[2465]

AtlasView({6032: {'weight': 1.0}, 12289: {'weight': 1.0}, 38933: {'weight': 1.0}, 49215: {'weight': 1.0}})