In [6]:
import struct
import numpy as np


def read_neighbors_from_binary(file_path):
    with open(file_path, 'rb') as f:
        # Read nq and k
        nq = np.fromfile(f, dtype=np.int32, count=1)[0]
        k = np.fromfile(f, dtype=np.int32, count=1)[0]

        # Read the neighbors (nns2)
        neighbors = np.fromfile(f, dtype=np.int64, count=nq * k)

    return neighbors.reshape(nq, k)


def read_qps_from_binary(filepath="/Users/mac/dev/rwalks-reproduce/data/sift50k/all_qps.bin"):
    """
    Read QPS values from binary file and return as a Python list.

    Args:
        filepath (str): Path to the binary QPS file

    Returns:
        list: List of QPS values as floats
    """
    try:
        with open(filepath, 'rb') as file:
            # Read the number of QPS values (size_t, typically 8 bytes on 64-bit systems)
            num_qps_bytes = file.read(8)
            # 'Q' for unsigned long long (size_t)
            num_qps = struct.unpack('Q', num_qps_bytes)[0]

            # Read all QPS values as floats
            qps_bytes = file.read(num_qps * 4)  # 4 bytes per float
            qps_values = struct.unpack(
                f'{num_qps}f', qps_bytes)  # 'f' for float

            print(f"Successfully read {num_qps} QPS values from {filepath}")
            return list(qps_values)

    except FileNotFoundError:
        print(f"Error: File {filepath} not found")
        return []
    except Exception as e:
        print(f"Error reading QPS file: {e}")
        return []


def read_distances_from_binary(file_path):
    with open(file_path, 'rb') as f:
        # Read nq and k
        nq = np.fromfile(f, dtype=np.int32, count=1)[0]
        k = np.fromfile(f, dtype=np.int32, count=1)[0]

        # Read the neighbors (nns2)
        neighbors = np.fromfile(f, dtype=np.float32, count=nq * k)

    return neighbors.reshape(nq, k)

# Usage


all_qps = read_qps_from_binary(
    "/Users/mac/dev/rwalks-reproduce/data/sift50k/all_qps.bin")
efs = [10,  20,  30,  40,  45,  50,  55,  60,  70,   80,   90,
       120, 130, 140, 200, 300, 400, 600, 900, 1200, 1500, 2000]
qps_per_spec = {}
# Divide all_qps into lists by number of efs (per spec)
num_efs = len(efs)
num_specs = len(all_qps) // num_efs
qps_per_spec = {}
for spec_idx in range(num_specs):
    start = spec_idx * num_efs
    end = (spec_idx + 1) * num_efs
    qps_per_spec[spec_idx] = all_qps[start:end]

spec_idx = 2
_neighbors = np.load(
    f"/Users/mac/dev/rwalks-reproduce/data/sift50k/gt_{spec_idx}.npy")
for idx, i in enumerate(efs):
    print("ef = ", i)
    acorn_nn = read_neighbors_from_binary(
        f"/Users/mac/dev/rwalks-reproduce/data/sift50k/01_nn_{i}_{spec_idx}.bin")
    acorn_dist = read_distances_from_binary(
        f"/Users/mac/dev/rwalks-reproduce/data/sift50k/01_nn_dist{i}_{spec_idx}.bin")

    recalls = {'top10': []}
    k = 10
    for neighbors, true_neighbors in zip(acorn_nn, _neighbors):
        recall_at_10 = len(np.intersect1d(
            true_neighbors[:k], neighbors)) / min(k, len(true_neighbors))
        recalls['top10'].append(recall_at_10)

    logs = {
        "spec_idx": spec_idx,
        "ef": i,
        "qps": qps_per_spec[spec_idx][idx],
        "recalls": {"top10": round(np.mean(recalls['top10']), 3)},
        "query_count": len(recalls['top10'])
    }

    print(logs)

Successfully read 66 QPS values from /Users/mac/dev/rwalks-reproduce/data/sift50k/all_qps.bin
ef =  10
{'spec_idx': 2, 'ef': 10, 'qps': 45761.53515625, 'recalls': {'top10': np.float64(0.362)}, 'query_count': 1667}
ef =  20
{'spec_idx': 2, 'ef': 20, 'qps': 26862.908203125, 'recalls': {'top10': np.float64(0.514)}, 'query_count': 1667}
ef =  30
{'spec_idx': 2, 'ef': 30, 'qps': 18927.271484375, 'recalls': {'top10': np.float64(0.589)}, 'query_count': 1667}
ef =  40
{'spec_idx': 2, 'ef': 40, 'qps': 14689.0302734375, 'recalls': {'top10': np.float64(0.635)}, 'query_count': 1667}
ef =  45
{'spec_idx': 2, 'ef': 45, 'qps': 13278.4326171875, 'recalls': {'top10': np.float64(0.651)}, 'query_count': 1667}
ef =  50
{'spec_idx': 2, 'ef': 50, 'qps': 12665.6455078125, 'recalls': {'top10': np.float64(0.665)}, 'query_count': 1667}
ef =  55
{'spec_idx': 2, 'ef': 55, 'qps': 11516.5693359375, 'recalls': {'top10': np.float64(0.677)}, 'query_count': 1667}
ef =  60
{'spec_idx': 2, 'ef': 60, 'qps': 8688.54296875,

In [5]:
len(qps_per_spec[2])

22

In [None]:
)