# FlaNN
---
## Download Dataset

In [1]:
%pip install numpy h5py requests pyflann-ibeis

Note: you may need to restart the kernel to use updated packages.


In [2]:
import numpy as np
import h5py
import os
import requests
import tempfile
import time

from pyflann_ibeis import *

In [3]:
with tempfile.TemporaryDirectory() as tmp:
    response = requests.get("http://ann-benchmarks.com/sift-128-euclidean.hdf5")
    loc = os.path.join(tmp, "sift.hdf5")
    with open(loc, 'wb') as f:
        f.write(response.content)
    
    sift_h5py = h5py.File(loc, "r")

In [4]:
list(sift_h5py.keys())

['distances', 'neighbors', 'test', 'train']

In [5]:
dataset = sift_h5py['train']
queries = sift_h5py['test']
print(dataset.shape)
print(queries.shape)

(1000000, 128)
(10000, 128)


## Build FlaNN Index
---

In [10]:
normalized_dataset = dataset / np.linalg.norm(dataset, axis=1)[:, np.newaxis]
# configure FlaNN as a tree
flann = FLANN()
# use FLANN().build_index() to instead create a TensorFlow-compatible indices
searcher = flann.build_index(normalized_dataset, algorithm='kdtree', trees=4, max_neighbors=10, cores=4, leaf_max_size=2000)

In [7]:
def compute_recall(neighbors, true_neighbors):
    total = 0
    for gt_row, row in zip(true_neighbors, neighbors):
        total += np.intersect1d(gt_row, row).shape[0]
    return total / true_neighbors.size

## FlaNN interface features
---

In [12]:
start = time.time()
neighbors, distances = flann.nn_index(queries)
end = time.time()

# select top 10 from ground truth
print("Recall:", compute_recall(neighbors, sift_h5py['neighbors'][:, :10]))
print("Time:", end - start)

Recall: 3e-05
Time: 0.07482671737670898
