In [36]:
import nmslib
import numpy
import tables

In [37]:
MODEL_PATH = 'models/inceptionv3_4_new_ohne_dpot_2.97270.hdf5'
MODEL_VGG_PATH = 'models/weights.bestVGG16try9.hdf5'
hdf5_PATH = 'models/vgg16_bottleneck_features.hdf5'

In [38]:
hdf5_file = tables.open_file(hdf5_PATH, mode='r')
features = hdf5_file.root.img_features
images = hdf5_file.root.img_paths

In [39]:
# Set index parameters
# These are the most important onese
M = 15
efC = 100

num_threads = 4
index_time_params = {'M': M, 'indexThreadQty': num_threads, 'efConstruction': efC, 'post' : 0}
print('Index-time parameters', index_time_params)

Index-time parameters {'M': 15, 'indexThreadQty': 4, 'efConstruction': 100, 'post': 0}


In [40]:
# Number of neighbors 
K=100

In [41]:
# Space name should correspond to the space name 
# used for brute-force search
space_name='l2'

In [42]:
# Intitialize the library, specify the space, the type of the vector and add data points 
index = nmslib.init(method='hnsw', space=space_name, data_type=nmslib.DataType.DENSE_VECTOR) 
index.addDataPointBatch(features[:])

403885

In [43]:
import time
# Create an index
start = time.time()
index_time_params = {'M': M, 'indexThreadQty': num_threads, 'efConstruction': efC}
index.createIndex(index_time_params, print_progress=True)
end = time.time() 
print('Index-time parameters', index_time_params)
print('Indexing time = %f' % (end-start))
index.saveIndex('dense_index_optim_01.bin')

Index-time parameters {'M': 15, 'indexThreadQty': 4, 'efConstruction': 100}
Indexing time = 924.952077


In [44]:
# Setting query-time parameters
efS = 100
query_time_params = {'efSearch': efS}
print('Setting query-time parameters', query_time_params)
index.setQueryTimeParams(query_time_params)

Setting query-time parameters {'efSearch': 100}


In [45]:
features[10020].shape[0]

4096

In [46]:
# Querying
query_qty = features[10020].shape[0]
start = time.time()
ids, distances = index.knnQuery(features[10020], k = K)
end = time.time() 
print('kNN time total=%f (sec), per query=%f (sec)' % 
      (end-start, float(end-start)/query_qty))

kNN time total=0.036593 (sec), per query=0.000009 (sec)


In [47]:
ids,distances

(array([ 10020, 382102, 260078, 260768,  42850,  20644, 266945, 225894,
        333075,  31842,  74819, 272200, 245624, 393375, 261284,  46502,
        105567,  32922,  31750, 139340, 379701, 371642, 139616, 333275,
        379622, 334176, 266181,  78916, 245752, 198736, 259558, 289129,
        266835, 394142, 169992,  15835, 332957, 376831,  46131, 194820,
        139317, 259874,   2131, 389711, 393776,   7815, 198382, 164818,
        334534, 246002, 245828, 301108,  32223, 207094, 125345, 378841,
        298691, 164553,  32737, 152305, 377553, 339326, 170415,  46017,
        272845, 260299, 388776,   8431,  31318, 116005, 377387,  31154,
         31783, 138736, 377595,  77392,   9123, 379257, 396170, 116403,
        179554, 188922, 260342, 349535, 109557, 332762,  32813, 213818,
        236827, 198062, 142407, 198244, 143987, 377212, 270330, 130618,
         14164,  97985, 189522, 357266], dtype=int32),
 array([   0.    , 2227.8503, 2259.3535, 2341.124 , 2358.0254, 2391.4626,
       