In [1]:
import numpy as np
import faiss
import time
import pandas as pd
from os import path

In [2]:
ngpus = faiss.get_num_gpus()
print("number of GPUs:", ngpus)

number of GPUs: 2


In [3]:
config = "mrl/" # mrl, mrl_e, ff, slimmable
dataset = 'imagenetv2' # imagenet1k, imagenetv2, imagenet4m
index_type = 'exactl2' # exactl2, hnsw8, hnsw32
hnsw_max_neighbors = 32 # 8, 32
k = 2048 # shortlist length

In [4]:
root = '/home/aniketr/Documents/imagenetv2/' + config 
db_csv = dataset + '_train_' + config + '2048-X.npy'
query_csv = dataset + '_val_' + config + '2048-X.npy'

if dataset == 'imagenetv2':
    db_csv = 'imagenet1k_train_' + config + '2048-X.npy'

In [5]:
nesting_list = [8, 16, 32, 64, 128, 256, 512, 1024, 2048]

if index_type == 'exactl2':
    use_gpu = 1 # GPU inference for exact search
else:
    use_gpu = 0 # GPU inference for HNSW is currently not supported by FAISS

In [6]:
for nesting in nesting_list:
    db_csv = dataset+'_val_nesting0_sh0_ff'+str(nesting)+'-X.npy'
    query_csv = dataset+'_val_nesting0_sh0_ff'+str(nesting)+'-X.npy'
    index_file = root+'index_files/'+dataset+'_'+str(nesting)+'_'+index_type+'.index'
    
    # Load or build index
    if path.exists(index_file):
        print("Loading index file: " + index_file)
        cpu_index = faiss.read_index(index_file)

    else:
        print("Generating index file: " + index_file)
        start = time.time()
        xb = np.ascontiguousarray(np.load(root+db_csv)[:,:nesting], dtype=np.float32)
        print("CSV file load time= ", time.time() - start)

        faiss.normalize_L2(xb)
        d = xb.shape[1]                           # dimension
        nb = xb.shape[0]                       # database size

        print("database: ", xb.shape)

        start = time.time()
        if index_type == 'exactl2':
            print("Building Exact L2 Index")
            cpu_index = faiss.IndexFlatL2(d)   # build the index
        else:
            print("Building HNSW%d Index" % hnsw_max_neighbors)
            cpu_index = faiss.IndexHNSWFlat(d, hnsw_max_neighbors)
        cpu_index.add(xb)                  # add vectors to the index
        faiss.write_index(cpu_index, index_file)
        print("GPU Index build time= ", time.time() - start)
    
    if use_gpu:
        index = faiss.index_cpu_to_all_gpus(
            cpu_index
        )
    else:
        index = cpu_index
    
    # Load the queries
    xq = np.ascontiguousarray(np.load(root+query_csv)[:,:nesting], dtype=np.float32)
    faiss.normalize_L2(xq)
    nq = xq.shape[0]
    print("queries: ", xq.shape)

    start = time.time()
    D, I = index.search(xq, k)
    end = time.time() - start
    print("GPU %d-NN search time= %f\n" % (k, end))
    
    nn_dir = root+"neighbors/"+index_type+"_"+str(nesting)+"dim-"+str(k)+"-NN_"+dataset+".csv"
    pd.DataFrame(I).to_csv(nn_dir, header=None, index=None)


Loading index file: /home/aniketr/Documents/imagenetv2/index_files/imagenetv2_8_exactl2.index
queries:  (10000, 8)
GPU 2048-NN search time= 0.221017

Loading index file: /home/aniketr/Documents/imagenetv2/index_files/imagenetv2_16_exactl2.index
queries:  (10000, 16)
GPU 2048-NN search time= 1.451043

Loading index file: /home/aniketr/Documents/imagenetv2/index_files/imagenetv2_32_exactl2.index
queries:  (10000, 32)
GPU 2048-NN search time= 0.391398

Loading index file: /home/aniketr/Documents/imagenetv2/index_files/imagenetv2_64_exactl2.index
queries:  (10000, 64)
GPU 2048-NN search time= 0.844761

Loading index file: /home/aniketr/Documents/imagenetv2/index_files/imagenetv2_128_exactl2.index
queries:  (10000, 128)
GPU 2048-NN search time= 0.712209

Loading index file: /home/aniketr/Documents/imagenetv2/index_files/imagenetv2_256_exactl2.index
queries:  (10000, 256)
GPU 2048-NN search time= 0.994553

Loading index file: /home/aniketr/Documents/imagenetv2/index_files/imagenetv2_512_exac