In [1]:
import numpy as np
import faiss
import time
import pandas as pd
from os import path

In [12]:
ngpus = faiss.get_num_gpus()
print("number of GPUs:", ngpus)

number of GPUs: 2


In [17]:
config = "ff/"
dataset = 'imagenetv2' #imagenet1k, imagenetv2
index_type = 'exactl2' #exactl2, hnsw8, hnsw32
hnsw_max_neighbors = 32
k = 2048 # shortlist length

In [18]:
if dataset == 'imagenet1k':
    # Imagenet1k
    root = '/mnt/disks/retrieval/corrected_fwd_pass/' + config
    db_csv = 'imagenet1k_train_nesting0_sh0_ff2048-X.npy'
    query_csv = 'imagenet1k_val_nesting0_sh0_ff2048-X.npy'
elif dataset == 'imagenet4m':
    root = '/mnt/disks/imagenet4m/corrected_fwd_pass/' + config
    db_csv = 'imagenet4m_train_nesting1_sh0_ff2048-X.npy'
    query_csv = 'imagenet4m_val_nesting1_sh0_ff2048-X.npy'
elif dataset == 'imagenetv2':
    root = '/mnt/disks/retrieval/corrected_fwd_pass/' + config
    db_csv = 'imagenet1k_train_nesting1_sh1_ff2048-X.npy'
    query_csv = 'imagenetv2/imagenetv2_val_nesting1_sh1_ff2048-X.npy'
elif dataset == 'slimmable':
    root = '/mnt/disks/retrieval/slimmable_nn_fwd_pass/'
    db_csv = 'slimmable_train/X_train_slimmable.npy'
    query_csv = 'slimmable_val/X_val_V1_slimmable.npy'

In [19]:
nesting_list = [8, 16, 32, 64, 128, 256, 512, 1024, 2048]
#nesting_list = [1536]
#nesting_list = [12, 24, 48, 96, 192, 384, 768, 1536]

if index_type == 'exactl2':
    use_gpu = 1
else:
    use_gpu = 0

In [21]:
for nesting in nesting_list:
    db_csv = 'imagenet1k_train_nesting0_sh0_ff'+str(nesting)+'-X.npy'
    query_csv = 'imagenetv2/imagenetv2_val_nesting0_sh0_ff'+str(nesting)+'-X.npy'
    #index_file = 'index_files/'+config+"hnsw/"+str(nesting)+'-imagenet1k_hnsw'+str(hnsw_max_neighbors)+'_normalized.index'
    index_file = root+'index_files/imagenet1k_'+str(nesting)+'_exactl2.index'
    #index_file = root+'index_files/'+dataset+'_'+str(nesting)+'_'+index_type+'.index'
    
    ## Load or build index
    if path.exists(index_file):
        print("Loading index file: " + index_file)
        cpu_index = faiss.read_index(index_file)

    else:
        print("Generating index file: " + index_file)
        start = time.time()
        #df_xb = pd.read_csv(root + db_csv, header=None)
        #xb = np.ascontiguousarray(df_xb.to_numpy(dtype=np.float32)[:,:nesting])
        xb = np.ascontiguousarray(np.load(root+db_csv)[:,:nesting], dtype=np.float32)
        print("CSV file load time= ", time.time() - start)

        faiss.normalize_L2(xb)
        d = xb.shape[1]                           # dimension
        nb = xb.shape[0]                       # database size

        print("database: ", xb.shape)

        start = time.time()
        if index_type == 'exactl2':
            print("Building Exact L2 Index")
            cpu_index = faiss.IndexFlatL2(d)   # build the index
        else:
            print("Building HNSW%d Index" % hnsw_max_neighbors)
            cpu_index = faiss.IndexHNSWFlat(d, hnsw_max_neighbors)
        cpu_index.add(xb)                  # add vectors to the index
        faiss.write_index(cpu_index, index_file)
        print("GPU Index build time= ", time.time() - start)
    
    if use_gpu:
        index = faiss.index_cpu_to_all_gpus(  # build the index
            cpu_index
        )
    else:
        index = cpu_index
    
    # Load the queries
    #df_xq = pd.read_csv(root+ query_csv, header=None)
    #xq = np.ascontiguousarray(df_xq.to_numpy(dtype=np.float32)[:,:nesting])
    xq = np.ascontiguousarray(np.load(root+query_csv)[:,:nesting], dtype=np.float32)
    faiss.normalize_L2(xq)
    nq = xq.shape[0]                       # nb of queries
    print("queries: ", xq.shape)

    ## NN Search on queries:
    #print("Searching for %d NN" % (k))

    start = time.time()
    D, I = index.search(xq, k)     # actual search
    end = time.time() - start
    print("GPU %d-NN search time= %f\n" % (k, end))
    #print(I.shape)                   # neighbors of the 5 first queries
    
    #nn_dir = root+"neighbors/"+str(nesting)+"dim-"+str(k)+"-NN-hnsw_"+str(hnsw_max_neighbors)+".csv"
    nn_dir = root+"neighbors/"+index_type+"_"+str(nesting)+"dim-"+str(k)+"-NN_"+dataset+".csv"
    
    pd.DataFrame(I).to_csv(nn_dir, header=None, index=None)


Loading index file: /mnt/disks/retrieval/corrected_fwd_pass/ff/index_files/imagenet1k_8_exactl2.index
queries:  (10000, 8)
GPU 2048-NN search time= 0.562993

Loading index file: /mnt/disks/retrieval/corrected_fwd_pass/ff/index_files/imagenet1k_16_exactl2.index
queries:  (10000, 16)
GPU 2048-NN search time= 0.583641

Loading index file: /mnt/disks/retrieval/corrected_fwd_pass/ff/index_files/imagenet1k_32_exactl2.index
queries:  (10000, 32)
GPU 2048-NN search time= 0.618286

Loading index file: /mnt/disks/retrieval/corrected_fwd_pass/ff/index_files/imagenet1k_64_exactl2.index
queries:  (10000, 64)
GPU 2048-NN search time= 0.494143

Loading index file: /mnt/disks/retrieval/corrected_fwd_pass/ff/index_files/imagenet1k_128_exactl2.index
queries:  (10000, 128)
GPU 2048-NN search time= 0.538245

Loading index file: /mnt/disks/retrieval/corrected_fwd_pass/ff/index_files/imagenet1k_256_exactl2.index
queries:  (10000, 256)
GPU 2048-NN search time= 0.590097

Loading index file: /mnt/disks/retriev