In [1]:
import numpy as np
import torch
import faiss
import sys
sys.path.append('../')
from utils import load_embeddings

In [9]:
root = '../../../inference_array/resnet50/'
model = "ff" # mrl, ff
dataset = '1K' # 1K, 4K, V2

### In the cell below, we use the relative contrast equation as defined in Equation (1) of [On the DIfficulty of Nearest Neighbor Search](https://www.ee.columbia.edu/ln/dvmm/pubs/files/ICML_RelativeContrast.pdf).<br>
### $C_r = \frac{D_{mean}}{D_{min}}$<br>
<p> where $C_r$ is the relative contrast of a dataset $X$, $D_{mean}$ is the expected distance of a random database sample from a query $q$, and $D_{min}$ is the expected distance to the nearest database sample from a query $q$.</p>

In [None]:
if torch.cuda.device_count() > 0:
    device = torch.device('cuda')
else:
    raise Exception("Please use a GPU! This will take very very long otherwise.")

# dlist = [8, 16, 32, 64, 128, 256, 512, 1024, 2048]
dlist = [2048]
batch_size = 4196

for d in dlist:
    database, queryset, db_labels, query_labels, xb, xq = load_embeddings(model, dataset, d)

    qy = torch.as_tensor(xq).to(device)
    db = torch.as_tensor(xb)
    
    num_batches = int(database.shape[0] / batch_size)
    final_d_min = torch.ones((qy.shape[0])).to(device) * 1e10
    final_d_mean = []

    for i in range(num_batches):
        db_batch = db[(i)*batch_size:(i+1)*batch_size, :].to(device)
        distances = torch.cdist(qy, db_batch)
        sorted_dist = torch.sort(distances)
        current_d_min = sorted_dist.values[:, 0]
        
        final_d_min = torch.min(current_d_min, final_d_min)
        final_d_mean.append(torch.mean(distances, axis=1).cpu().numpy())
        
    C_r = np.mean(final_d_mean) / torch.mean(final_d_min).cpu().numpy()
    print(f'C_r(d={d})={C_r}')