In [1]:
import matplotlib.pyplot as plt
import matplotlib.lines as mlines
%matplotlib inline
%config InlineBackend.figure_format='retina'
# plt.style.use('seaborn-whitegrid')
import seaborn as sns
import numpy as np
import pandas as pd

Dims = [8,16,32,64,128,256,512,1024,2048]

In [2]:
sns.set_context('talk')
sns.set_style("white")

## IVF Cluster Distribution

In [None]:
import faiss

def get_invlists(index): 
    index = faiss.read_index(index)
    invlists = index.invlists
    all_ids = []
    bins = []
    for listno in range(index.nlist): 
        ls = invlists.list_size(listno)
        if ls == 0: continue
        ids = faiss.rev_swig_ptr(invlists.get_ids(listno), ls).copy()
        all_ids.append(ids)
        bins.append(len(ids))
    del index
    return all_ids, bins

In [None]:
index_files = {}
TVDs = {
8: 0.0007830626151,
16: 0.0002710860909,
32: 0.0001552219862,
64:  0.0003651896396,
128: 0.0006069480588,
256: 0.0006804511841,
512: 0.0007603661853,
1024: 0.0007473010084,
2048: 0.0005379413265
}

TVD_2048 = {
8:0.001283394139,
16:0.001213469176,
32:0.0009486557904,
64:0.0007768816719,
128:0.0006945503814,
256:0.0006419974382,
512:0.000649082282,
1024:0.000586784595,
2048:0.0005379413265
}

In [None]:
arch = 'resnet50' # resnet18, resnet34, resnet50, resnet101, mobilenetv2
root = f"/mnt/disks/imagenet_dir/inference_array/{arch}/"

# All plots together
config = 'ivf'
dims = [8,16,32,64,128,256,512,1024,2048]
plt.figure(figsize=(20,20))
bins_dict = {}
probs = {}
for i, dim in enumerate(dims):
    
    model_ff = f'ff{dim}/'
    if dim == 2048:
        index_file = root+'index_files/ff-ivf/2048/1K_ivf_nlist2048_d2048.index'
#             index_files[model_ff] = index_file
    else:
        index_file = root+'index_files/'+model_ff+f"1K_ivf_{dim}m_nbits8_nlist2048_d{dim}.index" 
        
    print(index_file)
    if not model_ff in index_files.keys():
        all_ids, counts_per_cell_ff = get_invlists(index_file)
        index_files[model_ff] = (all_ids, counts_per_cell_ff)
    else:
        all_ids, counts_per_cell_ff = index_files[model_ff]
    
    df_1 = pd.DataFrame(counts_per_cell_ff)
    
    model_mrl = f'mrl-{config}/{dim}/'
    index_file = root+'index_files/'+model_mrl+f"1K_ivf_nlist2048_d{dim}.index"
    print(index_file)
    
    if not model_mrl in index_files.keys():
        all_ids, counts_per_cell_mrl = get_invlists(index_file)
        index_files[model_mrl] = (all_ids, counts_per_cell_mrl)
    else:
        all_ids, counts_per_cell_mrl = index_files[model_mrl]
        
    df_2 = pd.DataFrame(counts_per_cell_mrl)

    ax = plt.subplot(3, 3, i + 1)
    n, bins_1, patches = plt.hist(counts_per_cell_ff, edgecolor = "black", label=model_ff.replace("/"," "), alpha=0.5, bins=[i for i in range(0, 2500, 100)], color='tab:blue')
    n, bins_2, patches = plt.hist(counts_per_cell_mrl, edgecolor = "black", label=model_mrl.replace("/"," "), alpha=0.5, bins=[i for i in range(0, 2500, 100)], color='tab:orange')
    
    bins_dict[dim] = [bins_1, bins_2]

    mu_1, sigma_1 = df_1.describe()[0][1], df_1.describe()[0][2]
#     print((mu_1, sigma_1))
    mu_2, sigma_2 = df_2.describe()[0][1], df_2.describe()[0][2]
    
    y_1 = ((1 / (np.sqrt(2 * np.pi) * sigma_1)) * np.exp(-0.5 * (1 / sigma_1 * (bins_1 - mu_1))**2))
    y_2 = ((1 / (np.sqrt(2 * np.pi) * sigma_2)) * np.exp(-0.5 * (1 / sigma_2 * (bins_2 - mu_2))**2))
    probs[dim] = [y_1, y_2]
    
    ax2 = ax.twinx()
    ax2.plot(bins_1, y_1, '--', color ='blue')
    ax2.plot(bins_2, y_2, '--', color ='tab:red')
    ax2.get_yaxis().set_visible(False)

    plt.scatter([], [], marker='s', label=f'MR-{dim}', c='tab:orange')
    plt.scatter([], [], marker='s', label=f'RR-{dim}', c='tab:blue')

    ax.annotate( '$d_{TV}$='+f'{round(TVDs[dim],5)}' + "\n" + '$d_{TV,2048}$='+f'{round(TVD_2048[dim],5)}', xy=(1, 1), xytext=(-15, -15), fontsize=15, xycoords='axes fraction', textcoords='offset pixels', bbox=dict(facecolor='white', alpha=0.8, edgecolor='gray'), horizontalalignment='right', verticalalignment='top')
    
    ax.set_xlabel('Number of data points per cell')
    ax.set_ylabel('Number of cells')
    mrl_title = model_mrl.replace("/"," ").replace("-ivf",'').upper()

    plt.legend(loc='lower right',frameon=True, bbox_to_anchor=(0.99, 0.53))
    ax.grid()
    
plt.subplots_adjust(hspace=0.4, wspace=0.3)
plt.savefig("IVF_Cell_dist.pdf", bbox_inches="tight")

## IVF Plots

In [3]:
tableau_20 = [
"steelblue",
"tab:orange",
"tab:red",
"tab:green",
"tab:purple",
"tab:brown",
"tab:pink",
"tab:gray",
"tab:olive",
"tab:cyan"
]


markers = {
    256: 'o',
    512: '^',
    1024: 'v',
    2048: 'h',
    4096: 'p',
    8192: 'D'
}

sizes = {
    256: 3,
    512: 8,
    1024: 13,
    2048: 18,
    4096: 23,
    8192: 28    
}

In [364]:
df = pd.read_csv("./MRL-IVF.csv")
df["Compute"] = df["Compute"]/1000000
ff_df = pd.read_csv("./FF-IVF.csv")
ff_df["Compute"] = ff_df["Compute"]/1000000

In [None]:
f, ax = plt.subplots(figsize=(16, 12))

for i in range(0,54,6):
    row = df.loc[df.loc[i:i+5]['Top-1'].idxmax()]    
    
    if i == 0:
        row_ff = ff_df.loc[0:5][ff_df.loc[i:i+5]['NC']==1024]
    else:
        row_ff = ff_df.loc[ff_df.loc[i:i+5]['Top-1'].idxmax()]
    
    
    if i//6 < 8:
        next_row = df.loc[df.loc[i+6:i+11]['Top-1'].idxmax()]
        x_pts = [row["Compute"], next_row["Compute"]]
        y_pts = [row["Top-1"], next_row["Top-1"]]
        plt.plot(x_pts, y_pts, c='k', linewidth=3, alpha=0.3)

    if i//6 < 8:
        next_row_ff = ff_df.loc[ff_df.loc[i+6:i+11]['Top-1'].idxmax()]
        x_pts = [row_ff["Compute"],next_row_ff["Compute"]]
        y_pts = [row_ff["Top-1"], next_row_ff["Top-1"]]
        ax.plot(x_pts, y_pts, c='k', linewidth=3, alpha=0.3, linestyle='dotted')
    
    ax.scatter(row["Compute"], row["Top-1"], marker=markers[row["NC"]], s=300, c=tableau_20[i//6])
    ax.scatter(row_ff["Compute"], row_ff["Top-1"], marker=markers[row_ff["NC"]], s=300, c=tableau_20[i//6],alpha=0.5)

    ax.scatter(row["Compute"], row["Top-1"], marker='o', s=100*sizes[row['NC']], c=tableau_20[i//6], alpha=0.7)
    
#     print(row_ff["Compute"], row_ff["Top-1"], next_row_ff["Compute"], next_row_ff["Top-1"])
    try:
        nc = row_ff['NC'].values[0]
    except:
        nc = row_ff['NC']
    ax.scatter(row_ff["Compute"], row_ff["Top-1"], marker='v', s=100*sizes[nc], c=tableau_20[i//6],alpha=0.7)
    
plt.xscale('log')

fs=40
plt.yticks(fontsize=fs)
plt.xticks([0.1,1,10], [0.1,1,10],fontsize=fs)
# plt.scatter(df['D'], df['Top-1'])

plt.xlabel("MFLOPS/Query", fontsize=fs)
plt.ylabel("Top-1 (%)", fontsize=fs)


for i in range(8,14):
    plt.scatter([], [],  marker=list(markers.values())[i-8], label=f"{2**(i)}", c='k')

legend1 = plt.legend(loc=4, borderpad=0.1, title="$NC$", prop={'size': 20})
plt.gca().add_artist(legend1)


foo=[]; lab=[f"{2**(i+3)}" for i in range(9)]
for i in range(9):
    foo.append(plt.scatter([], [],  marker="s", c=tableau_20[i]))

    
legend2 = plt.legend(foo, lab, loc='lower right', borderpad=0.5, title="$D$", prop={'size':20}, title_fontsize=20,frameon=True)    
plt.gca().add_artist(legend2)

mrl = mlines.Line2D([], [], color='k', label='MRL', marker='o')
ff = mlines.Line2D([], [], color='k', label='Baseline', linestyle='dotted', marker='v')
# plt.legend(handles=[mrl, ff])

legend2 = plt.legend(handles=[mrl, ff], loc=1, borderpad=0.2, bbox_to_anchor=(0.85, 0.12), prop={'size':20}, markerscale=2,frameon=True)    
for legobj in legend2.legendHandles:
    legobj.set_linewidth(6)
plt.gca().add_artist(legend2)

plt.grid()

In [None]:
f, ax = plt.subplots(figsize=(16, 12))

for i in range(0,54,6):
    row = df.loc[df.loc[i:i+5]['Recall@100'].idxmax()]    

    row_ff = ff_df.loc[ff_df.loc[i:i+5]['R@100'].idxmax()]
    
    
    if i//6 < 8:
        next_row = df.loc[df.loc[i+6:i+11]['Recall@100'].idxmax()]
        x_pts = [row["Compute"], next_row["Compute"]]
        y_pts = [row["Recall@100"], next_row["Recall@100"]]
        ax.plot(x_pts, y_pts, c='k', linewidth=3, alpha=0.3)

    if i//6 < 8:
        next_row_ff = ff_df.loc[ff_df.loc[i+6:i+11]['R@100'].idxmax()]
        x_pts = [row_ff["Compute"],next_row_ff["Compute"]]
        y_pts = [row_ff["R@100"], next_row_ff["R@100"]]
        ax.plot(x_pts, y_pts, c='k', linewidth=3, alpha=0.3, linestyle='dotted')
    
#     ax.scatter(row["Compute"], row["Top-1"], marker=markers[row["NC"]], s=300, c=tableau_20[i//6])
#     ax.scatter(row_ff["Compute"], row_ff["Top-1"], marker=markers[row_ff["NC"]], s=300, c=tableau_20[i//6],alpha=0.5)

    ax.scatter(row["Compute"], row["Recall@100"], marker='o', s=100*sizes[row['NC']], c=tableau_20[i//6], alpha=0.7)
    
#     print(row_ff["Compute"], row_ff["Top-1"], next_row_ff["Compute"], next_row_ff["Top-1"])
    ax.scatter(row_ff["Compute"], row_ff["R@100"], marker='v', s=100*sizes[row_ff['NC']], c=tableau_20[i//6],alpha=0.7)
    
plt.xscale('log')

fs=40
plt.yticks(fontsize=fs)
plt.xticks([0.1,1,10],[0.1,1,10],fontsize=fs)
# plt.scatter(df['D'], df['Top-1'])

plt.xlabel("MFLOPS/Query", fontsize=fs)
plt.ylabel("Recall@100 (%)", fontsize=fs)


# for i in range(8,14):
#     plt.scatter([], [],  marker=list(markers.values())[i-8], label=f"{2**(i)}", c='k')

# legend1 = plt.legend(loc=4, borderpad=0.1, title="$NC$", prop={'size': 20})
# plt.gca().add_artist(legend1)


foo=[]; lab=[f"{2**(i+3)}" for i in range(9)]
for i in range(9):
    foo.append(plt.scatter([], [],  marker="s", c=tableau_20[i]))

    
legend2 = plt.legend(foo, lab, loc='lower right', borderpad=0.5, title="$D$", prop={'size':20}, title_fontsize=20,frameon=True)    
plt.gca().add_artist(legend2)

mrl = mlines.Line2D([], [], color='k', label='MRL',marker='o')
ff = mlines.Line2D([], [], color='k', label='Baseline', linestyle='dotted',marker='v')
# plt.legend(handles=[mrl, ff])

legend2 = plt.legend(handles=[mrl, ff], loc=1, borderpad=0.2, bbox_to_anchor=(0.84, 0.15), prop={'size':20}, markerscale=2,frameon=True)    
for legobj in legend2.legendHandles:
    legobj.set_linewidth(6)
plt.gca().add_artist(legend2)

# plt.legend()
plt.grid()

In [None]:
f, ax = plt.subplots(figsize=(20, 12))

for i in range(0,54,6):
    row = df.loc[df.loc[i:i+5]['Top-1'].idxmax()]
    row_ff = ff_df.loc[ff_df.loc[i:i+5]['Top-1'].idxmax()]
#     print(row['D'], row['NC'])
    
    if i//6 < 8:
        next_row = df.loc[df.loc[i+6:i+11]['Top-1'].idxmax()]
        x_pts = [row["D"], next_row["D"]]
        y_pts = [row["Top-1"], next_row["Top-1"]]
        ax.plot(x_pts, y_pts, c='steelblue', linewidth=3, alpha=0.7)

    if i//6 < 8:
        next_row_ff = ff_df.loc[ff_df.loc[i+6:i+11]['Top-1'].idxmax()]
        x_pts = [row_ff["D"],next_row_ff["D"]]
        y_pts = [row_ff["Top-1"], next_row_ff["Top-1"]]
        ax.plot(x_pts, y_pts, c='tab:orange', linewidth=3, alpha=0.7, linestyle='dotted')
    
    ax.scatter(row["D"], row["Top-1"], marker=markers[row["NC"]], s=300, c='steelblue')
    ax.scatter(row_ff["D"], row_ff["Top-1"], marker=markers[row_ff["NC"]], s=300, c='tab:orange')
    
#     if i//6 < 9:
#         row_8192 = ff_df.loc[i:i+5][ff_df.loc[i:i+5]['NC'] == 1024]
#         ax.scatter(row_8192["D"], row_8192["Top-1"], marker=markers[row_8192["NC"].values[0]], s=300, c='tab:orange', alpha=0.6)


# plt.scatter(df['D'], df['Top-1'])
plt.xscale('log')

fs=40
plt.yticks(fontsize=fs)
plt.xticks(Dims, Dims, fontsize=fs,  rotation=45)

plt.xlabel("Original Vector Dimension (D)", fontsize=fs)
plt.ylabel("Top-1 (%)", fontsize=fs)


for i in range(8,14):
    plt.scatter([], [],  marker=list(markers.values())[i-8], label=f"{2**(i)}", c='k')



legend1 = plt.legend(loc=4, borderpad=0.1, title="Number of Clusters", prop={'size': 20}, markerscale=2, title_fontsize=15,frameon=True)
plt.gca().add_artist(legend1)


# foo=[]; lab=[f"{2**(i+3)}" for i in range(9)]
# for i in range(9):
#     foo.append(plt.scatter([], [],  marker="s", c=tableau_20[i]))

# foo = []
# lab = ["MRL", "Baseline"]
# foo.append(plt.plot([],[],c='steelblue'))
# foo.append(plt.plot([],[],c='tab:orange',linestyle='dotted'))

mrl = mlines.Line2D([], [], color='steelblue', label='MRL')
ff = mlines.Line2D([], [], color='tab:orange', label='Baseline', linestyle='dotted')
# plt.legend(handles=[mrl, ff])

legend2 = plt.legend(handles=[mrl, ff], loc=1, borderpad=0.2, bbox_to_anchor=(0.82, 0.12), prop={'size':20}, markerscale=2,frameon=True)    
for legobj in legend2.legendHandles:
    legobj.set_linewidth(6)
plt.gca().add_artist(legend2)
# plt.legend()
plt.grid()

In [None]:
f, ax = plt.subplots(figsize=(20, 12))

for i in range(0,54,6):
    row = df.loc[df.loc[i:i+5]['Top-1'].idxmax()]
    row_ff = ff_df.loc[ff_df.loc[i:i+5]['Top-1'].idxmax()]
    
    if i//6 < 8:
        next_row = df.loc[df.loc[i+6:i+11]['Top-1'].idxmax()]
        x_pts = [row["D"], next_row["D"]]
        y_pts = [row["Top-1"], next_row["Top-1"]]
        ax.plot(x_pts, y_pts, c='tab:blue', linewidth=4, alpha=0.7)

    if i//6 < 8:
        next_row_ff = ff_df.loc[ff_df.loc[i+6:i+11]['Top-1'].idxmax()]
        x_pts = [row_ff["D"],next_row_ff["D"]]
        y_pts = [row_ff["Top-1"], next_row_ff["Top-1"]]
        ax.plot(x_pts, y_pts, c='tab:red', linewidth=4, alpha=0.7)
    
    ax.scatter(row["D"], row["Top-1"], marker='o', s=300, c='tab:blue')
    ax.scatter(row_ff["D"], row_ff["Top-1"], marker='*', s=300, c='tab:red')
    

plt.xscale('log')

fs=40
plt.yticks(fontsize=fs-5)
plt.xticks(Dims, Dims, fontsize=fs-5,  rotation=45)

plt.xlabel("Representation size", fontsize=fs, labelpad=10)
plt.ylabel("Top-1 Accuracy(%)", fontsize=fs, labelpad=10)



plt.scatter([],[],c='tab:blue', label='MR-IVF')
plt.scatter([],[],c='tab:red', label='RR-IVF', marker='*')
plt.legend(loc=4, borderpad=0.4, prop={'size':30}, markerscale=2,frameon=True)

plt.grid(linewidth=2, alpha=0.6)

plt.savefig("IVF Top-1 vs Vector Dimensionality.pdf", bbox_inches="tight")

In [346]:
df = pd.read_csv("./IVF-Top-1.csv")

In [None]:
f, ax = plt.subplots(figsize=(20, 12))

plt.scatter(df['d'], df['IVF-MR'], linewidth=4, c=tableau_20[0], marker='o', s=200)
plt.plot(df['d'], df['IVF-MR'], linewidth=4, c=tableau_20[0], marker='o', label='IVF-MR')

plt.scatter(df['d'], df['IVF-RR'], linewidth=4, c=tableau_20[2], marker='^', s=200)
plt.plot(df['d'], df['IVF-RR'], linewidth=4, c=tableau_20[2], marker='^', label='IVF-RR')

plt.scatter(df['d'], df['Exact-MR'], linewidth=4, c=tableau_20[0], marker='o', s=200)
plt.plot(df['d'], df['Exact-MR'], linewidth=4, c=tableau_20[0], marker='o', linestyle='dashed', label='Exact-MR')

plt.scatter(df['d'], df['Exact-RR'], linewidth=4, c=tableau_20[2], marker='^', s=200)
plt.plot(df['d'], df['Exact-RR'], linewidth=4, c=tableau_20[2], marker='^', linestyle='dashed', label='Exact-RR')
    
plt.xscale('log')

fs=40
plt.yticks(fontsize=fs-5)
plt.xticks(Dims, Dims, fontsize=fs-5,  rotation=45)

plt.xlabel("Representation Size", fontsize=fs, labelpad=10)
plt.ylabel("Top-1 Accuracy(%)", fontsize=fs, labelpad=10)


plt.legend(loc=4, borderpad=0.4, prop={'size':30}, markerscale=2,frameon=True)

plt.grid(linewidth=2, alpha=0.6)

plt.savefig("IVF Top-1 vs Vector Dimensionality.pdf", bbox_inches="tight")

In [None]:
f, ax = plt.subplots(figsize=(16, 12))

for i in range(0,54,6):
    row = df.loc[df.loc[i:i+5]['Recall@100'].idxmax()]
    row_ff = ff_df.loc[ff_df.loc[i:i+5]['R@100'].idxmax()]
#     print(row['D'], row['NC'])
    
    if i//6 < 8:
        next_row = df.loc[df.loc[i+6:i+11]['Recall@100'].idxmax()]
        x_pts = [row["D"], next_row["D"]]
        y_pts = [row["Recall@100"], next_row["Recall@100"]]
        ax.plot(x_pts, y_pts, c='steelblue', linewidth=3, alpha=0.7)

    if i//6 < 8:
        next_row_ff = ff_df.loc[ff_df.loc[i+6:i+11]['R@100'].idxmax()]
        x_pts = [row_ff["D"],next_row_ff["D"]]
        y_pts = [row_ff["R@100"], next_row_ff["R@100"]]
        ax.plot(x_pts, y_pts, c='tab:orange', linewidth=3, alpha=0.7, linestyle='dotted')
    
    ax.scatter(row["D"], row["Recall@100"], marker=markers[row["NC"]], s=300, c='steelblue')
    ax.scatter(row_ff["D"], row_ff["R@100"], marker=markers[row_ff["NC"]], s=300, c='tab:orange')
    

plt.xscale('log')

fs=40
plt.yticks(fontsize=fs)
plt.xticks(Dims, Dims, fontsize=fs,  rotation=45)

plt.xlabel("Original Vector Dimension (D)", fontsize=fs)
plt.ylabel("Recall@100 (%)", fontsize=fs)


for i in range(8,14):
    plt.scatter([], [],  marker=list(markers.values())[i-8], label=f"{2**(i)}", c='k')


legend1 = plt.legend(loc=4, borderpad=0.1, title="Number of Clusters", prop={'size': 20}, markerscale=2, title_fontsize=15,frameon=True)
plt.gca().add_artist(legend1)


mrl = mlines.Line2D([], [], color='steelblue', label='MRL')
ff = mlines.Line2D([], [], color='tab:orange', label='Baseline', linestyle='dotted')
# plt.legend(handles=[mrl, ff])

legend2 = plt.legend(handles=[mrl, ff], loc=1, borderpad=0.2, bbox_to_anchor=(0.82, 0.12), prop={'size':20}, markerscale=2,frameon=True)    
for legobj in legend2.legendHandles:
    legobj.set_linewidth(6)
plt.gca().add_artist(legend2)
# plt.legend()
plt.grid()

In [None]:
f, ax = plt.subplots(figsize=(20, 12))

for i in range(0,54,6):
    row = df.loc[df.loc[i:i+5]['Recall@100'].idxmax()]
    row_ff = ff_df.loc[ff_df.loc[i:i+5]['R@100'].idxmax()]
#     print(row['D'], row['NC'])
    
    if i//6 < 8:
        next_row = df.loc[df.loc[i+6:i+11]['Recall@100'].idxmax()]
        x_pts = [row["D"], next_row["D"]]
        y_pts = [row["Recall@100"], next_row["Recall@100"]]
        ax.plot(x_pts, y_pts, c='steelblue', linewidth=4, alpha=0.8)

    if i//6 < 8:
        next_row_ff = ff_df.loc[ff_df.loc[i+6:i+11]['R@100'].idxmax()]
        x_pts = [row_ff["D"],next_row_ff["D"]]
        y_pts = [row_ff["R@100"], next_row_ff["R@100"]]
        ax.plot(x_pts, y_pts, c='tab:red', linewidth=4, alpha=0.8)
    
    ax.scatter(row["D"], row["Recall@100"], marker='o', s=300, c='steelblue')
    ax.scatter(row_ff["D"], row_ff["R@100"], marker='*', s=300, c='tab:red')
    
plt.xscale('log')

fs=40
plt.yticks(fontsize=fs-5)
plt.xticks(Dims, Dims, fontsize=fs-5,  rotation=45)

plt.xlabel("Representation Size", fontsize=fs, labelpad=10)
plt.ylabel("Recall@100 (%)", fontsize=fs, labelpad=10)


plt.scatter([],[],c='tab:blue', label='MR')
plt.scatter([],[],c='tab:red', label='RR', marker='*')
plt.legend(loc=4, borderpad=0.4, prop={'size':30}, markerscale=2,frameon=True)

plt.grid(linewidth=2, alpha=0.6)
# plt.legend()

plt.savefig("IVF Recall@100.pdf", bbox_inches="tight")

## More IVF Plots

In [312]:
tableau_20 = [
"steelblue",
"tab:orange",
"tab:red",
"tab:green",
"tab:purple",
"tab:brown",
"tab:pink",
"tab:gray",
"tab:olive",
"tab:cyan"
]


In [318]:
ivf_df = pd.read_csv("./MRL_FF_IVF.csv")

ivf_df = ivf_df[ivf_df["Compute"]>0.1]
ivf_df = ivf_df[ivf_df["Compute"]<10]

mrl = ivf_df[ivf_df["Config"]=="MRL"]
mrl = mrl.sort_values(by='Compute')

ff = ivf_df[ivf_df["Config"]=="FF"]
ff = ff[ff['Top-1']>67]
# ff = ff[ff['Top-1']<70]
ff = ff.sort_values(by='Compute')

In [None]:
f, ax = plt.subplots(figsize=(16, 12))

plt.scatter(mrl['Compute'], mrl['Top-1'], s=200, c=tableau_20[0], label='MRL',marker='D')
z = np.polyfit(np.log(mrl['Compute']), mrl['Top-1'],1)
mrl_trend = np.poly1d(z)
plt.plot(mrl['Compute'], mrl_trend(np.log(mrl['Compute'])),linewidth=10, alpha=0.4, color=tableau_20[0])

plt.scatter(ff['Compute'], ff['Top-1'], s=200, c=tableau_20[2], label='RR', marker='*')
z = np.polyfit(np.log(ff['Compute']), ff['Top-1'],1)
ff_trend = np.poly1d(z)
plt.plot(ff['Compute'], ff_trend(np.log(ff['Compute'])),linewidth=10, alpha=0.4,color=tableau_20[2])

plt.xscale('log')

fs=40
plt.yticks(fontsize=fs-5)
plt.xticks([0.1, 0.5,1,5,10], [0.1, 0.5,1,5,10], fontsize=fs-5)

plt.xlabel("MFLOPS/Query", fontsize=fs, labelpad=10)
plt.ylabel("Top-1 Accuracy(%)", fontsize=fs, labelpad=10)

plt.legend(frameon=True,loc=4, borderpad=0.5, prop={'size':25},markerscale=1)
plt.grid()
# plt.legend()

plt.savefig("IVF Top-1 vs Compute.pdf", bbox_inches="tight")

## Adaptive vs Non-adaptive

In [10]:
tableau_20 = [
"steelblue",
"tab:orange",
"tab:red",
"tab:green",
"tab:purple",
"tab:brown",
"tab:pink",
"tab:gray",
"tab:olive",
"tab:cyan"
]

css_colors = [
    'gold'    
]

In [11]:
df = pd.read_csv("./AdANNS.csv")

# df = df[df['Compute']>0.1]

ivf_mrl = df[df['Config']=='IVF-MRL']

ivf_ff = df[df['Config']=='IVF-FF']

ad_anns = df[df['Config']=='AdANNS']

svd = df[df['Config']=='MG-FF-SVD']
svd = svd[svd['Top-1']>68]

mg_ff = df[df['Config']=='MG-FF']

In [None]:
# df = df[df['Compute']<5]

f, ax = plt.subplots(figsize=(20, 12))

ax.set_xlim(0.1, 5)

plt.scatter(ad_anns['Compute'], ad_anns['Top-1'], label='AdANNS-IVF-C', c=tableau_20[0], s=200, marker='D')

plt.scatter(ivf_mrl['Compute'], ivf_mrl['Top-1'], label='IVF-MR', c='blueviolet', s=200, marker='o')

plt.scatter(mg_ff['Compute'], mg_ff['Top-1'], label='MG-IVF-RR', c=tableau_20[3], s=200, marker='*')

plt.scatter(ivf_ff['Compute'], ivf_ff['Top-1'], label='IVF-RR', c=tableau_20[2], s=200, marker='P')

plt.scatter(svd['Compute'], svd['Top-1'], label='MG-IVF-SVD', c='black', s=200, marker='x')

plt.xscale('log')

xlims = ax.get_xlim()
x_ext = np.linspace(xlims[0], xlims[1], 100)

z = np.polyfit(np.log(ivf_mrl['Compute']), ivf_mrl['Top-1'],1)
t = np.poly1d(z)
# plt.plot(ivf_mrl['Compute'], t(np.log(ivf_mrl['Compute'])),linewidth=10, alpha=0.6, c='gold')
plt.plot(x_ext, t(np.log(x_ext)),linewidth=10, alpha=0.3, c='blueviolet')

z = np.polyfit(np.log(ivf_ff['Compute']), ivf_ff['Top-1'],1)
t = np.poly1d(z)
# plt.plot(ivf_ff['Compute'], t(np.log(ivf_ff['Compute'])),linewidth=10, alpha=0.6, c=tableau_20[1])
plt.plot(x_ext, t(np.log(x_ext)),linewidth=10, alpha=0.3, c=tableau_20[2])

z = np.polyfit(np.log(ad_anns['Compute']), ad_anns['Top-1'],1)
t = np.poly1d(z)
# plt.plot(ad_anns['Compute'], t(np.log(ad_anns['Compute'])),linewidth=10, alpha=0.6, c=tableau_20[2])
plt.plot(x_ext, t(np.log(x_ext)),linewidth=10, alpha=0.3, c=tableau_20[0])

z = np.polyfit(np.log(mg_ff['Compute']), mg_ff['Top-1'],1)
t = np.poly1d(z)
# plt.plot(mg_ff['Compute'], t(np.log(mg_ff['Compute'])),linewidth=10, alpha=0.6, c=tableau_20[3])
plt.plot(x_ext, t(np.log(x_ext)),linewidth=10, alpha=0.6, c=tableau_20[3])


fs=40
plt.yticks(fontsize=fs-5)
plt.xticks([0.1,0.5,1,5], [0.1,0.5,1,5], fontsize=fs-5)

plt.xlabel("MFLOPS/Query", fontsize=fs, labelpad=10)
plt.ylabel("Top-1 Accuracy(%)", fontsize=fs, labelpad=10)

plt.legend(frameon=True, loc=5, borderpad=0.5, prop={'size':25}, markerscale=1)
plt.grid(linewidth=2, alpha=0.6)

# plt.savefig("AdANNS Top-1 vs Compute.pdf", bbox_inches="tight")

In [None]:
f, ax = plt.subplots(figsize=(20, 12))

ax.set_xlim(0.1, 100)

plt.scatter(ad_anns['Compute'], ad_anns['Top-1'], label='AdANNS-IVF', c=tableau_20[0], s=200, marker='D')

plt.scatter(ivf_ff['Compute'], ivf_ff['Top-1'], label='Rigid-IVF', c=tableau_20[2], s=250, marker='P')

plt.xscale('log')

xlims = ax.get_xlim()
x_ext = np.linspace(xlims[0], xlims[1], 100)

z = np.polyfit(np.log(ad_anns['Compute']), ad_anns['Top-1'],1)
t = np.poly1d(z)
# plt.plot(ad_anns['Compute'], t(np.log(ad_anns['Compute'])),linewidth=10, alpha=0.6, c=tableau_20[2])
plt.plot(x_ext, t(np.log(x_ext)),linewidth=10, alpha=0.3, c=tableau_20[0])


z = np.polyfit(np.log(ivf_ff['Compute']), ivf_ff['Top-1'],1)
t = np.poly1d(z)
# plt.plot(ivf_ff['Compute'], t(np.log(ivf_ff['Compute'])),linewidth=10, alpha=0.6, c=tableau_20[1])
plt.plot(x_ext, t(np.log(x_ext)),linewidth=10, alpha=0.3, c=tableau_20[2])


fs=40
plt.yticks(fontsize=fs-5)
plt.xticks([0.1,1,10,100], [0.1,1,10,100], fontsize=fs-5)
# plt.xtick_
plt.xlabel("MFLOPS/Query", fontsize=fs, labelpad=10)
plt.ylabel("Top-1 Accuracy(%)", fontsize=fs+10, labelpad=15)

plt.legend(frameon=True,loc=4, borderpad=0.5, prop={'size':30}, markerscale=1)
plt.grid(linewidth=1)


p1 = (1.285263, 68.74)
p2 = (1.285263, 70.13)
plt.plot([p1[0],p2[0]], [p1[1], p2[1]-0.1], linestyle='dashed', color='tab:green', linewidth=4)
tri_p2 = [(p2[0]-0.09, p2[1]-0.1), (p2[0]+0.09, p2[1]-0.1), (p2[0], p2[1]) ]
tri = plt.Polygon(tri_p2, color='tab:green')
plt.gca().add_patch(tri)
plt.text((p1[0]+p2[0])/2 + 0.03, (p1[1]+p2[1])/2, '~1.5% gain', fontsize=35, color='tab:green', weight='bold')

p1 = (82.060224, 70.13)
p2 = (0.71, 70.13)
plt.plot([p1[0],p2[0]], [p1[1], p2[1]], linestyle='dashed', color='tab:green', linewidth=4)
tri_p2 = [(p2[0], p2[1]), (p2[0]+0.13, p2[1]-0.05), (p2[0]+0.13, p2[1]+0.05) ]
tri = plt.Polygon(tri_p2, color='tab:green')
plt.gca().add_patch(tri)
plt.text(np.log((p1[0]+p2[0])/2), (p1[1]+p2[1])/2 - 0.17, r'~100$\times$ Compute', fontsize=35, color='tab:green', weight='bold')


# plt.savefig("Teaser.pdf", bbox_inches="tight")

In [14]:
tableau_20 = [
"steelblue",
"darkorange",
"tab:red",
"tab:green",
"tab:purple",
"tab:brown",
"tab:pink",
"tab:gray",
"tab:olive",
"tab:cyan"
]

In [15]:
df = pd.read_csv("./AdANNS_AdInf.csv")
df = df[df['Compute']<5]
df = df[df['Compute']>0.1]

ad_anns = df[df['Config']=='AdANNS']
ad_anns = ad_anns[ad_anns['Acc']>69.6]
ad_inf = df[df['Config']=='AdInf']

In [None]:
f, ax = plt.subplots(figsize=(20, 12))

ax.set_xlim(0.1, 5)
# ax.set_ylim(69.6, 70.2)
plt.scatter(ad_anns['Compute'], ad_anns['Acc'], s=300, marker='D', c=tableau_20[0], label='AdANNS-IVF-C')

plt.scatter(ad_inf['Compute'], ad_inf['Acc'], s=300, marker='X', c=tableau_20[3], label='AdANNS-IVF-D')

xlims = ax.get_xlim()
x_ext = np.linspace(xlims[0], xlims[1], 100)

z = np.polyfit(np.log(ad_anns['Compute']), ad_anns['Acc'],1)
t = np.poly1d(z)
# plt.plot(ad_anns['Compute'], t(np.log(ad_anns['Compute'])),linewidth=10, alpha=0.6, c=tableau_20[2])
plt.plot(x_ext, t(np.log(x_ext)),linewidth=10, alpha=0.3, c=tableau_20[0])

z = np.polyfit(np.log(ad_inf['Compute']), ad_inf['Acc'],1)
t = np.poly1d(z)
# plt.plot(ivf_ff['Compute'], t(np.log(ivf_ff['Compute'])),linewidth=10, alpha=0.6, c=tableau_20[1])
plt.plot(x_ext, t(np.log(x_ext)),linewidth=10, alpha=0.5, c=tableau_20[3])

plt.xscale('log')

fs=40
plt.yticks([69.6, 69.7, 69.8, 69.9, 70.0, 70.1, 70.2], fontsize=fs-5)
plt.xticks([0.1, 0.5, 1, 5], [0.1, 0.5, 1, 5], fontsize=fs-5)
# plt.xtick_
plt.xlabel("MFLOPS/Query", fontsize=fs, labelpad=10)
plt.ylabel("Top-1 Accuracy(%)", fontsize=fs+10, labelpad=15)

plt.legend(frameon=True,loc=4, borderpad=0.5, prop={'size':30}, markerscale=1)
plt.grid(linewidth=2, alpha=0.7)

plt.savefig("AdANNS vs Adaptive Inference.pdf", bbox_inches="tight")

In [150]:
tableau_20 = [
"steelblue",
"tab:orange",
"tab:red",
"tab:green",
"tab:purple",
"tab:brown",
"tab:pink",
"tab:gray",
"tab:olive",
"tab:cyan"
]


In [155]:
df = pd.read_csv("./NonAD-AD.csv")

ad = df[df['Config']=='Ad']

non_ad = df[df['Config']=='Non-ad']
non_ad = non_ad[non_ad['Top-1']>68]

In [None]:
f, ax = plt.subplots(figsize=(20, 12))

plt.scatter(ad['Compute'], ad['Top-1'], s=200, c=tableau_20[0], label='Adaptive ANNS',marker='D')
z = np.polyfit(np.log(ad['Compute']), ad['Top-1'],1)
ad_trend = np.poly1d(z)
plt.plot(ad['Compute'], ad_trend(np.log(ad['Compute'])),linewidth=5, alpha=0.6)

plt.scatter(non_ad['Compute'], non_ad['Top-1'], s=200, c=tableau_20[1], label='Non-Adaptive ANNS', marker='*')
z = np.polyfit(np.log(non_ad['Compute']), non_ad['Top-1'],1)
non_ad_trend = np.poly1d(z)
plt.plot(non_ad['Compute'], non_ad_trend(np.log(non_ad['Compute'])),linewidth=5, alpha=0.6)

plt.xscale('log')

fs=40
plt.yticks(fontsize=fs)
plt.xticks([0.1,1,10,100], [0.1,1,10,100], fontsize=fs)

plt.xlabel("MFLOPS/Query", fontsize=fs)
plt.ylabel("Top-1 (%)", fontsize=fs)

plt.legend(frameon=True,loc=4, borderpad=0.5, prop={'size':25},markerscale=1)
plt.grid(linewidth=3)
# plt.legend()

## IVF V2 Plots

In [348]:
tableau_20 = [
"steelblue",
"tab:orange",
"tab:red",
"tab:green",
"tab:purple",
"tab:brown",
"tab:pink",
"tab:gray",
"tab:olive",
"tab:cyan"
]


In [349]:
v2_df = pd.read_csv("V2-IVF.csv")
# v2_df
mrl = v2_df[v2_df["Model"]=="MRL"]
ff = v2_df[v2_df["Model"]=="FF"]
mrl_exact = v2_df[v2_df["Model"]=="MRL-Exact"]
ff_exact = v2_df[v2_df["Model"]=="FF-Exact"]

In [None]:
f, ax = plt.subplots(figsize=(20, 12))

plt.scatter(mrl['D'], mrl['Top-1'], s=200, c=tableau_20[0])
plt.plot(mrl['D'], mrl['Top-1'],c=tableau_20[0],linewidth=3,marker='o',label='IVF-MR')

# x = mrl['D']
# x_new = np.linspace(x.min(), x.max(), 500)
# smooth = make_interp_spline(x, mrl['Top-1'], k=3)
# plt.plot(x_new, smooth(x_new), c=tableau_20[0], linewidth=3, label='MRL-IVF')

plt.scatter(ff['D'], ff['Top-1'], s=200, c=tableau_20[2], marker='^')
plt.plot(ff['D'], ff['Top-1'],c=tableau_20[2],linewidth=3,marker='^',label='IVF-RR')
         
plt.scatter(mrl_exact['D'], mrl_exact['Top-1'], s=200, c=tableau_20[0])
plt.plot(mrl_exact['D'], mrl_exact['Top-1'],c=tableau_20[0],linewidth=3,marker='o',label='Exact-MR', linestyle='dashed')

plt.scatter(ff_exact['D'], ff_exact['Top-1'], s=200, c=tableau_20[2], marker='^')
plt.plot(ff_exact['D'], ff_exact['Top-1'], c=tableau_20[2], linewidth=3, marker='^', label='Exact-RR', linestyle='dashed')

plt.xscale('log')

fs=40
plt.yticks(fontsize=fs-5)
plt.xticks(Dims, Dims, fontsize=fs-5,  rotation=45)

plt.xlabel("Representation Size", fontsize=fs, labelpad=10)
plt.ylabel("Top-1 Accuracy(%)", fontsize=fs, labelpad=10)

plt.legend(frameon=True,loc=4, borderpad=0.4, prop={'size':30}, markerscale=2)
plt.grid(linewidth=2, alpha=0.6)

plt.savefig("ImageNetV2 Clustering.pdf", bbox_inches="tight")

In [None]:
f, ax = plt.subplots(figsize=(16, 12))

plt.scatter(mrl['D'], mrl['Recall@100'], s=200, c=tableau_20[0])
plt.plot(mrl['D'], mrl['Recall@100'],c=tableau_20[0],linewidth=3,marker='o',label='MRL')

plt.scatter(ff['D'], ff['Recall@100'], s=200, c=tableau_20[1], marker='^')
plt.plot(ff['D'], ff['Recall@100'],c=tableau_20[1],linewidth=3,marker='^',label='Baseline',linestyle='dashed')

plt.xscale('log')

fs=40
plt.yticks(fontsize=fs)
plt.xticks(Dims, Dims, fontsize=fs,  rotation=45)

plt.xlabel("Original Vector Dimension (D)", fontsize=fs)
plt.ylabel("Top-1 (%)", fontsize=fs)

plt.legend(frameon=True,loc=4, borderpad=0.2, prop={'size':20}, markerscale=2)
plt.grid()

## IVF 4K plots

In [351]:
tableau_20 = [
"steelblue",
"tab:orange",
"tab:red",
"tab:green",
"tab:purple",
"tab:brown",
"tab:pink",
"tab:gray",
"tab:olive",
"tab:cyan"
]

In [352]:
df_4k = pd.read_csv("./MRL-4K.csv")
df_4k = df_4k.dropna()

mrl_4k = df_4k[df_4k['Model']=='MRL']
mrl_4k_exact = df_4k[df_4k['Model']=='MRL Exact']

In [None]:
f, ax = plt.subplots(figsize=(20, 12))

plt.scatter(mrl_4k['D'], mrl_4k['Top-1'], s=200, c=tableau_20[0])
plt.plot(mrl_4k['D'], mrl_4k['Top-1'],c=tableau_20[0],linewidth=4,marker='o',label='IVF-MR')

plt.scatter(mrl_4k_exact['D'], mrl_4k_exact['Top-1'], s=200, c='tab:red', marker='^')
plt.plot(mrl_4k_exact['D'], mrl_4k_exact['Top-1'],c='tab:red', linewidth=4,marker='^',label='Exact-MR')

plt.xscale('log')

fs=40
plt.yticks(fontsize=fs-5)
plt.xticks(Dims, Dims, fontsize=fs-5,  rotation=45)

plt.xlabel("Representation Size", fontsize=fs, labelpad=10)
plt.ylabel("Top-1 Accuracy(%)", fontsize=fs, labelpad=10)

plt.legend(frameon=True,loc=4, borderpad=0.4, prop={'size':30}, markerscale=2)
plt.grid(linewidth=2, alpha=0.6)

plt.savefig("ImageNet-4K Clustering.pdf", bbox_inches="tight")

In [None]:
f, ax = plt.subplots(figsize=(16, 12))

plt.scatter(mrl_4k['D'], mrl_4k['Recall@100'], s=200, c=tableau_20[0])
plt.plot(mrl_4k['D'], mrl_4k['Recall@100'],c=tableau_20[0],linewidth=3,marker='o',label='MRL IVF')

plt.scatter(mrl_4k_exact['D'], mrl_4k_exact['Recall@100'], s=200, c=tableau_20[4], marker='x')
plt.plot(mrl_4k_exact['D'], mrl_4k_exact['Recall@100'],c=tableau_20[4],linewidth=3,marker='x',label='MRL Exact',linestyle='dashed')

plt.xscale('log')

fs=40
plt.yticks(fontsize=fs)
plt.xticks(Dims, Dims, fontsize=fs,  rotation=45)

plt.xlabel("Original Vector Dimension (D)", fontsize=fs)
plt.ylabel("Recall@100 (%)", fontsize=fs)

plt.legend(frameon=True,loc=4, borderpad=0.2, prop={'size':20}, markerscale=2)
plt.grid()

In [17]:
tableau_20 = [
"steelblue",
"tab:orange",
"tab:red",
"tab:green",
"tab:purple",
"tab:brown",
"tab:pink",
"tab:gray",
"tab:olive",
"tab:cyan"
]

In [18]:
df = pd.read_csv('./4k_search.csv')

p1 = df[df['Probe']=='1']
p2 = df[df['Probe']=='2']
p4 = df[df['Probe']=='4']
p8 = df[df['Probe']=='8']
pexact = df[df['Probe']=='Exact']

In [None]:
f, ax = plt.subplots(figsize=(20, 12))

ax.set_xlim(0.1, 3000)
# ax.set_ylim(10,30)
plt.scatter(p1['Compute'], p1['Acc'], s=200, c='blueviolet', label='IVF 1-Probe', marker='d')
plt.scatter(p2['Compute'], p2['Acc'], s=200, c='gold', label='IVF 2-Probe', marker='d')
plt.scatter(p4['Compute'], p4['Acc'], s=200, c=tableau_20[2], label='IVF 4-Probe', marker='d')
plt.scatter(p8['Compute'], p8['Acc'], s=200, c=tableau_20[0], label='IVF 8-Probe', marker='d')

plt.scatter(pexact['Compute'], pexact['Acc'], s=200, c='k', label='Exact', marker='*')

plt.xscale('log')

fs=40
plt.yticks(fontsize=fs-5)
plt.xticks([0.1,1,10,100,1000], [0.1,1,10,100,1000], fontsize=fs-5, rotation=45)
# plt.xtick_
plt.xlabel("MFLOPS/Query", fontsize=fs, labelpad=10)
plt.ylabel("Top-1 (%)", fontsize=fs+10, labelpad=10)

p1 = (2151.424, 29.33)
p2 = (37.810304, 29.33)
plt.plot([p1[0],p2[0]], [p1[1], p2[1]], linestyle='dashed', color='tab:green', linewidth=4)
tri_p2 = [(p2[0], p2[1]), (p2[0]+5, p2[1]-0.5), (p2[0]+5, p2[1]+0.5) ]
tri = plt.Polygon(tri_p2, color='tab:green')
plt.gca().add_patch(tri)
plt.text(np.log((p1[0]+p2[0])/2) + 90, (p1[1]+p2[1])/2 + 0.3, r'~100$\times$ Compute', fontsize=35, color='tab:green', weight='bold')


plt.legend(frameon=True,loc=4, borderpad=0.5, prop={'size':30}, markerscale=1)
plt.grid(linewidth=2, alpha=0.6)

plt.savefig("ImageNet-4K Impact of Search Probes.pdf", bbox_inches="tight")

## PQ Plots

In [334]:
from scipy.interpolate import make_interp_spline

tableau_20 = [
"steelblue",
"tab:orange",
"tab:red",
"tab:green",
"tab:purple",
"tab:brown",
"tab:pink",
"tab:gray",
"tab:olive",
"tab:cyan"
]

pq_markers = {
    16: '*',
    32: 'D',
    64: 'o'
}

pq_linestyle = {
    16: 'dashed',
    32: 'dotted',
    64: 'solid'
}

In [335]:
pq_df = pd.read_csv("./MRL-PQ.csv")
pq_df = pq_df.dropna()

ff_pq_df = pd.read_csv("./FF-PQ.csv")

In [None]:
f, ax = plt.subplots(figsize=(20, 12))

for idx, i in enumerate([2**(i) for i in range(4,7)]):
    
    plt.scatter(pq_df[pq_df['m']==i]['D'], pq_df[pq_df['m']==i]['Top-1'], c=tableau_20[0], s=200, marker=pq_markers[i])
    plt.plot(pq_df[pq_df['m']==i]['D'], pq_df[pq_df['m']==i]['Top-1'], c=tableau_20[0],alpha=0.8,linewidth=4, linestyle=pq_linestyle[i])
    
    
    plt.scatter(ff_pq_df[ff_pq_df['m']==i]['D'], ff_pq_df[ff_pq_df['m']==i]['Top-1'], c=tableau_20[2], s=200, marker=pq_markers[i])
    plt.plot(ff_pq_df[ff_pq_df['m']==i]['D'], ff_pq_df[ff_pq_df['m']==i]['Top-1'], c=tableau_20[2],alpha=0.8,linewidth=4, linestyle=pq_linestyle[i])    
    
    plt.scatter([],[],marker=pq_markers[i], label=f'{i} bytes', c='k')
    
plt.xscale('log')
fs=40
plt.yticks(fontsize=fs-5)
plt.xticks(Dims[1:], Dims[1:], fontsize=fs-5,  rotation=45)

plt.xlabel("Representation Size", fontsize=fs, labelpad=10)
plt.ylabel("Top-1 Accuracy (%)", fontsize=fs, labelpad=10)


foo=[]; lab=['MR', 'RR']
for i in range(2):
    foo.append(plt.scatter([], [],  marker="s", c=tableau_20[i*2]))
    
legend2 = plt.legend(foo, lab, loc=3, borderpad=0.3, prop={'size':30}, bbox_to_anchor=(0.24, 0.001), title_fontsize=20,frameon=True)    
plt.gca().add_artist(legend2)

plt.legend(title="Compute Budget", loc=3, borderpad=0.4, prop={'size': 30}, title_fontsize=30, frameon=True, markerscale=1.5)
plt.grid(linewidth=2, alpha=0.6)

plt.savefig("PQ Top-1 vs Vector Byte Size.pdf", bbox_inches="tight")

In [None]:
f, ax = plt.subplots(figsize=(16, 12))

for idx, i in enumerate([2**(i) for i in range(3,12)]):
    temp_df_D = pq_df[pq_df['D']==i]
    temp_df_D = temp_df_D[temp_df_D['m'] >=8]# and temp_df_D['m'] <=64]
    
    temp_df_D_ff = ff_pq_df[ff_pq_df['D']==i]
    temp_df_D_ff = temp_df_D_ff[temp_df_D_ff['m'] >=8]# and temp_df_D_ff['m'] <=64]
    
    plt.scatter(temp_df_D['m'], temp_df_D['Top-1'], c=tableau_20[idx], s=200, alpha=0.8)
    plt.plot(temp_df_D['m'], temp_df_D['Top-1'], c=tableau_20[idx],alpha=0.6, linewidth=3)
    
    plt.scatter(temp_df_D_ff['m'], temp_df_D_ff['Top-1'], c=tableau_20[idx], s=200, marker='^',alpha=0.8)
    plt.plot(temp_df_D_ff['m'], temp_df_D_ff['Top-1'], c=tableau_20[idx],alpha=0.6,linestyle='dotted', linewidth=3)    
    
    plt.scatter([],[],marker='s',label=i, c=tableau_20[idx])
    
plt.xscale('log')
fs=40
plt.yticks(fontsize=fs)
plt.xticks([8,16,32,64,128],[8,16,32,64,128],fontsize=fs,  rotation=45)

plt.xlabel("Sub-vector Size (m)", fontsize=fs)
plt.ylabel("Top-1 Accuracy (%)", fontsize=fs)

mrl = mlines.Line2D([], [], color='k', label='MRL',marker='o')
ff = mlines.Line2D([], [], color='k', label='Baseline', linestyle='dotted',marker='^')
# plt.legend(handles=[mrl, ff])

legend2 = plt.legend(handles=[mrl, ff], loc=1, borderpad=0.2, bbox_to_anchor=(0.86, 0.12), prop={'size':20}, markerscale=2,frameon=True)    
for legobj in legend2.legendHandles:
    legobj.set_linewidth(4)
plt.gca().add_artist(legend2)

plt.legend(title="$D$", loc=4, borderpad=0.1, prop={'size': 20}, title_fontsize=15, frameon=True)
plt.grid()

In [None]:
f, ax = plt.subplots(figsize=(16, 12))
Y = 'R@100'

for idx, i in enumerate([2**(i) for i in range(4,7)]):
    
    plt.scatter(pq_df[pq_df['m']==i]['D'], pq_df[pq_df['m']==i][Y], c=tableau_20[idx], s=200, alpha=0.8)
    plt.plot(pq_df[pq_df['m']==i]['D'], pq_df[pq_df['m']==i][Y], c=tableau_20[idx],alpha=0.6)
    
    plt.scatter(ff_pq_df[ff_pq_df['m']==i]['D'], ff_pq_df[ff_pq_df['m']==i][Y], c=tableau_20[idx], s=200, marker='^',alpha=0.8)
    plt.plot(ff_pq_df[ff_pq_df['m']==i]['D'], ff_pq_df[ff_pq_df['m']==i][Y], c=tableau_20[idx],alpha=0.6,linestyle='dotted')    
    
    plt.scatter([],[],marker='s',label=i,c=tableau_20[idx])
    
plt.xscale('log')
fs=40
plt.yticks(fontsize=fs)
plt.xticks(Dims, Dims, fontsize=fs,  rotation=45)

plt.xlabel("Original Vector Dimension (D)", fontsize=fs)
plt.ylabel("Recall@100 (%)", fontsize=fs)

mrl = mlines.Line2D([], [], color='k', label='MRL',marker='o')
ff = mlines.Line2D([], [], color='k', label='Baseline', linestyle='dotted',marker='^')
# plt.legend(handles=[mrl, ff])

legend2 = plt.legend(handles=[mrl, ff], loc=1, borderpad=0.2, bbox_to_anchor=(0.84, 0.15), prop={'size':20}, markerscale=2,frameon=True)    
for legobj in legend2.legendHandles:
    legobj.set_linewidth(4)
plt.gca().add_artist(legend2)

plt.legend(title="$m$", loc=4, borderpad=0.1, prop={'size': 20}, title_fontsize=15, frameon=True)
plt.grid()

In [None]:
f, ax = plt.subplots(figsize=(16, 12))

for idx, i in enumerate([2**(i) for i in range(3,12)]):
    temp_df_D = pq_df[pq_df['D']==i]
    temp_df_D = temp_df_D[temp_df_D['m'] >=8]
    
    temp_df_D_ff = ff_pq_df[ff_pq_df['D']==i]
    temp_df_D_ff = temp_df_D_ff[temp_df_D_ff['m'] >=8]
    
    plt.scatter(temp_df_D['m'], temp_df_D['R@100'], c=tableau_20[idx], s=200, alpha=0.8)
    plt.plot(temp_df_D['m'], temp_df_D['R@100'], c=tableau_20[idx],alpha=0.6, linewidth=3)
    
    plt.scatter(temp_df_D_ff['m'], temp_df_D_ff['R@100'], c=tableau_20[idx], s=200, marker='^',alpha=0.8)
    plt.plot(temp_df_D_ff['m'], temp_df_D_ff['R@100'], c=tableau_20[idx],alpha=0.6,linestyle='dotted', linewidth=3)    
    
    plt.scatter([],[],marker='s',label=i)
    
plt.xscale('log')
fs=40
plt.yticks(fontsize=fs)
plt.xticks([8,16,32,64,128],[8,16,32,64,128],fontsize=fs,  rotation=45)

plt.xlabel("Sub-vector Size (m)", fontsize=fs)
plt.ylabel("Recall@100 (%)", fontsize=fs)

mrl = mlines.Line2D([], [], color='k', label='MRL',marker='o')
ff = mlines.Line2D([], [], color='k', label='Baseline', linestyle='dotted',marker='^')
# plt.legend(handles=[mrl, ff])

legend2 = plt.legend(handles=[mrl, ff], loc=1, borderpad=0.2, bbox_to_anchor=(0.84, 0.15), prop={'size':20}, markerscale=2, frameon=True)    
for legobj in legend2.legendHandles:
    legobj.set_linewidth(4)
plt.gca().add_artist(legend2)

plt.legend(title="$D$", loc=4, borderpad=0.1, prop={'size': 20}, title_fontsize=15, frameon=True)
plt.grid()

## HNSWPQ

In [34]:
tableau_20 = [
"steelblue",
"tab:orange",
"tab:red",
"tab:green",
"tab:purple",
"tab:brown",
"tab:pink",
"tab:gray",
"tab:olive",
"tab:cyan"
]

In [35]:
hn_df = pd.read_csv("HNSWPQ_MRL.csv")
# hn_df

In [None]:
f, ax = plt.subplots(figsize=(20, 12))

for idx, i in enumerate([2**(i) for i in range(3,12)]):
    temp_df = hn_df[hn_df['d']==i]
    
    plt.scatter(temp_df['m'], temp_df['MRL Acc'], c=tableau_20[idx], s=200)
    plt.plot(temp_df['m'], temp_df['MRL Acc'], c=tableau_20[idx], linewidth=3, linestyle='dashed', alpha=0.8)
    
    plt.scatter([],[],marker='s',label=i, c=tableau_20[idx])

    plt.xscale('log')
    
fs=40
plt.yticks(fontsize=fs-5)
plt.xticks([8,16,32,64,128],[8,16,32,64,128],fontsize=fs-5)

plt.xlabel("Number of Bytes for Quantization (m)", fontsize=fs, labelpad=10)
plt.ylabel("Top-1 Accuracy (%)", fontsize=fs, labelpad=10)


plt.legend(title="d", loc=4, borderpad=0.3, prop={'size': 25}, title_fontsize=30, frameon=True)
plt.grid(linewidth=2, alpha=0.6)

plt.savefig("HNSWPQ with MRL.pdf", bbox_inches="tight")

In [None]:
f, ax = plt.subplots(figsize=(20, 12))

for idx, i in enumerate([2**(i) for i in range(3,12)]):
    temp_df = hn_df[hn_df['d']==i]
    
    plt.scatter(temp_df['m'], temp_df['FF Acc'], c=tableau_20[idx], s=200)
    plt.plot(temp_df['m'], temp_df['FF Acc'], c=tableau_20[idx], linewidth=3, linestyle='dashed', alpha=0.8)
    
    plt.scatter([],[],marker='s',label=i, c=tableau_20[idx])

    plt.xscale('log')
    
fs=40
plt.yticks(fontsize=fs-5)
plt.xticks([8,16,32,64,128],[8,16,32,64,128],fontsize=fs-5)

plt.xlabel("Number of Bytes for Quantization (m)", fontsize=fs, labelpad=10)
plt.ylabel("Top-1 Accuracy (%)", fontsize=fs, labelpad=10)


plt.legend(title="d", loc=4, borderpad=0.3, prop={'size': 25}, title_fontsize=30, frameon=True)
plt.grid(linewidth=2, alpha=0.6)

plt.savefig("HNSWPQ with FF.pdf", bbox_inches="tight")

In [360]:
tableau_20 = [
"steelblue",
"tab:orange",
"tab:red",
"tab:green",
"tab:purple",
"tab:brown",
"tab:pink",
"tab:gray",
"tab:olive",
"tab:cyan"
]

In [361]:
df = pd.read_csv("./HNSW32.csv")
df

Unnamed: 0,d,Top-1 MRL,Top-1 FF,R@100 MRL,R@100 FF
0,8,62.15,58.88,4.71,4.55
1,16,67.81,66.61,5.17,5.08
2,32,69.25,68.57,5.23,5.17
3,64,69.87,69.01,5.24,5.12
4,128,70.27,68.7,5.25,4.98
5,256,70.29,69.34,5.24,4.89
6,512,70.42,69.76,5.24,4.85
7,1024,70.58,70.01,5.24,4.79
8,2048,70.66,70.77,5.24,4.97


In [None]:
f, ax = plt.subplots(figsize=(20, 12))

plt.scatter(df['d'], df['Top-1 MRL'], s=200, marker='o', c='tab:blue', label='MR')
plt.plot(df['d'], df['Top-1 MRL'], linewidth='4', c='tab:blue')

plt.scatter(df['d'], df['Top-1 FF'], s=300, marker='*', c='tab:red', label='RR')
plt.plot(df['d'], df['Top-1 FF'], linewidth='4', c='tab:red')

plt.xscale('log')

fs=40
plt.yticks(fontsize=fs-5)
plt.xticks(Dims, Dims, fontsize=fs-5,  rotation=45)

plt.xlabel("Representation Size", fontsize=fs, labelpad=10)
plt.ylabel("Top-1 Accuracy(%)", fontsize=fs, labelpad=10)
plt.legend(loc=4, borderpad=0.4, prop={'size':30}, frameon=True)

plt.grid(linewidth=2, alpha=0.6)

plt.savefig("HNSW32 Top-1 vs Vector Dimensionality.pdf", bbox_inches="tight")

In [None]:
f, ax = plt.subplots(figsize=(20, 12))

plt.scatter(df['d'], df['R@100 MRL'], s=200, marker='o', c='tab:blue', label='MR')
plt.plot(df['d'], df['R@100 MRL'], linewidth='4', c='tab:blue')

plt.scatter(df['d'], df['R@100 FF'], s=300, marker='*', c='tab:red', label='RR')
plt.plot(df['d'], df['R@100 FF'], linewidth='4', c='tab:red')

plt.xscale('log')

fs=40
plt.yticks(fontsize=fs-5)
plt.xticks(Dims, Dims, fontsize=fs-5,  rotation=45)

plt.xlabel("Representation Size", fontsize=fs, labelpad=10)
plt.ylabel("Recall@100 (%)", fontsize=fs, labelpad=10)
plt.legend(loc=4, borderpad=0.4, prop={'size':30}, frameon=True)

plt.grid(linewidth=2, alpha=0.6)

plt.savefig("HNSW32 Recall@100.pdf", bbox_inches="tight")

In [331]:
tableau_20 = [
"steelblue",
"tab:orange",
"tab:red",
"tab:red",
"tab:purple",
"tab:brown",
"tab:pink",
"tab:gray",
"tab:olive",
"tab:cyan"
]

In [332]:
df = pd.read_csv("./HNSW_Dim.csv")
ff16 = df[df['Config']=='PQ16-FF']
ff32 = df[df['Config']=='PQ32-FF']
ff64 = df[df['Config']=='PQ64-FF']

mrl16 = df[df['Config']=='PQ16-MRL']
mrl32 = df[df['Config']=='PQ32-MRL']
mrl64 = df[df['Config']=='PQ64-MRL']

markers=['*', 'D', 'o']
lsts = ['dashed', 'dotted', 'solid']
Ms = ['16 bytes', '32 bytes', '64 bytes']

In [None]:
f, ax = plt.subplots(figsize=(20, 12))

for idx, data in enumerate([mrl16, mrl32, mrl64, ff16, ff32, ff64]):
    if idx < 3: 
        plt.scatter(data['d'], data['Top-1'], s=200, marker=markers[idx%3], c=tableau_20[0])
        plt.plot(data['d'], data['Top-1'], linewidth=4, c=tableau_20[0], linestyle=lsts[idx%3],alpha=0.8)
    else:
        plt.scatter(data['d'], data['Top-1'], s=200, marker=markers[idx%3], c=tableau_20[3])
        plt.plot(data['d'], data['Top-1'], linewidth=4, c=tableau_20[3], linestyle=lsts[idx%3],alpha=0.8)
    
    if idx<3:
        plt.scatter([],[],marker=markers[idx%3],c='k', label=Ms[idx])


plt.xscale('log')

fs=40
plt.yticks(fontsize=fs-5)
plt.xticks(Dims[1:], Dims[1:], fontsize=fs-5,  rotation=45)

plt.xlabel("Representation Size", fontsize=fs, labelpad=10)
plt.ylabel("Top-1 Accuracy(%)", fontsize=fs, labelpad=10)

foo=[]; lab=['MR', 'RR']
for i in range(2):
    foo.append(plt.scatter([], [],  marker="s", c=tableau_20[i*2]))
    
legend2 = plt.legend(foo, lab, loc=3, borderpad=0.3, prop={'size':30}, bbox_to_anchor=(0.24, 0.001), title_fontsize=20,frameon=True)    
plt.gca().add_artist(legend2)

plt.legend(title="Compute Budget", loc=3, borderpad=0.4, prop={'size': 30}, title_fontsize=30, frameon=True, markerscale=1.5)
plt.grid(linewidth=2, alpha=0.6)

# mrl = mlines.Line2D([], [], color='k', label='MR',marker='o')
# ff = mlines.Line2D([], [], color='k', label='RR', linestyle='dashed',marker='*')
# # plt.legend(handles=[mrl, ff])

# legend2 = plt.legend(handles=[mrl, ff], loc=3, borderpad=0.3, bbox_to_anchor=(0.2, 0.001), prop={'size':25}, markerscale=2, frameon=True)    
# for legobj in legend2.legendHandles:
#     legobj.set_linewidth(4)
# plt.gca().add_artist(legend2)

# for idx,i in enumerate(['16 bytes','32 bytes','64 bytes']):
#     plt.scatter([], [], marker='s', label=i, c=tableau_20[idx])
    
# plt.legend(title='Compute Budget', loc=3, borderpad=0.4, prop={'size':25}, frameon=True, title_fontsize=25)

# plt.grid(linewidth=2, alpha=0.6)

plt.savefig("HNSWPQ Top-1 vs Embedding Dimensionality.pdf", bbox_inches="tight")

# IVFPQ Plots

In [337]:
tableau_20 = [
"steelblue",
"tab:orange",
"tab:red",
"tab:red",
"tab:purple",
"tab:brown",
"tab:pink",
"tab:gray",
"tab:olive",
"tab:cyan"
]

In [338]:
df = pd.read_csv("./IVFPQ_DIm.csv")

mrl16 = df[df['Config']=='PQ16-MRL']
mrl32 = df[df['Config']=='PQ32-MRL']
mrl64 = df[df['Config']=='PQ64-MRL']

ff16 = df[df['Config']=='PQ16-FF']
ff32 = df[df['Config']=='PQ32-FF']
ff64 = df[df['Config']=='PQ64-FF']

markers=['*', 'D', 'o']
lsts = ['dashed', 'dotted', 'solid']
Ms = ['16 bytes', '32 bytes', '64 bytes']

In [None]:
f, ax = plt.subplots(figsize=(20, 12))

for idx, data in enumerate([mrl16, mrl32, mrl64, ff16, ff32, ff64]):
    if idx < 3: 
        plt.scatter(data['d'], data['Top-1'], s=200, marker=markers[idx%3], c=tableau_20[0])
        plt.plot(data['d'], data['Top-1'], linewidth=4, c=tableau_20[0], linestyle=lsts[idx%3],alpha=0.8)
    else:
        plt.scatter(data['d'], data['Top-1'], s=200, marker=markers[idx%3], c=tableau_20[3])
        plt.plot(data['d'], data['Top-1'], linewidth=4, c=tableau_20[3], linestyle=lsts[idx%3],alpha=0.8)
    
    if idx<3:
        plt.scatter([],[],marker=markers[idx%3],c='k', label=Ms[idx])


plt.xscale('log')

fs=40
plt.yticks(fontsize=fs-5)
plt.xticks(Dims[1:], Dims[1:], fontsize=fs-5,  rotation=45)

plt.xlabel("Representation Size", fontsize=fs, labelpad=10)
plt.ylabel("Top-1 Accuracy(%)", fontsize=fs, labelpad=10)

foo=[]; lab=['MR', 'RR']
for i in range(2):
    foo.append(plt.scatter([], [],  marker="s", c=tableau_20[i*2]))
    
legend2 = plt.legend(foo, lab, loc=3, borderpad=0.3, prop={'size':30}, bbox_to_anchor=(0.24, 0.001), title_fontsize=20,frameon=True)    
plt.gca().add_artist(legend2)

plt.legend(title="Compute Budget", loc=3, borderpad=0.4, prop={'size': 30}, title_fontsize=30, frameon=True, markerscale=1.5)
plt.grid(linewidth=2, alpha=0.6)

# mrl = mlines.Line2D([], [], color='k', label='MR',marker='o')
# ff = mlines.Line2D([], [], color='k', label='RR', linestyle='dashed',marker='*')
# # plt.legend(handles=[mrl, ff])

# legend2 = plt.legend(handles=[mrl, ff], loc=3, borderpad=0.3, bbox_to_anchor=(0.2, 0.001), prop={'size':25}, markerscale=2, frameon=True)    
# for legobj in legend2.legendHandles:
#     legobj.set_linewidth(4)
# plt.gca().add_artist(legend2)

# for idx,i in enumerate(['16 bytes','32 bytes','64 bytes']):
#     plt.scatter([], [], marker='s', label=i, c=tableau_20[idx])
    
# plt.legend(title='Compute Budget', loc=3, borderpad=0.4, prop={'size':25}, frameon=True, title_fontsize=25)

# plt.grid(linewidth=2, alpha=0.6)

plt.savefig("IVFPQ Top-1 vs Embedding Dimensionality.pdf", bbox_inches="tight")

In [22]:
tableau_20 = [
"steelblue",
"tab:orange",
"tab:green",
"tab:red",
"tab:purple",
"tab:brown",
"tab:pink",
"tab:gray",
"tab:olive",
"tab:cyan"
]

In [28]:
df = pd.read_csv('./IVFPQ_m.csv')
df = df[df['MRL-Acc']>66]

In [None]:
f, ax = plt.subplots(figsize=(20, 12))

for idx, i in enumerate([2**(i) for i in range(4,12)]):
    temp_df = df[df['d']==i]
    
    plt.scatter(temp_df['m'], temp_df['MRL-Acc'], c=tableau_20[idx], s=200)
    plt.plot(temp_df['m'], temp_df['MRL-Acc'], c=tableau_20[idx], linewidth=3, linestyle='dashed', alpha=0.8)
    
    plt.scatter([],[],marker='s',label=i, c=tableau_20[idx])

    plt.xscale('log')
    
fs=40
plt.yticks(fontsize=fs-5)
plt.xticks([8,16,32,64,128],[8,16,32,64,128],fontsize=fs-5)

plt.xlabel("Number of Bytes for Quantization (m)", fontsize=fs, labelpad=10)
plt.ylabel("Top-1 Accuracy (%)", fontsize=fs, labelpad=10)


plt.legend(title="d", loc=4, borderpad=0.3, prop={'size': 25}, title_fontsize=30, frameon=True)
plt.grid(linewidth=2, alpha=0.6)

plt.savefig("IVFPQ with MRL.pdf", bbox_inches="tight")

In [None]:
f, ax = plt.subplots(figsize=(20, 12))

for idx, i in enumerate([2**(i) for i in range(4,12)]):
    temp_df = df[df['d']==i]
    
    plt.scatter(temp_df['m'], temp_df['FF-Acc'], c=tableau_20[idx], s=200)
    plt.plot(temp_df['m'], temp_df['FF-Acc'], c=tableau_20[idx], linewidth=3, linestyle='dashed', alpha=0.8)
    
    plt.scatter([],[],marker='s',label=i, c=tableau_20[idx])

    plt.xscale('log')
    
fs=40
plt.yticks(fontsize=fs-5)
plt.xticks([8,16,32,64,128],[8,16,32,64,128],fontsize=fs-5)

plt.xlabel("Number of Bytes for Quantization (m)", fontsize=fs, labelpad=10)
plt.ylabel("Top-1 Accuracy (%)", fontsize=fs, labelpad=10)


plt.legend(title="d", loc=4, borderpad=0.3, prop={'size': 25}, title_fontsize=30, frameon=True)
plt.grid(linewidth=2, alpha=0.6)

plt.savefig("IVFPQ with FF.pdf", bbox_inches="tight")

## AR IVF

In [328]:
tableau_20 = [
"steelblue",
"tab:orange",
"tab:green",
"tab:red",
"tab:purple",
"tab:brown",
"tab:pink",
"tab:gray",
"tab:olive",
"tab:cyan"
]

In [329]:
df = pd.read_csv("./AR-IVF.csv")
df

Unnamed: 0,Dim,Acc
0,32,69.12
1,64,69.57
2,128,69.67
3,256,69.87
4,512,69.91
5,1024,70.01


In [None]:
f, ax = plt.subplots(figsize=(16, 12))

ax.set_xlim([-10, 100])
ax.set_ylim([69, 70.1])
sns.barplot(ax=ax, data=df, x='Dim', y='Acc', facecolor='tab:blue', width=0.5, edgecolor='k', label='AdANNS-IVF-D')

ax.plot([-5,100], [69.94, 69.94], color="tab:red", linewidth=5, linestyle='dashed', label='IVF-MR-2048')
# plt.bar(df['Dim'], df['Acc'], color='tab:blue')

fs=40
plt.yticks(fontsize=fs-5)
plt.xticks(fontsize=fs-5)

plt.xlabel("Representation Size", fontsize=fs, labelpad=10)
plt.ylabel("Top-1 Accuracy(%)", fontsize=fs, labelpad=10)

plt.grid(linewidth=3)
plt.legend(loc=2, borderpad=0.4, prop={'size':25}, frameon=True, title_fontsize=30)

plt.savefig("Adaptive Inference vs IVF Top-1.pdf", bbox_inches="tight")

## Relative Contrast

In [340]:
tableau_20 = [
"steelblue",
"tab:orange",
"tab:green",
"tab:red",
"tab:purple",
"tab:brown",
"tab:pink",
"tab:gray",
"tab:olive",
"tab:cyan"
]

In [341]:
df = pd.read_csv("./RelCon.csv")

In [None]:
f, ax = plt.subplots(figsize=(20, 12))

plt.scatter(df['D'], df['MRL-Cr'], s=200, c=tableau_20[0], label='MR')
plt.plot(df['D'], df['MRL-Cr'], c=tableau_20[0], linewidth=4)

plt.scatter(df['D'], df['FF-Cr'], s=200, c=tableau_20[3], label='RR')
plt.plot(df['D'], df['FF-Cr'], c=tableau_20[3], linewidth=4)

plt.xscale('log')

fs=40
plt.yticks(fontsize=fs-5)
plt.xticks(Dims, Dims, fontsize=fs-5,  rotation=45)

plt.xlabel("Representation Size", fontsize=fs, labelpad=10)
plt.ylabel("Relative Contrast", fontsize=fs, labelpad=10)
plt.legend(loc=1, borderpad=0.4, prop={'size':30}, frameon=True)

plt.grid(linewidth=2, alpha=0.6)

plt.savefig("Relative Contrast vs. Exact Search Dimensionality.pdf", bbox_inches="tight")

## Recall1@1 Plots

In [13]:
tableau_20 = [
"steelblue",
"tab:red",
"tab:green",
"tab:orange",
"tab:purple",
"tab:brown",
"tab:pink",
"tab:gray",
"tab:olive",
"tab:cyan"
]

In [14]:
df = pd.read_csv('./R11_Cluster.csv')

mrl64 = df[df['Config'] == 'MRL-64']
mrl256 = df[df['Config'] == 'MRL-256']
mrl2048 = df[df['Config'] == 'MRL-2048']

ff64 = df[df['Config'] == 'FF-64']
ff256 = df[df['Config'] == 'FF-256']
ff2048 = df[df['Config'] == 'FF-2048']

markers = ['*', 'D', 'o']

In [None]:
f, ax = plt.subplots(figsize=(16, 16))

Ds = [64,256,2048]
ax.set_xlim(1, 30)
for idx, data in enumerate([mrl64, mrl256, mrl2048, ff64, ff256, ff2048]):
    if idx < 3:
        plt.scatter(data['probes'], data['R1@1'], s=200, marker=markers[idx%3], c=tableau_20[idx%3])
        plt.plot(data['probes'], data['R1@1'], linewidth=4, c=tableau_20[idx%3])
    else:
        plt.scatter(data['probes'], data['R1@1'], s=200, marker=markers[idx%3], c=tableau_20[idx%3])
        plt.plot(data['probes'], data['R1@1'], linewidth=4, c=tableau_20[idx%3], linestyle='dashed')
    
    if idx <3:
        plt.scatter([],[],marker=markers[idx%3], label=Ds[idx%3], c=tableau_20[idx%3])

plt.xscale('log')

fs=40
plt.yticks(fontsize=fs-5)
plt.xticks([1,2,4,10,20], [1,2,4,10,20], fontsize=fs-5)

plt.xlabel("IVF Search Probes", fontsize=fs, labelpad=10)
plt.ylabel("1-Recall@1 (%)", fontsize=fs, labelpad=10)

# mrl = mlines.Line2D([], [], color='k', label='MRL',marker='o')
# ff = mlines.Line2D([], [], color='k', label='Baseline', linestyle='dotted',marker='^')
# plt.legend(handles=[mrl, ff])

# legend2 = plt.legend(handles=[mrl, ff], loc=1, borderpad=0.2, bbox_to_anchor=(0.89, 0.12), prop={'size':20}, markerscale=2,frameon=True)    
# for legobj in legend2.legendHandles:
#     legobj.set_linewidth(4)
# plt.gca().add_artist(legend2)

mrl = mlines.Line2D([], [], label='MR', c='k')
ff = mlines.Line2D([], [], label='RR', linestyle='dashed', c='k')
# plt.legend(handles=[mrl, ff])

legend2 = plt.legend(handles=[mrl, ff], loc=1, borderpad=0.4, bbox_to_anchor=(0.785, 0.14), prop={'size':30}, markerscale=2,frameon=True)    
for legobj in legend2.legendHandles:
    legobj.set_linewidth(6)
plt.gca().add_artist(legend2)

plt.legend(title="d", loc=4, borderpad=0.4, prop={'size': 30}, title_fontsize=30, frameon=True, markerscale=1.5)
plt.grid(linewidth=2, alpha=0.6)

plt.savefig("R50+IN1K_ R1@1 for Clustering.pdf", bbox_inches="tight")

In [131]:
tableau_20 = [
"steelblue",
"tab:red",
"tab:green",
"tab:orange",
"tab:purple",
"tab:brown",
"tab:pink",
"tab:gray",
"tab:olive",
"tab:cyan"
]

In [132]:
df = pd.read_csv('./R11_HNSW.csv')

mrl64 = df[df['Config'] == 'MRL-64']
mrl256 = df[df['Config'] == 'MRL-256']
mrl2048 = df[df['Config'] == 'MRL-2048']

ff64 = df[df['Config'] == 'FF-64']
ff256 = df[df['Config'] == 'FF-256']
ff2048 = df[df['Config'] == 'FF-2048']

markers = ['*', 'D', 'o']

In [None]:
f, ax = plt.subplots(figsize=(16, 16))

Ds = [64,256,2048]
ax.set_xlim(1, 30)
for idx, data in enumerate([mrl64, mrl256, mrl2048, ff64, ff256, ff2048]):
    if idx < 3:
        plt.scatter(data['efSearch'], data['R1@1'], s=200, marker=markers[idx%3], c=tableau_20[idx%3])
        plt.plot(data['efSearch'], data['R1@1'], linewidth=4, c=tableau_20[idx%3])
    else:
        plt.scatter(data['efSearch'], data['R1@1'], s=200, marker=markers[idx%3], c=tableau_20[idx%3])
        plt.plot(data['efSearch'], data['R1@1'], linewidth=4, c=tableau_20[idx%3], linestyle='dashed')
    
    if idx <3:
        plt.scatter([],[],marker=markers[idx%3], label=Ds[idx%3], c=tableau_20[idx%3])

plt.xscale('log')

fs=40
plt.yticks(fontsize=fs-5)
plt.xticks([1,2,4,10,20], [1,2,4,10,20], fontsize=fs-5)

plt.xlabel("HNSW Search Probe (efSearch)", fontsize=fs, labelpad=10)
plt.ylabel("1-Recall@1 (%)", fontsize=fs, labelpad=10)

# mrl = mlines.Line2D([], [], color='k', label='MRL',marker='o')
# ff = mlines.Line2D([], [], color='k', label='Baseline', linestyle='dotted',marker='^')
# plt.legend(handles=[mrl, ff])

# legend2 = plt.legend(handles=[mrl, ff], loc=1, borderpad=0.2, bbox_to_anchor=(0.89, 0.12), prop={'size':20}, markerscale=2,frameon=True)    
# for legobj in legend2.legendHandles:
#     legobj.set_linewidth(4)
# plt.gca().add_artist(legend2)

mrl = mlines.Line2D([], [], label='MR', c='k')
ff = mlines.Line2D([], [], label='RR', linestyle='dashed', c='k')
# plt.legend(handles=[mrl, ff])

legend2 = plt.legend(handles=[mrl, ff], loc=1, borderpad=0.4, bbox_to_anchor=(0.785, 0.14), prop={'size':30}, markerscale=2,frameon=True)    
for legobj in legend2.legendHandles:
    legobj.set_linewidth(6)
plt.gca().add_artist(legend2)

plt.legend(title="d", loc=4, borderpad=0.4, prop={'size': 30}, title_fontsize=30, frameon=True, markerscale=1.5)
plt.grid(linewidth=2, alpha=0.6)

plt.savefig("R50+IN1K_ R1@1 for HNSW.pdf", bbox_inches="tight")

In [354]:
tableau_20 = [
"steelblue",
"tab:red",
"tab:green",
"tab:orange",
"tab:purple",
"tab:brown",
"tab:pink",
"tab:gray",
"tab:olive",
"tab:cyan"
]

# markers = ['o', '*', 'D', ]

In [355]:
df = pd.read_csv('./Cluster_centroid.csv')
# df = df[df['R1@1']>80]

In [None]:
f, ax = plt.subplots(figsize=(20, 12))

ax.set_ylim(80, 101)

for idx, col in enumerate(['R1@1', 'R1@2', 'R1@4', 'R1@5', 'R1@10']):
    plt.scatter(df['Ds'], df[col], s=200, c=tableau_20[idx], marker='o', label='1-Recall@'+col.split('@')[1])
    plt.plot(df['Ds'], df[col], c=tableau_20[idx], linewidth=4)

plt.xscale('log')

fs=40
plt.yticks(fontsize=fs-5)
plt.xticks(Dims, Dims, fontsize=fs-5,  rotation=45)

plt.xlabel("Representation Size", fontsize=fs, labelpad=10)
plt.ylabel("1-Recall@N", fontsize=fs, labelpad=10)
plt.legend(loc=4, borderpad=0.4, prop={'size':30}, frameon=True)

plt.grid(linewidth=2, alpha=0.6)

plt.savefig("Cluster Centroid Recall-k@N.pdf", bbox_inches="tight")

## Recall Score Plots

In [3]:
tableau_20 = [
"steelblue",
"tab:red",
"tab:green",
"tab:orange",
"tab:purple",
"tab:brown",
"tab:pink",
"tab:gray",
"tab:olive",
"tab:cyan"
]

In [4]:
df = pd.read_csv("./Recall_score.csv")

In [None]:
f, ax = plt.subplots(figsize=(16,16))

ax.set_xlim(1, 15)
for idx, d in enumerate(Dims):
    _df = df[df['d']==d]
    plt.plot(_df['m'], _df['MRL IVF R40'], linewidth=4, marker=None, c=tableau_20[idx])
    plt.scatter([],[],marker='s', label=d, c=tableau_20[idx])


plt.xscale('log')

fs=40
plt.yticks(fontsize=fs-5)
plt.xticks([1,2,4,10], [1,2,4,10], fontsize=fs-5)

plt.xlabel(r"IVF Search Probes ($n_p$)", fontsize=fs, labelpad=10)
plt.ylabel("40-Recall@2048 (%)", fontsize=fs, labelpad=10)

# mrl = mlines.Line2D([], [], color='k', label='MRL',marker='o')
# ff = mlines.Line2D([], [], color='k', label='Baseline', linestyle='dotted',marker='^')
# plt.legend(handles=[mrl, ff])

# legend2 = plt.legend(handles=[mrl, ff], loc=1, borderpad=0.2, bbox_to_anchor=(0.89, 0.12), prop={'size':20}, markerscale=2,frameon=True)    
# for legobj in legend2.legendHandles:
#     legobj.set_linewidth(4)
# plt.gca().add_artist(legend2)

plt.legend(title="d", loc=4, borderpad=0.4, prop={'size': 30}, title_fontsize=30, frameon=True, markerscale=1.5)
plt.grid(linewidth=2, alpha=0.6)

plt.savefig("MRL Clustering R40@2048.pdf", bbox_inches="tight")

In [None]:
f, ax = plt.subplots(figsize=(16,16))

ax.set_xlim(1, 15)
for idx, d in enumerate(Dims):
    _df = df[df['d']==d]
    plt.plot(_df['m'], _df['MRL R1@1'], linewidth=4, marker=None, c=tableau_20[idx])
    plt.scatter([],[],marker='s', label=d, c=tableau_20[idx])


plt.xscale('log')

fs=40
plt.yticks(fontsize=fs-5)
plt.xticks([1,2,4,10], [1,2,4,10], fontsize=fs-5)

plt.xlabel(r"IVF Search Probes ($n_p$)", fontsize=fs, labelpad=10)
plt.ylabel("1-Recall@1 (%)", fontsize=fs, labelpad=10)

# mrl = mlines.Line2D([], [], color='k', label='MRL',marker='o')
# ff = mlines.Line2D([], [], color='k', label='Baseline', linestyle='dotted',marker='^')
# plt.legend(handles=[mrl, ff])

# legend2 = plt.legend(handles=[mrl, ff], loc=1, borderpad=0.2, bbox_to_anchor=(0.89, 0.12), prop={'size':20}, markerscale=2,frameon=True)    
# for legobj in legend2.legendHandles:
#     legobj.set_linewidth(4)
# plt.gca().add_artist(legend2)

plt.legend(title="d", loc=4, borderpad=0.4, prop={'size': 30}, title_fontsize=30, frameon=True, markerscale=1.5)
plt.grid(linewidth=2, alpha=0.6)

plt.savefig("MRL Clustering R1@1.pdf", bbox_inches="tight")

In [None]:
f, ax = plt.subplots(figsize=(16,16))

ax.set_xlim(1, 40)
for idx, d in enumerate(Dims):
    _df = df[df['d']==d]
    plt.plot(_df['m'], _df['MRL HNSW R40'], linewidth=4, marker=None, c=tableau_20[idx])
    plt.scatter([],[],marker='s', label=d, c=tableau_20[idx])


plt.xscale('log')

fs=40
plt.yticks(fontsize=fs-5)
plt.xticks([1,2,4,10, 20], [1,2,4,10,20], fontsize=fs-5)

plt.xlabel("HNSW Search Probes (efSearch)", fontsize=fs, labelpad=10)
plt.ylabel("40-Recall@2048 (%)", fontsize=fs, labelpad=10)

# mrl = mlines.Line2D([], [], color='k', label='MRL',marker='o')
# ff = mlines.Line2D([], [], color='k', label='Baseline', linestyle='dotted',marker='^')
# plt.legend(handles=[mrl, ff])

# legend2 = plt.legend(handles=[mrl, ff], loc=1, borderpad=0.2, bbox_to_anchor=(0.89, 0.12), prop={'size':20}, markerscale=2,frameon=True)    
# for legobj in legend2.legendHandles:
#     legobj.set_linewidth(4)
# plt.gca().add_artist(legend2)

plt.legend(title="d", loc=4, borderpad=0.4, prop={'size': 30}, title_fontsize=30, frameon=True, markerscale=1.5)
plt.grid(linewidth=2, alpha=0.6)

plt.savefig("MRL HNSW R40@2048.pdf", bbox_inches="tight")

In [None]:
f, ax = plt.subplots(figsize=(16,16))

ax.set_xlim(1, 40)
for idx, d in enumerate(Dims):
    _df = df[df['d']==d]
    plt.plot(_df['m'], _df['MRL HNSW R1@1'], linewidth=4, marker=None, c=tableau_20[idx])
    plt.scatter([],[],marker='s', label=d, c=tableau_20[idx])


plt.xscale('log')

fs=40
plt.yticks(fontsize=fs-5)
plt.xticks([1,2,4,10, 20], [1,2,4,10,20], fontsize=fs-5)

plt.xlabel("HNSW Search Probes (efSearch)", fontsize=fs, labelpad=10)
plt.ylabel("1-Recall@1 (%)", fontsize=fs, labelpad=10)

# mrl = mlines.Line2D([], [], color='k', label='MRL',marker='o')
# ff = mlines.Line2D([], [], color='k', label='Baseline', linestyle='dotted',marker='^')
# plt.legend(handles=[mrl, ff])

# legend2 = plt.legend(handles=[mrl, ff], loc=1, borderpad=0.2, bbox_to_anchor=(0.89, 0.12), prop={'size':20}, markerscale=2,frameon=True)    
# for legobj in legend2.legendHandles:
#     legobj.set_linewidth(4)
# plt.gca().add_artist(legend2)

plt.legend(title="d", loc=4, borderpad=0.4, prop={'size': 30}, title_fontsize=30, frameon=True, markerscale=1.5)
plt.grid(linewidth=2, alpha=0.6)

plt.savefig("MRL HNSW R1@1.pdf", bbox_inches="tight")

In [21]:
tableau_20 = [
"steelblue",
"tab:red",
"tab:green",
"tab:orange",
"tab:purple",
"tab:brown",
"tab:pink",
"tab:gray",
"tab:olive",
"tab:cyan"
]

In [30]:
df = pd.read_csv('./4k_recall_40.csv')

In [None]:
f, ax = plt.subplots(figsize=(16,16))

ax.set_ylim(65, 100)
ax.set_xlim(1, 16)
for idx, col in enumerate(['MRL-8', 'MRL-64', 'MRL-256', 'MRL-2048']):
    plt.scatter([], [], c=tableau_20[idx], label=col.replace('L', ''), marker='s')
    plt.plot(df['probes'], df[col], c=tableau_20[idx], linewidth=4, marker=None)

plt.xscale('log')

fs=40
plt.yticks(fontsize=fs-5)
plt.xticks([1,2,4,10], [1,2,4,10], fontsize=fs-5)

plt.xlabel(r"IVF Search Probes ($n_p$)", fontsize=fs, labelpad=10)
plt.ylabel("40-Recall@2048 (%)", fontsize=fs, labelpad=10)

plt.legend(loc=4, borderpad=0.4, prop={'size': 30}, title_fontsize=30, frameon=True, markerscale=1.5)
plt.grid(linewidth=2, alpha=0.6)

plt.savefig("R50+IN4K_ R40@2048 for Clustering.pdf", bbox_inches="tight")

In [None]:
df = pd.read_csv('./4k_recall_1.csv')

f, ax = plt.subplots(figsize=(16,16))

ax.set_ylim(70, 100)
ax.set_xlim(1, 16)
for idx, col in enumerate(['MRL-8', 'MRL-64', 'MRL-256', 'MRL-2048']):
    plt.scatter([], [], c=tableau_20[idx], label=col.replace('L', ''), marker='s')
    plt.plot(df['probes'], df[col], c=tableau_20[idx], linewidth=4, marker=None)

plt.xscale('log')

fs=40
plt.yticks(fontsize=fs-5)
plt.xticks([1,2,4,10], [1,2,4,10], fontsize=fs-5)

plt.xlabel(r"IVF Search Probes ($n_p$)", fontsize=fs, labelpad=10)
plt.ylabel("1-Recall@1 (%)", fontsize=fs, labelpad=10)

plt.legend(loc=4, borderpad=0.4, prop={'size': 30}, title_fontsize=30, frameon=True, markerscale=1.5)
plt.grid(linewidth=2, alpha=0.6)

plt.savefig("R50+IN4K_ R1@1 for Clustering.pdf", bbox_inches="tight")

## AR

In [20]:
tableau_20 = [
"steelblue",
"tab:red",
"tab:green",
"tab:orange",
"tab:purple",
"tab:brown",
"tab:pink",
"tab:gray",
"tab:olive",
"tab:cyan"
]

In [21]:
df = pd.read_csv('./AR.csv')
# df['top1']
df = df[df['top1']>64]
df = df[df['top1']<71]
ad_anns = df[df['Config']=='AdANNS']
mgrr = df[df['Config']=='MG-FF']
ivf_rr = df[df['Config']=='IVF-RR']
mr_rr = df[df['Config']=='IVF-MR']
svd = df[df['Config']=='MG-IVF-SVD']

In [None]:
f, ax = plt.subplots(figsize=(16,9))

# ax.set_xlim(0.1, 200)

plt.scatter(ad_anns['compute'], ad_anns['top1'], s=50, marker='D', c=tableau_20[0], label='AdANNS-IVF-C', alpha=0.8)

plt.scatter(mr_rr['compute'], mr_rr['top1'], s=100, marker='o', c='blueviolet', label='IVF-MR', alpha=1)

plt.scatter(mgrr['compute'], mgrr['top1'], s=100, marker='*', c=tableau_20[2], label='MG-IVF-RR', alpha=0.8)

plt.scatter(ivf_rr['compute'], ivf_rr['top1'], s=100, marker='P', c=tableau_20[1], label='IVF-RR', alpha=0.8)

plt.scatter(svd['compute'], svd['top1'], s=100, marker='x', c='k', label='MG-IVF-SVD', alpha=0.8)



plt.xscale('log')

fs=20
plt.yticks(fontsize=fs-5)
plt.xticks([0.1,1,10,100], [0.1,1,10,100], fontsize=fs-5)
# plt.xtick_
plt.xlabel("MFLOPS/Query", fontsize=fs, labelpad=10)
plt.ylabel("Top-1 Accuracy(%)", fontsize=fs, labelpad=10)


plt.legend(frameon=True,loc=4, borderpad=0.5, prop={'size':20}, markerscale=1.5)
plt.grid(linewidth=2, alpha=0.6)

plt.savefig("Adaptive Retrieval.pdf", bbox_inches="tight")

## ResNet Family

In [3]:
tableau_20 = [
"steelblue",
"tab:red",
"tab:green",
"blueviolet",
"tab:purple",
"tab:brown",
"tab:pink",
"tab:gray",
"tab:olive",
"tab:cyan"
]

In [4]:
df = pd.read_csv("./ResFam.csv")

# df = df[df['Acc']>50]

mr_r18 = df[df['Config']=='R18-IVF']
mr_r34 = df[df['Config']=='R34-IVF']
mr_r50 = df[df['Config']=='R50-IVF']
mr_r101 = df[df['Config']=='R101-IVF']

mr_r18_e = df[df['Config']=='R18-IVF-E']
mr_r34_e = df[df['Config']=='R34-IVF-E']
mr_r50_e = df[df['Config']=='R50-IVF-E']
mr_r101_e = df[df['Config']=='R101-IVF-E']

labels=['ResNet18', 'ResNet32', 'ResNet50', 'ResNet101']
markers = ['o', '^', 'D', '*']

In [None]:
f, ax = plt.subplots(figsize=(20,12))

ax.set_ylim(50,74)
for idx, (d1, d2) in enumerate(zip([mr_r18, mr_r34, mr_r50, mr_r101], [mr_r18_e, mr_r34_e, mr_r50_e, mr_r101_e])):
    plt.scatter(d1['d'], d1['Acc'], s=125, c=tableau_20[idx], marker=markers[idx])
    plt.plot(d1['d'], d1['Acc'], linewidth=4, alpha=0.8, c=tableau_20[idx])
    
    plt.scatter(d2['d'], d2['Acc'], s=125, c=tableau_20[idx], marker=markers[idx])
    plt.plot(d2['d'], d2['Acc'], linewidth=4, alpha=0.8, c=tableau_20[idx], linestyle='dashed')
    
    plt.scatter([],[], marker=markers[idx], c=tableau_20[idx], label=labels[idx])

plt.xscale('log')

fs=40
plt.yticks(fontsize=fs-5)
plt.xticks(Dims, Dims, fontsize=fs-5,  rotation=45)

plt.xlabel("Representation Size", fontsize=fs, labelpad=10)
plt.ylabel("Top-1 Accuracy (%)", fontsize=fs, labelpad=10)


mrl = mlines.Line2D([], [], label='IVF-MR', c='k')
ff = mlines.Line2D([], [], label='Exact-MR', linestyle='dashed', c='k')
# plt.legend(handles=[mrl, ff])

legend2 = plt.legend(handles=[mrl, ff], loc=1, borderpad=0.4, bbox_to_anchor=(0.795, 0.17), prop={'size':25}, markerscale=1.5,frameon=True)    
for legobj in legend2.legendHandles:
    legobj.set_linewidth(6)
plt.gca().add_artist(legend2)

plt.legend(loc=4, borderpad=0.4, prop={'size':25}, frameon=True, markerscale=1.5)

plt.grid(linewidth=2, alpha=0.6)

plt.savefig("Top-1 for different MRL ResNet models.pdf", bbox_inches="tight")