In [2]:
import matplotlib.pyplot as plt
import numpy as np
import json

In [3]:
datasets = [
  'glove-200-angular',
  'gist-960-euclidean',
  'sift-128-euclidean', 
  'fashion-mnist-784-euclidean', 
  'deep-image-96-angular',
  'dbpedia-openai-1000k-angular',
  'us-stock-384-euclidean',
  'ucf-crime-4096-euclidean',
]
dataset_format = {
  'glove-200-angular': 'GloVe',
  'gist-960-euclidean': 'GIST1M',
  'sift-128-euclidean': 'SIFT1M', 
  'fashion-mnist-784-euclidean': 'MNIST', 
  'deep-image-96-angular': 'DEEP1M',
  'dbpedia-openai-1000k-angular': 'DBpedia-OpenAI',
  'us-stock-384-euclidean': 'S&P 500',
  'ucf-crime-4096-euclidean': 'UCF-Crime',
}
index = 'hnsw'
colors = ['tomato', 'deepskyblue', 'forestgreen', 'PeachPuff', 'gold', "orchid"]
knn = 10
num_datasets = len(datasets)
# num_indices = len(indices)

In [None]:
from function import pareto_frontier

sigma_list = [sigma/100 for sigma in range(0, 101, 20)]

nrows = 1
ncols = num_datasets // nrows

fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(18, 1.5))

# for i in range(nrows):
axes[0].set_ylabel('QPS', fontsize=14)

for i in range(num_datasets):
  ds = datasets[i]
  index = 'hnsw'
  recall_limit = 0.5
  recall_gap = 0.1
  # baseline_path = f"perf/overall/pf.{index}.{ds}.uniform-0.0-1.0.uniform-0.0-1.0.k10.json"  \
  #   if ds != 'us-stock-384-euclidean' \
  #   else f"perf/overall/pf.{index}.{ds}...k10.json"
  method_path = f"perf/overall/qt.{index}.{ds}.uniform-0.0-1.0.uniform-0.0-1.0.k10.json"    \
    if ds != 'us-stock-384-euclidean' \
    else f"perf/overall/qt.{index}.{ds}...k10.json"
  # with open(baseline_path, 'r') as file:
  #   baseline = json.load(file)
  with open(method_path, 'r') as file:
    method = json.load(file)
  # baseline = [(item['qps'], item['recall']) for item in baseline if item['recall'] > recall_limit]
  # method = [(item['qps'], item['recall']) for item in method if item['recall'] > recall_limit]
  
  for k in range(len(sigma_list)):
    # sigma_baseline = pareto_frontier(
    #   [item for item in baseline if item['query_parameters'][-1] == sigma_list[k]],
    #   recall_limit
    # )
    sigma_method = pareto_frontier(
      [item for item in method if item['query_parameters'][-1] == sigma_list[k]],
      recall_limit
    )
    # axes[i].plot([item['recall'] for item in sigma_baseline], 
    #                               [item['qps'] for item in sigma_baseline], marker='s', 
    #                               color=colors[k], linewidth=1.2, linestyle='dashed', markersize=3)
    axes[i].plot([item['recall'] for item in sigma_method], 
                                  [item['qps'] for item in sigma_method], marker='o', 
                                  color=colors[k], linewidth=1.2, markersize=3)
  if i//ncols == nrows - 1:
    axes[i].set_xlabel(f'Recall@{knn}', fontsize=14)
  axes[i].set_xticks(np.arange(recall_limit, 1.001, recall_gap))
  # axes[i].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
  axes[i].set_yscale("log", base=10)
  axes[i].grid(True)
  axes[i].set_title(dataset_format[ds], fontsize=14)

leg = list()
for sigma in sigma_list:
  leg += [
    # f"HNSW (" + r'$\sigma$' + f"={sigma})", 
    f"{r'$\sigma$' + f'={sigma}'}"
  ]

fig.subplots_adjust(wspace=0.3, hspace=0.71)
fig.legend(leg, loc='center', bbox_to_anchor=(0.51, 1.15), ncol=6, frameon=False, fontsize=14)
fig.savefig(f'figure/factor/sim.pdf', dpi=300, bbox_inches='tight', pad_inches=0)
fig.show()