In [8]:
from utils import theoretical_similarity, similarity_cutoff
from helpers import norm_hamming_similarity
import torchhd
import torch
import plotly.express as px
import pandas as pd

# Experiment setup

In [9]:
DIMENSIONS = [2**i for i in range(6, 14)]
BUNDLE_SIZES = list(range(10, 200, 10))
ITEM_MEMORY_SIZE = 1000

In [10]:
num_exp = 10  # Number of times to repeat the experiment
recall = 0.95

metric_averages = torch.zeros(len(BUNDLE_SIZES), len(DIMENSIONS), 3)  # recall, precision, accuracy

for i, bundle_size in enumerate(BUNDLE_SIZES):
  for j, dim in enumerate(DIMENSIONS):
    # Generate random memory vectors
    item_memory = torchhd.random(ITEM_MEMORY_SIZE, dim, vsa='BSC')
    # Compute similarity cutoff 
    theoretical_similarities = theoretical_similarity(bundle_size, 'BSC')
    cutoff = similarity_cutoff(bundle_size, dim, recall)
    # 2d array to save values of recall, precision, and accuracy
    metrics = torch.zeros((num_exp, 3)) # recall, precision, accuracy

    for exp in range(num_exp):
      # Select k random memory vectors to form a bundle
      bundle_idxs = torch.randperm(ITEM_MEMORY_SIZE)[:bundle_size]
      other_idxs = [i for i in range(ITEM_MEMORY_SIZE) if i not in bundle_idxs]
      bundle = torchhd.multiset(item_memory[bundle_idxs])

      similarities_bundle = norm_hamming_similarity(bundle, item_memory)
      similarities_bundle_bundled = similarities_bundle[bundle_idxs]
      similarities_bundle_other = similarities_bundle[other_idxs]

      # Calculate the recall
      TP = (similarities_bundle_bundled > cutoff).sum().item()
      FP = (similarities_bundle_other > cutoff).sum().item()
      FN = (similarities_bundle_bundled < cutoff).sum().item()
      TN = (similarities_bundle_other < cutoff).sum().item()

      metrics[exp, 0] = TP / (TP + FN)
      metrics[exp, 1] = TP / (TP + FP)
      metrics[exp, 2] = (TP + TN) / ITEM_MEMORY_SIZE
      
    metric_averages[i, j, :] = metrics.mean(dim=0)
    # print(f"n={dim}, k={bundle_size}")
    # print(f"Similarity cutoff: {cutoff}")
    # # print(f"True positives: {TP}")
    # # print(f"False positives: {FP}")
    # # print(f"False negatives: {FN}")
    # # print(f"True negatives: {TN}")
    # print(f"Average recall: {metrics[:, 0].mean()}")
    # print(f"Average precision: {metrics[:, 1].mean()}")
    # print(f"Average accuracy: {metrics[:, 2].mean()}")
    # print("\n")

In [11]:
# y_ax_values are recall, precision, and accuracy averaged over bundle sizes
df = pd.DataFrame({'Dimension': DIMENSIONS, 'Recall': metric_averages.mean(dim=0)[:, 0], 'Precision': metric_averages.mean(dim=0)[:, 1], 'Accuracy': metric_averages.mean(dim=0)[:, 2]})
fig = px.line(x='Dimension', y=['Recall', 'Precision', 'Accuracy'], data_frame=df, title='Recall, Precision, and Accuracy vs Dimension')
fig.update_traces(textposition='top center')
fig.update_layout(title='Recall, Precision, and Accuracy vs Dimension', xaxis_title='Dimension', yaxis_title='Metric Value')
# fig.update_xaxes(type='log')
fig.show()

In [12]:
df = pd.DataFrame({'Bundle Size': BUNDLE_SIZES, 'Recall': metric_averages.mean(dim=1)[:, 0], 'Precision': metric_averages.mean(dim=1)[:, 1], 'Accuracy': metric_averages.mean(dim=1)[:, 2]})
fig = px.line(x='Bundle Size', y=['Recall', 'Precision', 'Accuracy'], data_frame=df, title='Recall, Precision, and Accuracy vs Bundle Size')
fig.update_traces(textposition='top center')
fig.update_layout(title='Recall, Precision, and Accuracy vs Bundle Size', xaxis_title='Bundle Size', yaxis_title='Metric Value')
fig.show()

In [13]:
# recall vs dimension
df = pd.DataFrame({'Dimension': DIMENSIONS, 'Recall': metric_averages.mean(dim=0)[:, 0]})
fig = px.line(x='Dimension', y='Recall', data_frame=df, title='Recall vs Dimension')
fig.update_traces(textposition='top center')
fig.update_layout(title='Recall vs Dimension', xaxis_title='Dimension', yaxis_title='Recall')
# fig.update_xaxes(type='log')
fig.show()

In [14]:
# recqll vs bundle size
df = pd.DataFrame({'Bundle Size': BUNDLE_SIZES, 'Recall': metric_averages.mean(dim=1)[:, 0]})
fig = px.line(x='Bundle Size', y='Recall', data_frame=df, title='Recall vs Bundle Size')
fig.update_traces(textposition='top center')
fig.update_layout(title='Recall vs Bundle Size', xaxis_title='Bundle Size', yaxis_title='Recall')
fig.show()