In [19]:
import torchhd
import torch
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
import pandas as pd
import seaborn as sns
from tqdm import tqdm
from functools import partial


In [20]:
import sys
sys.path.append('..')
from shared_code.helpers import similarity_func_partial

In [21]:
vsa_types = ['FHRR', 'BSC', 'MAP', 'HRR', 'BSBC']
vsa_type = 'BSC'
# DIMENSIONS = [2**i for i in range(5, 13)]
DIMENSIONS = [2**i for i in range(9, 14)]
BUNDLE_SIZES = list(range(2, 200, 2))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if device == 'cuda':
    torch.cuda.empty_cache()

batches = 10
item_memory_size = 10_000
similarity_func = partial(similarity_func_partial, vsa_type)

### Evaluating non-normalized capacity of HD vectors

In [22]:
sim_unbound_vector_tensor = torch.zeros(len(DIMENSIONS), len(BUNDLE_SIZES), batches)
# prob_corr_retrieval_tensor = torch.zeros(len(DIMENSIONS), len(BUNDLE_SIZES), batches)
unbound_vec_retainment_tensor = torch.zeros(len(DIMENSIONS), len(BUNDLE_SIZES), batches)


for batch in tqdm(range(batches)):
    sim_unbound_vector_array = torch.zeros(len(DIMENSIONS), len(BUNDLE_SIZES))
    # prob_corr_retrieval_array = torch.zeros(len(DIMENSIONS), len(BUNDLE_SIZES))
    unbound_vec_retainment_array = torch.zeros(len(DIMENSIONS), len(BUNDLE_SIZES))

    # print(f'iteration: {batch + 1}')

    for d_idx in range(len(DIMENSIONS)):
        dim = DIMENSIONS[d_idx]
        item_memory = torchhd.random(item_memory_size, dim, vsa=vsa_type, device=device)

        for k_idx in range(len(BUNDLE_SIZES)):
            k = BUNDLE_SIZES[k_idx] * 2 # we will create key-value pairs so we need to double the bundle size

            # sample k vectors from item memory and bundle them
            random_indices = torch.randperm(item_memory_size)[:k]
            experiment_vectors = item_memory[random_indices]
            key_vectors = experiment_vectors[:k//2]
            value_vectors = experiment_vectors[k//2:]

            bound_vectors = torch.stack([k.bind(v) for k, v in zip(key_vectors, value_vectors)])

            bundle_vector = torchhd.multiset(bound_vectors)

            unbound_vector = bundle_vector.bind(key_vectors[0].negative())

            # add similarity of first vector to bundle vector
            sim_unbound_vector_array[d_idx, k_idx] = 1 - similarity_func(value_vectors[0], unbound_vector)

            # find the nearest vectors to bundle
            similarities = 1 - similarity_func(unbound_vector, item_memory)
            nearest_vector_idx = torch.topk(similarities, 5, largest=True).indices[0].item()

            # check if the first vector is in the k nearest vectors
            unbound_vec_retainment_array[d_idx, k_idx] = 1 if random_indices[k//2] == nearest_vector_idx else 0 # random_indices[k//2] is the first value vector in the bundle
            # check if the k similar vectors are correct
            # num_correct = np.intersect1d(k_nearest_vectors_idx.cpu().numpy(), random_indices.cpu().numpy()).shape[0]

            # prob_corr_retrieval_array[d_idx, k_idx] = num_correct / k
    
    # print(prob_corr_retrieval_array)
    sim_unbound_vector_tensor[:, :, batch] = sim_unbound_vector_array
    unbound_vec_retainment_tensor[:, :, batch] = unbound_vec_retainment_array
    # prob_corr_retrieval_tensor[:, :, batch] = prob_corr_retrieval_array
   
# compute mean and std over batches
sim_unbound_vector_mean = torch.mean(sim_unbound_vector_tensor, dim=2)
sim_unbound_vector_std = torch.std(sim_unbound_vector_tensor, dim=2)

# prob_corr_retrieval_mean = torch.mean(prob_corr_retrieval_tensor, dim=2)
# prob_corr_retrieval_std = torch.std(prob_corr_retrieval_tensor, dim=2)

unbound_vec_retainment_mean = torch.mean(unbound_vec_retainment_tensor, dim=2)
unbound_vec_retainment_std = torch.std(unbound_vec_retainment_tensor, dim=2)

100%|██████████| 10/10 [00:26<00:00,  2.67s/it]


Plotting results

In [27]:
fig = go.Figure()

for d_idx in range(len(DIMENSIONS)):
    dim = DIMENSIONS[d_idx]
    fig.add_trace(go.Scatter(x=BUNDLE_SIZES, y=sim_unbound_vector_mean[d_idx],
                             name=f'{dim}',
                             line=dict(color=f'rgb({255 - 255 * d_idx // len(DIMENSIONS)}, {255 * d_idx // len(DIMENSIONS)}, 1, 1)')))
    fig.add_trace(go.Scatter(x=BUNDLE_SIZES + BUNDLE_SIZES[::-1],
                            #  y = [sim_unbound_vector_mean[d_idx] - sim_unbound_vector_std[d_idx]],
                             y=np.concatenate([sim_unbound_vector_mean[d_idx] + sim_unbound_vector_std[d_idx],
                                                                                    (sim_unbound_vector_mean[d_idx] - sim_unbound_vector_std[d_idx]).flip(0)]),
                              fill='toself',
                              fillcolor=f'rgba({255 - 255 * d_idx // len(DIMENSIONS)}, {255 * d_idx // len(DIMENSIONS)}, 1, 0.35)',
                              line=dict(color=f'rgba({255 - 255 * d_idx // len(DIMENSIONS)}, {255 * d_idx // len(DIMENSIONS)}, 1, 0)'),
                              showlegend=False))
    
fig.update_layout(title=dict(
            text=f'Bundled key-value pairs, key HV unbinding similarity to value HV; type {vsa_type} vectors',
            x=0.5,
            y=.965,
          ),
          legend=dict(
            orientation="h",
            yanchor="top",
            y=1.075,
            xanchor="center",
            x=0.5,
            title='Dimensionality (D)'
          ),
          xaxis_title='number of bundled key-value bindings vectors',
          yaxis_title='similarity of key HV unbound bundle to value HV',
          width=1250,
          height=750,
          font=dict(
            size=18,
          ),
          margin=dict(l=115, r=100, t=100, b=80),
)

fig.show()


In [24]:
fig = go.Figure()

for d_idx in range(len(DIMENSIONS)):
    fig.add_trace(go.Scatter(x=BUNDLE_SIZES, y=unbound_vec_retainment_mean[d_idx, :],
                             name=f'{DIMENSIONS[d_idx]}',
                             line=dict(color=f'rgb({255 - 255 * d_idx // len(DIMENSIONS)}, {255 * d_idx // len(DIMENSIONS)}, 1)')))
    fig.add_trace(go.Scatter(x=BUNDLE_SIZES + BUNDLE_SIZES[::-1], y=np.concatenate([unbound_vec_retainment_mean[d_idx, :] + unbound_vec_retainment_std[d_idx, :],
                        (unbound_vec_retainment_mean[d_idx, :] - unbound_vec_retainment_std[d_idx, :]).flip(0)]),
                        fill='toself',
                        line=dict(width=0),
                        fillcolor=f'rgba({255 - 255 * d_idx // len(DIMENSIONS)}, {255 * d_idx // len(DIMENSIONS)}, 1, 0.35)',
                        showlegend=False
    ))

fig.update_layout(title=dict(
            text=f'Non-normalized first vector retainment; type {vsa_type} vectors; {item_memory_size} items in memory',
            x=0.5,
            y=.965,
          ),
          legend=dict(
            orientation="h",
            yanchor="top",
            y=1.075,
            xanchor="center",
            x=0.5,
            title='Dimensionality (D)'
          ),
          xaxis_title='number of bundled vectors',
          yaxis_title='probability of correct retrieval',
          width=1250,
          height=750,
          font=dict(
            size=18,
          ),
          margin=dict(l=115, r=100, t=100, b=80),
)

fig.show()


### Capacity of unnormalized HD vectors

In [25]:
# MAX_BUNDLE_SIZE = BUNDLE_SIZES[-1]
# sim_unbound_vector_tensor = torch.zeros(len(DIMENSIONS), len(BUNDLE_SIZES), batches)
# prob_corr_retrieval_tensor = torch.zeros(len(DIMENSIONS), len(BUNDLE_SIZES), batches)
# unbound_vec_retainment_tensor = torch.zeros(len(DIMENSIONS), len(BUNDLE_SIZES), batches)

# for batch in tqdm(range(batches)):
#     sim_unbound_vector_array = torch.zeros(len(DIMENSIONS), len(BUNDLE_SIZES))
#     retrieval_first_vector_array = torch.zeros(len(DIMENSIONS), len(BUNDLE_SIZES))
#     unbound_vec_retainment_array = torch.zeros(len(DIMENSIONS), len(BUNDLE_SIZES))
#     # print(f'iteration: {batch + 1}')

#     for d_idx in range(len(DIMENSIONS)):
#         dim = DIMENSIONS[d_idx]
#         item_memory = torchhd.random(item_memory_size, dim, vsa=vsa_type, device=device)

#         # sample MAX_BUNDLE_SIZE vectors from item memory
#         random_indices = torch.randperm(item_memory_size)[:MAX_BUNDLE_SIZE]
#         to_bundle_vectors = item_memory[random_indices]
#         first_vector = to_bundle_vectors[0]
#         bundle_vector = first_vector

#         for k_idx in range(len(BUNDLE_SIZES)):
#             prev_k = BUNDLE_SIZES[k_idx - 1] if k_idx > 0 else 0
#             k = BUNDLE_SIZES[k_idx]

#             # add vectors to bundle
#             for i in range(prev_k, k):
#                 bundle_vector = torchhd.bundle(bundle_vector, to_bundle_vectors[i])

#             # add similarity of first vector to bundle to array
#             sim_unbound_vector_array[d_idx, k_idx] = similarity_func(first_vector, bundle_vector)

#             # find the k nearest vectors to bundle
#             similarities = similarity_func(bundle_vector, item_memory)
#             k_nearest_vectors_idx = torch.topk(similarities, k, largest=True).indices

#             # check if the first vector is in the k nearest vectors
#             unbound_vec_retainment_array[d_idx, k_idx] = 1 if random_indices[0] in k_nearest_vectors_idx else 0

#             # check if the k similar vectors are correct
#             num_correct = np.intersect1d(k_nearest_vectors_idx.cpu().numpy(), random_indices.cpu().numpy()).shape[0]

#             retrieval_first_vector_array[d_idx, k_idx] = num_correct / k

#     sim_unbound_vector_tensor[:, :, batch] = sim_unbound_vector_array
#     unbound_vec_retainment_tensor[:, :, batch] = unbound_vec_retainment_array
#     prob_corr_retrieval_tensor[:, :, batch] = retrieval_first_vector_array

# # compute mean and std over batches
# sim_unbound_vector_mean = torch.mean(sim_unbound_vector_tensor, dim=2)
# sim_unbound_vector_std = torch.std(sim_unbound_vector_tensor, dim=2)

# prob_corr_retrieval_mean = torch.mean(prob_corr_retrieval_tensor, dim=2)
# prob_corr_retrieval_std = torch.std(prob_corr_retrieval_tensor, dim=2)

# unbound_vec_retainment_mean = torch.mean(unbound_vec_retainment_tensor, dim=2)
# unbound_vec_retainment_std = torch.std(unbound_vec_retainment_tensor, dim=2) # is this relevant?