In [1]:
import numpy as np
import pickle
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics.pairwise import cosine_distances
from torchvision.datasets import MNIST, FashionMNIST, CIFAR10
from torchvision import transforms
import torch.nn.functional as F
import torch
from tqdm import tqdm
from scipy.io import savemat, loadmat
import torchvision
from torch.utils.data import DataLoader, Subset, TensorDataset

In [2]:
def load_ucr(file):
    data = np.loadtxt(file)
    X = data[:, 1:]
    y = data[:, 0]
    y = np.where(y == 1, 1, 0)  # convert labels to 0/1
    return X, y

# Adjust file paths to your local files
X_train, y_train = load_ucr("../FordA_TRAIN.txt")
X_test, y_test = load_ucr("../FordA_TEST.txt")

print(f"Train shape: {X_train.shape}, Test shape: {X_test.shape}")

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

min_val = X_train_tensor.min()
max_val = X_train_tensor.max()

X_train_tensor = (X_train_tensor - min_val) / (max_val - min_val)
X_test_tensor = (X_test_tensor - min_val) / (max_val - min_val)


train_data = TensorDataset(X_train_tensor, y_train_tensor)
test_data = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32)

print(torch.min(X_train_tensor), torch.max(X_train_tensor))




# 1. Generate bipolar lookup table
def lookup_generate(dim: int, datatype: str, n_keys: int, device: torch.device):
    if datatype != 'bipolar':
        raise ValueError("Only 'bipolar' supported")
    tbl = torch.randint(0, 2, (n_keys, dim), device=device, dtype=torch.int8)
    return tbl * 2 - 1  # map {0,1} → {-1,+1}

# 2. Encode a batch of inputs into hypervectors
@torch.no_grad()
def encode_batch(X: torch.Tensor, position_table: torch.Tensor, grayscale_table: torch.Tensor):
    """
    X:               (N, D_in) float tensor in [-3, +3]
    position_table:  (D_in, dim)
    grayscale_table: (n_keys, dim)
    → returns (N, dim) int tensor
    """
    n_keys = grayscale_table.shape[0]
    X_normalized = ((X + 3) / 6) * (n_keys - 1)  # map [-3,3] → [0, n_keys-1]
    X_indices = X_normalized.round().clamp(0, n_keys - 1).long()  # indices in [0, n_keys-1]
    gray = grayscale_table[X_indices]            # (N, D_in, dim)
    pos  = position_table.unsqueeze(0)           # (1, D_in, dim)
    hv   = (pos * gray).sum(dim=1)               # (N, dim)
    return hv

# 3. Train associative memory by summing all encodings per class
def train_am(X_train, Y_train, position_table, grayscale_table, dim: int):
    H_train = encode_batch(X_train, position_table, grayscale_table).float()  # (N, dim)
    C = int(Y_train.max().item()) + 1
    am = torch.zeros((C, dim), device=X_train.device, dtype=torch.float32)
    am = am.index_add(0, Y_train, H_train)
    return am

# 4. Single-image prediction (returns class and query HV)
@torch.no_grad()
def predict_(am, img, position_table, grayscale_table):
    qhv = encode_batch(img.unsqueeze(0), position_table, grayscale_table).squeeze(0).float()
    sims = F.cosine_similarity(qhv.unsqueeze(0), am, dim=1)  # (C,)
    pred = int(sims.argmax().item())
    return pred, qhv

def predict(am, img, position_table, grayscale_table):
    pred, _ = predict_(am, img, position_table, grayscale_table)
    return pred

# 5. Test on full set
@torch.no_grad()
def test(am, X_test, Y_test, position_table, grayscale_table):
    H_test = encode_batch(X_test, position_table, grayscale_table).float()  # (N_test, dim)
    h_norm = H_test.norm(dim=1, keepdim=True)                              # (N,1)
    a_norm = am.norm(dim=1, keepdim=True).t()                              # (1,C)
    sims   = (H_test @ am.t()) / (h_norm * a_norm)                         # (N,C)
    preds  = sims.argmax(dim=1)                                            # (N,)
    acc    = (preds == Y_test).float().mean().item()
    print(f"Testing accuracy: {acc:.4f}")
    return acc

# 6. Load a saved model (AM + tables)
def loadmodel(fpath: str, device: torch.device = None):
    with open(fpath, 'rb') as f:
        am_np, pos_np, gray_np = pickle.load(f)
    am   = torch.from_numpy(am_np)
    pos  = torch.from_numpy(pos_np)
    gray = torch.from_numpy(gray_np)
    if device is not None:
        am, pos, gray = am.to(device), pos.to(device), gray.to(device)
    return am, pos, gray

# 7. Quantize the AM to a lower bit-width
def quantize(am: torch.Tensor, before_bw: int, after_bw: int) -> torch.Tensor:
    if before_bw <= after_bw:
        return am.clone()
    shift = before_bw - after_bw
    return torch.round(am.float() / (2 ** shift)).to(am.dtype)

# 8. Batched AM training
@torch.no_grad()
def train_am_batched(
    X_train: torch.Tensor,
    Y_train: torch.Tensor,
    position_table: torch.Tensor,
    grayscale_table: torch.Tensor,
    dim: int,
    batch_size: int = 128,
    device: torch.device = None
) -> torch.Tensor:
    N = X_train.shape[0]
    C = int(Y_train.max().item()) + 1
    am = torch.zeros(C, dim, device=device, dtype=torch.float32)
    for i in tqdm(range(0, N, batch_size), desc="Training batches"):
        xb = X_train[i : i + batch_size]
        yb = torch.as_tensor(Y_train[i : i + batch_size], device=device)  # <== ✅ fixed
        hb = encode_batch(xb, position_table, grayscale_table).float()
        am = am.index_add(0, yb, hb)
    return am

# 9. Test on a split (non-batched)
@torch.no_grad()
def test_split(am, X_split, Y_split, position_table, grayscale_table):
    Hs   = encode_batch(X_split, position_table, grayscale_table).float()  # (M, dim)
    sims = F.cosine_similarity(Hs.unsqueeze(1), am.unsqueeze(0), dim=2)   # (M, C)
    preds = sims.argmax(dim=1)                                            # (M,)
    return (preds == Y_split).float().mean().item()

# 10. Test on a split (batched)
@torch.no_grad()
def test_split_batched(
    am: torch.Tensor,
    X: torch.Tensor,
    Y: torch.Tensor,
    position_table: torch.Tensor,
    grayscale_table: torch.Tensor,
    encode_fn,
    batch_size: int = 128,
    device: torch.device = None
) -> float:
    correct, total = 0, 0
    for i in range(0, X.size(0), batch_size):
        xb = X[i : i + batch_size].to(device)
        yb = Y[i : i + batch_size].to(device)
        hb = encode_fn(xb, position_table, grayscale_table).float()
        sims  = F.cosine_similarity(hb.unsqueeze(1), am.unsqueeze(0), dim=2)
        preds = sims.argmax(dim=1)
        correct += (preds == yb).sum().item()
        total   += yb.size(0)
    return correct / total

Train shape: (3601, 500), Test shape: (1320, 500)
tensor(0.) tensor(1.)


In [3]:
mnist_path = '../../'
fmnist_path = '../../'
cifar10_path = '../../'

In [4]:
hyperdims = range(5000, 21000, 1000)
hyperdims

range(5000, 21000, 1000)

In [5]:
np.unique(y_train)

array([0, 1])

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# X_train, Y_train, X_test, Y_test = load_dataset(cifar10_path, device)

n_splits   = 10
split_size = X_test.shape[0] // n_splits
hyperdims = hyperdims
accuracies = np.zeros((len(hyperdims), n_splits))
n_class    = 2
q_bit      = 16
print(split_size)

132


In [7]:
torch.max(X_train_tensor), torch.min(X_train_tensor)

(tensor(1.), tensor(0.))

In [8]:
for i, D in enumerate(hyperdims):
    print(f"\n==> Hyperdimension: {D}")
    
    # Dynamically get input dimension
    input_dim = X_train_tensor.shape[1]  # e.g., 3*206
    n_keys = 64                   # number of quantization bins for [-3,+3]
    
    # a) lookup tables
    position_table  = lookup_generate(D, 'bipolar', input_dim, device=device)
    grayscale_table = lookup_generate(D, 'bipolar', n_keys, device=device)

    # b) train AM
    am = train_am_batched(
        X_train_tensor, y_train_tensor,
        position_table, grayscale_table,
        dim=D,
        batch_size=1,
        device=device
    )
    
    # c) quantize AM
    am_q = quantize(am, before_bw=16, after_bw=q_bit)

    # d) evaluate on splits
    for split_idx in range(n_splits):
        start = split_idx * split_size
        end   = start + split_size

        acc = test_split_batched(
            am_q,
            X_test_tensor[start:end],
            y_test_tensor[start:end],
            position_table,
            grayscale_table,
            encode_batch,  
            batch_size=10,
            device=device
        )
        accuracies[i, split_idx] = acc

    print("Accuracy average for 20 rounds:", accuracies[i].mean())

    # Free GPU memory
    del position_table, grayscale_table, am, am_q
    torch.cuda.empty_cache()



==> Hyperdimension: 5000


Training batches: 100%|██████████| 3601/3601 [00:00<00:00, 4084.80it/s]


Accuracy average for 20 rounds: 0.47727272727272724

==> Hyperdimension: 6000


Training batches: 100%|██████████| 3601/3601 [00:00<00:00, 4548.18it/s]


Accuracy average for 20 rounds: 0.4871212121212121

==> Hyperdimension: 7000


Training batches: 100%|██████████| 3601/3601 [00:00<00:00, 4317.00it/s]


Accuracy average for 20 rounds: 0.4962121212121212

==> Hyperdimension: 8000


Training batches: 100%|██████████| 3601/3601 [00:00<00:00, 4027.67it/s]


Accuracy average for 20 rounds: 0.4840909090909092

==> Hyperdimension: 9000


Training batches: 100%|██████████| 3601/3601 [00:00<00:00, 3795.99it/s]


Accuracy average for 20 rounds: 0.5015151515151516

==> Hyperdimension: 10000


Training batches: 100%|██████████| 3601/3601 [00:01<00:00, 3583.32it/s]


Accuracy average for 20 rounds: 0.5007575757575757

==> Hyperdimension: 11000


Training batches: 100%|██████████| 3601/3601 [00:01<00:00, 3365.53it/s]


Accuracy average for 20 rounds: 0.48636363636363633

==> Hyperdimension: 12000


Training batches: 100%|██████████| 3601/3601 [00:01<00:00, 3217.97it/s]


Accuracy average for 20 rounds: 0.48106060606060613

==> Hyperdimension: 13000


Training batches: 100%|██████████| 3601/3601 [00:01<00:00, 3062.78it/s]


Accuracy average for 20 rounds: 0.4901515151515152

==> Hyperdimension: 14000


Training batches: 100%|██████████| 3601/3601 [00:01<00:00, 2925.95it/s]


Accuracy average for 20 rounds: 0.49469696969696975

==> Hyperdimension: 15000


Training batches: 100%|██████████| 3601/3601 [00:01<00:00, 2745.59it/s]


Accuracy average for 20 rounds: 0.4962121212121212

==> Hyperdimension: 16000


Training batches: 100%|██████████| 3601/3601 [00:01<00:00, 2696.22it/s]


Accuracy average for 20 rounds: 0.49318181818181817

==> Hyperdimension: 17000


Training batches: 100%|██████████| 3601/3601 [00:01<00:00, 2583.46it/s]


Accuracy average for 20 rounds: 0.49015151515151506

==> Hyperdimension: 18000


Training batches: 100%|██████████| 3601/3601 [00:01<00:00, 2477.08it/s]


Accuracy average for 20 rounds: 0.4962121212121212

==> Hyperdimension: 19000


Training batches: 100%|██████████| 3601/3601 [00:01<00:00, 2388.55it/s]


Accuracy average for 20 rounds: 0.48636363636363633

==> Hyperdimension: 20000


Training batches: 100%|██████████| 3601/3601 [00:01<00:00, 2310.27it/s]


Accuracy average for 20 rounds: 0.4871212121212121


In [9]:
np.mean(accuracies, axis=1)

array([0.47727273, 0.48712121, 0.49621212, 0.48409091, 0.50151515,
       0.50075758, 0.48636364, 0.48106061, 0.49015152, 0.49469697,
       0.49621212, 0.49318182, 0.49015152, 0.49621212, 0.48636364,
       0.48712121])

In [10]:
from scipy.io import savemat
savemat('FordA_VanillaHDC.mat', {'FordA_VanillaHDC': accuracies*100})