In [1]:
import numpy as np
import pickle
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics.pairwise import cosine_distances
from torchvision.datasets import MNIST, FashionMNIST, CIFAR10
from torchvision import transforms
import torch.nn.functional as F
import torch
from tqdm import tqdm
from scipy.io import savemat, loadmat
import torchvision
from torch.utils.data import DataLoader, Subset, TensorDataset

In [2]:
def load_ucr(file):
    data = np.loadtxt(file)
    X = data[:, 1:]
    y = data[:, 0]
    y = np.where(y == 1, 1, 0)  # convert labels to 0/1
    return X, y

# Adjust file paths to your local files
X_train, y_train = load_ucr("../FordA_TRAIN.txt")
X_test, y_test = load_ucr("../FordA_TEST.txt")

print(f"Train shape: {X_train.shape}, Test shape: {X_test.shape}")

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

min_val = X_train_tensor.min()
max_val = X_train_tensor.max()

X_train_tensor = (X_train_tensor - min_val) / (max_val - min_val)
X_test_tensor = (X_test_tensor - min_val) / (max_val - min_val)


train_data = TensorDataset(X_train_tensor, y_train_tensor)
test_data = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32)

print(torch.min(X_train_tensor), torch.max(X_train_tensor))




# 1. Generate bipolar lookup table
def lookup_generate(dim: int, datatype: str, n_keys: int, device: torch.device):
    if datatype != 'bipolar':
        raise ValueError("Only 'bipolar' supported")
    tbl = torch.randint(0, 2, (n_keys, dim), device=device, dtype=torch.int8)
    return tbl * 2 - 1  # map {0,1} → {-1,+1}

# 2. Encode a batch of inputs into hypervectors
@torch.no_grad()
def encode_batch(X: torch.Tensor, position_table: torch.Tensor, grayscale_table: torch.Tensor):
    """
    X:               (N, D_in) float tensor in [-3, +3]
    position_table:  (D_in, dim)
    grayscale_table: (n_keys, dim)
    → returns (N, dim) int tensor
    """
    n_keys = grayscale_table.shape[0]
    X_normalized = ((X + 3) / 6) * (n_keys - 1)  # map [-3,3] → [0, n_keys-1]
    X_indices = X_normalized.round().clamp(0, n_keys - 1).long()  # indices in [0, n_keys-1]
    gray = grayscale_table[X_indices]            # (N, D_in, dim)
    pos  = position_table.unsqueeze(0)           # (1, D_in, dim)
    hv   = (pos * gray).sum(dim=1)               # (N, dim)
    return hv

# 3. Train associative memory by summing all encodings per class
def train_am(X_train, Y_train, position_table, grayscale_table, dim: int):
    H_train = encode_batch(X_train, position_table, grayscale_table).float()  # (N, dim)
    C = int(Y_train.max().item()) + 1
    am = torch.zeros((C, dim), device=X_train.device, dtype=torch.float32)
    am = am.index_add(0, Y_train, H_train)
    return am

# 4. Single-image prediction (returns class and query HV)
@torch.no_grad()
def predict_(am, img, position_table, grayscale_table):
    qhv = encode_batch(img.unsqueeze(0), position_table, grayscale_table).squeeze(0).float()
    sims = F.cosine_similarity(qhv.unsqueeze(0), am, dim=1)  # (C,)
    pred = int(sims.argmax().item())
    return pred, qhv

def predict(am, img, position_table, grayscale_table):
    pred, _ = predict_(am, img, position_table, grayscale_table)
    return pred

# 5. Test on full set
@torch.no_grad()
def test(am, X_test, Y_test, position_table, grayscale_table):
    H_test = encode_batch(X_test, position_table, grayscale_table).float()  # (N_test, dim)
    h_norm = H_test.norm(dim=1, keepdim=True)                              # (N,1)
    a_norm = am.norm(dim=1, keepdim=True).t()                              # (1,C)
    sims   = (H_test @ am.t()) / (h_norm * a_norm)                         # (N,C)
    preds  = sims.argmax(dim=1)                                            # (N,)
    acc    = (preds == Y_test).float().mean().item()
    print(f"Testing accuracy: {acc:.4f}")
    return acc

# 6. Load a saved model (AM + tables)
def loadmodel(fpath: str, device: torch.device = None):
    with open(fpath, 'rb') as f:
        am_np, pos_np, gray_np = pickle.load(f)
    am   = torch.from_numpy(am_np)
    pos  = torch.from_numpy(pos_np)
    gray = torch.from_numpy(gray_np)
    if device is not None:
        am, pos, gray = am.to(device), pos.to(device), gray.to(device)
    return am, pos, gray

# 7. Quantize the AM to a lower bit-width
def quantize(am: torch.Tensor, before_bw: int, after_bw: int) -> torch.Tensor:
    if before_bw <= after_bw:
        return am.clone()
    shift = before_bw - after_bw
    return torch.round(am.float() / (2 ** shift)).to(am.dtype)

# 8. Batched AM training
@torch.no_grad()
def train_am_batched(
    X_train: torch.Tensor,
    Y_train: torch.Tensor,
    position_table: torch.Tensor,
    grayscale_table: torch.Tensor,
    dim: int,
    batch_size: int = 128,
    device: torch.device = None
) -> torch.Tensor:
    N = X_train.shape[0]
    C = int(Y_train.max().item()) + 1
    am = torch.zeros(C, dim, device=device, dtype=torch.float32)
    for i in tqdm(range(0, N, batch_size), desc="Training batches"):
        xb = X_train[i : i + batch_size]
        yb = torch.as_tensor(Y_train[i : i + batch_size], device=device)  # <== ✅ fixed
        hb = encode_batch(xb, position_table, grayscale_table).float()
        am = am.index_add(0, yb, hb)
    return am

# 9. Test on a split (non-batched)
@torch.no_grad()
def test_split(am, X_split, Y_split, position_table, grayscale_table):
    Hs   = encode_batch(X_split, position_table, grayscale_table).float()  # (M, dim)
    sims = F.cosine_similarity(Hs.unsqueeze(1), am.unsqueeze(0), dim=2)   # (M, C)
    preds = sims.argmax(dim=1)                                            # (M,)
    return (preds == Y_split).float().mean().item()

# 10. Test on a split (batched)
@torch.no_grad()
def test_split_batched(
    am: torch.Tensor,
    X: torch.Tensor,
    Y: torch.Tensor,
    position_table: torch.Tensor,
    grayscale_table: torch.Tensor,
    encode_fn,
    batch_size: int = 128,
    device: torch.device = None
) -> float:
    correct, total = 0, 0
    for i in range(0, X.size(0), batch_size):
        xb = X[i : i + batch_size].to(device)
        yb = Y[i : i + batch_size].to(device)
        hb = encode_fn(xb, position_table, grayscale_table).float()
        sims  = F.cosine_similarity(hb.unsqueeze(1), am.unsqueeze(0), dim=2)
        preds = sims.argmax(dim=1)
        correct += (preds == yb).sum().item()
        total   += yb.size(0)
    return correct / total

@torch.no_grad()
def flip_rows_(tensor: torch.Tensor, perc: float) -> torch.Tensor:
    """
    In-place sign flip of `perc` fraction of elements in *every* row.

    tensor : (B, D)  – any real dtype (+1/-1 HVs work fine)
    perc   : 0‒1     – fraction of dimensions to flip per row

    Returns the same tensor object (for chaining).
    """
    if not 0.0 <= perc <= 1.0:
        raise ValueError("perc must be in [0,1]")

    # trivial cases
    if perc == 0.0 or tensor.numel() == 0:
        return tensor

    B, D = tensor.shape
    k = int(round(D * perc))            # exact #dims to flip / row
    if k == 0:
        return tensor

    device = tensor.device
    # --- vectorised selection of k unique indices per row -------------
    # 1) random scores per entry
    rnd = torch.rand(B, D, device=device)
    # 2) take indices of the largest k scores along each row
    _, idx = rnd.topk(k, dim=1, largest=True, sorted=False)  # (B,k)
    # 3) convert (row, col) pairs → flat indices
    base = torch.arange(B, device=device).unsqueeze(1) * D   # (B,1)
    flat_idx = (idx + base).reshape(-1)                      # (B*k,)
    # 4) flip in-place
    tensor.view(-1)[flat_idx] *= -1
    return tensor
    
# 11. Test on a split (batched)
@torch.no_grad()
def test_split_batched(
    am: torch.Tensor,
    X: torch.LongTensor,
    Y: torch.LongTensor,
    position_table: torch.Tensor,
    grayscale_table: torch.Tensor,
    encode_fn,
    flip_perc=0.0,
    batch_size: int = 128,
    device: torch.device = None
) -> float:
    correct, total = 0, 0
    for i in range(0, X.size(0), batch_size):
        xb = X[i : i + batch_size].to(device)
        yb = Y[i : i + batch_size].to(device)
        hb = encode_fn(xb, position_table, grayscale_table).float()
        flip_rows_(hb, perc=flip_perc)
        sims  = F.cosine_similarity(hb.unsqueeze(1), am.unsqueeze(0), dim=2)
        preds = sims.argmax(dim=1)
        correct += (preds == yb).sum().item()
        total   += yb.size(0)
    return correct / total

Train shape: (3601, 500), Test shape: (1320, 500)
tensor(0.) tensor(1.)


In [3]:
np.unique(y_train)

array([0, 1])

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# X_train, Y_train, X_test, Y_test = load_dataset(cifar10_path, device)

n_splits   = 20
split_size = X_test.shape[0] // n_splits
flip_percs = np.arange(0.0, 0.51, 0.05)
accuracies = np.zeros((len(flip_percs), n_splits))
n_class    = 2
q_bit      = 16
D = 15000
subset_size = 500
print(split_size)

66


In [5]:
torch.max(X_train_tensor), torch.min(X_train_tensor)

(tensor(1.), tensor(0.))

In [6]:
for i, perc in enumerate(flip_percs):
    print(f"\n==> Flip Percentage: {np.round(perc, 2)}")
    
    # Dynamically get input dimension
    input_dim = X_train_tensor.shape[1]  # e.g., 3*206
    n_keys = 64                   # number of quantization bins for [-3,+3]
    
    # a) lookup tables
    position_table  = lookup_generate(D, 'bipolar', input_dim, device=device)
    grayscale_table = lookup_generate(D, 'bipolar', n_keys, device=device)

    # b) train AM
    am = train_am_batched(
        X_train_tensor, y_train_tensor,
        position_table, grayscale_table,
        dim=D,
        batch_size=1,
        device=device
    )
    
    # c) quantize AM
    am_q = quantize(am, before_bw=16, after_bw=q_bit)

    # d) evaluate on splits
    for split_idx in range(n_splits):
        split_indices = np.random.choice(len(X_test_tensor), size=subset_size, replace=False)
        acc = test_split_batched(
            am_q,
            X_test_tensor[split_indices],
            y_test_tensor[split_indices],
            position_table,
            grayscale_table,
            encode_batch,  
            flip_perc=perc,
            batch_size=10,
            device=device
        )
        accuracies[i, split_idx] = acc

    print("Accuracy average for 20 rounds:", accuracies[i].mean())

    # Free GPU memory
    del position_table, grayscale_table, am, am_q
    torch.cuda.empty_cache()



==> Flip Percentage: 0.0


Training batches: 100%|██████████| 3601/3601 [00:01<00:00, 2569.98it/s]


Accuracy average for 20 rounds: 0.4835999999999999

==> Flip Percentage: 0.05


Training batches: 100%|██████████| 3601/3601 [00:01<00:00, 2833.86it/s]


Accuracy average for 20 rounds: 0.4925

==> Flip Percentage: 0.1


Training batches: 100%|██████████| 3601/3601 [00:01<00:00, 2807.64it/s]


Accuracy average for 20 rounds: 0.49379999999999996

==> Flip Percentage: 0.15


Training batches: 100%|██████████| 3601/3601 [00:01<00:00, 2804.42it/s]


Accuracy average for 20 rounds: 0.4888

==> Flip Percentage: 0.2


Training batches: 100%|██████████| 3601/3601 [00:01<00:00, 2823.70it/s]


Accuracy average for 20 rounds: 0.4972

==> Flip Percentage: 0.25


Training batches: 100%|██████████| 3601/3601 [00:01<00:00, 2815.92it/s]


Accuracy average for 20 rounds: 0.49499999999999994

==> Flip Percentage: 0.3


Training batches: 100%|██████████| 3601/3601 [00:01<00:00, 2823.67it/s]


Accuracy average for 20 rounds: 0.4901999999999999

==> Flip Percentage: 0.35


Training batches: 100%|██████████| 3601/3601 [00:01<00:00, 2804.81it/s]


Accuracy average for 20 rounds: 0.48250000000000004

==> Flip Percentage: 0.4


Training batches: 100%|██████████| 3601/3601 [00:01<00:00, 2816.43it/s]


Accuracy average for 20 rounds: 0.5056

==> Flip Percentage: 0.45


Training batches: 100%|██████████| 3601/3601 [00:01<00:00, 2813.56it/s]


Accuracy average for 20 rounds: 0.5035999999999999

==> Flip Percentage: 0.5


Training batches: 100%|██████████| 3601/3601 [00:01<00:00, 2842.43it/s]


Accuracy average for 20 rounds: 0.492


In [7]:
np.mean(accuracies, axis=1)

array([0.4836, 0.4925, 0.4938, 0.4888, 0.4972, 0.495 , 0.4902, 0.4825,
       0.5056, 0.5036, 0.492 ])

In [8]:
from scipy.io import savemat
savemat('FordA_VanillaHDC.mat', {'FordA_VanillaHDC': accuracies*100})