In [1]:
import numpy as np
from sklearn import metrics
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from tqdm import tqdm
# import torch
# from torchvision.datasets import FashionMNIST, MNIST
# from torchvision.transforms import ToTensor, Lambda
import matplotlib.pyplot as plt
import os
import gzip
import csv
import torch
from torch.utils.data import TensorDataset, DataLoader
from torch import Tensor
from torchvision import datasets, transforms
from scipy.io import loadmat, savemat
from typing import Optional, Tuple

In [2]:
hyperdims = 15000


In [3]:
# 1. Device and hyperparameters
device     = torch.device("cuda" if torch.cuda.is_available() else "cpu")
n_splits   = 20
hyperdims  = hyperdims
batch_size = 10

# 2. Load & preprocess MNIST
transform = transforms.Compose([
    transforms.ToTensor(),                           # → [0,1], shape (1,28,28)
    transforms.Lambda(lambda x: (x > 0.5).float()),  # binarize
    transforms.Lambda(lambda x: x.view(-1))         # flatten → (784,)
])

train_ds = datasets.MNIST(root='../../Data', train=True,  download=True, transform=transform)
test_ds  = datasets.MNIST(root='../../Data', train=False, download=True, transform=transform)

X_train = torch.stack([img for img, _ in train_ds], dim=0).to(device)  # (60000, 784)
y_train = torch.tensor([lbl for _, lbl in train_ds], device=device)

X_test  = torch.stack([img for img, _ in test_ds],  dim=0).to(device)  # (10000, 784)
y_test  = torch.tensor([lbl for _, lbl in test_ds],  device=device)

B = X_train.size(1)                   # 784 pixels
C = len(torch.unique(y_train))        # 10 classes
split_size = X_test.size(0) // n_splits

# 3. HDC utility functions

def generate_base_HDVs(D, B, device):
    """Generate a (B × D) random binary matrix."""
    return (torch.rand(B, D, device=device) > 0.5).int()  # intTensor of 0/1

@torch.no_grad()
def encode_dataset_batched(X, base_HDVs, batch_size=128):
    """
    Encode X in chunks to avoid OOM.
    X:          (N, B) floatTensor {0,1}
    base_HDVs:  (B, D) intTensor {0,1}
    returns:    (N, D) intTensor {0,1}
    """
    N, B = X.shape
    D    = base_HDVs.shape[1]

    # Precompute roll-shifted HDVs once
    perm_HDVs = base_HDVs.roll(shifts=1, dims=1)  # (B, D)

    # Expand for broadcasting
    base = base_HDVs.unsqueeze(0)   # (1, B, D)
    perm = perm_HDVs.unsqueeze(0)   # (1, B, D)

    chunks = []
    for i in (range(0, N, batch_size)):
        xb    = X[i : i+batch_size]           # (b, B)
        xb_exp= xb.unsqueeze(-1)              # (b, B, 1)

        # When pixel==1 pick perm, else pick base
        weighted = xb_exp * perm + (1 - xb_exp) * base  # (b, B, D)
        H_float  = weighted.mean(dim=1)                 # (b, D)
        chunks.append(torch.round(H_float).int())       # (b, D)

    return torch.cat(chunks, dim=0)  # (N, D)

def encode_class_HDVs(H_train, y_train, C):
    """
    Bundle all train-HDVs per class.
    H_train: (N, D), y_train: (N,)
    returns: (C, D)
    """
    class_HDVs = []
    for c in range(C):
        subset = H_train[y_train == c]        # (Nc, D)
        m      = subset.float().mean(dim=0)   # (D,)
        class_HDVs.append(torch.round(m).int())
    return torch.stack(class_HDVs, dim=0)    # (C, D)

@torch.no_grad()
def predict(H_test, class_HDVs):
    """
    Nearest-neighbor by Hamming distance.
    H_test:     (M, D), class_HDVs: (C, D)
    returns:    (M,) predicted labels
    """
    diffs = H_test.unsqueeze(1) != class_HDVs.unsqueeze(0)  # (M, C, D)
    dists = diffs.sum(dim=2)                                # (M, C)
    return dists.argmin(dim=1)                              # (M,)

@torch.no_grad()
def flip_bits_indexed_unique(
    H: torch.IntTensor,
    perc: float,
    *,
    inplace: bool = True,
    generator: Optional[torch.Generator] = None,
) -> torch.IntTensor:
    """
    Flip *exactly* k = round(perc·D) unique bits in each row of H,
    without allocating an (M×D) mask.

    Parameters
    ----------
    H         (M, D) int tensor of {0,1}
    perc      fraction in [0,1)
    inplace   modify H if True, otherwise work on a clone
    generator optional torch.Generator for deterministic behaviour

    Returns
    -------
    Tensor with same shape/dtype/device as H, bits flipped.
    """
    if perc <= 0.0:
        return H if inplace else H.clone()

    M, D = H.shape
    k = int(round(perc * D))
    if k == 0:
        return H if inplace else H.clone()

    out = H if inplace else H.clone()

    # Precompute the row indices once → shape (M·k,)
    rows = torch.arange(M, device=H.device).repeat_interleave(k)

    # Build column indices chunk-by-chunk; (M·k,) total
    cols_chunks = [
        torch.multinomial(
            torch.ones(D, device=H.device),       # uniform weights
            k,
            replacement=False,
            generator=generator,
        )
        for _ in range(M)
    ]
    cols = torch.cat(cols_chunks)

    # Flip: XOR with 1 toggles 0 ↔ 1
    out[rows, cols] ^= 1
    return out

In [4]:
n_splits   = 20
percs = np.arange(0.0, 0.55, 0.05)
accuracies = np.zeros((len(percs), n_splits), dtype=float)
for idx, perc in enumerate(percs):
    print(f"\n==> Flip Percentage: {perc}")
    base_HDVs  = generate_base_HDVs(hyperdims, B, device)
    H_train    = encode_dataset_batched(X_train, base_HDVs, batch_size)
    class_HDVs = encode_class_HDVs(H_train, y_train, C)

    for i in tqdm(range(n_splits)):
        s, e = i * split_size, (i + 1) * split_size
        Xs, ys = X_test[s:e], y_test[s:e]

        Hs    = encode_dataset_batched(Xs, base_HDVs, batch_size)
        flip_bits_indexed_unique(Hs, perc)          # modifies Hs in-place
        preds = predict(Hs, class_HDVs)
    
        accuracies[idx, i] = (preds == ys).float().mean().item()
        # print(f'Accuracy for split index {i}: {accuracies[idx, i]}')

    print(f"Average accuracy: {accuracies[idx].mean().item():.4f} for flip percentage {perc}")


==> Flip Percentage: 0.0


100%|██████████| 20/20 [00:04<00:00,  4.38it/s]


Average accuracy: 0.8085 for flip percentage 0.0

==> Flip Percentage: 0.05


100%|██████████| 20/20 [00:06<00:00,  3.14it/s]


Average accuracy: 0.8027 for flip percentage 0.05

==> Flip Percentage: 0.1


100%|██████████| 20/20 [00:06<00:00,  3.18it/s]


Average accuracy: 0.8036 for flip percentage 0.1

==> Flip Percentage: 0.15000000000000002


100%|██████████| 20/20 [00:06<00:00,  3.17it/s]


Average accuracy: 0.7952 for flip percentage 0.15000000000000002

==> Flip Percentage: 0.2


100%|██████████| 20/20 [00:06<00:00,  3.15it/s]


Average accuracy: 0.7973 for flip percentage 0.2

==> Flip Percentage: 0.25


100%|██████████| 20/20 [00:06<00:00,  3.15it/s]


Average accuracy: 0.7894 for flip percentage 0.25

==> Flip Percentage: 0.30000000000000004


100%|██████████| 20/20 [00:06<00:00,  2.96it/s]


Average accuracy: 0.7817 for flip percentage 0.30000000000000004

==> Flip Percentage: 0.35000000000000003


100%|██████████| 20/20 [00:06<00:00,  2.96it/s]


Average accuracy: 0.7462 for flip percentage 0.35000000000000003

==> Flip Percentage: 0.4


100%|██████████| 20/20 [00:06<00:00,  2.99it/s]


Average accuracy: 0.6768 for flip percentage 0.4

==> Flip Percentage: 0.45


100%|██████████| 20/20 [00:06<00:00,  2.98it/s]


Average accuracy: 0.4753 for flip percentage 0.45

==> Flip Percentage: 0.5


100%|██████████| 20/20 [00:06<00:00,  2.97it/s]

Average accuracy: 0.0959 for flip percentage 0.5





In [5]:
from scipy.io import savemat, loadmat
savemat('HoloGN_MNIST.mat', {'HoloGN_MNIST': accuracies*100})