In [1]:
import numpy as np
from sklearn import metrics
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from tqdm import tqdm
# import torch
# from torchvision.datasets import FashionMNIST, MNIST
# from torchvision.transforms import ToTensor, Lambda
import matplotlib.pyplot as plt
import os
import gzip
import csv
import torch
from torch.utils.data import TensorDataset, DataLoader
from torch import Tensor
from torchvision import datasets, transforms
from scipy.io import loadmat, savemat
import random

In [2]:
hyperdims = loadmat('../EHDGNet_CIFAR10_nHD.mat')
hyperdims = hyperdims['EHDGNet_CIFAR10_nHD']
hyperdims = np.mean(hyperdims, axis=1, dtype=int)
hyperdims

array([15000, 17250, 19000, 21250, 23250, 25250, 27000, 29000, 31000,
       33000, 35000])

In [3]:
# 1. Device and hyperparameters
device     = torch.device("cuda" if torch.cuda.is_available() else "cpu")
n_splits   = 20
hyperdims  = hyperdims
batch_size = 1

# 2. Load & preprocess MNIST
transform = transforms.Compose([
    transforms.ToTensor(),                           # → [0,1], shape (1,28,28)
    transforms.Lambda(lambda x: (x > 0.5).float()),  # binarize
    transforms.Lambda(lambda x: x.view(-1))         # flatten → (784,)
])

train_ds = datasets.CIFAR10(root='../../Data', train=True,  download=True, transform=transform)
test_ds  = datasets.CIFAR10(root='../../Data', train=False, download=True, transform=transform)

X_train = torch.stack([img for img, _ in train_ds], dim=0).to(device)  # (60000, 784)
y_train = torch.tensor([lbl for _, lbl in train_ds], device=device)

X_test  = torch.stack([img for img, _ in test_ds],  dim=0).to(device)  # (10000, 784)
y_test  = torch.tensor([lbl for _, lbl in test_ds],  device=device)

B = X_train.size(1)                   # 784 pixels
print(B, 'num features')
C = len(torch.unique(y_train))        # 10 classes
split_size = X_test.size(0) // n_splits

# 3. HDC utility functions

def generate_base_HDVs(D, B, device, seed=42):
    """Generate a (B × D) random binary matrix."""
    torch.manual_seed(seed)
    return (torch.rand(B, D, device=device) > 0.5).int()  # intTensor of 0/1

@torch.no_grad()
def encode_dataset_batched(X, base_HDVs, batch_size=128):
    """
    Encode X in chunks to avoid OOM.
    X:          (N, B) floatTensor {0,1}
    base_HDVs:  (B, D) intTensor {0,1}
    returns:    (N, D) intTensor {0,1}
    """
    N, B = X.shape
    D    = base_HDVs.shape[1]

    # Precompute roll-shifted HDVs once
    perm_HDVs = base_HDVs.roll(shifts=1, dims=1)  # (B, D)

    # Expand for broadcasting
    base = base_HDVs.unsqueeze(0)   # (1, B, D)
    perm = perm_HDVs.unsqueeze(0)   # (1, B, D)

    chunks = []
    for i in (range(0, N, batch_size)):
        xb    = X[i : i+batch_size]           # (b, B)
        xb_exp= xb.unsqueeze(-1)              # (b, B, 1)

        # When pixel==1 pick perm, else pick base
        weighted = xb_exp * perm + (1 - xb_exp) * base  # (b, B, D)
        H_float  = weighted.mean(dim=1)                 # (b, D)
        chunks.append(torch.round(H_float).int())       # (b, D)

    return torch.cat(chunks, dim=0)  # (N, D)

def encode_class_HDVs(H_train, y_train, C):
    """
    Bundle all train-HDVs per class.
    H_train: (N, D), y_train: (N,)
    returns: (C, D)
    """
    class_HDVs = []
    for c in range(C):
        subset = H_train[y_train == c]        # (Nc, D)
        m      = subset.float().mean(dim=0)   # (D,)
        class_HDVs.append(torch.round(m).int())
    return torch.stack(class_HDVs, dim=0)    # (C, D)

@torch.no_grad()
def predict(H_test, class_HDVs):
    """
    Nearest-neighbor by Hamming distance.
    H_test:     (M, D), class_HDVs: (C, D)
    returns:    (M,) predicted labels
    """
    diffs = H_test.unsqueeze(1) != class_HDVs.unsqueeze(0)  # (M, C, D)
    dists = diffs.sum(dim=2)                                # (M, C)
    return dists.argmin(dim=1)                              # (M,)

Files already downloaded and verified
Files already downloaded and verified
3072 num features


In [4]:
# hyperdims = hyperdims // 100
print(hyperdims)

[15000 17250 19000 21250 23250 25250 27000 29000 31000 33000 35000]


In [5]:
n_splits   = 20
accuracies = np.zeros((len(hyperdims), n_splits), dtype=float)
for idx, D in enumerate(hyperdims):
    print(f"\n==> Hyperdimension: {D}")
    torch.manual_seed(idx)
    np.random.seed(idx)
    random.seed(idx)
    base_HDVs  = generate_base_HDVs(D, B, device, seed=idx)
    H_train    = encode_dataset_batched(X_train, base_HDVs, batch_size)
    class_HDVs = encode_class_HDVs(H_train, y_train, C)

    for i in tqdm(range(n_splits)):
        s, e = i * split_size, (i + 1) * split_size
        Xs, ys = X_test[s:e], y_test[s:e]

        Hs    = encode_dataset_batched(Xs, base_HDVs, batch_size)
        preds = predict(Hs, class_HDVs)
    
        accuracies[idx, i] = (preds == ys).float().mean().item()
        # print(f'Accuracy for split index {i}: {accuracies[idx, i]}')

    print(f"Average accuracy: {accuracies[idx].mean().item():.4f} for Hyperdim {D}")


==> Hyperdimension: 15000


100%|██████████| 20/20 [00:17<00:00,  1.13it/s]


Average accuracy: 0.2730 for Hyperdim 15000

==> Hyperdimension: 17250


100%|██████████| 20/20 [00:20<00:00,  1.02s/it]


Average accuracy: 0.2740 for Hyperdim 17250

==> Hyperdimension: 19000


100%|██████████| 20/20 [00:22<00:00,  1.11s/it]


Average accuracy: 0.2714 for Hyperdim 19000

==> Hyperdimension: 21250


100%|██████████| 20/20 [00:25<00:00,  1.25s/it]


Average accuracy: 0.2738 for Hyperdim 21250

==> Hyperdimension: 23250


100%|██████████| 20/20 [00:27<00:00,  1.37s/it]


Average accuracy: 0.2709 for Hyperdim 23250

==> Hyperdimension: 25250


100%|██████████| 20/20 [00:29<00:00,  1.48s/it]


Average accuracy: 0.2728 for Hyperdim 25250

==> Hyperdimension: 27000


100%|██████████| 20/20 [00:31<00:00,  1.57s/it]


Average accuracy: 0.2734 for Hyperdim 27000

==> Hyperdimension: 29000


100%|██████████| 20/20 [00:33<00:00,  1.69s/it]


Average accuracy: 0.2760 for Hyperdim 29000

==> Hyperdimension: 31000


100%|██████████| 20/20 [00:36<00:00,  1.81s/it]


Average accuracy: 0.2713 for Hyperdim 31000

==> Hyperdimension: 33000


100%|██████████| 20/20 [00:38<00:00,  1.92s/it]


Average accuracy: 0.2767 for Hyperdim 33000

==> Hyperdimension: 35000


100%|██████████| 20/20 [00:40<00:00,  2.04s/it]

Average accuracy: 0.2752 for Hyperdim 35000





In [8]:
from scipy.io import savemat, loadmat
savemat('HoloGN_CIFAR10.mat', {'HoloGN_CIFAR10': accuracies*100})

In [9]:
np.mean(accuracies*100, axis=1), np.std(accuracies*100, axis=1)

(array([27.30000131, 27.40000106, 27.14000128, 27.38000073, 27.09000126,
        27.28000127, 27.34000131, 27.60000132, 27.13000111, 27.67000124,
        27.52000131]),
 array([1.90840236, 2.03076367, 2.19827214, 2.00189911, 1.94676688,
        1.98937191, 1.78896647, 2.00698792, 2.12252235, 2.01124322,
        2.20671725]))