In [9]:
from aeon.datasets import load_classification
from sklearn.preprocessing import StandardScaler, LabelEncoder
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import torch.optim as optim
import time
from tqdm import tqdm
from scipy.io import loadmat, savemat
from typing import Optional, Tuple

In [10]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
dataset_name = 'WalkingSittingStanding'
try:
    NHDC = loadmat(f'../{dataset_name}_nHD.mat')[f'{dataset_name}_nHD']
    hyperdims = np.mean(NHDC, axis=1, dtype=int)

except:
    hyperdims = 15_000


In [11]:
dataset_name = 'WalkingSittingStanding'
# Load dataset
X_train, y_train, metadata = load_classification(dataset_name, return_metadata=True, split='train')
X_test, y_test = load_classification(dataset_name, split='test')
if X_train.shape[0] < 200:
    if X_test.shape[0] >= 200:
        train_size = (X_train.shape[0] + X_test.shape[0]) * 1/4
        x, y = load_classification(dataset_name)
        X_train, y_train = x[:train_size, :], y[:train_size]
        X_test, y_test = x[train_size:, :], y[train_size:]

In [12]:
input_channels = 1
if X_train.ndim == 3:
    input_channels = X_train.shape[1]
seq_length = X_train.shape[-1]
if y_train.dtype == object or isinstance(y_train[0], str):
    le = LabelEncoder()
    y_train = le.fit_transform(y_train)
    y_test = le.transform(y_test)

In [13]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train.reshape(X_train.shape[0], -1))
X_test_scaled = scaler.transform(X_test.reshape(X_test.shape[0], -1))


X_min = X_train_scaled.min(axis=0)
X_max = X_train_scaled.max(axis=0)

denom = (X_max - X_min)
denom[denom == 0] = 1   # avoid division by zero

X_train_norm = (X_train_scaled - X_min) / denom
X_test_norm  = (X_test_scaled  - X_min) / denom

# Optional: clip to [0,1] just in case
X_train_norm = np.clip(X_train_norm, 0, 1)
X_test_norm  = np.clip(X_test_norm, 0, 1)
X_train_tensor = torch.tensor(X_train_norm, dtype=torch.float32).to(device)
X_test_tensor = torch.tensor(X_test_norm, dtype=torch.float32).to(device)

In [14]:
X_train.shape, X_train_scaled.shape, X_train_tensor.shape

((7352, 3, 206), (7352, 618), torch.Size([7352, 618]))

In [15]:
y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(device)
y_test_tensor = torch.tensor(y_test, dtype=torch.long).to(device)

train_data = TensorDataset(X_train_tensor, y_train_tensor)
test_data = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

num_classes = len(np.unique(y_train))

In [16]:
print(torch.min(X_train_tensor), torch.max(X_train_tensor))

B = X_train_tensor.shape[1]    
C = len(torch.unique(y_train_tensor))        # 10 classes
n_rounds = 5

# 3. HDC utility functions

def generate_base_HDVs(D, B, device):
    """Generate a (B × D) random binary matrix."""
    return (torch.rand(B, D, device=device) > 0.5).int()  # intTensor of 0/1

@torch.no_grad()
def encode_dataset_batched(X, base_HDVs, batch_size=128):
    """
    Encode X in chunks to avoid OOM.
    X:          (N, B) floatTensor {0,1}
    base_HDVs:  (B, D) intTensor {0,1}
    returns:    (N, D) intTensor {0,1}
    """
    N, B = X.shape
    D    = base_HDVs.shape[1]

    # Precompute roll-shifted HDVs once
    perm_HDVs = base_HDVs.roll(shifts=1, dims=1)  # (B, D)

    # Expand for broadcasting
    base = base_HDVs.unsqueeze(0)   # (1, B, D)
    perm = perm_HDVs.unsqueeze(0)   # (1, B, D)

    chunks = []
    for i in (range(0, N, batch_size)):
        xb    = X[i : i+batch_size].to(base_HDVs.device)           # (b, B)
        xb_exp= xb.unsqueeze(-1)              # (b, B, 1)

        # When pixel==1 pick perm, else pick base
        weighted = xb_exp * perm + (1 - xb_exp) * base  # (b, B, D)
        H_float  = weighted.mean(dim=1)                 # (b, D)
        chunks.append(torch.round(H_float).int())       # (b, D)

    return torch.cat(chunks, dim=0)  # (N, D)

def encode_class_HDVs(H_train, y_train, C):
    """
    Bundle all train-HDVs per class.
    H_train: (N, D), y_train: (N,)
    returns: (C, D)
    """
    class_HDVs = []
    for c in range(C):
        subset = H_train[y_train == c]        # (Nc, D)
        m      = subset.float().mean(dim=0)   # (D,)
        class_HDVs.append(torch.round(m).int())
    return torch.stack(class_HDVs, dim=0)    # (C, D)

@torch.no_grad()
def predict(H_test, class_HDVs):
    """
    Nearest-neighbor by Hamming distance.
    H_test:     (M, D), class_HDVs: (C, D)
    returns:    (M,) predicted labels
    """
    diffs = H_test.unsqueeze(1) != class_HDVs.unsqueeze(0)  # (M, C, D)
    dists = diffs.sum(dim=2)                                # (M, C)
    return dists.argmin(dim=1)   

@torch.no_grad()
def flip_bits_indexed_unique(
    H: torch.IntTensor,
    perc: float,
    *,
    inplace: bool = True,
    generator: Optional[torch.Generator] = None,
) -> torch.IntTensor:
    """
    Flip *exactly* k = round(perc·D) unique bits in each row of H,
    without allocating an (M×D) mask.

    Parameters
    ----------
    H         (M, D) int tensor of {0,1}
    perc      fraction in [0,1)
    inplace   modify H if True, otherwise work on a clone
    generator optional torch.Generator for deterministic behaviour

    Returns
    -------
    Tensor with same shape/dtype/device as H, bits flipped.
    """
    if perc <= 0.0:
        return H if inplace else H.clone()

    M, D = H.shape
    k = int(round(perc * D))
    if k == 0:
        return H if inplace else H.clone()

    out = H if inplace else H.clone()

    # Precompute the row indices once → shape (M·k,)
    rows = torch.arange(M, device=H.device).repeat_interleave(k)

    # Build column indices chunk-by-chunk; (M·k,) total
    cols_chunks = [
        torch.multinomial(
            torch.ones(D, device=H.device),       # uniform weights
            k,
            replacement=False,
            generator=generator,
        )
        for _ in range(M)
    ]
    cols = torch.cat(cols_chunks)

    # Flip: XOR with 1 toggles 0 ↔ 1
    out[rows, cols] ^= 1
    return out

tensor(0., device='cuda:0') tensor(1., device='cuda:0')


In [19]:
device     = torch.device("cuda" if torch.cuda.is_available() else "cpu")
batch_size = 10
n_splits = 20
split_size = len(test_data) // n_splits 
percs = np.arange(0.0, 0.55, 0.05)
accuracies = np.zeros((len(percs), n_splits), dtype=float)
for idx, perc in enumerate(percs):
    print(f"\n==> Flipping Percentage: {perc}")
    for split_idx in range(n_splits):
        indices = list(range(len(test_data)))
        np.random.shuffle(indices)  # or random.shuffle(indices)
        start_idx = split_idx * split_size
        end_idx = start_idx + split_size
        split_indices = indices[start_idx:end_idx]
        base_HDVs  = generate_base_HDVs(hyperdims, B, device)
        H_train    = encode_dataset_batched(X_train_tensor, base_HDVs, batch_size)
        class_HDVs = encode_class_HDVs(H_train, y_train_tensor, C)
        Xs, ys = X_test_tensor[split_indices], y_test_tensor[split_indices]

        Hs    = encode_dataset_batched(Xs, base_HDVs, batch_size)
        flip_bits_indexed_unique(Hs, perc)          # modifies Hs in-place
        preds = predict(Hs, class_HDVs)
    
        accuracies[idx, split_idx] = (preds.cpu() == ys.cpu()).float().mean().item()
        # print(f'Accuracy for split index {i}: {accuracies[idx, i]}')

    print(f"Average accuracy: {accuracies[idx].mean().item():.4f} for Hyperdim {perc}")


==> Flipping Percentage: 0.0
Average accuracy: 0.4956 for Hyperdim 0.0

==> Flipping Percentage: 0.05
Average accuracy: 0.5129 for Hyperdim 0.05

==> Flipping Percentage: 0.1
Average accuracy: 0.5095 for Hyperdim 0.1

==> Flipping Percentage: 0.15000000000000002
Average accuracy: 0.4963 for Hyperdim 0.15000000000000002

==> Flipping Percentage: 0.2
Average accuracy: 0.5020 for Hyperdim 0.2

==> Flipping Percentage: 0.25
Average accuracy: 0.5116 for Hyperdim 0.25

==> Flipping Percentage: 0.30000000000000004
Average accuracy: 0.5065 for Hyperdim 0.30000000000000004

==> Flipping Percentage: 0.35000000000000003
Average accuracy: 0.4952 for Hyperdim 0.35000000000000003

==> Flipping Percentage: 0.4
Average accuracy: 0.5133 for Hyperdim 0.4

==> Flipping Percentage: 0.45
Average accuracy: 0.4786 for Hyperdim 0.45

==> Flipping Percentage: 0.5
Average accuracy: 0.1653 for Hyperdim 0.5


In [20]:
np.mean(accuracies, axis=1)

array([0.49557823, 0.51292517, 0.5095238 , 0.49625851, 0.50204081,
       0.51156462, 0.50646258, 0.49523809, 0.51326531, 0.47857143,
       0.16530612])

In [21]:
savemat(f'{dataset_name}_HoloGN.mat', {f'{dataset_name}_HoloGN': accuracies*100})

In [22]:
np.std(accuracies*100, axis=1)

array([4.57997969, 4.17356764, 2.73719713, 2.70809013, 4.4535745 ,
       3.83977278, 4.23452296, 3.38499431, 4.89831306, 4.21700106,
       2.61794305])