In [1]:
from aeon.datasets import load_classification
from sklearn.preprocessing import StandardScaler, LabelEncoder
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import torch.optim as optim
import time
from tqdm import tqdm
from scipy.io import loadmat, savemat

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
dataset_name = 'Epilepsy'
try:
    NHDC = loadmat(f'../{dataset_name}_nHD.mat')[f'{dataset_name}_nHD']
    hyperdims = np.mean(NHDC, axis=1, dtype=int)

except:
    hyperdims = range(1000, 5500, 500)


In [4]:
dataset_name = 'Epilepsy'
# Load dataset
X_train, y_train, metadata = load_classification(dataset_name, return_metadata=True, split='train')
X_test, y_test = load_classification(dataset_name, split='test')
if X_train.shape[0] < 200:
    if X_test.shape[0] >= 200:
        train_size = (X_train.shape[0] + X_test.shape[0]) * 1/4
        x, y = load_classification(dataset_name)
        X_train, y_train = x[:train_size, :], y[:train_size]
        X_test, y_test = x[train_size:, :], y[train_size:]

In [5]:
input_channels = 1
if X_train.ndim == 3:
    input_channels = X_train.shape[1]
seq_length = X_train.shape[-1]
if y_train.dtype == object or isinstance(y_train[0], str):
    le = LabelEncoder()
    y_train = le.fit_transform(y_train)
    y_test = le.transform(y_test)

In [6]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train.reshape(X_train.shape[0], -1))
X_test_scaled = scaler.transform(X_test.reshape(X_test.shape[0], -1))


X_min = X_train_scaled.min(axis=0)
X_max = X_train_scaled.max(axis=0)

denom = (X_max - X_min)
denom[denom == 0] = 1   # avoid division by zero

X_train_norm = (X_train_scaled - X_min) / denom
X_test_norm  = (X_test_scaled  - X_min) / denom

# Optional: clip to [0,1] just in case
X_train_norm = np.clip(X_train_norm, 0, 1)
X_test_norm  = np.clip(X_test_norm, 0, 1)
X_train_tensor = torch.tensor(X_train_norm, dtype=torch.float32).to(device)
X_test_tensor = torch.tensor(X_test_norm, dtype=torch.float32).to(device)

In [7]:
X_train.shape, X_train_scaled.shape, X_train_tensor.shape

((137, 3, 206), (137, 618), torch.Size([137, 618]))

In [8]:
y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(device)
y_test_tensor = torch.tensor(y_test, dtype=torch.long).to(device)

train_data = TensorDataset(X_train_tensor, y_train_tensor)
test_data = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

num_classes = len(np.unique(y_train))

In [9]:
print(torch.min(X_train_tensor), torch.max(X_train_tensor))

B = X_train_tensor.shape[1]    
C = len(torch.unique(y_train_tensor))        # 10 classes
n_rounds = 5

# 3. HDC utility functions

def generate_base_HDVs(D, B, device):
    """Generate a (B × D) random binary matrix."""
    return (torch.rand(B, D, device=device) > 0.5).int()  # intTensor of 0/1

@torch.no_grad()
def encode_dataset_batched(X, base_HDVs, batch_size=128):
    """
    Encode X in chunks to avoid OOM.
    X:          (N, B) floatTensor {0,1}
    base_HDVs:  (B, D) intTensor {0,1}
    returns:    (N, D) intTensor {0,1}
    """
    N, B = X.shape
    D    = base_HDVs.shape[1]

    # Precompute roll-shifted HDVs once
    perm_HDVs = base_HDVs.roll(shifts=1, dims=1)  # (B, D)

    # Expand for broadcasting
    base = base_HDVs.unsqueeze(0)   # (1, B, D)
    perm = perm_HDVs.unsqueeze(0)   # (1, B, D)

    chunks = []
    for i in (range(0, N, batch_size)):
        xb    = X[i : i+batch_size].to(base_HDVs.device)           # (b, B)
        xb_exp= xb.unsqueeze(-1)              # (b, B, 1)

        # When pixel==1 pick perm, else pick base
        weighted = xb_exp * perm + (1 - xb_exp) * base  # (b, B, D)
        H_float  = weighted.mean(dim=1)                 # (b, D)
        chunks.append(torch.round(H_float).int())       # (b, D)

    return torch.cat(chunks, dim=0)  # (N, D)

def encode_class_HDVs(H_train, y_train, C):
    """
    Bundle all train-HDVs per class.
    H_train: (N, D), y_train: (N,)
    returns: (C, D)
    """
    class_HDVs = []
    for c in range(C):
        subset = H_train[y_train == c]        # (Nc, D)
        m      = subset.float().mean(dim=0)   # (D,)
        class_HDVs.append(torch.round(m).int())
    return torch.stack(class_HDVs, dim=0)    # (C, D)

@torch.no_grad()
def predict(H_test, class_HDVs):
    """
    Nearest-neighbor by Hamming distance.
    H_test:     (M, D), class_HDVs: (C, D)
    returns:    (M,) predicted labels
    """
    diffs = H_test.unsqueeze(1) != class_HDVs.unsqueeze(0)  # (M, C, D)
    dists = diffs.sum(dim=2)                                # (M, C)
    return dists.argmin(dim=1)   

tensor(0., device='cuda:0') tensor(1., device='cuda:0')


In [10]:
device     = torch.device("cuda" if torch.cuda.is_available() else "cpu")
batch_size = 10
n_splits = 20
split_size = len(test_data) // n_splits 
accuracies = np.zeros((len(hyperdims), n_splits), dtype=float)
for idx, D in enumerate(hyperdims):
    print(f"\n==> Hyperdimension: {D}")
    for split_idx in range(n_splits):        
        base_HDVs  = generate_base_HDVs(D, B, device)
        H_train    = encode_dataset_batched(X_train_tensor, base_HDVs, batch_size)
        class_HDVs = encode_class_HDVs(H_train, y_train_tensor, C)
        Xs, ys = X_test_tensor, y_test_tensor

        Hs    = encode_dataset_batched(Xs, base_HDVs, batch_size)
        preds = predict(Hs, class_HDVs)
    
        accuracies[idx, split_idx] = (preds.cpu() == ys.cpu()).float().mean().item() * 100
        # print(f'Accuracy for split index {i}: {accuracies[idx, i]}')

    print(f"Average accuracy: {accuracies[idx].mean().item():.4f} for Hyperdim {D}")


==> Hyperdimension: 5666
Average accuracy: 48.2609 for Hyperdim 5666

==> Hyperdimension: 6333
Average accuracy: 47.8986 for Hyperdim 6333

==> Hyperdimension: 7333
Average accuracy: 47.7174 for Hyperdim 7333

==> Hyperdimension: 8666
Average accuracy: 48.7319 for Hyperdim 8666

==> Hyperdimension: 9666
Average accuracy: 47.9710 for Hyperdim 9666

==> Hyperdimension: 10666
Average accuracy: 48.4420 for Hyperdim 10666

==> Hyperdimension: 12000
Average accuracy: 48.6594 for Hyperdim 12000

==> Hyperdimension: 13000
Average accuracy: 49.0580 for Hyperdim 13000

==> Hyperdimension: 13666
Average accuracy: 48.6957 for Hyperdim 13666

==> Hyperdimension: 14666
Average accuracy: 48.8406 for Hyperdim 14666

==> Hyperdimension: 16000
Average accuracy: 49.1304 for Hyperdim 16000


In [11]:
np.mean(accuracies, axis=1)

array([48.26086998, 47.89855123, 47.7173917 , 48.73188436, 47.97101498,
       48.44202936, 48.65942061, 49.05797124, 48.69565248, 48.84057999,
       49.13043499])

In [12]:
savemat(f'{dataset_name}_HoloGN.mat', {f'{dataset_name}_HoloGN': accuracies})

In [13]:
np.std(accuracies, axis=1)

array([1.75116567, 1.61871758, 1.62478894, 1.74327665, 0.78045848,
       1.41768064, 1.4360809 , 1.53889533, 1.28814381, 1.49211995,
       1.13654952])