In [1]:
import os
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
import torch
import numpy as np
import time
from utils import HDDataset
from model import BModel
from torchvision import datasets, transforms
import scipy
from encoder_1d import RandomFourierEncoder
import main2_1d as main2 
import importlib
importlib.reload(main2)
import csv
from scipy.io import savemat, loadmat
from torch.utils.data import DataLoader, Subset, TensorDataset
from aeon.datasets import load_classification
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from torch.utils.data import DataLoader, Subset
import matplotlib.pyplot as plt
from tqdm import tqdm


In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
dataset_name = 'Epilepsy'
# Load dataset
X_train, y_train, metadata = load_classification(dataset_name, return_metadata=True, split='train')
X_test, y_test = load_classification(dataset_name, split='test')
if X_train.shape[0] < 200:
    if X_test.shape[0] >= 200:
        train_size = (X_train.shape[0] + X_test.shape[0]) * 3/4
        x, y = load_classification(dataset_name)
        X_train, y_train = x[:train_size, :], y[:train_size]
        X_test, y_test = x[train_size:, :], y[train_size:]

In [4]:
input_channels = 1
if X_train.ndim == 3:
    input_channels = X_train.shape[1]
seq_length = X_train.shape[-1]
if y_train.dtype == object or isinstance(y_train[0], str):
    le = LabelEncoder()
    y_train = le.fit_transform(y_train)
    y_test = le.transform(y_test)

In [5]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train.reshape(X_train.shape[0], -1))
X_test_scaled = scaler.transform(X_test.reshape(X_test.shape[0], -1))


X_min = X_train_scaled.min(axis=0)
X_max = X_train_scaled.max(axis=0)

denom = (X_max - X_min)
denom[denom == 0] = 1   # avoid division by zero

X_train_norm = (X_train_scaled - X_min) / denom
X_test_norm  = (X_test_scaled  - X_min) / denom

# Optional: clip to [0,1] just in case
X_train_norm = np.clip(X_train_norm, 0, 1)
X_test_norm  = np.clip(X_test_norm, 0, 1)
X_train_tensor = torch.tensor(X_train_norm, dtype=torch.float32).to(device)
X_test_tensor = torch.tensor(X_test_norm, dtype=torch.float32).to(device)

In [6]:
X_train_tensor.shape

torch.Size([137, 618])

In [7]:
y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(device)
y_test_tensor = torch.tensor(y_test, dtype=torch.long).to(device)

train_data = TensorDataset(X_train_tensor, y_train_tensor)
test_data = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

num_classes = len(np.unique(y_train))

In [8]:
torch.min(X_test_tensor), torch.max(X_test_tensor), np.unique(y_train)

(tensor(0., device='cuda:0'), tensor(1., device='cuda:0'), array([0, 1, 2, 3]))

In [9]:
gorder = 2
dim = 10000
gamma = 0.3
lr = 0.01
classes = 10
channels = 3
epochs = 3
train = main2.train
test = main2.test

In [10]:
X_train_tensor.shape

torch.Size([137, 618])

In [14]:
n_splits = 20
split_size = len(test_data) // n_splits

try:
    hyperdims = np.mean(loadmat(f'../{dataset_name}_nHD.mat')[f'{dataset_name}_nHD'], 
                    axis=1, dtype=int)
except:
    hyperdims = range(5000, 15000, 2500)
print(hyperdims)
accuracies1 = np.zeros((len(hyperdims), n_splits))
input_dim = torch.prod(torch.tensor(list(train_data[0][0].size())))
print(input_dim)

for i, nHDC in enumerate(hyperdims):
    print(f'nHDC: {nHDC} ------------')
    for split_idx in range(n_splits):
        # indices = list(range(len(test_data)))
        # np.random.shuffle(indices)  # or random.shuffle(indices)
        # start_idx = split_idx * split_size
        # end_idx = start_idx + split_size
        # split_indices = indices[start_idx:end_idx]
        # split_subset = Subset(test_data, split_indices)
        # print(f'Round {j+1}')
        # Train
        encoder = RandomFourierEncoder(input_dim, gamma, gorder, output_dim=nHDC)
        encoder.build_item_mem()
        train_hd, y_train = encoder.encode_data_extract_labels(train_data)
        train_dataset = HDDataset(train_hd.cpu(), y_train.cpu())
        trainloader = DataLoader(train_dataset, batch_size=10, shuffle=True, pin_memory=False, num_workers=0)
        # Test
        
        # test_hd, y_test = encoder.encode_data_extract_labels(split_subset)
        test_hd, y_test = encoder.encode_data_extract_labels(test_data)
        test_dataset = HDDataset(test_hd, y_test)
        testloader = DataLoader(test_dataset, batch_size=10, shuffle=False, pin_memory=False, num_workers=0)
        accuracies1[i, split_idx] = train(trainloader, testloader, lr, nHDC)
        del testloader
        del test_dataset
        del test_hd

        
    del train_hd
    del trainloader
    del train_dataset
    print(f'nHDC: {nHDC} ----------------------------------------- Finished')
    print(f'Average Accuracy for nHDC={nHDC}: {np.mean(accuracies1[i, :])}')

[ 5666  6333  7333  8666  9666 10666 12000 13000 13666 14666 16000]
tensor(618)
nHDC: 5666 ------------
nHDC: 5666 ----------------------------------------- Finished
Average Accuracy for nHDC=5666: 74.56521739130434
nHDC: 6333 ------------
nHDC: 6333 ----------------------------------------- Finished
Average Accuracy for nHDC=6333: 74.71014492753622
nHDC: 7333 ------------
nHDC: 7333 ----------------------------------------- Finished
Average Accuracy for nHDC=7333: 75.07246376811595
nHDC: 8666 ------------
nHDC: 8666 ----------------------------------------- Finished
Average Accuracy for nHDC=8666: 76.41304347826086
nHDC: 9666 ------------
nHDC: 9666 ----------------------------------------- Finished
Average Accuracy for nHDC=9666: 77.06521739130434
nHDC: 10666 ------------
nHDC: 10666 ----------------------------------------- Finished
Average Accuracy for nHDC=10666: 76.05072463768116
nHDC: 12000 ------------
nHDC: 12000 ----------------------------------------- Finished
Average Accur

In [15]:
np.mean(accuracies1, axis=1)

array([74.56521739, 74.71014493, 75.07246377, 76.41304348, 77.06521739,
       76.05072464, 76.8115942 , 80.18115942, 77.24637681, 79.85507246,
       78.44202899])

In [16]:
savemat(f'{dataset_name}_RFFHDC.mat', {f'{dataset_name}_RFFHDC': accuracies1})