In [3]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using {} device".format(device))

DIMENSIONS = 1024  # number of hypervector dimensions
BATCH_SIZE = 12  # for GPUs with enough memory we can process multiple images at ones

Using cuda device


# 数据集

下载好 ISOLET 数据集。

通过 `torch.utils.data.Dataset` 构建数据集类，实现必要的方法。

## Programming Note
在Python中，`from typing import ...` 是一种导入语句，用于从 `typing` 模块导入特定的类型注解工具。`typing` 模块提供了用于支持Python类型提示（Type Hints）的工具，这些工具可以帮助开发者为函数参数和返回值指定预期的类型，从而提高代码的可读性和可维护性。

以下是你提到的每个导入项的简要说明：

1. **`Callable`**：
   - `Callable` 是一个类型注解，用于注解一个可调用对象，即任何可以像函数一样被调用的对象。例如，函数、方法、类实例等。
   - 示例：`def process_data(func: Callable[[int, str], float]) -> None:` 表示 `process_data` 函数接受一个参数 `func`，它是一个接受一个整数和一个字符串并返回一个浮点数的可调用对象。

2. **`Optional`**：
   - `Optional` 是一个类型注解，用于注解一个可能为 `None` 的值。
   - 示例：`def greet(name: Optional[str]) -> None:` 表示 `greet` 函数接受一个参数 `name`，它可以是一个字符串或者 `None`。

3. **`Tuple`**：
   - `Tuple` 是一个泛型类型注解，用于注解一个元组，其元素类型是固定的。
   - 示例：`def get_coordinates() -> Tuple[int, int]:` 表示 `get_coordinates` 函数返回一个包含两个整数的元组。

4. **`List`**：
   - `List` 是一个泛型类型注解，用于注解一个列表，其元素类型是固定的。
   - 示例：`def get_numbers() -> List[int]:` 表示 `get_numbers` 函数返回一个包含整数的列表。

这些类型注解工具使得代码更加清晰，因为它们明确了函数参数和返回值的预期类型。此外，它们还可以被静态类型检查器（如 `mypy`）用来检查类型错误，从而在代码运行之前发现潜在的问题。

以下是如何使用这些类型注解的一个示例：

```python
from typing import Callable, Optional, Tuple, List

# 定义一个函数，接受一个整数和一个字符串，返回一个浮点数
def process_data(func: Callable[[int, str], float]) -> None:
    result = func(10, "hello")
    print(result)

# 定义一个函数，接受一个可能为None的字符串参数
def greet(name: Optional[str]) -> None:
    if name is not None:
        print(f"Hello, {name}!")
    else:
        print("Hello, stranger!")

# 定义一个函数，返回一个整数元组
def get_coordinates() -> Tuple[int, int]:
    return (1, 2)

# 定义一个函数，返回一个整数列表
def get_numbers() -> List[int]:
    return [1, 2, 3]
```

在这个示例中，我们使用了 `Callable`、`Optional`、`Tuple` 和 `List` 来为函数参数和返回值添加类型注解。


In [None]:
import os
import os.path
import zipfile
import pandas as pd
from torch.utils.data import Dataset
from typing import Callable, Optional, Tuple, List

class ISOLET(Dataset):
    """
    Description
    ===
    `ISOLET <https://archive.ics.uci.edu/ml/datasets/isolet>`_ dataset.
    The data file has been downloaded and extracted, existing in the directory of `root`.

    Args:
    root (string): Root directory of dataset where ``isolet1+2+3+4.data``
        and  ``isolet5.data`` exist.
    train (bool, optional): If True, creates dataset from ``isolet1+2+3+4.data``,
        otherwise from ``isolet5.data``.

    transform (callable, optional): A function/transform that takes in an torch.FloatTensor
        and returns a transformed version.
    target_transform (callable, optional): A function/transform that takes in the
        target and transforms it.

    """
    classes: List[str] = [
        "A",
        "B",
        "C",
        "D",
        "E",
        "F",
        "G",
        "H",
        "I",
        "J",
        "K",
        "L",
        "M",
        "N",
        "O",
        "P",
        "Q",
        "R",
        "S",
        "T",
        "U",
        "V",
        "W",
        "X",
        "Y",
        "Z",        
    ]

    def __init__(
            self,
            root: str,
            train: bool = True,
            transform: Optional[Callable] = None,
            target_transform: Optional[Callable] = None
            ):
        super().__init__()
        root = os.path.join(root, "isolet")
        root = os.path.expanduser(root)
        self.root = root
        os.makedirs(self.root, exist_ok=True)

        self.train = train
        self.transform = transform
        self.target_transform = target_transform

        if not self._check_integrity():
            raise RuntimeError(
                "Dataset not found or corrupted."
            )
        
        self._load_data()

    def __len__(self) -> int:
        """
        Function
        ===
        Subclasses could optionally overwrite :meth:`__len__`.

        Return
        ---
        the number of the samples in the dataset
        """
        return self.data.size(0)
    
    def __getitem__(self, index) -> Tuple[torch.FloatTensor, torch.LongTensor]:
        """
        Function
        ===
        All subclasses should overwrite :meth:`__getitem__`, supporting fetching a
        data sample for a given key. 

        Parameters
        ---
            index (int): Index

        Returns:
            Tuple: (sample, Target) where target is the index of tghe target class
        """
        sample = self.data[index]
        label = self.targets[index]

        if self.transform:
            sample = self.transform(sample)

        if self.target_transform:
            label = self.target_transform(label)

        return sample, label


    def _check_integrity(self) -> bool:
        """
        Function
        ===
        Unzip the dataset file.
        Check if `root`  is a legal directory and if the root directory contains the required file
        """
        if not os.path.isdir(self.root):
            return False
        
        train_file = os.path.join(self.root, "isolet1+2+3+4.data")
        test_file = os.path.join(self.root, "isolet5.data")

        has_train_file = os.path.isfile(train_file)
        has_test_file = os.path.isfile(test_file)

        if has_train_file and has_test_file:
            return True
        
        # TODO: Add more specific checks like an MD5 ckecksum

        return False

    def _load_data(self):
        """
        Function
        ===
        Load ISOLET dataset from `path` where ``isolet1+2+3+4.data`` and ``isolet5.data`` exist
        Metadata is stored in `data` and `targets`.

        Parameter
        ---
        train (bool, optional): If True, creates dataset from ``isolet1+2+3+4.data``,
        otherwise from ``isolet5.data``.

        """
        data_file = "isolet1+2+3+4.data" if self.train else "isolet5.data"
        data = pd.read_csv(os.path.join(self.root, data_file), header=None)
        self.data = torch.tensor(data.values[:, :-1], dtype=torch.float)
        self.targets = torch.tensor(data.values[:, -1], dtype=torch.long) - 1


In [13]:

train_ds = ISOLET("../../data", train=True)
train_ld = torch.utils.data.DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)

test_ds = ISOLET("../../data", train=False)
test_ld = torch.utils.data.DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False)

num_features = train_ds[0][0].size(-1)
num_classes = len(train_ds.classes)

classifiers = [
    "Vanilla",
    "AdaptHD",
    "OnlineHD",
    "NeuralHD",
    "DistHD",
    "CompHD",
    "SparseHD",
    "QuantHD",
    "LeHDC",
    "IntRVFL",
]


In [None]:
std, mean = torch.std_mean(train_ds.data, dim=0, keepdim=False)


def transform(sample):
    return (sample - mean) / std


train_ds.transform = transform
test_ds.transform = transform

params = {
    "Vanilla": {},
    "AdaptHD": {
        "epochs": 10,
    },
    "OnlineHD": {
        "epochs": 10,
    },
    "NeuralHD": {
        "epochs": 10,
        "regen_freq": 5,
    },
    "DistHD": {
        "epochs": 10,
        "regen_freq": 5,
    },
    "CompHD": {},
    "SparseHD": {
        "epochs": 10,
    },
    "QuantHD": {
        "epochs": 10,
    },
    "LeHDC": {
        "epochs": 10,
    },
    "IntRVFL": {},
}

for classifier in classifiers:
    print()
    print(classifier)

    model_cls = getattr(torchhd.classifiers, classifier)
    model: torchhd.classifiers.Classifier = model_cls(
        num_features, DIMENSIONS, num_classes, device=device, **params[classifier]
    )

    model.fit(train_ld)
    accuracy = model.accuracy(test_ld)
    print(f"Testing accuracy of {(accuracy * 100):.3f}%")


训练