In [1]:
import torchhd
from torchhd.datasets import AirfoilSelfNoise
from torchhd import embeddings

In [2]:
import torch
import torch.nn.functional as F
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

import torchmetrics
from tqdm import tqdm

In [3]:
DIMENSIONS = 10_000

In [4]:
class BaselineModel(torch.nn.Module):
    def __init__(self, num_features, lr=0.00001) -> None:
        super(BaselineModel, self).__init__()
        
        self.lr = lr
        self.M = torch.zeros(1, DIMENSIONS)
        self.project = embeddings.Projection(num_features, DIMENSIONS)
        
    def encode(self, x):
        sample_hv = self.project(x)
        return torchhd.hard_quantize(sample_hv)
    
    def model_update(self, x, y):
        update = self.M + self.lr * (y - (F.linear(x, self.M))) * x
        update = update.mean(0)
        # print("update: ", update)
        self.M = update
        
    def forward(self, x):
        return F.linear(self.encode(x), self.M)

In [5]:
class LevelHVModel(torch.nn.Module):
    def __init__(self, num_features, num_levels=100, lr=0.00001) -> None:
        super(LevelHVModel, self).__init__()
        
        self.lr = lr
        self.M = torch.zeros(1, DIMENSIONS)
        self.project = embeddings.Projection(num_features, DIMENSIONS)
        self.num_levels = num_levels
        self.embed = embeddings.Level(num_levels, DIMENSIONS, low=-3, high=3)
        self.memory = self.embed.weight
        
    def encode(self, x):
        sample_hv = self.project(x)
        return torchhd.hard_quantize(sample_hv)
    
    def model_update(self, x, y):
        update = self.M + self.lr * torchhd.bind(x, (self.embed(y)))
        update = update.mean(0)
        # print(update)
        self.M = update
        
    def forward(self, x):
        l = torchhd.bind(self.M, torchhd.inverse(self.encode(x)))
        # print(l)
        l = torchhd.cleanup(l, self.memory)
        # print(l)
        i = (self.memory == l).all(dim=1).nonzero().squeeze()
        return (((i / self.num_levels) * (self.embed.high - self.embed.low)) + self.embed.low).mean(0)

In [6]:
dataset = AirfoilSelfNoise('../data', download=True)

STD_DEVS = dataset.data.std(0)
MEANS = dataset.data.mean(0)
TARGET_STD = dataset.targets.std(0)
TARGET_MEAN = dataset.targets.mean(0)
MINS = dataset.data.min(0).values
MAXS = dataset.data.max(0).values
TARGET_MINS = dataset.targets.min(0).values
TARGET_MAXS = dataset.targets.max(0).values

# def transform(x):
#     x = x - MINS
#     x = x / (MAXS - MINS)
#     return x

# def target_transform(x):
#     x = x - TARGET_MINS
#     x = x / (TARGET_MAXS - TARGET_MINS)
#     return x

def transform(x):
    x = x - MEANS
    x = x / STD_DEVS
    return x


def target_transform(x):
    x = x - TARGET_MEAN
    x = x / TARGET_STD
    return x

dataset.transform = transform
dataset.target_transform = target_transform

train_size = int(0.7 * len(dataset))
test_size = len(dataset) - train_size
train_data, test_data = torch.utils.data.random_split(dataset, [train_size, test_size])

train_dataloader = DataLoader(train_data, batch_size=1, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=1)

Files already downloaded and verified


In [7]:
dataset.data

tensor([[8.0000e+02, 0.0000e+00, 3.0480e-01, 7.1300e+01, 2.6634e-03],
        [1.0000e+03, 0.0000e+00, 3.0480e-01, 7.1300e+01, 2.6634e-03],
        [1.2500e+03, 0.0000e+00, 3.0480e-01, 7.1300e+01, 2.6634e-03],
        ...,
        [4.0000e+03, 1.5600e+01, 1.0160e-01, 3.9600e+01, 5.2849e-02],
        [5.0000e+03, 1.5600e+01, 1.0160e-01, 3.9600e+01, 5.2849e-02],
        [6.3000e+03, 1.5600e+01, 1.0160e-01, 3.9600e+01, 5.2849e-02]])

In [136]:
MINS.values

<function Tensor.values>

In [11]:
item, label = next(iter(train_dataloader))
print(item)
print(label)

tensor([[ 1.6220, -0.4701,  0.9841,  1.3125, -0.5425]])
tensor([-1.7377])


In [138]:
# model = BaselineModel(5)
model = LevelHVModel(5, 10000)

In [139]:
with torch.no_grad():
    for _ in range(1):
        for samples, labels in tqdm(train_dataloader, desc="Iteration {}".format(_ + 1)):
            samples_hv = model.encode(samples)
            model.model_update(samples_hv, labels)

Iteration 1: 100%|██████████| 1052/1052 [00:00<00:00, 1698.85it/s]


In [140]:
old_model = model.M
old_model

MAP([ 0.0006, -0.0004,  0.0007,  ..., -0.0005,  0.0011,  0.0008])

In [None]:

mse = torchmetrics.MeanSquaredError()

with torch.no_grad():
    for samples, labels in tqdm(test_dataloader, desc="Testing"):
        predictions = model(samples)
        predictions = predictions * TARGET_STD + TARGET_MEAN
        # print(predictions)
        labels = labels * TARGET_STD + TARGET_MEAN
        # print(labels)
        mse.update(predictions.view(1).cpu(), labels)

print(f"Testing mean squared error of {(mse.compute().item()):.3f}")

In [None]:
with torch.no_grad():
    for _ in range(20):
        for samples, labels in tqdm(train_dataloader, desc="Iteration {}".format(_ + 1)):
            samples_hv = model.encode(samples)
            model.model_update(samples_hv, labels)

Iteration 1: 100%|██████████| 1052/1052 [00:00<00:00, 2040.11it/s]
Iteration 2: 100%|██████████| 1052/1052 [00:00<00:00, 2219.40it/s]
Iteration 3: 100%|██████████| 1052/1052 [00:00<00:00, 2225.84it/s]
Iteration 4: 100%|██████████| 1052/1052 [00:00<00:00, 2286.93it/s]
Iteration 5: 100%|██████████| 1052/1052 [00:00<00:00, 2196.82it/s]
Iteration 6: 100%|██████████| 1052/1052 [00:00<00:00, 2173.12it/s]
Iteration 7: 100%|██████████| 1052/1052 [00:00<00:00, 2280.57it/s]
Iteration 8: 100%|██████████| 1052/1052 [00:00<00:00, 2268.37it/s]
Iteration 9: 100%|██████████| 1052/1052 [00:00<00:00, 2284.67it/s]
Iteration 10: 100%|██████████| 1052/1052 [00:00<00:00, 2226.34it/s]
Iteration 11: 100%|██████████| 1052/1052 [00:00<00:00, 2279.76it/s]
Iteration 12: 100%|██████████| 1052/1052 [00:00<00:00, 2246.23it/s]
Iteration 13: 100%|██████████| 1052/1052 [00:00<00:00, 2275.20it/s]
Iteration 14: 100%|██████████| 1052/1052 [00:00<00:00, 2257.18it/s]
Iteration 15: 100%|██████████| 1052/1052 [00:00<00:00, 23

In [None]:
mse = torchmetrics.MeanSquaredError()

p = []
l = []
with torch.no_grad():
    for samples, labels in tqdm(test_dataloader, desc="Testing"):
        predictions = model(samples)
        predictions = predictions * TARGET_STD + TARGET_MEAN
        labels = labels * TARGET_STD + TARGET_MEAN
        mse.update(predictions.view(1).cpu(), labels)
        # print(predictions, labels)

print(f"Testing mean squared error of {(mse.compute().item()):.3f}")

Testing: 100%|██████████| 451/451 [00:00<00:00, 1635.19it/s]

Testing mean squared error of 9.943





In [None]:
model.M

MAP([-2.0084e-03,  2.4296e-04, -5.9487e-04,  ...,  6.8179e-04,
     -2.9832e-04,  3.1204e-05])