In [1]:
from DatasetLoader_mine2 import DatasetLoader
import os
import numpy as np

In [2]:
parent = 'multipletimeseriesforecasting/preprocessed-data/'

path = os.path.join(parent, 'SanFranciscoTraffic/traffic_weekly_dataset.tsf')

dl = DatasetLoader('SanFranciscoTraffic', path)

matrix_1_original = dl.dataset_load_and_preprocess("original")
matrix_1_norm = dl.dataset_load_and_preprocess("normalized")
print(matrix_1_norm.shape)

---------------------------------------------------
Dataset name:  SanFranciscoTraffic
Dataset contains missing values?  False
--------------------------
---------------------------------------------------
Dataset name:  SanFranciscoTraffic
Dataset contains missing values?  False
--------------------------
(862, 104)


In [3]:
sample_len = int(matrix_1_norm.shape[1]*(0.15))
print("Sample_len total", sample_len)

np.random.seed(1)
sets = np.random.choice(matrix_1_norm.shape[1]-40, int(sample_len), replace=False)
sets_training, sets_validation, sets_testing = sets[:int(len(sets)*.75)], sets[int(len(sets)*.75):int(len(sets)*.8)], sets[int(len(sets)*.8):]
print(sets_training, sets_validation, sets_testing)

Sample_len total 15
[24 39 52 27 44  2 21 62 41 50 38] [54] [35 26 57]


#### Encoding with just Bind

In [8]:
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data

# Note: this example requires the torchmetrics library: https://torchmetrics.readthedocs.io
from sklearn.metrics import mean_squared_error
from tqdm import tqdm

from torchhd import functional, embeddings, cos_similarity, bind, multiset, hard_quantize

from scipy.special import softmax

import time

d = 10000
models = 4

# Model based on RegHD application for Single model regression -> No comparing which cluster
class MultiModel_With_NonLinear_Sin(nn.Module):
    def __init__(self, num_classes, size, levels):
        super(MultiModel_With_NonLinear_Sin, self).__init__()

        self.lr = 0.00001 # alpha
        self.M = torch.zeros(models, d).double() # Model initializes in 0
        self.flatten = torch.nn.Flatten()
        self.position = embeddings.Random(size, d)
        self.value = embeddings.Level(levels, d)
        self.cluster = functional.random_hv(models, d)

    def encode(self, x): # encoding a value
        for i in range(len(x)):
            x[i] = float(x[i])
        sample_hv = bind(self.position.weight, self.value(x))
        sample_hv = multiset(sample_hv)
        print(hard_quantize(sample_hv))
        return hard_quantize(sample_hv)

    def model_update(self, x, y): # update # y = no hv
        #x = torch.reshape(x, (1,d))
        confidence = np.transpose(softmax(cos_similarity(x, self.cluster))) # Compare input with cluster
        model_result = F.linear(x, self.M.type(torch.FloatTensor))
        update = self.M + (float(self.lr) * float(y - F.linear(confidence, model_result)) * x) # Model + alpha*(Error)*(x)
        #update = update.mean(0) # Mean by columns
        self.M = update.type(torch.FloatTensor) # New 
        # update cluster center?
        center = [num.item() for num in confidence].index(max(confidence).item())
        self.cluster[center] = self.cluster[center] + (1-max(confidence)) * x
        return center
        

    def forward(self, x):
        enc = torch.reshape(self.encode(x), (1,d))
        confidence = np.transpose(softmax(cos_similarity(self.cluster, enc))) # Compare input with cluster
        model_result = F.linear(enc, self.M).type(torch.FloatTensor)
        res = F.linear(confidence, model_result) # Multiply enc (x) * weights (Model) = Dot product
        return res # Return the resolutions

    def train(self, sets_training, matrix_1_norm, epochs):
        for _ in range(epochs): # Number of iterations for all the samples
            pred = []
            labels_full = []
            for i in sets_training:
                samples = matrix_1_norm[:, i:i+20]
                labels = matrix_1_norm[:, i+20]
                
                for n in range(samples.shape[0]):
                    label = torch.tensor(labels[n])
                    sample = torch.tensor(samples[n, :])
                    samples_hv = self.encode(sample) # Encode the inputs
                    self.model_update(samples_hv, label) # Pass input and label to train
                    predictions_testing = self.forward(sample) # Pass samples from test to model (forward function)
                    pred.append(float(predictions_testing[0]))
                    labels_full.append(float(label.unsqueeze(dim=0)))

            print(f"Training root mean squared error of {(mean_squared_error(labels_full, pred, squared=False)):.3f}")
    
    def test(self, sets_testing, matrix_1_norm):
        pred = []
        labels_full = []
        for i in sets_testing:
            samples = matrix_1_norm[:, i:i+20]
            labels = matrix_1_norm[:, i+20]
            for n in range(samples.shape[0]):
                label = torch.tensor(labels[n])
                sample = torch.tensor(samples[n, :])
                # Pass samples from test to model (forward function)
                predictions = self.forward(sample)
                pred.append(float(predictions[0]))
                labels_full.append(float(label.unsqueeze(dim=0)))

        print(
            f"Testing root mean squared error of testing {(mean_squared_error(labels_full, pred, squared=False)):.3f}")

In [9]:
# Debuging by making sure that diffrent level hypervectors are being chosen - ALL CLEAR

model = MultiModel_With_NonLinear_Sin(2, 20, 6) # 1 class, 5
model.train(sets_training, matrix_1_norm, 1)
model.test(sets_validation, matrix_1_norm)

MAP([-1., -1., -1.,  ..., -1., -1.,  1.])
MAP([-1., -1., -1.,  ..., -1., -1.,  1.])
MAP([ 1., -1.,  1.,  ..., -1., -1., -1.])
MAP([ 1., -1.,  1.,  ..., -1., -1., -1.])
MAP([ 1., -1.,  1.,  ..., -1., -1., -1.])
MAP([ 1., -1.,  1.,  ..., -1., -1., -1.])
MAP([ 1., -1.,  1.,  ..., -1., -1., -1.])
MAP([ 1., -1.,  1.,  ..., -1., -1., -1.])
MAP([ 1., -1.,  1.,  ..., -1.,  1., -1.])
MAP([ 1., -1.,  1.,  ..., -1.,  1., -1.])
MAP([-1., -1.,  1.,  ..., -1., -1.,  1.])
MAP([-1., -1.,  1.,  ..., -1., -1.,  1.])
MAP([ 1., -1.,  1.,  ..., -1., -1., -1.])
MAP([ 1., -1.,  1.,  ..., -1., -1., -1.])
MAP([ 1., -1.,  1.,  ..., -1., -1., -1.])
MAP([ 1., -1.,  1.,  ..., -1., -1., -1.])
MAP([ 1., -1.,  1.,  ..., -1., -1., -1.])
MAP([ 1., -1.,  1.,  ..., -1., -1., -1.])
MAP([ 1., -1.,  1.,  ..., -1., -1.,  1.])
MAP([ 1., -1.,  1.,  ..., -1., -1.,  1.])
MAP([ 1., -1.,  1.,  ..., -1., -1., -1.])
MAP([ 1., -1.,  1.,  ..., -1., -1., -1.])
MAP([ 1., -1.,  1.,  ..., -1.,  1., -1.])
MAP([ 1., -1.,  1.,  ..., -1.,  1.

In [7]:
# Changing number of level hypervectors

for i in range(1,50):
    print(i)
    model = MultiModel_With_NonLinear_Sin(2, 20, i) # 1 class, 5
    model.train(sets_training, matrix_1_norm, 1)
    model.test(sets_validation, matrix_1_norm)
for i in range(50, 100, 10):
    print(i)
    model = MultiModel_With_NonLinear_Sin(2, 20, i) # 1 class, 5
    model.train(sets_training, matrix_1_norm, 1)
    model.test(sets_validation, matrix_1_norm)
for i in range(100, 1100, 100):
    print(i)
    model = MultiModel_With_NonLinear_Sin(2, 20, i) # 1 class, 5
    model.train(sets_training, matrix_1_norm, 1)
    model.test(sets_validation, matrix_1_norm)

1
Training root mean squared error of 0.202
Testing root mean squared error of testing 0.346
2
Training root mean squared error of 0.130
Testing root mean squared error of testing 0.168
3
Training root mean squared error of 0.129
Testing root mean squared error of testing 0.169
4
Training root mean squared error of 0.121
Testing root mean squared error of testing 0.166
5
Training root mean squared error of 0.118
Testing root mean squared error of testing 0.160
6
Training root mean squared error of 0.117
Testing root mean squared error of testing 0.143
7
Training root mean squared error of 0.116
Testing root mean squared error of testing 0.152
8
Training root mean squared error of 0.115
Testing root mean squared error of testing 0.150
9
Training root mean squared error of 0.115
Testing root mean squared error of testing 0.147
10
Training root mean squared error of 0.115
Testing root mean squared error of testing 0.157
11
Training root mean squared error of 0.115
Testing root mean square

#### Encoding with bind and permutation

In [8]:
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data

# Note: this example requires the torchmetrics library: https://torchmetrics.readthedocs.io
from sklearn.metrics import mean_squared_error
from tqdm import tqdm

from torchhd import functional, embeddings, cos_similarity, bind, multiset, hard_quantize, permute

from scipy.special import softmax

import time

d = 10000
models = 4

# Model based on RegHD application for Single model regression -> No comparing which cluster
class MultiModel_With_NonLinear_Sin(nn.Module):
    def __init__(self, num_classes, size, levels):
        super(MultiModel_With_NonLinear_Sin, self).__init__()

        self.lr = 0.00001 # alpha
        self.M = torch.zeros(models, d).double() # Model initializes in 0
        self.flatten = torch.nn.Flatten()
        self.position = embeddings.Random(size, d)
        self.value = embeddings.Level(levels, d)
        self.cluster = functional.random_hv(models, d)

    def encode(self, x): # encoding a value
        for i in range(len(x)):
            x[i] = float(x[i])
        #x = self.flatten(x)
        sample_hv = bind(self.position.weight, self.value(x))
        for i, hv in enumerate(sample_hv):
            sample_hv[i] = permute(hv, shifts=i)
        sample_hv = multiset(sample_hv)
        return hard_quantize(sample_hv)

    def model_update(self, x, y): # update # y = no hv
        #x = torch.reshape(x, (1,d))
        confidence = np.transpose(softmax(cos_similarity(x, self.cluster))) # Compare input with cluster
        model_result = F.linear(x, self.M.type(torch.FloatTensor))
        update = self.M + (float(self.lr) * float(y - F.linear(confidence, model_result)) * x) # Model + alpha*(Error)*(x)
        #update = update.mean(0) # Mean by columns
        self.M = update.type(torch.FloatTensor) # New 
        # update cluster center?
        center = [num.item() for num in confidence].index(max(confidence).item())
        self.cluster[center] = self.cluster[center] + (1-max(confidence)) * x
        return center
        

    def forward(self, x):
        enc = torch.reshape(self.encode(x), (1,d))
        confidence = np.transpose(softmax(cos_similarity(self.cluster, enc))) # Compare input with cluster
        model_result = F.linear(enc, self.M).type(torch.FloatTensor)
        res = F.linear(confidence, model_result) # Multiply enc (x) * weights (Model) = Dot product
        return res # Return the resolutions

    def train(self, sets_training, matrix_1_norm, epochs):
        for _ in range(epochs): # Number of iterations for all the samples
            pred = []
            labels_full = []
            for i in sets_training:
                samples = matrix_1_norm[:, i:i+20]
                labels = matrix_1_norm[:, i+20]
                
                for n in range(samples.shape[0]):
                    label = torch.tensor(labels[n])
                    sample = torch.tensor(samples[n, :])
                    samples_hv = self.encode(sample) # Encode the inputs
                    self.model_update(samples_hv, label) # Pass input and label to train
                    predictions_testing = self.forward(sample) # Pass samples from test to model (forward function)
                    pred.append(float(predictions_testing[0]))
                    labels_full.append(float(label.unsqueeze(dim=0)))
            print(f"Training root mean squared error of {(mean_squared_error(labels_full, pred, squared=False)):.3f}")
    
    def test(self, sets_testing, matrix_1_norm):
        pred = []
        labels_full = []
        for i in sets_testing:
            samples = matrix_1_norm[:, i:i+20]
            labels = matrix_1_norm[:, i+20]
            for n in range(samples.shape[0]):
                label = torch.tensor(labels[n])
                sample = torch.tensor(samples[n, :])
                # Pass samples from test to model (forward function)
                predictions = self.forward(sample)
                pred.append(float(predictions[0]))
                labels_full.append(float(label.unsqueeze(dim=0)))

        print(
            f"Testing root mean squared error of testing {(mean_squared_error(labels_full, pred, squared=False)):.3f}")

In [9]:
for i in range(1,50):
    print(i)
    model = MultiModel_With_NonLinear_Sin(2, 20, i) # 1 class, 5
    model.train(sets_training, matrix_1_norm, 1)
    model.test(sets_validation, matrix_1_norm)
for i in range(50, 100, 10):
    print(i)
    model = MultiModel_With_NonLinear_Sin(2, 20, i) # 1 class, 5
    model.train(sets_training, matrix_1_norm, 1)
    model.test(sets_validation, matrix_1_norm)
for i in range(100, 1100, 100):
    print(i)
    model = MultiModel_With_NonLinear_Sin(2, 20, i) # 1 class, 5
    model.train(sets_training, matrix_1_norm, 1)
    model.test(sets_validation, matrix_1_norm)

1
Training root mean squared error of 0.202
Testing root mean squared error of testing 0.346
2
Training root mean squared error of 0.129
Testing root mean squared error of testing 0.164
3
Training root mean squared error of 0.130
Testing root mean squared error of testing 0.168
4
Training root mean squared error of 0.121
Testing root mean squared error of testing 0.170
5
Training root mean squared error of 0.117
Testing root mean squared error of testing 0.159
6
Training root mean squared error of 0.116
Testing root mean squared error of testing 0.146
7
Training root mean squared error of 0.114
Testing root mean squared error of testing 0.152
8
Training root mean squared error of 0.114
Testing root mean squared error of testing 0.150
9
Training root mean squared error of 0.114
Testing root mean squared error of testing 0.150
10
Training root mean squared error of 0.114
Testing root mean squared error of testing 0.153
11
Training root mean squared error of 0.114
Testing root mean square

#### Same test but modify r
r = randomness -> r-value to interpolate between level-hypervectors

In [13]:
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data

# Note: this example requires the torchmetrics library: https://torchmetrics.readthedocs.io
from sklearn.metrics import mean_squared_error
from tqdm import tqdm

from torchhd import functional, embeddings, cos_similarity, bind, multiset, hard_quantize

from scipy.special import softmax

import time

d = 10000
models = 4

# Model based on RegHD application for Single model regression -> No comparing which cluster
class MultiModel_With_NonLinear_Sin(nn.Module):
    def __init__(self, size, levels, r):
        super(MultiModel_With_NonLinear_Sin, self).__init__()

        self.lr = 0.00001 # alpha
        self.M = torch.zeros(models, d).double() # Model initializes in 0
        self.flatten = torch.nn.Flatten()
        self.position = embeddings.Random(size, d)
        self.value = embeddings.Level(levels, d, randomness=r)
        self.cluster = functional.random_hv(models, d)
        self.size = size
        self.r = r

    def encode(self, x): # encoding a value
        for i in range(len(x)):
            x[i] = float(x[i])
        sample_hv = bind(self.position.weight, self.value(x))
        sample_hv = multiset(sample_hv)
        return hard_quantize(sample_hv)

    def model_update(self, x, y): # update # y = no hv
        #x = torch.reshape(x, (1,d))
        confidence = np.transpose(softmax(cos_similarity(x, self.cluster))) # Compare input with cluster
        model_result = F.linear(x, self.M.type(torch.FloatTensor))
        update = self.M + (float(self.lr) * float(y - F.linear(confidence, model_result)) * x) # Model + alpha*(Error)*(x)
        #update = update.mean(0) # Mean by columns
        self.M = update.type(torch.FloatTensor) # New 
        # update cluster center?
        center = [num.item() for num in confidence].index(max(confidence).item())
        self.cluster[center] = self.cluster[center] + (1-max(confidence)) * x
        return center
        

    def forward(self, x):
        enc = torch.reshape(self.encode(x), (1,d))
        confidence = np.transpose(softmax(cos_similarity(self.cluster, enc))) # Compare input with cluster
        model_result = F.linear(enc, self.M).type(torch.FloatTensor)
        res = F.linear(confidence, model_result) # Multiply enc (x) * weights (Model) = Dot product
        return res # Return the resolutions

    def train(self, sets_training, matrix_1_norm, epochs):
        for _ in range(epochs): # Number of iterations for all the samples
            pred = []
            labels_full = []
            for i in sets_training:
                samples = matrix_1_norm[:, i:i+self.size]
                labels = matrix_1_norm[:, i+self.size]
                
                for n in range(samples.shape[0]):
                    label = torch.tensor(labels[n])
                    sample = torch.tensor(samples[n, :])
                    samples_hv = self.encode(sample) # Encode the inputs
                    self.model_update(samples_hv, label) # Pass input and label to train
                    predictions_testing = self.forward(sample) # Pass samples from test to model (forward function)
                    pred.append(float(predictions_testing[0]))
                    labels_full.append(float(label.unsqueeze(dim=0)))

            print(f"Training root mean squared error of {(mean_squared_error(labels_full, pred, squared=False)):.3f}")
    
    def test(self, sets_testing, matrix_1_norm):
        pred = []
        labels_full = []
        for i in sets_testing:
            samples = matrix_1_norm[:, i:i+self.size]
            labels = matrix_1_norm[:, i+self.size]
            for n in range(samples.shape[0]):
                label = torch.tensor(labels[n])
                sample = torch.tensor(samples[n, :])
                # Pass samples from test to model (forward function)
                predictions = self.forward(sample)
                pred.append(float(predictions[0]))
                labels_full.append(float(label.unsqueeze(dim=0)))

        print(
            f"Testing root mean squared error of testing {(mean_squared_error(labels_full, pred, squared=False)):.3f}")

In [15]:
i = 0
while i < 1.1:
    print(i)
    model = MultiModel_With_NonLinear_Sin(size = 20, levels = 6, r = i) # 1 class, 5
    model.train(sets_training, matrix_1_norm, 1)
    model.test(sets_validation, matrix_1_norm)
    i += 0.05

0
Training root mean squared error of 0.116
Testing root mean squared error of testing 0.143
0.05
Training root mean squared error of 0.117
Testing root mean squared error of testing 0.143
0.1
Training root mean squared error of 0.116
Testing root mean squared error of testing 0.146
0.15000000000000002
Training root mean squared error of 0.116
Testing root mean squared error of testing 0.144
0.2
Training root mean squared error of 0.116
Testing root mean squared error of testing 0.143
0.25
Training root mean squared error of 0.116
Testing root mean squared error of testing 0.141
0.3
Training root mean squared error of 0.116
Testing root mean squared error of testing 0.142
0.35
Training root mean squared error of 0.116
Testing root mean squared error of testing 0.143
0.39999999999999997
Training root mean squared error of 0.116
Testing root mean squared error of testing 0.140
0.44999999999999996
Training root mean squared error of 0.116
Testing root mean squared error of testing 0.140
0

In [16]:
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data

# Note: this example requires the torchmetrics library: https://torchmetrics.readthedocs.io
from sklearn.metrics import mean_squared_error
from tqdm import tqdm

from torchhd import functional, embeddings, cos_similarity, bind, multiset, hard_quantize, permute

from scipy.special import softmax

import time

d = 10000
models = 4

# Model based on RegHD application for Single model regression -> No comparing which cluster
class MultiModel_With_NonLinear_Sin(nn.Module):
    def __init__(self, size, levels, r):
        super(MultiModel_With_NonLinear_Sin, self).__init__()

        self.lr = 0.00001 # alpha
        self.M = torch.zeros(models, d).double() # Model initializes in 0
        self.position = embeddings.Random(size, d)
        self.value = embeddings.Level(levels, d, randomness=r)
        self.cluster = functional.random_hv(models, d)
        self.size = size
        self.r = r

    def encode(self, x): # encoding a value
        for i in range(len(x)):
            x[i] = float(x[i])
        #x = self.flatten(x)
        sample_hv = bind(self.position.weight, self.value(x))
        for i, hv in enumerate(sample_hv):
            sample_hv[i] = permute(hv, shifts=i)
        sample_hv = multiset(sample_hv)
        return hard_quantize(sample_hv)

    def model_update(self, x, y): # update # y = no hv
        #x = torch.reshape(x, (1,d))
        confidence = np.transpose(softmax(cos_similarity(x, self.cluster))) # Compare input with cluster
        model_result = F.linear(x, self.M.type(torch.FloatTensor))
        update = self.M + (float(self.lr) * float(y - F.linear(confidence, model_result)) * x) # Model + alpha*(Error)*(x)
        #update = update.mean(0) # Mean by columns
        self.M = update.type(torch.FloatTensor) # New 
        # update cluster center?
        center = [num.item() for num in confidence].index(max(confidence).item())
        self.cluster[center] = self.cluster[center] + (1-max(confidence)) * x
        return center
        

    def forward(self, x):
        enc = torch.reshape(self.encode(x), (1,d))
        confidence = np.transpose(softmax(cos_similarity(self.cluster, enc))) # Compare input with cluster
        model_result = F.linear(enc, self.M).type(torch.FloatTensor)
        res = F.linear(confidence, model_result) # Multiply enc (x) * weights (Model) = Dot product
        return res # Return the resolutions

    def train(self, sets_training, matrix_1_norm, epochs):
        for _ in range(epochs): # Number of iterations for all the samples
            pred = []
            labels_full = []
            for i in sets_training:
                samples = matrix_1_norm[:, i:i+self.size]
                labels = matrix_1_norm[:, i+self.size]
                
                for n in range(samples.shape[0]):
                    label = torch.tensor(labels[n])
                    sample = torch.tensor(samples[n, :])
                    samples_hv = self.encode(sample) # Encode the inputs
                    self.model_update(samples_hv, label) # Pass input and label to train
                    predictions_testing = self.forward(sample) # Pass samples from test to model (forward function)
                    pred.append(float(predictions_testing[0]))
                    labels_full.append(float(label.unsqueeze(dim=0)))
            print(f"Training root mean squared error of {(mean_squared_error(labels_full, pred, squared=False)):.3f}")
    
    def test(self, sets_testing, matrix_1_norm):
        pred = []
        labels_full = []
        for i in sets_testing:
            samples = matrix_1_norm[:, i:i+self.size]
            labels = matrix_1_norm[:, i+self.size]
            for n in range(samples.shape[0]):
                label = torch.tensor(labels[n])
                sample = torch.tensor(samples[n, :])
                # Pass samples from test to model (forward function)
                predictions = self.forward(sample)
                pred.append(float(predictions[0]))
                labels_full.append(float(label.unsqueeze(dim=0)))

        print(
            f"Testing root mean squared error of testing {(mean_squared_error(labels_full, pred, squared=False)):.3f}")

In [17]:
i = 0
while i < 1.1:
    print(i)
    model = MultiModel_With_NonLinear_Sin(size = 20, levels = 6, r = i) # 1 class, 5
    model.train(sets_training, matrix_1_norm, 1)
    model.test(sets_validation, matrix_1_norm)
    i += 0.05

0
Training root mean squared error of 0.116
Testing root mean squared error of testing 0.149
0.05
Training root mean squared error of 0.116
Testing root mean squared error of testing 0.146
0.1
Training root mean squared error of 0.115
Testing root mean squared error of testing 0.147
0.15000000000000002
Training root mean squared error of 0.116
Testing root mean squared error of testing 0.145
0.2
Training root mean squared error of 0.116
Testing root mean squared error of testing 0.150
0.25
Training root mean squared error of 0.116
Testing root mean squared error of testing 0.148
0.3
Training root mean squared error of 0.116
Testing root mean squared error of testing 0.145
0.35
Training root mean squared error of 0.116
Testing root mean squared error of testing 0.146
0.39999999999999997
Training root mean squared error of 0.116
Testing root mean squared error of testing 0.144
0.44999999999999996
Training root mean squared error of 0.116
Testing root mean squared error of testing 0.143
0

#### Combine this new encoding with spectral clustering

In [23]:
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data

# Note: this example requires the torchmetrics library: https://torchmetrics.readthedocs.io
from sklearn.metrics import mean_squared_error
from tqdm import tqdm

from torchhd import functional
from torchhd import embeddings
from torchhd import cos_similarity

from scipy.special import softmax

import time
from sklearn.cluster import SpectralClustering

d = 10000
models = 4

# Model based on RegHD application for Single model regression -> No comparing which cluster
class MultiModel_With_NonLinear_Sin(nn.Module):
    def __init__(self, size, levels, r):
        super(MultiModel_With_NonLinear_Sin, self).__init__()

        self.lr = 0.00001 # alpha
        self.M = torch.zeros(models, d).double() # Model initializes in 0
        self.position = embeddings.Random(size, d)
        self.value = embeddings.Level(levels, d, randomness=r)
        self.cluster = functional.random_hv(models, d)
        self.size = size
        self.r = r
        self.cluster_model = None
        self.training_samples = None

    def encode(self, x): # encoding a value
        for i in range(len(x)):
            x[i] = float(x[i])
        sample_hv = bind(self.position.weight, self.value(x))
        sample_hv = multiset(sample_hv)
        return hard_quantize(sample_hv)

    def model_update(self, x, y, cluster): # update # y = no hv
        x = torch.reshape(x, (1,d))
        #confidence = np.transpose(softmax(cos_similarity(self.cluster, x))) # Compare input with cluster
        model_result = F.linear(x.type(torch.FloatTensor), self.M[cluster].type(torch.FloatTensor))
        update = self.M[cluster] + (self.lr * (y - model_result) * x) # Model + alpha*(Error)*(x)
        self.M[cluster] = update[0] # New 
        #center = [num.item() for num in confidence[0]].index(max(confidence[0]).item())
        #self.cluster[center] = self.cluster[center] + (1-max(confidence[0])) * x
        #return center
    
    def forward(self, x):
        enc = torch.reshape(self.encode(x), (1,d))
        #confidence = np.transpose(softmax(cos_similarity(self.cluster, enc))) # Compare input with cluster
        similarity = cos_similarity(self.training_samples, enc)
        #print(similarity.index(max(similarity)))
        i = list(similarity.numpy()).index(max(similarity).numpy()[0])
        #i,j = np.where(similarity.numpy() == max(similarity).numpy())
        model_result = F.linear(enc, self.M[self.cluster_model[i]].type(torch.FloatTensor))
        #res = F.linear(confidence, model_result) # Multiply enc (x) * weights (Model) = Dot product
        return model_result # Return the resolutions
    
    def train(self, sets_training, matrix_1_norm, epochs):
        
        full_samples = {}
        for time, i in enumerate(sets_training):
            samples = matrix_1_norm[:, i:i+self.size]
            for n in range(0, samples.shape[0], 1):
                sample = torch.tensor(samples[n, :])
                full_samples[(samples.shape[0]*time) + n] = sample

        if(self.cluster_model == None):
            encoded = torch.zeros(len(full_samples), 10000)
            for i, samp in (full_samples.items()):
                encoded[i, :] = self.encode(samp)
            self.training_samples = encoded
            affinity_matrix = cos_similarity(encoded, encoded)
            clustering = SpectralClustering(n_clusters=models, assign_labels='discretize', 
                                            random_state=0, affinity='precomputed').fit(affinity_matrix)
            self.cluster_model = clustering.labels_

        for _ in range(epochs): # Number of iterations for all the samples
            pred = []
            labels_full = []
            for time, i in enumerate(sets_training):
                #samples = matrix_1_norm[:, i:i+40]
                labels = matrix_1_norm[:, i+self.size]
                
                for n in range(samples.shape[0]):
                    label = torch.tensor(labels[n])
                    item = (samples.shape[0]*time) + n
                    self.model_update(encoded[item], label, self.cluster_model[item]) # Pass input and label to train
                    #predictions_testing = self(full_samples[item]) # Pass samples from test to model (forward function)
                    #pred.append(predictions_testing[0])
                    #labels_full.append(label.unsqueeze(dim=0))

            #print(f"Training root mean squared error of {(mean_squared_error(labels_full, pred, squared=False)):.3f}")
    
    def test(self, sets_testing, matrix_1_norm):
        pred = []
        labels_full = []
        for i in sets_testing:
            samples = matrix_1_norm[:, i:i+self.size]
            labels = matrix_1_norm[:, i+self.size]
            for n in range(samples.shape[0]):
                label = torch.tensor(labels[n])
                sample = torch.tensor(samples[n, :])
                # Pass samples from test to model (forward function)
                predictions = self.forward(sample)
                pred.append(float(predictions[0]))
                labels_full.append(float(label.unsqueeze(dim=0)))
        print(f"Testing root mean squared error of {(mean_squared_error(labels_full, pred, squared=False)):.3f}")
  
model = MultiModel_With_NonLinear_Sin(size = 20, levels = 6, r = 1.0) # 1 class, 5

In [26]:
model = MultiModel_With_NonLinear_Sin(size = 20, levels = 6, r = 0.25) # 1 class, 5
model.train(sets_training, matrix_1_norm, 1)
model.test(sets_validation, matrix_1_norm)

Testing root mean squared error of 0.139


In [27]:
model.test(sets_testing, matrix_1_norm)

Testing root mean squared error of 0.131


In [4]:
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data

# Note: this example requires the torchmetrics library: https://torchmetrics.readthedocs.io
from sklearn.metrics import mean_squared_error
from tqdm import tqdm

from torchhd import functional, embeddings, cos_similarity, bind, multiset, hard_quantize, permute

from scipy.special import softmax

import time
from sklearn.cluster import SpectralClustering

d = 10000
models = 4

# Model based on RegHD application for Single model regression -> No comparing which cluster
class MultiModel_With_NonLinear_Sin(nn.Module):
    def __init__(self, size, levels, r):
        super(MultiModel_With_NonLinear_Sin, self).__init__()

        self.lr = 0.00001 # alpha
        self.M = torch.zeros(models, d).double() # Model initializes in 0
        self.position = embeddings.Random(size, d)
        self.value = embeddings.Level(levels, d, randomness=r)
        self.cluster = functional.random_hv(models, d)
        self.size = size
        self.r = r
        self.cluster_model = None
        self.training_samples = None

    def encode(self, x): # encoding a value
        for i in range(len(x)):
            x[i] = float(x[i])
        #x = self.flatten(x)
        sample_hv = bind(self.position.weight, self.value(x))
        for i, hv in enumerate(sample_hv):
            sample_hv[i] = permute(hv, shifts=i)
        sample_hv = multiset(sample_hv)
        return hard_quantize(sample_hv)

    def model_update(self, x, y, cluster): # update # y = no hv
        x = torch.reshape(x, (1,d))
        #confidence = np.transpose(softmax(cos_similarity(self.cluster, x))) # Compare input with cluster
        model_result = F.linear(x.type(torch.FloatTensor), self.M[cluster].type(torch.FloatTensor))
        update = self.M[cluster] + (self.lr * (y - model_result) * x) # Model + alpha*(Error)*(x)
        self.M[cluster] = update[0] # New 
        #center = [num.item() for num in confidence[0]].index(max(confidence[0]).item())
        #self.cluster[center] = self.cluster[center] + (1-max(confidence[0])) * x
        #return center
    
    def forward(self, x):
        enc = torch.reshape(self.encode(x), (1,d))
        #confidence = np.transpose(softmax(cos_similarity(self.cluster, enc))) # Compare input with cluster
        similarity = cos_similarity(self.training_samples, enc)
        #print(similarity.index(max(similarity)))
        i = list(similarity.numpy()).index(max(similarity).numpy()[0])
        #i,j = np.where(similarity.numpy() == max(similarity).numpy())
        model_result = F.linear(enc, self.M[self.cluster_model[i]].type(torch.FloatTensor))
        #res = F.linear(confidence, model_result) # Multiply enc (x) * weights (Model) = Dot product
        return model_result # Return the resolutions
    
    def train(self, sets_training, matrix_1_norm, epochs):
        
        full_samples = {}
        for time, i in enumerate(sets_training):
            samples = matrix_1_norm[:, i:i+self.size]
            for n in range(0, samples.shape[0], 1):
                sample = torch.tensor(samples[n, :])
                full_samples[(samples.shape[0]*time) + n] = sample

        if(self.cluster_model == None):
            encoded = torch.zeros(len(full_samples), 10000)
            for i, samp in (full_samples.items()):
                encoded[i, :] = self.encode(samp)
            self.training_samples = encoded
            affinity_matrix = cos_similarity(encoded, encoded)
            clustering = SpectralClustering(n_clusters=models, assign_labels='discretize', 
                                            random_state=0, affinity='precomputed').fit(affinity_matrix)
            self.cluster_model = clustering.labels_

        for _ in range(epochs): # Number of iterations for all the samples
            pred = []
            labels_full = []
            for time, i in enumerate(sets_training):
                #samples = matrix_1_norm[:, i:i+40]
                labels = matrix_1_norm[:, i+self.size]
                
                for n in range(samples.shape[0]):
                    label = torch.tensor(labels[n])
                    item = (samples.shape[0]*time) + n
                    self.model_update(encoded[item], label, self.cluster_model[item]) # Pass input and label to train
                    #predictions_testing = self(full_samples[item]) # Pass samples from test to model (forward function)
                    #pred.append(predictions_testing[0])
                    #labels_full.append(label.unsqueeze(dim=0))

            #print(f"Training root mean squared error of {(mean_squared_error(labels_full, pred, squared=False)):.3f}")
    
    def test(self, sets_testing, matrix_1_norm):
        pred = []
        labels_full = []
        for i in sets_testing:
            samples = matrix_1_norm[:, i:i+self.size]
            labels = matrix_1_norm[:, i+self.size]
            for n in range(samples.shape[0]):
                label = torch.tensor(labels[n])
                sample = torch.tensor(samples[n, :])
                # Pass samples from test to model (forward function)
                predictions = self.forward(sample)
                pred.append(float(predictions[0]))
                labels_full.append(float(label.unsqueeze(dim=0)))
        print(f"Testing root mean squared error of {(mean_squared_error(labels_full, pred, squared=False)):.3f}")
  
#model = MultiModel_With_NonLinear_Sin(size = 20, levels = 6, r = 1.0) # 1 class, 5

In [29]:
model = MultiModel_With_NonLinear_Sin(size = 20, levels = 6, r = 0.25) # 1 class, 5
model.train(sets_training, matrix_1_norm, 1)
model.test(sets_validation, matrix_1_norm)

Testing root mean squared error of 0.141


In [8]:
model = MultiModel_With_NonLinear_Sin(size = 20, levels = 100, r = 0.25) # 1 class, 5
model.train(sets_training, matrix_1_norm, 1)
model.test(sets_validation, matrix_1_norm)

Testing root mean squared error of 0.133


In [5]:
levels = [1,5,10,20,50,100,200,500,1000]
r = [0, 0.05, 0.1, 0.25, 0.5, 1]
levels_r = []
for l in levels:
    for r_i in r:
        levels_r.append((l,r_i))

for level, r in levels_r:
    model = MultiModel_With_NonLinear_Sin(size = 20, levels = level, r = r) # 1 class, 5
    model.train(sets_training, matrix_1_norm, 1)
    model.test(sets_validation, matrix_1_norm)

Testing root mean squared error of 0.332
Testing root mean squared error of 0.332
Testing root mean squared error of 0.332
Testing root mean squared error of 0.332
Testing root mean squared error of 0.332
Testing root mean squared error of 0.332
Testing root mean squared error of 0.141
Testing root mean squared error of 0.145
Testing root mean squared error of 0.140
Testing root mean squared error of 0.144
Testing root mean squared error of 0.143
Testing root mean squared error of 0.141
Testing root mean squared error of 0.142
Testing root mean squared error of 0.140
Testing root mean squared error of 0.134
Testing root mean squared error of 0.135
Testing root mean squared error of 0.142
Testing root mean squared error of 0.139
Testing root mean squared error of 0.134
Testing root mean squared error of 0.131
Testing root mean squared error of 0.131
Testing root mean squared error of 0.138
Testing root mean squared error of 0.140
Testing root mean squared error of 0.144
Testing root mea

In [4]:
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data

# Note: this example requires the torchmetrics library: https://torchmetrics.readthedocs.io
from sklearn.metrics import mean_squared_error
from tqdm import tqdm

from torchhd import functional, embeddings, cos_similarity, bind, multiset, hard_quantize, permute

from scipy.special import softmax

import time
from sklearn.cluster import SpectralClustering

d = 10000
models = 4

# Model based on RegHD application for Single model regression -> No comparing which cluster
class MultiModel_With_NonLinear_Sin(nn.Module):
    def __init__(self, size, levels, r):
        super(MultiModel_With_NonLinear_Sin, self).__init__()

        self.lr = 0.00001 # alpha
        self.M = torch.zeros(models, d).double() # Model initializes in 0
        self.position = embeddings.Random(size, d)
        self.value = embeddings.Level(levels, d, randomness=r)
        self.cluster = functional.random_hv(models, d)
        self.size = size
        self.r = r
        self.cluster_model = None
        self.training_samples = None

    def encode(self, x): # encoding a value
        for i in range(len(x)):
            x[i] = float(x[i])
        #x = self.flatten(x)
        sample_hv = self.value(x)
        for i, hv in enumerate(sample_hv):
            sample_hv[i] = permute(hv, shifts=i)
        sample_hv = multiset(sample_hv)
        return hard_quantize(sample_hv)

    def model_update(self, x, y, cluster): # update # y = no hv
        x = torch.reshape(x, (1,d))
        #confidence = np.transpose(softmax(cos_similarity(self.cluster, x))) # Compare input with cluster
        model_result = F.linear(x.type(torch.FloatTensor), self.M[cluster].type(torch.FloatTensor))
        update = self.M[cluster] + (self.lr * (y - model_result) * x) # Model + alpha*(Error)*(x)
        self.M[cluster] = update[0] # New 
        #center = [num.item() for num in confidence[0]].index(max(confidence[0]).item())
        #self.cluster[center] = self.cluster[center] + (1-max(confidence[0])) * x
        #return center
    
    def forward(self, x):
        enc = torch.reshape(self.encode(x), (1,d))
        #confidence = np.transpose(softmax(cos_similarity(self.cluster, enc))) # Compare input with cluster
        similarity = cos_similarity(self.training_samples, enc)
        #print(similarity.index(max(similarity)))
        i = list(similarity.numpy()).index(max(similarity).numpy()[0])
        #i,j = np.where(similarity.numpy() == max(similarity).numpy())
        model_result = F.linear(enc, self.M[self.cluster_model[i]].type(torch.FloatTensor))
        #res = F.linear(confidence, model_result) # Multiply enc (x) * weights (Model) = Dot product
        return model_result # Return the resolutions
    
    def train(self, sets_training, matrix_1_norm, epochs):
        
        full_samples = {}
        for time, i in enumerate(sets_training):
            samples = matrix_1_norm[:, i:i+self.size]
            for n in range(0, samples.shape[0], 1):
                sample = torch.tensor(samples[n, :])
                full_samples[(samples.shape[0]*time) + n] = sample

        if(self.cluster_model == None):
            encoded = torch.zeros(len(full_samples), 10000)
            for i, samp in (full_samples.items()):
                encoded[i, :] = self.encode(samp)
            self.training_samples = encoded
            affinity_matrix = cos_similarity(encoded, encoded)
            clustering = SpectralClustering(n_clusters=models, assign_labels='discretize', 
                                            random_state=0, affinity='precomputed').fit(affinity_matrix)
            self.cluster_model = clustering.labels_

        for _ in range(epochs): # Number of iterations for all the samples
            pred = []
            labels_full = []
            for time, i in enumerate(sets_training):
                #samples = matrix_1_norm[:, i:i+40]
                labels = matrix_1_norm[:, i+self.size]
                
                for n in range(samples.shape[0]):
                    label = torch.tensor(labels[n])
                    item = (samples.shape[0]*time) + n
                    self.model_update(encoded[item], label, self.cluster_model[item]) # Pass input and label to train
                    #predictions_testing = self(full_samples[item]) # Pass samples from test to model (forward function)
                    #pred.append(predictions_testing[0])
                    #labels_full.append(label.unsqueeze(dim=0))

            #print(f"Training root mean squared error of {(mean_squared_error(labels_full, pred, squared=False)):.3f}")
    
    def test(self, sets_testing, matrix_1_norm):
        pred = []
        labels_full = []
        for i in sets_testing:
            samples = matrix_1_norm[:, i:i+self.size]
            labels = matrix_1_norm[:, i+self.size]
            for n in range(samples.shape[0]):
                label = torch.tensor(labels[n])
                sample = torch.tensor(samples[n, :])
                # Pass samples from test to model (forward function)
                predictions = self.forward(sample)
                pred.append(float(predictions[0]))
                labels_full.append(float(label.unsqueeze(dim=0)))
        print(f"Testing root mean squared error of {(mean_squared_error(labels_full, pred, squared=False)):.3f}")
  
#model = MultiModel_With_NonLinear_Sin(size = 20, levels = 6, r = 1.0) # 1 class, 5

In [5]:
model = MultiModel_With_NonLinear_Sin(size = 20, levels = 200, r = 0.25) # 1 class, 5
model.train(sets_training, matrix_1_norm, 1)
model.test(sets_validation, matrix_1_norm)

Testing root mean squared error of 0.133


#### Bundle with season - Linear

In [50]:
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data

# Note: this example requires the torchmetrics library: https://torchmetrics.readthedocs.io
from sklearn.metrics import mean_squared_error
from tqdm import tqdm

from torchhd import functional, bind
from torchhd import embeddings
from torchhd import cos_similarity

from scipy.special import softmax

import time
from sklearn.cluster import SpectralClustering

d = 10000
models = 4

# Model based on RegHD application for Single model regression -> No comparing which cluster
class MultiModel_With_NonLinear_Sin(nn.Module):
    def __init__(self, num_classes, size):
        super(MultiModel_With_NonLinear_Sin, self).__init__()

        self.lr = 0.00001 # alpha
        self.M = torch.zeros(models, d).double() # Model initializes in 0
        self.project = embeddings.Projection(size, d).double() # 5 features, 10000 dimensions = hypervectors like weights?
        self.project.weight.data.normal_(0, 1) # Normal distributions mean=0.0, std=1.0
        self.bias = nn.parameter.Parameter(torch.empty(d), requires_grad=False)
        self.bias.data.uniform_(0, 2 * math.pi) # bias
        self.cluster_model = None
        self.training_samples = None
        self.time_stamp = functional.level_hv(4, d)

    def encode(self, x, time): # encoding a value
        for i in range(len(x)):
            x[i] = float(x[i])
        enc = self.project(x)
        sample_hv = torch.cos(enc + self.bias) * torch.sin(enc) 
        season = ( time // 13 ) % 4
        return bind(functional.hard_quantize(sample_hv), self.time_stamp[season])

    def model_update(self, x, y, cluster): # update # y = no hv
        x = torch.reshape(x, (1,d))
        model_result = F.linear(x.type(torch.FloatTensor), self.M[cluster].type(torch.FloatTensor))
        update = self.M[cluster] + (self.lr * (y - model_result) * x) # Model + alpha*(Error)*(x)
        self.M[cluster] = update[0] # New 
    
    def forward(self, x, time):
        enc = torch.reshape(self.encode(x, time), (1,d))
        similarity = cos_similarity(self.training_samples, enc)
        i = list(similarity.numpy()).index(max(similarity).numpy()[0])
        model_result = F.linear(enc, self.M[self.cluster_model.labels_[i]])
        return model_result # Return the resolutions
    
    def train(self, sets_training, matrix_1_norm, epochs):
        
        full_samples = {}
        for time, i in enumerate(sets_training):
            samples = matrix_1_norm[:, i:i+20]
            for n in range(0, samples.shape[0], 1):
                sample = torch.tensor(samples[n, :])
                full_samples[(samples.shape[0]*time) + n] = (sample, i)

        if(self.cluster_model == None):
            encoded = torch.zeros(len(full_samples), 10000)
            for i, samp in (full_samples.items()):
                encoded[i, :] = self.encode(samp[0], samp[1])
                print(encoded[i, :])
            self.training_samples = encoded
            affinity_matrix = cos_similarity(encoded, encoded)
            print("Checking")
            for i in range(affinity_matrix.shape[0]):
                line = affinity_matrix[i].numpy()
                for j in range(affinity_matrix.shape[1]):
                    if np.isnan(line[j]) or math. isinf(line[j]):
                        print(i,j)
            print("Done checking")
            clustering = SpectralClustering(n_clusters=models, assign_labels='discretize', 
                                            random_state=0, affinity='precomputed').fit(affinity_matrix.numpy())
            self.cluster_model = clustering

        for _ in range(epochs): # Number of iterations for all the samples
            pred = []
            labels_full = []
            for time, i in enumerate(sets_training):
                labels = matrix_1_norm[:, i+20]
                
                for n in range(samples.shape[0]):
                    label = torch.tensor(labels[n])
                    item = (samples.shape[0]*time) + n
                    self.model_update(encoded[item], label, self.cluster_model.labels_[item]) 
    
    def test(self, sets_testing, matrix_1_norm):
        pred = []
        labels_full = []
        for i in tqdm(sets_testing):
            samples = matrix_1_norm[:, i:i+20]
            labels = matrix_1_norm[:, i+20]
            for n in range(samples.shape[0]):
                label = torch.tensor(labels[n])
                sample = torch.tensor(samples[n, :])
                # Pass samples from test to model (forward function)
                predictions = self.forward(sample, i)
                pred.append(predictions[0])
                labels_full.append(label.unsqueeze(dim=0))
        print(f"Testing root mean squared error of {(mean_squared_error(labels_full, pred, squared=False)):.3f}")

In [51]:
model = MultiModel_With_NonLinear_Sin(2, 20)
for i in range(1,3):
    print(i)
    model.train(sets_training, matrix_1_norm, 1)
    model.test(sets_testing, matrix_1_norm)

1
tensor([ 1.,  1., -1.,  ...,  1., -1.,  1.])
tensor([ 1.,  1., -1.,  ...,  1., -1., -1.])
tensor([ 1.,  1., -1.,  ..., -1., -1., -1.])
tensor([ 1.,  1.,  1.,  ..., -1.,  1., -1.])
tensor([ 1., -1.,  1.,  ..., -1., -1., -1.])
tensor([ 1., -1.,  1.,  ..., -1.,  1.,  1.])
tensor([ 1., -1.,  1.,  ..., -1.,  1.,  1.])
tensor([ 1.,  1., -1.,  ..., -1.,  1., -1.])
tensor([1., 1., 1.,  ..., 1., 1., 1.])
tensor([1., 1., 1.,  ..., 1., 1., 1.])
tensor([ 1., -1., -1.,  ...,  1.,  1., -1.])
tensor([ 1.,  1., -1.,  ...,  1.,  1.,  1.])
tensor([1., 1., 1.,  ..., 1., 1., 1.])
tensor([ 1.,  1., -1.,  ..., -1.,  1.,  1.])
tensor([ 1.,  1., -1.,  ..., -1.,  1., -1.])
tensor([ 1., -1.,  1.,  ..., -1.,  1., -1.])
tensor([ 1., -1.,  1.,  ...,  1., -1.,  1.])
tensor([ 1.,  1., -1.,  ...,  1.,  1.,  1.])
tensor([ 1.,  1.,  1.,  ..., -1.,  1.,  1.])
tensor([ 1.,  1., -1.,  ...,  1.,  1., -1.])
tensor([ 1.,  1.,  1.,  ..., -1.,  1., -1.])
tensor([ 1.,  1.,  1.,  ..., -1.,  1.,  1.])
tensor([ 1., -1.,  1.,  ..

RuntimeError: [enforce fail at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\c10\core\impl\alloc_cpu.cpp:81] data. DefaultCPUAllocator: not enough memory: you tried to allocate 427993344 bytes.

#### Bundle with season - Circular

In [4]:
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data

# Note: this example requires the torchmetrics library: https://torchmetrics.readthedocs.io
from sklearn.metrics import mean_squared_error
from tqdm import tqdm

from torchhd import functional, bind
from torchhd import embeddings
from torchhd import cos_similarity

from scipy.special import softmax

import time
from sklearn.cluster import SpectralClustering

d = 10000
models = 4

# Model based on RegHD application for Single model regression -> No comparing which cluster
class MultiModel_With_NonLinear_Sin(nn.Module):
    def __init__(self, num_classes, size):
        super(MultiModel_With_NonLinear_Sin, self).__init__()

        self.lr = 0.00001 # alpha
        self.M = torch.zeros(models, d).double() # Model initializes in 0
        self.project = embeddings.Projection(size, d).double() # 5 features, 10000 dimensions = hypervectors like weights?
        self.project.weight.data.normal_(0, 1) # Normal distributions mean=0.0, std=1.0
        self.bias = nn.parameter.Parameter(torch.empty(d), requires_grad=False)
        self.bias.data.uniform_(0, 2 * math.pi) # bias
        self.cluster_model = None
        self.training_samples = None
        self.time_stamp = functional.circular_hv(4, d)

    def encode(self, x, time): # encoding a value
        for i in range(len(x)):
            x[i] = float(x[i])
        enc = self.project(x)
        sample_hv = torch.cos(enc + self.bias) * torch.sin(enc) 
        season = ( time // 13 ) % 4
        return bind(functional.hard_quantize(sample_hv), self.time_stamp[season])

    def model_update(self, x, y, cluster): # update # y = no hv
        x = torch.reshape(x, (1,d))
        model_result = F.linear(x.type(torch.FloatTensor), self.M[cluster].type(torch.FloatTensor))
        update = self.M[cluster] + (self.lr * (y - model_result) * x) # Model + alpha*(Error)*(x)
        self.M[cluster] = update[0] # New 
    
    def forward(self, x, time):
        enc = torch.reshape(self.encode(x, time), (1,d))
        similarity = cos_similarity(self.training_samples, enc)
        i = list(similarity.numpy()).index(max(similarity).numpy()[0])
        model_result = F.linear(enc, self.M[self.cluster_model.labels_[i]])
        return model_result # Return the resolutions
    
    def train(self, sets_training, matrix_1_norm, epochs):
        
        full_samples = {}
        for time, i in enumerate(sets_training):
            samples = matrix_1_norm[:, i:i+20]
            for n in range(0, samples.shape[0], 1):
                sample = torch.tensor(samples[n, :])
                full_samples[(samples.shape[0]*time) + n] = (sample, i)

        if(self.cluster_model == None):
            encoded = torch.zeros(len(full_samples), 10000)
            for i, samp in (full_samples.items()):
                encoded[i, :] = self.encode(samp[0], samp[1])
            self.training_samples = encoded
            affinity_matrix = cos_similarity(encoded, encoded)
            clustering = SpectralClustering(n_clusters=models, assign_labels='discretize', 
                                            random_state=0, affinity='precomputed').fit(affinity_matrix)
            self.cluster_model = clustering

        for _ in range(epochs): # Number of iterations for all the samples
            pred = []
            labels_full = []
            for time, i in enumerate(sets_training):
                #samples = matrix_1_norm[:, i:i+40]
                labels = matrix_1_norm[:, i+20]
                
                for n in range(samples.shape[0]):
                    label = torch.tensor(labels[n])
                    item = (samples.shape[0]*time) + n
                    self.model_update(self.training_samples[item], label, self.cluster_model.labels_[item]) 
    
    def test(self, sets_testing, matrix_1_norm):
        pred = []
        labels_full = []
        for i in tqdm(sets_testing):
            samples = matrix_1_norm[:, i:i+20]
            labels = matrix_1_norm[:, i+20]
            for n in range(samples.shape[0]):
                label = torch.tensor(labels[n])
                sample = torch.tensor(samples[n, :])
                # Pass samples from test to model (forward function)
                predictions = self.forward(sample, i)
                pred.append(predictions[0])
                labels_full.append(label.unsqueeze(dim=0))
        print(f"Testing root mean squared error of {(mean_squared_error(labels_full, pred, squared=False)):.3f}")

In [5]:
model = MultiModel_With_NonLinear_Sin(2, 20)
for i in range(1,3):
    print(i)
    model.train(sets_training, matrix_1_norm, 1)
    model.test(sets_testing, matrix_1_norm)

1


100%|██████████| 3/3 [17:19<00:00, 346.54s/it]
  array = np.asarray(array, order=order, dtype=dtype)
  array = np.asarray(array, order=order, dtype=dtype)
  y_true = check_array(y_true, ensure_2d=False, dtype=dtype)


Testing root mean squared error of 0.162
2


UnboundLocalError: local variable 'encoded' referenced before assignment

#### Bundle with Circular Hv seasons

In [6]:
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data

# Note: this example requires the torchmetrics library: https://torchmetrics.readthedocs.io
from sklearn.metrics import mean_squared_error
from tqdm import tqdm

from torchhd import functional, bundle
from torchhd import embeddings
from torchhd import cos_similarity

from scipy.special import softmax

import time
from sklearn.cluster import SpectralClustering

d = 10000
models = 4

# Model based on RegHD application for Single model regression -> No comparing which cluster
class MultiModel_With_NonLinear_Sin(nn.Module):
    def __init__(self, num_classes, size):
        super(MultiModel_With_NonLinear_Sin, self).__init__()

        self.lr = 0.00001 # alpha
        self.M = torch.zeros(models, d).double() # Model initializes in 0
        self.project = embeddings.Projection(size, d).double() # 5 features, 10000 dimensions = hypervectors like weights?
        self.project.weight.data.normal_(0, 1) # Normal distributions mean=0.0, std=1.0
        self.bias = nn.parameter.Parameter(torch.empty(d), requires_grad=False)
        self.bias.data.uniform_(0, 2 * math.pi) # bias
        self.cluster_model = None
        self.training_samples = None
        self.time_stamp = functional.circular_hv(4, d)

    def encode(self, x, time): # encoding a value
        for i in range(len(x)):
            x[i] = float(x[i])
        enc = self.project(x)
        sample_hv = torch.cos(enc + self.bias) * torch.sin(enc) 
        season = ( time // 13 ) % 4
        return bundle(functional.hard_quantize(sample_hv), self.time_stamp[season])

    def model_update(self, x, y, cluster): # update # y = no hv
        x = torch.reshape(x, (1,d))
        model_result = F.linear(x.type(torch.FloatTensor), self.M[cluster].type(torch.FloatTensor))
        update = self.M[cluster] + (self.lr * (y - model_result) * x) # Model + alpha*(Error)*(x)
        self.M[cluster] = update[0] # New 
    
    def forward(self, x, time):
        enc = torch.reshape(self.encode(x, time), (1,d))
        similarity = cos_similarity(self.training_samples, enc)
        i = list(similarity.numpy()).index(max(similarity).numpy()[0])
        model_result = F.linear(enc, self.M[self.cluster_model.labels_[i]])
        return model_result # Return the resolutions
    
    def train(self, sets_training, matrix_1_norm, epochs):
        
        full_samples = {}
        for time, i in enumerate(sets_training):
            samples = matrix_1_norm[:, i:i+20]
            for n in range(0, samples.shape[0], 1):
                sample = torch.tensor(samples[n, :])
                full_samples[(samples.shape[0]*time) + n] = (sample, i)

        if(self.cluster_model == None):
            encoded = torch.zeros(len(full_samples), 10000)
            for i, samp in (full_samples.items()):
                encoded[i, :] = self.encode(samp[0], samp[1])
            self.training_samples = encoded
            affinity_matrix = cos_similarity(encoded, encoded)
            clustering = SpectralClustering(n_clusters=models, assign_labels='discretize', 
                                            random_state=0, affinity='precomputed').fit(affinity_matrix)
            self.cluster_model = clustering

        for _ in range(epochs): # Number of iterations for all the samples
            pred = []
            labels_full = []
            for time, i in enumerate(sets_training):
                #samples = matrix_1_norm[:, i:i+40]
                labels = matrix_1_norm[:, i+20]
                
                for n in range(samples.shape[0]):
                    label = torch.tensor(labels[n])
                    item = (samples.shape[0]*time) + n
                    self.model_update(self.training_samples[item], label, self.cluster_model.labels_[item]) 
    
    def test(self, sets_testing, matrix_1_norm):
        pred = []
        labels_full = []
        for i in tqdm(sets_testing):
            samples = matrix_1_norm[:, i:i+20]
            labels = matrix_1_norm[:, i+20]
            for n in range(samples.shape[0]):
                label = torch.tensor(labels[n])
                sample = torch.tensor(samples[n, :])
                # Pass samples from test to model (forward function)
                predictions = self.forward(sample, i)
                pred.append(predictions[0])
                labels_full.append(label.unsqueeze(dim=0))
        print(f"Testing root mean squared error of {(mean_squared_error(labels_full, pred, squared=False)):.3f}")

In [7]:
model = MultiModel_With_NonLinear_Sin(2, 20)
for i in range(1,3):
    print(i)
    model.train(sets_training, matrix_1_norm, 1)
    model.test(sets_testing, matrix_1_norm)

1


100%|██████████| 3/3 [18:18<00:00, 366.15s/it]
  array = np.asarray(array, order=order, dtype=dtype)
  array = np.asarray(array, order=order, dtype=dtype)
  y_true = check_array(y_true, ensure_2d=False, dtype=dtype)


Testing root mean squared error of 0.200
2


  0%|          | 0/3 [01:41<?, ?it/s]


KeyboardInterrupt: 

#### MORE SEASONS

In [16]:
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data

# Note: this example requires the torchmetrics library: https://torchmetrics.readthedocs.io
from sklearn.metrics import mean_squared_error
from tqdm import tqdm

from torchhd import functional, bind
from torchhd import embeddings
from torchhd import cos_similarity

from scipy.special import softmax

import time
from sklearn.cluster import SpectralClustering

d = 10000
models = 4

# Model based on RegHD application for Single model regression -> No comparing which cluster
class MultiModel_With_NonLinear_Sin(nn.Module):
    def __init__(self, num_classes, size):
        super(MultiModel_With_NonLinear_Sin, self).__init__()

        self.lr = 0.00001 # alpha
        self.M = torch.zeros(models, d).double() # Model initializes in 0
        self.project = embeddings.Projection(size, d).double() # 5 features, 10000 dimensions = hypervectors like weights?
        self.project.weight.data.normal_(0, 1) # Normal distributions mean=0.0, std=1.0
        self.bias = nn.parameter.Parameter(torch.empty(d), requires_grad=False)
        self.bias.data.uniform_(0, 2 * math.pi) # bias
        self.cluster_model = None
        self.training_samples = None
        self.time_stamp = functional.circular_hv(8, d)

    def encode(self, x, time): # encoding a value
        for i in range(len(x)):
            x[i] = float(x[i])
        enc = self.project(x)
        sample_hv = torch.cos(enc + self.bias) * torch.sin(enc) 
        season = ( time // 13 ) % 8
        return bind(functional.hard_quantize(sample_hv), self.time_stamp[season])

    def model_update(self, x, y, cluster): # update # y = no hv
        x = torch.reshape(x, (1,d))
        model_result = F.linear(x.type(torch.FloatTensor), self.M[cluster].type(torch.FloatTensor))
        update = self.M[cluster] + (self.lr * (y - model_result) * x) # Model + alpha*(Error)*(x)
        self.M[cluster] = update[0] # New 
    
    def forward(self, x, time):
        enc = torch.reshape(self.encode(x, time), (1,d))
        similarity = cos_similarity(self.training_samples, enc)
        i = list(similarity.numpy()).index(max(similarity).numpy()[0])
        model_result = F.linear(enc, self.M[self.cluster_model.labels_[i]])
        return model_result # Return the resolutions
    
    def train(self, sets_training, matrix_1_norm, epochs):
        
        full_samples = {}
        for time, i in enumerate(sets_training):
            samples = matrix_1_norm[:, i:i+20]
            for n in range(0, samples.shape[0], 1):
                sample = torch.tensor(samples[n, :])
                full_samples[(samples.shape[0]*time) + n] = (sample, i)

        if(self.cluster_model == None):
            encoded = torch.zeros(len(full_samples), 10000)
            for i, samp in (full_samples.items()):
                encoded[i, :] = self.encode(samp[0], samp[1])
            self.training_samples = encoded
            affinity_matrix = cos_similarity(encoded, encoded)
            clustering = SpectralClustering(n_clusters=models, assign_labels='discretize', 
                                            random_state=0, affinity='precomputed').fit(affinity_matrix)
            self.cluster_model = clustering

        for _ in range(epochs): # Number of iterations for all the samples
            pred = []
            labels_full = []
            for time, i in enumerate(sets_training):
                #samples = matrix_1_norm[:, i:i+40]
                labels = matrix_1_norm[:, i+20]
                
                for n in range(samples.shape[0]):
                    label = torch.tensor(labels[n])
                    item = (samples.shape[0]*time) + n
                    self.model_update(self.training_samples[item], label, self.cluster_model.labels_[item]) 
    
    def test(self, sets_testing, matrix_1_norm):
        pred = []
        labels_full = []
        for i in tqdm(sets_testing):
            samples = matrix_1_norm[:, i:i+20]
            labels = matrix_1_norm[:, i+20]
            for n in range(samples.shape[0]):
                label = torch.tensor(labels[n])
                sample = torch.tensor(samples[n, :])
                # Pass samples from test to model (forward function)
                predictions = self.forward(sample, i)
                pred.append(predictions[0])
                labels_full.append(label.unsqueeze(dim=0))
        print(f"Testing root mean squared error of {(mean_squared_error(labels_full, pred, squared=False)):.3f}")

In [17]:
model = MultiModel_With_NonLinear_Sin(2, 20)
model.train(sets_training, matrix_1_norm, 1)
model.test(sets_testing, matrix_1_norm)

  w = np.where(isolated_node_mask, 1, np.sqrt(w))


ValueError: array must not contain infs or NaNs

#### Tryit with London Smartmeters becauuuuse -> Time series are longer -> Better temporal encoding

##### Normal Spectral Clustering RegHD

In [18]:
parent = 'multipletimeseriesforecasting/preprocessed-data/'

path = os.path.join(parent, 'LondonSmartMeters/London_elec_HalfHourly.csv')

dl = DatasetLoader('LondonSmartMeters', path)

matrix_1_original = dl.dataset_load_and_preprocess("original")
matrix_1_norm = dl.dataset_load_and_preprocess("normalized")
print(matrix_1_norm.shape)

---------------------------------------------------
Dataset name:  LondonSmartMeters
Dataset contains missing values?  False
--------------------------
---------------------------------------------------
Dataset name:  LondonSmartMeters
Dataset contains missing values?  False
--------------------------
(504, 9983)


In [28]:
sample_len = int(matrix_1_norm.shape[1]*(0.01))
print("Sample_len total", sample_len)

np.random.seed(1)
sets = np.random.choice(matrix_1_norm.shape[1]-40, int(sample_len), replace=False)
sets_training, sets_testing = sets[:int(len(sets)*.8)], sets[int(len(sets)*.8):]
print(sets_training, sets_testing)

Sample_len total 99
[7809 1518  895  274 3329 2245 9733 2680 8824 7093  372 1433 6801 4114
 9236 2932 4638 3983 7234  825 5343 2385 5821 8316 7868 6568 2597 2844
 2373 1797 4631 9591 3085 8557 2575 7876 2122 9180 2940 6720 4997 1731
 5387  574 2311  278 2167  260 5471 9200 4171  804 4036 9427 7394 4511
 6375 6748 1967 6327 8052 6229 9620  793 5260 5142 3382 2354 4506 8244
 5569 7793 1163 9197 5358 8850  222 9204  218] [8975 3709 6304  763 8232 1510 8759 9882 2625 5220 4076 4033  408 7624
 6157 5696 6790 9280 3019 7779]


In [29]:
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data

# Note: this example requires the torchmetrics library: https://torchmetrics.readthedocs.io
from sklearn.metrics import mean_squared_error
from tqdm import tqdm

from torchhd import functional
from torchhd import embeddings
from torchhd import cos_similarity

from scipy.special import softmax

import time
from sklearn.cluster import SpectralClustering

d = 10000
models = 32

# Model based on RegHD application for Single model regression -> No comparing which cluster
class MultiModel_With_NonLinear_Sin(nn.Module):
    def __init__(self, num_classes, size):
        super(MultiModel_With_NonLinear_Sin, self).__init__()

        self.lr = 0.00001 # alpha
        self.M = torch.zeros(models, d).double() # Model initializes in 0
        self.project = embeddings.Projection(size, d).double() # 5 features, 10000 dimensions = hypervectors like weights?
        self.project.weight.data.normal_(0, 1) # Normal distributions mean=0.0, std=1.0
        self.bias = nn.parameter.Parameter(torch.empty(d), requires_grad=False)
        self.bias.data.uniform_(0, 2 * math.pi) # bias
        self.cluster_model = None
        self.training_samples = None

    def encode(self, x): # encoding a value
        for i in range(len(x)):
            x[i] = float(x[i])
        enc = self.project(x)
        sample_hv = torch.cos(enc + self.bias) * torch.sin(enc) 
        return functional.hard_quantize(sample_hv)

    def model_update(self, x, y, cluster): # update # y = no hv
        x = torch.reshape(x, (1,d))
        model_result = F.linear(x.type(torch.FloatTensor), self.M[cluster].type(torch.FloatTensor))
        update = self.M[cluster] + (self.lr * (y - model_result) * x) # Model + alpha*(Error)*(x)
        self.M[cluster] = update[0] # New 
    
    def forward(self, x):
        enc = torch.reshape(self.encode(x), (1,d))
        similarity = cos_similarity(self.training_samples, enc)
        i = list(similarity.numpy()).index(max(similarity).numpy()[0])
        model_result = F.linear(enc, self.M[self.cluster_model.labels_[i]])
        return model_result # Return the resolutions
    
    def train(self, sets_training, matrix_1_norm, epochs):
        
        full_samples = {}
        for time, i in enumerate(sets_training):
            samples = matrix_1_norm[:, i:i+20]
            for n in range(0, samples.shape[0], 1):
                sample = torch.tensor(samples[n, :])
                full_samples[(samples.shape[0]*time) + n] = sample

        if(self.cluster_model == None):
            encoded = torch.zeros(len(full_samples), 10000)
            for i, samp in (full_samples.items()):
                encoded[i, :] = self.encode(samp)
            self.training_samples = encoded
            affinity_matrix = cos_similarity(encoded, encoded)
            clustering = SpectralClustering(n_clusters=models, assign_labels='discretize', 
                                            random_state=0, affinity='precomputed').fit(affinity_matrix)
            self.cluster_model = clustering

        for _ in range(epochs): # Number of iterations for all the samples
            pred = []
            labels_full = []
            for time, i in enumerate(sets_training):
                print(i)
                #samples = matrix_1_norm[:, i:i+40]
                labels = matrix_1_norm[:, i+20]
                
                for n in range(samples.shape[0]):
                    label = torch.tensor(labels[n])
                    item = (samples.shape[0]*time) + n
                    self.model_update(self.training_samples[item], label, self.cluster_model.labels_[item]) 
    
    def test(self, sets_testing, matrix_1_norm):
        pred = []
        labels_full = []
        for i in tqdm(sets_testing):
            samples = matrix_1_norm[:, i:i+20]
            labels = matrix_1_norm[:, i+20]
            for n in range(samples.shape[0]):
                label = torch.tensor(labels[n])
                sample = torch.tensor(samples[n, :])
                # Pass samples from test to model (forward function)
                predictions = self.forward(sample, i)
                pred.append(predictions[0])
                labels_full.append(label.unsqueeze(dim=0))
        print(f"Testing root mean squared error of {(mean_squared_error(labels_full, pred, squared=False)):.3f}")

In [30]:
model = MultiModel_With_NonLinear_Sin(2, 20)
model.train(sets_training, matrix_1_norm, 1)
model.test(sets_testing, matrix_1_norm)

RuntimeError: [enforce fail at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\c10\core\impl\alloc_cpu.cpp:81] data. DefaultCPUAllocator: not enough memory: you tried to allocate 1592640000 bytes.