In [295]:
import sklearn.datasets
import numpy as np
from numpy.linalg import norm
import matplotlib.pyplot as plt
import pandas as pd
import os
import torch
import time

np.random.seed(1)
NUM_FEATURES = 784

In [224]:
dataset = sklearn.datasets.fetch_openml("mnist_784", version=1)

In [225]:
X = dataset.data.to_numpy()
y = dataset.target.to_numpy()

In [336]:
def randomVector(n):
    """
    Randomly generates hypervector of size n
    """
    return (2*np.random.randint(0, 2, n)) - 1

def bind(a, b):
    return np.multiply(a, b)

def randomFlip(vec, idxs):
    """
    Returns a vector with the inx bits flipped.
    """
    assert len(idxs) <= len(vec), "Number of bits to flip greater than vector length"
    
    flipped_vec = np.copy(vec)
    
    for i in idxs:
        flipped_vec[i] *= -1
        
    return flipped_vec
    

def hammingDist(a, b):
    """
    Normalized hamming distance between two vectors.
    Vectors are assumed to be binarized.
    """
    assert len(a) == len(b), "Vectors are not the same length"
    return np.count_nonzero(a!=b) / len(a)

In [379]:
def buildLevelVectors(levels, dim):
    """
    Level hypervectors, level hypervector at i is copied from level hypervector i-1 with D/(2*255) bits flipped
    """
    levelvectors = []
    min_vector = randomVector(dim)
    
    levelvectors.append(min_vector)
    
    window = 0
    indexes = np.arange(0, dim)
    np.random.shuffle(indexes)
    bit_flips = int(dim/(2*255))

    for i in range(1, levels):
        mid_vec = np.copy(levelvectors[i-1])
        mid_vec = randomFlip(mid_vec, indexes[window:window+bit_flips])
        window += bit_flips

        levelvectors.append(mid_vec)
        
    return np.array(levelvectors)

def buildPosVectors(n, dim):
    """
    Array of n random hypervectors to encode position of features.
    """
    posvectors = []
    for i in range(n):
        posvectors.append(randomVector(dim))
    return np.array(posvectors)

def rmrBinarize(x):
    """
    Random majority rule. 0 are randomly set to 1 or -1
    """
    h = [i/i if i != 0 else np.random.choice([-1.0, 1.0]) for i in x]
    return h
def fmrBinarize(x, k):
    """
    Fixed majority rule. 0 are set to k
    """
    h = [i/i if i != 0 else 1.0 for i in x]
    return h

In [383]:
class Encoder:
    def __init__(self, dim, n_features):
        self.D = dim
        self.n_features = n_features
        self.B = buildPosVectors(n_features, dim)
        self.levelvectors = buildLevelVectors(256, dim)
        
        
        self.classvecs = {}
        
    def encodeVector(self, X, binarizetype):
        h = np.zeros(self.D)
        for i, feature in enumerate(X):
            h += np.copy(self.levelvectors[int(feature)]) * self.B[i]       
        
        if binarizetype == 'rmr':
            return rmrBinarize(h)
        else:
            return fmrBinarize(h, 1)
        
    
    def train(self, X, y, binarizetype):
        assert len(X) == len(y), "len(X) len(y) not equal"
        
        for i, obs in enumerate(X):
            target = y[i]
            h = self.encodeVector(obs)
            
            if target not in self.classvecs.keys():
                self.classvecs[target] = h
            else:
                self.classvecs[target] += h
                
    def predict(self, x):
        h = self.encodeVector(x, binarizetype)
        dists = {}
        
        for key, val in self.classvecs.items():
            dists[key] = hammingDist(h, val)
            
        return dists

In [384]:
encoder = Encoder(5120, NUM_FEATURES)

In [385]:
TRAIN_SUBSET = 100

start = time.time()
encoder.train(X[0:TRAIN_SUBSET], y[0:TRAIN_SUBSET], 'rmr')
end = time.time()
print("%f seconds elapsed. Avg %f seconds per observation" % (end-start, (end-start) / TRAIN_SUBSET))

TypeError: encodeVector() missing 1 required positional argument: 'binarizetype'

In [358]:
print(encoder.predict(X[101]))

{'5': 0.9849609375, '0': 0.9865234375, '4': 0.9861328125, '1': 0.9861328125, '9': 0.98671875, '2': 0.983984375, '3': 0.986328125, '6': 0.9865234375, '7': 0.9865234375, '8': 0.986328125}
