In [56]:
import numpy as np
from mnist import MNIST
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import accuracy_score
from PIL import Image
from matplotlib import pyplot as plt
import copy
import pandas as pd

In [57]:
model_30 = np.load("./temp/digit_vectors_seed_30.npy")
model_40 = np.load("./temp/digit_vectors_seed_40.npy")
model_50 = np.load("./temp/digit_vectors_seed_50.npy")

In [58]:
def shuffle(X, y):
    permutation = np.arange(X.shape[0])
    np.random.shuffle(permutation)
    return X[permutation], y[permutation]

def load_dataset():
    mndata = MNIST('./data/')
    X_train, labels_train = map(np.array, mndata.load_training())
    X_test, _ = map(np.array, mndata.load_testing())
    return X_train, labels_train, X_test

In [59]:
X_train, labels_train, _ = load_dataset()
# X_train, labels_train = shuffle(X_train, labels_train)
X_train, X_test, y_train, y_test = train_test_split(X_train, labels_train, test_size=0.33, random_state=42)

In [60]:
D = 10000 # dimensions in random space
IMG_LEN = 28
NUM_SAMPLES = X_train.shape[0]

In [61]:
# print("Generating random projection...")
# proj = np.random.rand(D, IMG_LEN * IMG_LEN)
print("Generating random projection...")
seed = 30
np.random.seed(seed)
proj = np.random.randint(2, size=(D, IMG_LEN * IMG_LEN))
proj[proj==0] = -1
print(proj.shape)
def get_scene(img, proj):
    return np.dot(img, proj.T)

# Transform the image vectors into the hypervectors
def get_scenes(images, proj):
    print(images.shape)
    print(proj.shape)
    return np.dot(images[:NUM_SAMPLES, :], proj.T)

# print("Projecting images to higher dim space...")
# X_train = get_scenes(X_train, proj)

Generating random projection...
(10000, 784)


In [62]:
temp = get_scene(X_train[0].reshape((1, -1)), proj)
temp[temp > 0] = 1
temp[temp <= 0] = -1

In [63]:
temp[0].shape

(10000,)

In [64]:
X_train[0].reshape((1, -1)).shape

(1, 784)

In [65]:
proj.T.shape

(784, 10000)

In [66]:
test_discrepencies = pd.read_excel("./temp/test_discrepencies.xlsx")

In [67]:
test_discrepencies.rename(columns={"Unnamed: 0": "idx"}, inplace=True)

In [68]:
test_discrepencies.head()

Unnamed: 0,idx,model_30,model_40,model_50,y
0,13,4,8,8,8
1,15,7,9,9,7
2,34,8,8,5,5
3,57,0,0,2,2
4,68,7,9,7,7


In [69]:
test_dis_model_30 = test_discrepencies[test_discrepencies["model_30"] != test_discrepencies["y"]]

In [40]:
for row in test_dis_model_30.iterrows():
    idx = row[1]["idx"]
    y_false = row[1]["model_30"]
    y_true = row[1]["y"]
    hv = get_scene(X_train[idx].reshape((1, -1)), proj)
    hv[hv > 0] = 1
    hv[hv <= 0] = -1
    model_30[y_false] -= hv[0]
    model_30[y_true] += hv[0]
    model_30[model_30 > 0] = 1
    model_30[model_30 <= 0] = -1

In [41]:
def classify(images, digit_vectors):
    similarities = cosine_similarity(images, digit_vectors)
    classifications = np.argmax(similarities, axis=1)
    return classifications

In [42]:
# print("Generating random projection...")
# proj = np.random.rand(D, IMG_LEN * IMG_LEN)
print("Generating random projection...")
seed = 50
np.random.seed(seed)
proj = np.random.randint(2, size=(D, IMG_LEN * IMG_LEN))
proj[proj==0] = -1
print(proj.shape)
def get_scene(img, proj):
    return np.dot(proj, img)

# Transform the image vectors into the hypervectors
def get_scenes(images, proj):
    print(images.shape)
    print(proj.shape)
    return np.dot(images[:NUM_SAMPLES, :], proj.T)

print("Projecting images to higher dim space...")
X_train = get_scenes(X_train, proj)

Generating random projection...
(10000, 784)
Projecting images to higher dim space...
(40200, 784)
(10000, 784)


In [43]:
X_train[X_train > 0] = 1
X_train[X_train <= 0] = -1

In [44]:
print("Train accuracy:")
predictions = classify(X_train, model_30)
acc = accuracy_score(y_train[:X_train.shape[0]], predictions)
print(acc)

Train accuracy:
0.09893034825870647


In [45]:
X_train.shape

(40200, 10000)

In [46]:
model_30.shape

(10, 10000)