In [None]:
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tqdm import tqdm
import warnings

warnings.filterwarnings('ignore')

In [None]:
mnist = fetch_openml('mnist_784', version=1, as_frame=False)
X, y = mnist.data, mnist.target.astype(int)
X_bin = (X > 128).astype(np.uint8)

X_train, X_test, y_train, y_test = train_test_split(X_bin, y, test_size=10000, random_state=42)

Initial training set class distribution:
{np.int64(0): np.int64(5920), np.int64(1): np.int64(6725), np.int64(2): np.int64(6023), np.int64(3): np.int64(6107), np.int64(4): np.int64(5918), np.int64(5): np.int64(5376), np.int64(6): np.int64(5915), np.int64(7): np.int64(6238), np.int64(8): np.int64(5856), np.int64(9): np.int64(5922)}

Balanced training set class distribution:
{np.int64(0): np.int64(5376), np.int64(1): np.int64(5376), np.int64(2): np.int64(5376), np.int64(3): np.int64(5376), np.int64(4): np.int64(5376), np.int64(5): np.int64(5376), np.int64(6): np.int64(5376), np.int64(7): np.int64(5376), np.int64(8): np.int64(5376), np.int64(9): np.int64(5376)}


In [24]:
D = 4000
num_pixels = 784
num_classes = 10
np.random.seed(42)

pixel_hvs = np.random.choice([0, 1], size=(num_pixels, D), p=[0.5, 0.5]).astype(np.uint8)
class_hvs = np.zeros((num_classes, D), dtype=np.uint8)

pixel_mean = np.mean(pixel_hvs)
print(f"Mean value of pixel hypervectors: {pixel_mean:.4f}")

Mean value of pixel hypervectors: 0.5001


In [25]:
# Circular shift
def permutation(x, shift=1):
    return np.roll(x, shift, axis=-1)

# Majority vote
def bundle(hvs):
    return (np.mean(hvs, axis=0) > 0.5).astype(np.uint8)

# HoloGN like
def encode_image(image, pixel_hvs):
    active_pixels = np.where(image.ravel() == 1)[0]
    shifted_hvs = permutation(pixel_hvs[active_pixels], shift=1)
    if len(shifted_hvs) > 0:
        return bundle(shifted_hvs)
    return np.zeros(D, dtype=np.uint8)

def encode_dataset(X, pixel_hvs, batch_size=128):
    X_encoded = np.zeros((len(X), D), dtype=np.uint8)
    for i in tqdm(range(0, len(X), batch_size), desc="Encoding"):
        batch = X[i:i+batch_size]
        X_encoded[i:i+batch_size] = np.array([encode_image(img, pixel_hvs) for img in batch])
    return X_encoded

In [26]:
X_train_encoded = encode_dataset(X_train, pixel_hvs)
for digit in range(num_classes):
  digit_hvs = X_train_encoded[y_train == digit]
  if len(digit_hvs) > 0:
    class_hvs[digit] = bundle(digit_hvs)

Encoding: 100%|██████████| 420/420 [00:30<00:00, 13.82it/s]


In [27]:
def predict_batch(X_test, pixel_hvs, class_hvs, batch_size=128):
    y_pred = np.zeros(len(X_test), dtype=int)
    for i in tqdm(range(0, len(X_test), batch_size), desc="Predicting"):
        batch = X_test[i:i+batch_size]
        batch_encoded = np.array([encode_image(img, pixel_hvs) for img in batch])

        # Compute Hamming distances
        distances = np.array([[np.sum(enc_hv != class_hv) for class_hv in class_hvs]
                           for enc_hv in batch_encoded])
        y_pred[i:i+batch_size] = np.argmin(distances, axis=1)
    return y_pred

y_pred = predict_batch(X_test, pixel_hvs, class_hvs)

Predicting: 100%|██████████| 79/79 [00:07<00:00, 11.18it/s]


In [28]:
print("Classification Report:")
print(classification_report(y_test, y_pred))

accuracy = np.mean(y_pred == y_test)
print(f"\nFinal Accuracy: {accuracy:.4f}")

Classification Report:
              precision    recall  f1-score   support

           0       0.87      0.91      0.89       983
           1       0.85      0.92      0.88      1152
           2       0.82      0.76      0.79       967
           3       0.70      0.80      0.75      1034
           4       0.76      0.78      0.77       906
           5       0.80      0.63      0.70       937
           6       0.85      0.87      0.86       961
           7       0.91      0.83      0.87      1055
           8       0.71      0.73      0.72       969
           9       0.74      0.75      0.75      1036

    accuracy                           0.80     10000
   macro avg       0.80      0.80      0.80     10000
weighted avg       0.80      0.80      0.80     10000


Final Accuracy: 0.8008
