In [None]:
import numpy as np
from sklearn.decomposition import PCA
from sklearn.preprocessing import normalize
from sklearn.datasets import make_classification
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC

# Step 1: Generate synthetic dataset
X, y = make_classification(
    n_samples=500, n_features=20, n_informative=15, n_classes=3, random_state=42
)

# Step 2: Hyperdimensional Computing (HDC) Encoding
def encode_to_hypervectors(data, dim=1000):
    np.random.seed(42)
    projection_matrix = np.random.randn(data.shape[1], dim)
    hypervectors = np.dot(data, projection_matrix)
    return np.sign(hypervectors)  # Binarize to create hypervectors

# Encode data to hypervectors
hdc_vectors = encode_to_hypervectors(X)

# Step 3: Perform PCA (Latent Component Analysis)
pca = PCA(n_components=10)
latent_components = pca.fit_transform(hdc_vectors)

# Step 4: Combine HDC and LCM features
combined_features = np.hstack([hdc_vectors, latent_components])

# Normalize combined features for downstream use
normalized_features = normalize(combined_features)

# Step 5: Train/Test split for classification
split = int(0.8 * len(normalized_features))
X_train, X_test = normalized_features[:split], normalized_features[split:]
y_train, y_test = y[:split], y[split:]

# Step 6: Classify with SVM
classifier = SVC(kernel="linear", random_state=42)
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)

# Step 7: Evaluate performance
accuracy = accuracy_score(y_test, y_pred)
cos_sim = cosine_similarity(X_test[:1], X_test[1:5])  # Example of similarity measurement

# Output results
{
    "Accuracy": accuracy,
    "Cosine Similarities (example)": cos_sim.tolist(),
    "Latent Component Explained Variance Ratio": pca.explained_variance_ratio_.tolist(),
}

{'Accuracy': 0.72,
 'Cosine Similarities (example)': [[0.05469023514409879,
   0.38723056907918124,
   0.13230486553120455,
   0.13452673573850304]],
 'Latent Component Explained Variance Ratio': [0.16977878709720945,
  0.10842187016820352,
  0.05788565452195865,
  0.047540718585688285,
  0.043384314224870636,
  0.04006247995360765,
  0.036778212717841686,
  0.02987207864306066,
  0.025010354804586472,
  0.022676241564176122]}

In [None]:
from sklearn.model_selection import GridSearchCV

# Step 1: Optimize HDC dimensionality and PCA components
hdc_dimensions = [500, 1000, 2000]
pca_components = [5, 10, 15]

best_accuracy = 0
best_params = None
best_model = None

for hdc_dim in hdc_dimensions:
    # Re-encode data to hypervectors with new dimensionality
    hdc_vectors = encode_to_hypervectors(X, dim=hdc_dim)

    for pca_comp in pca_components:
        # Perform PCA with a different number of components
        pca = PCA(n_components=pca_comp)
        latent_components = pca.fit_transform(hdc_vectors)

        # Combine HDC and LCM features
        combined_features = np.hstack([hdc_vectors, latent_components])
        normalized_features = normalize(combined_features)

        # Train/Test split for classification
        X_train, X_test = normalized_features[:split], normalized_features[split:]
        y_train, y_test = y[:split], y[split:]

        # Train SVM classifier with grid search for hyperparameters
        param_grid = {'C': [0.1, 1, 10], 'kernel': ['linear', 'rbf']}
        classifier = GridSearchCV(SVC(random_state=42), param_grid, cv=3)
        classifier.fit(X_train, y_train)
        y_pred = classifier.predict(X_test)

        # Evaluate performance
        accuracy = accuracy_score(y_test, y_pred)

        # Store the best result
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_params = (hdc_dim, pca_comp, classifier.best_params_)
            best_model = classifier

# Step 2: Output optimized results
{
    "Best Accuracy": best_accuracy,
    "Optimal HDC Dimension": best_params[0],
    "Optimal PCA Components": best_params[1],
    "Optimal SVM Parameters": best_params[2],
}


{'Best Accuracy': 0.88,
 'Optimal HDC Dimension': 500,
 'Optimal PCA Components': 5,
 'Optimal SVM Parameters': {'C': 10, 'kernel': 'rbf'}}

In [None]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

# Step 1: Load Fashion MNIST dataset (10% subset)
fashion_mnist = fetch_openml('Fashion-MNIST', version=1, as_frame=False)
X_fashion, y_fashion = fashion_mnist['data'], fashion_mnist['target'].astype(int)

# Sample 10% of the data for faster processing
sample_size = int(0.1 * len(X_fashion))
X_fashion, _, y_fashion, _ = train_test_split(X_fashion, y_fashion, train_size=sample_size, stratify=y_fashion, random_state=42)

# Step 2: Preprocess Fashion MNIST data
scaler = StandardScaler()
X_fashion_scaled = scaler.fit_transform(X_fashion)

# Step 3: HDC Encoding and PCA Optimization
optimal_hdc_dim = 500  # From previous optimization
optimal_pca_comp = 5   # From previous optimization

# Encode to hypervectors
hdc_vectors_fashion = encode_to_hypervectors(X_fashion_scaled, dim=optimal_hdc_dim)

# Perform PCA
pca_fashion = PCA(n_components=optimal_pca_comp)
latent_components_fashion = pca_fashion.fit_transform(hdc_vectors_fashion)

# Combine HDC and LCM features
combined_features_fashion = np.hstack([hdc_vectors_fashion, latent_components_fashion])
normalized_features_fashion = normalize(combined_features_fashion)

# Step 4: Train/Test split
X_train_fashion, X_test_fashion, y_train_fashion, y_test_fashion = train_test_split(
    normalized_features_fashion, y_fashion, test_size=0.2, random_state=42, stratify=y_fashion
)

# Step 5: Train and Evaluate SVM with optimal parameters
svm_classifier_fashion = SVC(C=10, kernel='rbf', random_state=42)
svm_classifier_fashion.fit(X_train_fashion, y_train_fashion)
y_pred_fashion = svm_classifier_fashion.predict(X_test_fashion)

# Step 6: Evaluate performance on Fashion MNIST
fashion_accuracy = accuracy_score(y_test_fashion, y_pred_fashion)
fashion_cos_sim = cosine_similarity(X_test_fashion[:1], X_test_fashion[1:5])  # Example similarity

# Output results
{
    "Fashion MNIST Accuracy": fashion_accuracy,
    "Cosine Similarities (example)": fashion_cos_sim.tolist(),
    "Latent Component Explained Variance Ratio": pca_fashion.explained_variance_ratio_.tolist(),
}

{'Fashion MNIST Accuracy': 0.8328571428571429,
 'Cosine Similarities (example)': [[-0.4022284590230415,
   -0.31704810072676737,
   0.13938251078071415,
   -0.15629955804816287]],
 'Latent Component Explained Variance Ratio': [0.1697821374874963,
  0.10693141187758717,
  0.050897886418453504,
  0.02984687155923866,
  0.025504891240072103]}

In [None]:
# Reload Fashion MNIST data (10% subset) and preprocess
from sklearn.datasets import fetch_openml

# Reload the dataset
fashion_mnist = fetch_openml('Fashion-MNIST', version=1, as_frame=False)
X_fashion, y_fashion = fashion_mnist['data'], fashion_mnist['target'].astype(int)

# Sample 10% of the data for faster processing
sample_size = int(0.1 * len(X_fashion))
X_fashion, _, y_fashion, _ = train_test_split(X_fashion, y_fashion, train_size=sample_size, stratify=y_fashion, random_state=42)

# Preprocess data
scaler = StandardScaler()
X_fashion_scaled = scaler.fit_transform(X_fashion)

# Pure HDC Encoding and Evaluation
# Step 1: Encode to hypervectors using optimal dimension (500)
pure_hdc_vectors_fashion = encode_to_hypervectors(X_fashion_scaled, dim=optimal_hdc_dim)

# Step 2: Normalize HDC features
pure_hdc_normalized = normalize(pure_hdc_vectors_fashion)

# Step 3: Train/Test split for pure HDC model
X_train_pure_hdc, X_test_pure_hdc, y_train_pure_hdc, y_test_pure_hdc = train_test_split(
    pure_hdc_normalized, y_fashion, test_size=0.2, random_state=42, stratify=y_fashion
)

# Step 4: Train and Evaluate SVM on pure HDC features
svm_classifier_pure_hdc = SVC(C=10, kernel='rbf', random_state=42)
svm_classifier_pure_hdc.fit(X_train_pure_hdc, y_train_pure_hdc)
y_pred_pure_hdc = svm_classifier_pure_hdc.predict(X_test_pure_hdc)

# Step 5: Evaluate performance of pure HDC model
pure_hdc_accuracy = accuracy_score(y_test_pure_hdc, y_pred_pure_hdc)
pure_hdc_cos_sim = cosine_similarity(X_test_pure_hdc[:1], X_test_pure_hdc[1:5])  # Example similarity

# Output results for pure HDC
{
    "Pure HDC Accuracy": pure_hdc_accuracy,
    "Cosine Similarities (example)": pure_hdc_cos_sim.tolist(),
}

{'Pure HDC Accuracy': 0.8342857142857143,
 'Cosine Similarities (example)': [[-0.2720000000000001,
   -0.192,
   0.036,
   -0.072]]}

In [None]:
import numpy as np

# Step 1: Define the Schema
DIMENSIONS = 100
SCHEMA = {
    "Attributes": slice(0, 10),       # Dimensions 0-9
    "Actions": slice(10, 20),        # Dimensions 10-19
    "Context": slice(20, 30),        # Dimensions 20-29
    "Relationships": slice(30, 40),  # Dimensions 30-39
    "Entity Type": slice(40, 50),    # Dimensions 40-49
    "Reserved": slice(50, DIMENSIONS)  # Dimensions 50-99
}

# Step 2: Helper Functions
def generate_random_hypervector(dim):
    """Generate a random sparse hypervector with 50% sparsity."""
    return np.random.choice([0, 1], size=dim)

def encode_to_schema(schema, data):
    """Encode data into a structured hypervector based on the schema."""
    hypervector = np.zeros(DIMENSIONS)

    for category, values in data.items():
        if category in schema:
            dims = schema[category]
            # Fill the hypervector slice with the given data
            hypervector[dims] = values

    return hypervector

def bundle_hypervectors(vectors):
    """Bundle multiple hypervectors by element-wise summation and normalization."""
    bundled = np.sum(vectors, axis=0)
    return np.sign(bundled)  # Normalize to -1, 0, or +1

# Step 3: Define Example Data
data_1 = {
    "Attributes": generate_random_hypervector(10),  # Red, Large
    "Actions": generate_random_hypervector(10),     # Bounce
    "Context": generate_random_hypervector(10),     # Air
    "Entity Type": generate_random_hypervector(10)  # Ball
}

data_2 = {
    "Attributes": generate_random_hypervector(10),  # Blue, Small
    "Actions": generate_random_hypervector(10),     # Roll
    "Context": generate_random_hypervector(10),     # Ground
    "Entity Type": generate_random_hypervector(10)  # Ball
}

# Step 4: Encode Data
hypervector_1 = encode_to_schema(SCHEMA, data_1)
hypervector_2 = encode_to_schema(SCHEMA, data_2)

# Step 5: Bundle Data
composite_hypervector = bundle_hypervectors([hypervector_1, hypervector_2])

# Step 6: Output Results
{
    "Hypervector 1": hypervector_1.tolist(),
    "Hypervector 2": hypervector_2.tolist(),
    "Composite Hypervector": composite_hypervector.tolist()
}

{'Hypervector 1': [1.0,
  0.0,
  0.0,
  1.0,
  0.0,
  0.0,
  0.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  0.0,
  0.0,
  1.0,
  1.0,
  1.0,
  0.0,
  0.0,
  0.0,
  1.0,
  1.0,
  1.0,
  1.0,
  0.0,
  0.0,
  1.0,
  1.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  0.0,
  0.0,
  0.0,
  1.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0],
 'Hypervector 2': [0.0,
  0.0,
  1.0,
  0.0,
  1.0,
  0.0,
  1.0,
  0.0,
  0.0,
  1.0,
  0.0,
  1.0,
  0.0,
  0.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  0.0,
  0.0,
  0.0,
  1.0,
  1.0,
  1.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,

In [None]:
# Reload the Fashion MNIST dataset
from sklearn.datasets import fetch_openml

# Load Fashion MNIST and preprocess
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()
x_train, y_train = x_train[:6000], y_train[:6000]  # Use 10% of the training dataset
x_test, y_test = x_test[:1000], y_test[:1000]  # Use a small test subset

# Normalize pixel values
x_train = (x_train / 255.0).astype(np.float32)
x_test = (x_test / 255.0).astype(np.float32)

# Re-encode the dataset into hypervectors
dim = 1000  # Dimensionality of hypervectors

def hdc_encode_image(image, dim=1000):
    """Convert an image into a high-dimensional hypervector."""
    np.random.seed(42)
    # Generate random hypervectors for each pixel intensity value
    pixel_hypervectors = np.random.choice([-1, 1], size=(256, dim))  # Assume pixel values 0–255
    # Map each pixel to its corresponding hypervector and sum them
    encoded_vector = np.sum([pixel_hypervectors[int(pixel)] for pixel in image.flatten()], axis=0)
    return np.sign(encoded_vector)  # Normalize to binary -1/1

# Encode training and test datasets
x_train_encoded = np.array([hdc_encode_image(img, dim) for img in x_train])
x_test_encoded = np.array([hdc_encode_image(img, dim) for img in x_test])

# Normalize the encoded hypervectors
x_train_encoded_norm = normalize(x_train_encoded, axis=1)
x_test_encoded_norm = normalize(x_test_encoded, axis=1)

# Classify using the pure HDC classifier
hdc_predictions = hdc_classify(x_train_encoded_norm, y_train, x_test_encoded_norm)

# Evaluate the accuracy
hdc_accuracy = accuracy_score(y_test, hdc_predictions)

# Output the HDC test accuracy
{
    "HDC Test Accuracy": hdc_accuracy
}

{'HDC Test Accuracy': 0.095}

In [None]:
import numpy as np
from sklearn.preprocessing import normalize
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import accuracy_score

# Improved HDC Encoding Function
def hdc_encode_image_v2(image, dim=1000, block_size=4):
    """Encode an image into a high-dimensional hypervector using pixel blocks."""
    np.random.seed(42)
    rows, cols = image.shape
    pixel_hypervectors = np.random.choice([-1, 1], size=(256, dim))  # 256 unique intensity levels
    encoded_vector = np.zeros(dim)

    # Iterate over image blocks
    for i in range(0, rows, block_size):
        for j in range(0, cols, block_size):
            block = image[i:i+block_size, j:j+block_size]
            avg_intensity = int(block.mean() * 255)  # Average pixel intensity
            encoded_vector += pixel_hypervectors[avg_intensity]

    return np.sign(encoded_vector)  # Normalize to binary -1/1

# Debugging HDC Pipeline Function
def debug_hdc_pipeline(train_hv, train_labels, test_hv, test_labels):
    """Simplified debugging function for HDC classification."""
    predictions = []
    for test_vector in test_hv:
        similarities = cosine_similarity([test_vector], train_hv)[0]
        predictions.append(train_labels[np.argmax(similarities)])

    accuracy = accuracy_score(test_labels, predictions)
    print(f"HDC Test Accuracy: {accuracy}")
    return predictions

# Main Pipeline
if __name__ == "__main__":
    from tensorflow.keras.datasets import fashion_mnist

    # Load Fashion MNIST dataset
    (x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

    # Use 10% of the dataset for efficiency
    x_train, y_train = x_train[:6000], y_train[:6000]
    x_test, y_test = x_test[:1000], y_test[:1000]

    # Normalize pixel values
    x_train = (x_train / 255.0).astype(np.float32)
    x_test = (x_test / 255.0).astype(np.float32)

    # Re-encode the dataset into hypervectors
    dim = 1000  # Dimensionality of hypervectors

    print("Encoding training data...")
    x_train_encoded = np.array([hdc_encode_image_v2(img, dim) for img in x_train])

    print("Encoding test data...")
    x_test_encoded = np.array([hdc_encode_image_v2(img, dim) for img in x_test])

    # Normalize the encoded hypervectors
    x_train_encoded_norm = normalize(x_train_encoded, axis=1)
    x_test_encoded_norm = normalize(x_test_encoded, axis=1)

    # Run the HDC classifier
    print("Running HDC classification...")
    debug_hdc_pipeline(x_train_encoded_norm, y_train, x_test_encoded_norm, y_test)

Encoding training data...
Encoding test data...
Running HDC classification...
HDC Test Accuracy: 0.095


In [None]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Define the dimensions for hypervectors
dim = 10000  # High dimensional space

# Step 1: Generate random hypervectors for words
def generate_random_hypervector(dim):
    """Generate a random bipolar hypervector."""
    return np.random.choice([-1, 1], size=dim)

# Create a vocabulary with random hypervectors for each unique word
def build_vocab(paragraph, dim):
    """Build a vocabulary of words mapped to random hypervectors."""
    words = set(paragraph.lower().split())
    vocab = {word: generate_random_hypervector(dim) for word in words}
    return vocab

# Step 2: Encode the paragraph into a single hypervector
def encode_paragraph(paragraph, vocab, dim):
    """Encode the paragraph into a high-dimensional hypervector by bundling word vectors."""
    words = paragraph.lower().split()
    encoded_vector = np.zeros(dim)
    for word in words:
        if word in vocab:
            encoded_vector += vocab[word]
    return np.sign(encoded_vector)  # Normalize to binary -1/1

# Step 3: Decode the hypervector back to the most similar words
def decode_hypervector(encoded_vector, vocab, top_n=5):
    """Find the most similar words to the encoded vector using cosine similarity."""
    similarities = {word: cosine_similarity([encoded_vector], [hv])[0][0] for word, hv in vocab.items()}
    # Sort by similarity and return the top N words
    return sorted(similarities.items(), key=lambda x: x[1], reverse=True)[:top_n]

# Example paragraph
paragraph = "The quick brown fox jumps over the lazy dog"

# Build vocabulary
vocab = build_vocab(paragraph, dim)

# Encode the paragraph
encoded_vector = encode_paragraph(paragraph, vocab, dim)

# Decode the hypervector
decoded_words = decode_hypervector(encoded_vector, vocab)

# Output the results
print("Encoded Hypervector:", encoded_vector)
print("Decoded Words:", decoded_words)

Encoded Hypervector: [-1. -1. -1. ... -1.  1. -1.]
Decoded Words: [('the', 0.535199999999999), ('brown', 0.2513999999999994), ('dog', 0.25079999999999936), ('lazy', 0.2443999999999994), ('over', 0.23999999999999944)]
