In [5]:
import numpy as np

def orthonormal_basis(matrix):
    Q, _ = np.linalg.qr(matrix)
    return Q

def canonical_angles(subspace1, subspace2):
    # Orthonormal bases for the subspaces
    orthonormal_basis1 = orthonormal_basis(subspace1)
    orthonormal_basis2 = orthonormal_basis(subspace2)

    # Compute the matrix product
    matrix_product = orthonormal_basis1.T @ orthonormal_basis2

    # Perform singular value decomposition (SVD)
    _, singular_values, _ = np.linalg.svd(matrix_product)

    # Clamp singular values between -1 and 1 to avoid numerical errors
    singular_values = np.clip(singular_values, -1, 1)

    # Compute canonical angles
    angles = np.arccos(singular_values)
    
    return angles

def subspace_similarity(angles):
    # Calculate subspace distance and RMS
    subspace_distance = np.sum(np.square(angles))
    rms = np.sqrt(subspace_distance / len(angles))

    return subspace_distance, rms

def most_similar_subspace(new_subspace, example_subspaces, metric='distance'):
    similarities = []

    for i, example_subspace in enumerate(example_subspaces):
        angles = canonical_angles(new_subspace, example_subspace)
        subspace_distance, rms = subspace_similarity(angles)

        similarity = subspace_distance if metric == 'distance' else rms
        similarities.append((i, similarity))

    # Sort subspaces by similarity
    similarities.sort(key=lambda x: x[1])

    return similarities[0]

np.random.seed(42)  # For reproducibility

# Generate 10 example subspaces (10x4)
example_subspaces = [np.random.rand(10, 4) for _ in range(10)]

print(len(example_subspaces))
print(example_subspaces[0].shape)

# Create new subspaces dataset with labels
n_new_subspaces = 100
perturbation_factor = 0.1
new_subspaces = []
labels = []

for i in range(n_new_subspaces):
    # Select a random class
    true_class = np.random.randint(0, 10)
    
    # Create a new subspace by perturbing the example subspace of the true class
    new_subspace = example_subspaces[true_class] + perturbation_factor * np.random.rand(10, 4)
    
    new_subspaces.append(new_subspace)
    labels.append(true_class)

# Classification using subspace similarity
predictions = []
for new_subspace in new_subspaces:
    index, _ = most_similar_subspace(new_subspace, example_subspaces, metric='distance')
    predictions.append(index)

# Calculate accuracy
correct_predictions = sum([1 for pred, true in zip(predictions, labels) if pred == true])
accuracy = correct_predictions / n_new_subspaces
print(f"Accuracy: {accuracy * 100:.2f}%")

10
(10, 4)
Accuracy: 100.00%
