In [6]:
import numpy as np
from sklearn.decomposition import PCA
from tqdm.auto import tqdm

In [7]:
# Parameters
num_matrices = 10000
n = 10  # Number of rows
d = 5   # Number of columns
min_val = 0
max_val = 100
tolerance = 1e-10  # Adjust this value based on your tolerance for rounding errors

# Generate random matrices
random_matrices = [np.random.randint(min_val, max_val+1, size=(n, d)) for _ in range(num_matrices)]

# PCA and reconstruction
reconstructions_outside_range = 0

for i, matrix in enumerate(tqdm(random_matrices, desc="Processing matrices")):
    pca = PCA(n_components=d)
    pca.fit(matrix)
    reconstructed_matrix = pca.inverse_transform(pca.transform(matrix))
    
    # Check if any value is outside the range
    if np.any((reconstructed_matrix < min_val - tolerance) | (reconstructed_matrix > max_val + tolerance)):
        reconstructions_outside_range += 1

print(f"Number of reconstructions with values outside of [{min_val}, {max_val}]: {reconstructions_outside_range}")

Processing matrices: 100%|██████████| 10000/10000 [00:08<00:00, 1148.86it/s]

Number of reconstructions with values outside of [0, 100]: 0





In [8]:
# Parameters
num_matrices = 10000
n = 10  # Number of rows
d = 5   # Number of columns
min_val = 0
max_val = 100
tolerance = 1e-10  # Adjust this value based on your tolerance for rounding errors

# Function to check if row sums are preserved
def check_row_sums(matrix1, matrix2, tol):
    row_sums1 = np.sum(matrix1, axis=1)
    row_sums2 = np.sum(matrix2, axis=1)
    return np.all(np.abs(row_sums1 - row_sums2) < tol)

# Generate random matrices
random_matrices = [np.random.randint(min_val, max_val+1, size=(n, d)) for _ in range(num_matrices)]

# PCA and reconstruction
reconstructions_with_different_row_sums = 0

for i, matrix in enumerate(tqdm(random_matrices, desc="Processing matrices")):
    pca = PCA(n_components=d)
    pca.fit(matrix)
    reconstructed_matrix = pca.inverse_transform(pca.transform(matrix))
    
    # Check if row sums are preserved
    if not check_row_sums(matrix, reconstructed_matrix, tolerance):
        reconstructions_with_different_row_sums += 1

print(f"Number of reconstructions with different row sums: {reconstructions_with_different_row_sums}")

Processing matrices: 100%|██████████| 10000/10000 [00:07<00:00, 1346.66it/s]

Number of reconstructions with different row sums: 0



