In [99]:
import numpy as np
from sklearn.decomposition import PCA
from tqdm.auto import tqdm

In [100]:
# Parameters
num_matrices = 10000
n = 3  # Number of rows
d = 2   # Number of columns
min_val = 0
max_val = 100
tolerance = 1e-10  # Adjust this value based on your tolerance for rounding errors

# Generate random matrices
random_matrices = [np.random.randint(min_val, max_val+1, size=(n, d)) for _ in range(num_matrices)]

# PCA and reconstruction
reconstructions_outside_range = 0
paragon = None
minimum = 0

for i, matrix in enumerate(tqdm(random_matrices, desc="Processing matrices")):
    pca = PCA(n_components=1)
    A = pca.fit_transform(matrix)
    reconstructed_matrix = pca.inverse_transform(A)
    
    # Check if any value is outside the range
    # if np.any((reconstructed_matrix < min_val - tolerance) | (reconstructed_matrix > max_val + tolerance)):

    # Check if any value is negative
    if np.any(reconstructed_matrix < min_val - tolerance):
        if np.min(reconstructed_matrix) < minimum:
            paragon = matrix
            minimum = np.min(matrix)
        reconstructions_outside_range += 1

print(f"Number of reconstructions with values outside of [{min_val}, {max_val}]: {reconstructions_outside_range}")

Processing matrices: 100%|██████████| 10000/10000 [00:04<00:00, 2259.99it/s]

Number of reconstructions with values outside of [0, 100]: 626





In [101]:
paragon

array([[ 1, 49],
       [69, 90],
       [26, 99]])

In [102]:
A = pca.fit_transform(paragon)
pca.inverse_transform(A)

array([[-3.24941533, 55.20907927],
       [62.16926767, 99.98079859],
       [37.08014766, 82.81012214]])

In [103]:
# Parameters
num_matrices = 10000
n = 3  # Number of rows
d = 2   # Number of columns
min_val = 0
max_val = 100
tolerance = 1e-10  # Adjust this value based on your tolerance for rounding errors

# Function to check if row sums are preserved
def check_row_sums(matrix1, matrix2, tol):
    row_sums1 = np.sum(matrix1, axis=1)
    row_sums2 = np.sum(matrix2, axis=1)
    return np.all(np.abs(row_sums1 - row_sums2) < tol)

# Generate random matrices
random_matrices = [np.random.randint(min_val, max_val+1, size=(n, d)) for _ in range(num_matrices)]

# PCA and reconstruction
reconstructions_with_different_row_sums = 0

for i, matrix in enumerate(tqdm(random_matrices, desc="Processing matrices")):
    pca = PCA(n_components=1)
    A = pca.fit_transform(matrix)
    reconstructed_matrix = pca.inverse_transform(A)
    
    # Check if row sums are preserved
    if not check_row_sums(matrix, reconstructed_matrix, tolerance):
        paragon = matrix
        reconstructions_with_different_row_sums += 1

print(f"Number of reconstructions with different row sums: {reconstructions_with_different_row_sums}")

Processing matrices: 100%|██████████| 10000/10000 [00:04<00:00, 2079.92it/s]

Number of reconstructions with different row sums: 9986





In [104]:
paragon

array([[99, 16],
       [58, 26],
       [29,  3]])

In [105]:
A = pca.fit_transform(paragon)
pca.inverse_transform(A)

array([[98.11469265, 21.17649564],
       [59.94147943, 14.64794264],
       [27.94382792,  9.17556172]])