In [3]:
import numpy as np
import pandas as pd

In [5]:
csv_file = "landauer.csv"  # Replace with your CSV file path
data = pd.read_csv(csv_file)

data.head()

Unnamed: 0,c1,c2,c3,c4,c5,m1,m2,m3,m4
human,1,0,0,1,0,0,0,0,0
interface,1,0,1,0,0,0,0,0,0
computer,1,1,0,0,0,0,0,0,0
user,0,1,1,0,1,0,0,0,0
system,0,1,1,2,0,0,0,0,0


In [6]:
data_numeric = data.select_dtypes(include=[np.number]).to_numpy()

In [7]:
U, S, VT = np.linalg.svd(data_numeric, full_matrices=False)

# Display the results
print("U matrix (left singular vectors):")
print(U)
print("\nSingular values:")
print(S)
print("\nVT matrix (right singular vectors):")
print(VT)

U matrix (left singular vectors):
[[-2.21350778e-01 -1.13179617e-01  2.88958154e-01  4.14750740e-01
   1.06275121e-01  3.40983324e-01 -5.22657771e-01 -6.04501376e-02
   4.06677509e-01]
 [-1.97645401e-01 -7.20877788e-02  1.35039639e-01  5.52239584e-01
  -2.81768939e-01 -4.95878011e-01  7.04234412e-02 -9.94003721e-03
   1.08930266e-01]
 [-2.40470226e-01  4.31519521e-02 -1.64429079e-01  5.94961818e-01
   1.06755285e-01  2.54955130e-01  3.02240236e-01  6.23280150e-02
  -4.92444364e-01]
 [-4.03598863e-01  5.70702584e-02 -3.37803538e-01 -9.91137295e-02
  -3.31733718e-01 -3.84831917e-01 -2.87217529e-03 -3.90504202e-04
  -1.23293479e-02]
 [-6.44481152e-01 -1.67301206e-01  3.61148151e-01 -3.33461601e-01
   1.58954979e-01  2.06522588e-01  1.65828575e-01  3.42720233e-02
  -2.70696289e-01]
 [-2.65037470e-01  1.07159573e-01 -4.25998497e-01 -7.38121922e-02
  -8.03193764e-02  1.69676389e-01 -2.82915727e-01 -1.61465472e-02
   5.38746887e-02]
 [-2.65037470e-01  1.07159573e-01 -4.25998497e-01 -7.3812192

In [12]:
# # Step 3: Center the data (subtract mean from each column)
# mean_centered = data_numeric - np.mean(data_numeric, axis=0)

# Step 4: Compute the covariance matrix
cov_matrix = np.dot(data_numeric.T, data_numeric)

# Step 5: Compute eigenvalues and eigenvectors of the covariance matrix
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)

# Step 6: Sort eigenvalues and eigenvectors by descending eigenvalue magnitude
sorted_indices = np.argsort(eigenvalues)[::-1]
sorted_eigenvalues = eigenvalues[sorted_indices]
sorted_eigenvectors = eigenvectors[:, sorted_indices]

# Step 7: Compute the singular values (square root of eigenvalues)
singular_values = np.sqrt(sorted_eigenvalues)

# Step 8: Compute the left singular vectors (U matrix)
U = np.dot(data_numeric, sorted_eigenvectors)
U = U / np.linalg.norm(U, axis=0)  # Normalize each column

# Step 9: VT matrix (right singular vectors is the sorted eigenvectors)
VT = sorted_eigenvectors.T

# Step 10: Verify reconstruction
reconstructed_matrix = np.dot(U, np.dot(np.diag(singular_values), VT))

# Display results
print("Mean-Centered Data:")
print(data_numeric)
print("\nCovariance Matrix:")
print(cov_matrix)
print("\nEigenvalues:")
print(sorted_eigenvalues)
print("\nEigenvectors (VT):")
print(VT)
print("\nSingular Values:")
print(singular_values)
print("\nU Matrix:")
print(U)
print("\nReconstructed Matrix (approximation):")
print(reconstructed_matrix)

Mean-Centered Data:
[[1 0 0 1 0 0 0 0 0]
 [1 0 1 0 0 0 0 0 0]
 [1 1 0 0 0 0 0 0 0]
 [0 1 1 0 1 0 0 0 0]
 [0 1 1 2 0 0 0 0 0]
 [0 1 0 0 1 0 0 0 0]
 [0 1 0 0 1 0 0 0 0]
 [0 0 1 1 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 1]
 [0 0 0 0 0 1 1 1 0]
 [0 0 0 0 0 0 1 1 1]
 [0 0 0 0 0 0 0 1 1]]

Covariance Matrix:
[[3 1 1 1 0 0 0 0 0]
 [1 6 2 2 3 0 0 0 1]
 [1 2 4 3 1 0 0 0 0]
 [1 2 3 6 0 0 0 0 0]
 [0 3 1 0 3 0 0 0 0]
 [0 0 0 0 0 1 1 1 0]
 [0 0 0 0 0 1 2 2 1]
 [0 0 0 0 0 1 2 3 2]
 [0 1 0 0 0 0 1 2 3]]

Eigenvalues:
[11.16150425  6.46024397  5.54105008  2.70448646  2.264518    1.7066338
  0.71555203  0.31375057  0.13226084]

Eigenvectors (VT):
[[ 1.97392802e-01  6.05990269e-01  4.62917508e-01  5.42114417e-01
   2.79469108e-01  3.81521297e-03  1.46314675e-02  2.41368353e-02
   8.19573680e-02]
 [ 5.59135178e-02 -1.65592878e-01  1.27312062e-01  2.31755229e-01
  -1.06774717e-01 -1.92847936e-01 -4.37874883e-01 -6.15121899e-01
  -5.29937072e-01]
 [-1.10269729e-01  4.97326494e-01 -2.07605953e-01 -5.69921445e-01
   5.

In [63]:
R = data_numeric

# Step 2: Define Parameters
num_users, num_items = R.shape
latent_features = 3  # Number of latent features
lambda_reg = 0.1
iterations = 20     # Number of ALS iterations

for latent_features in range(2, 4):
    print(f"\n\nFor {latent_features} latent features:\n")

    # Step 3: Initialize Latent Matrices U and V
    P = np.random.rand(num_users, latent_features)
    Q = np.random.rand(num_items, latent_features)

    # Step 4: ALS Optimization Loop
    for iteration in range(iterations):
        for i in range(num_users):
            # Get indices of non-zero ratings for user i
            non_zero_indices = R[i, :] > 0
            Q_non_zero = Q[non_zero_indices, :]
            R_non_zero = R[i, non_zero_indices]
            
            # Solve for U[i] using normal equation
            P[i, :] = np.linalg.solve(
                Q_non_zero.T @ Q_non_zero + lambda_reg * np.eye(latent_features),
                Q_non_zero.T @ R_non_zero
            )
        
        for j in range(num_items):
            # Get indices of non-zero ratings for item j
            non_zero_indices = R[:, j] > 0
            P_non_zero = P[non_zero_indices, :]
            R_non_zero = R[non_zero_indices, j]
            
            # Solve for V[j] using normal equation
            Q[j, :] = np.linalg.solve(
                P_non_zero.T @ P_non_zero + lambda_reg * np.eye(latent_features),
                P_non_zero.T @ R_non_zero
            )

        
        # Compute reconstruction error (optional)
        #print(P.shape, Q.shape)
        reconstruction = P @ Q.T

        #print(reconstruction.shape)
        error = np.sqrt(np.sum((R[R > 0] - reconstruction[R > 0]) ** 2))
        print(f"Iteration {iteration + 1}, RMSE: {error:.4f}")

    # Step 5: Final Reconstruction
    reconstructed_matrix = P @ Q.T

    # Display Results
    print("\nOriginal Matrix (R):")
    print(R)
    print("\nReconstructed Matrix:")
    print(reconstructed_matrix)
    # print("\nUser Matrix (P):")
    # print(P)
    # print("\nItem Matrix (Q):")
    # print(Q)



For 2 latent features:

Iteration 1, RMSE: 0.3694
Iteration 2, RMSE: 0.2395
Iteration 3, RMSE: 0.2353
Iteration 4, RMSE: 0.2406
Iteration 5, RMSE: 0.2487
Iteration 6, RMSE: 0.2569
Iteration 7, RMSE: 0.2645
Iteration 8, RMSE: 0.2714
Iteration 9, RMSE: 0.2776
Iteration 10, RMSE: 0.2831
Iteration 11, RMSE: 0.2880
Iteration 12, RMSE: 0.2924
Iteration 13, RMSE: 0.2962
Iteration 14, RMSE: 0.2995
Iteration 15, RMSE: 0.3024
Iteration 16, RMSE: 0.3048
Iteration 17, RMSE: 0.3069
Iteration 18, RMSE: 0.3085
Iteration 19, RMSE: 0.3099
Iteration 20, RMSE: 0.3109

Original Matrix (R):
[[1 0 0 1 0 0 0 0 0]
 [1 0 1 0 0 0 0 0 0]
 [1 1 0 0 0 0 0 0 0]
 [0 1 1 0 1 0 0 0 0]
 [0 1 1 2 0 0 0 0 0]
 [0 1 0 0 1 0 0 0 0]
 [0 1 0 0 1 0 0 0 0]
 [0 0 1 1 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 1]
 [0 0 0 0 0 1 1 1 0]
 [0 0 0 0 0 0 1 1 1]
 [0 0 0 0 0 0 0 1 1]]

Reconstructed Matrix:
[[0.88974524 0.72578859 0.49204286 1.04787564 0.73586555 0.65254734
  0.6900739  0.70651858 0.7199787 ]
 [0.96799955 0.49018701 0.94302272 1.2677