In [5]:
import numpy as np
import pandas as pd
from sklearn.decomposition import TruncatedSVD

# 1. Define the Matrix
R = np.array([
    [5, np.nan, 4, 1, np.nan],
    [5, 3, np.nan, np.nan, np.nan],
    [np.nan, 3, 2, np.nan, 1],
    [np.nan, np.nan, np.nan, 4, 2]
])

users = ["U1", "U2", "U3", "U4"]
movies = ["M1", "M2", "M3", "M4", "M5"]

R_df = pd.DataFrame(R, index=users, columns=movies)

# 2. Pre-processing: Fill NaNs with 0 
# Note: SVD cannot handle missing values. Filling with 0 assumes 'no rating'.
R_filled = R_df.fillna(0)

# 3. Apply Truncated SVD
# k=3 represents the 'Top 3' latent factors (concepts)
k = 2 
svd = TruncatedSVD(n_components=k, random_state=42)

# This generates the 'User-Concept' matrix (U * Sigma)
U_sigma = svd.fit_transform(R_filled)

# This is the 'Concept-Movie' matrix (V^T)
VT = svd.components_

# 4. Reconstruct the Matrix (Predicted Ratings)
# This fills in the original NaNs with predicted numbers based on patterns
R_hat = svd.inverse_transform(U_sigma)
R_hat_df = pd.DataFrame(R_hat, index=users, columns=movies)

print("--- Original Matrix (with 0s) ---")
print(R_filled)
print("\n--- User Latent Factors (Reduced Matrix) ---")
print(pd.DataFrame(U_sigma, index=users, columns=[f"Concept_{i+1}" for i in range(k)]))
print("\n--- Predicted Ratings (Reconstructed Matrix) ---")
print(R_hat_df.round(2))

# 5. Explained Variance (The "Judge" Metric)
print(f"\nTotal Variance Explained by {k} components: {svd.explained_variance_ratio_.sum():.2%}")

--- Original Matrix (with 0s) ---
     M1   M2   M3   M4   M5
U1  5.0  0.0  4.0  1.0  0.0
U2  5.0  3.0  0.0  0.0  0.0
U3  0.0  3.0  2.0  0.0  1.0
U4  0.0  0.0  0.0  4.0  2.0

--- User Latent Factors (Reduced Matrix) ---
    Concept_1  Concept_2
U1   6.002686   0.581377
U2   5.154855  -1.273997
U3   1.827994   0.272683
U4   0.597510   4.316215

--- Predicted Ratings (Reconstructed Matrix) ---
      M1    M2    M3    M4    M5
U1  4.95  1.81  2.59  1.26  0.52
U2  4.55  1.81  1.97 -0.45 -0.31
U3  1.49  0.54  0.80  0.47  0.20
U4 -0.22 -0.44  0.85  3.80  1.89

Total Variance Explained by 2 components: 63.33%
