In [2]:
import pandas as pd
import numpy as np

# Create dataset exactly like the image
data = {
    'Blues Traveler':     [3.5, 2, 5, 3, np.nan, np.nan, 5, 3],
    'Broken Bells':       [2, 3.5, 1, 4, 4, 4.5, 2, np.nan],
    'Deadmau5':           [np.nan, 4, 1, 4.5, 1, 4, np.nan, np.nan],
    'Norah Jones':        [4.5, np.nan, 3, np.nan, 4, 5, 3, 5],
    'Phoenix':            [5, 2, 5, 3, np.nan, 5, 5, 4],
    'Slightly Stoopid':   [1.5, 3.5, 1, 4.5, np.nan, 4.5, 4, 2.5],
    'The Strokes':        [2.5, np.nan, np.nan, 4, 4, 4, 5, 3],
    'Vampire Weekend':    [2, 3, np.nan, 2, 1, 4, np.nan, np.nan]
}

users = ['Angelica', 'Bill', 'Chan', 'Dan', 'Hailey', 'Jordyn', 'Sam', 'Veronica']

ratings = pd.DataFrame(data, index=users)
from IPython.display import display

print("Original Rating Matrix (with NULL values):")
display(ratings)


Original Rating Matrix (with NULL values):


Unnamed: 0,Blues Traveler,Broken Bells,Deadmau5,Norah Jones,Phoenix,Slightly Stoopid,The Strokes,Vampire Weekend
Angelica,3.5,2.0,,4.5,5.0,1.5,2.5,2.0
Bill,2.0,3.5,4.0,,2.0,3.5,,3.0
Chan,5.0,1.0,1.0,3.0,5.0,1.0,,
Dan,3.0,4.0,4.5,,3.0,4.5,4.0,2.0
Hailey,,4.0,1.0,4.0,,,4.0,1.0
Jordyn,,4.5,4.0,5.0,5.0,4.5,4.0,4.0
Sam,5.0,2.0,,3.0,5.0,4.0,5.0,
Veronica,3.0,,,5.0,4.0,2.5,3.0,


In [5]:
from scipy.sparse.linalg import svds

# Fill NULL values with 0
matrix_filled = ratings.fillna(0)
matrix = matrix_filled.values

# Mean Centering
user_mean = np.mean(matrix, axis=1)
matrix_demeaned = matrix - user_mean.reshape(-1,1)

# Apply SVD
U, sigma, Vt = svds(matrix_demeaned, k=3)

sigma = np.diag(sigma)


# User Matrix
user_matrix = pd.DataFrame(U,
                           index=ratings.index,
                           columns=['Feature1','Feature2','Feature3'])

print("\nUser Latent Feature Matrix (U):")
#display(user_matrix.round(3))

# Item Matrix
item_matrix = pd.DataFrame(Vt,
                           index=['Feature1','Feature2','Feature3'],
                           columns=ratings.columns)

print("\nItem Latent Feature Matrix (Vᵀ):")
#display(item_matrix.round(3))


predicted_ratings = np.dot(np.dot(U, sigma), Vt) + user_mean.reshape(-1,1)

pred_df = pd.DataFrame(predicted_ratings,
                       columns=ratings.columns,
                       index=ratings.index)

print("\nPredicted Complete Rating Matrix:")
display(pred_df.round(2))


User Latent Feature Matrix (U):

Item Latent Feature Matrix (Vᵀ):

Predicted Complete Rating Matrix:


Unnamed: 0,Blues Traveler,Broken Bells,Deadmau5,Norah Jones,Phoenix,Slightly Stoopid,The Strokes,Vampire Weekend
Angelica,3.73,1.41,0.63,5.0,4.38,1.73,2.51,1.61
Bill,2.49,2.66,4.27,-0.07,1.57,2.94,0.64,3.5
Chan,4.97,0.05,0.48,2.96,4.7,1.63,0.27,0.93
Dan,3.48,3.68,4.11,0.11,2.29,4.7,4.12,2.51
Hailey,-0.86,2.74,0.86,3.92,0.5,0.97,4.2,1.66
Jordyn,2.12,4.56,3.86,5.19,2.97,3.21,4.58,4.51
Sam,5.2,1.75,0.42,3.12,4.79,3.74,5.16,-0.17
Veronica,3.48,0.89,-0.47,4.54,4.09,1.56,3.14,0.28


In [12]:
# Fill missing with 0
matrix_filled = ratings.fillna(0)
matrix = matrix_filled.values

# NO mean centering
matrix_demeaned = matrix

# Apply SVD with higher k
U, sigma, Vt = svds(matrix_demeaned, k=5)
sigma = np.diag(sigma)

# Reconstruct
predicted_ratings = np.dot(np.dot(U, sigma), Vt)

# Clip to rating range
predicted_ratings = np.clip(predicted_ratings, 1, 5)

# Round like item-based output
predicted_ratings = np.round(predicted_ratings, 2)

pred_df = pd.DataFrame(predicted_ratings,
                       columns=ratings.columns,
                       index=ratings.index)

display(pred_df)

Unnamed: 0,Blues Traveler,Broken Bells,Deadmau5,Norah Jones,Phoenix,Slightly Stoopid,The Strokes,Vampire Weekend
Angelica,3.44,1.93,1.0,4.91,4.81,1.7,2.29,1.19
Bill,1.79,3.45,4.27,1.0,2.45,3.07,1.0,2.82
Chan,5.0,1.04,1.0,2.91,4.85,1.11,1.0,1.0
Dan,3.03,4.23,4.12,1.0,2.87,4.79,3.82,2.08
Hailey,1.0,4.04,1.0,3.85,1.0,1.0,4.08,1.15
Jordyn,1.0,4.39,3.97,5.0,4.75,4.62,3.97,4.18
Sam,5.0,1.7,1.0,3.14,4.9,3.87,5.0,1.0
Veronica,2.77,1.0,1.0,4.51,4.56,2.25,3.13,1.0
