In [1]:
# Basic libraries
import numpy as np
import pandas as pd

# Train-test split
from sklearn.model_selection import train_test_split

# Evaluation metrics
from sklearn.metrics import mean_squared_error

# Sparse matrix + SVD
from scipy.sparse import csr_matrix
from scipy.sparse.linalg import svds

In [2]:
df1 = pd.read_csv('users_interactions.csv')
df2 = pd.read_csv('products_catalog.csv')

In [3]:
df1.head()

Unnamed: 0,user_id,product_id,rating,timestamp
0,U0001,P0234,4,2025-06-30 00:00:00
1,U0001,P0212,5,2024-11-05 00:00:00
2,U0001,P0425,4,2025-02-08 00:00:00
3,U0001,P0491,4,2024-12-12 00:00:00
4,U0001,P0204,2,2024-07-01 00:00:00


In [9]:
df1 = df1.drop("timestamp", axis=1)
df1

Unnamed: 0,user_id,product_id,rating
0,U0001,P0234,4
1,U0001,P0212,5
2,U0001,P0425,4
3,U0001,P0491,4
4,U0001,P0204,2
...,...,...,...
27497,U1000,P0345,4
27498,U1000,P0412,5
27499,U1000,P0226,4
27500,U1000,P0095,5


In [10]:
from sklearn.model_selection import train_test_split

train_df, test_df = train_test_split(
    df1,
    test_size=0.2,
    random_state=42
)

In [11]:
import numpy as np

user_ids = train_df['user_id'].unique()
product_ids = train_df['product_id'].unique()

user_id_map = {u: i for i, u in enumerate(user_ids)}
product_id_map = {p: i for i, p in enumerate(product_ids)}

In [12]:
from scipy.sparse import csr_matrix

rows = train_df['user_id'].map(user_id_map)
cols = train_df['product_id'].map(product_id_map)
ratings = train_df['rating']

sparse_matrix = csr_matrix(
    (ratings, (rows, cols)),
    shape=(len(user_ids), len(product_ids))
)

In [13]:
user_means = np.zeros(sparse_matrix.shape[0])

for i in range(sparse_matrix.shape[0]):
    row_data = sparse_matrix[i].data
    if len(row_data) > 0:
        user_means[i] = row_data.mean()

In [14]:
mean_centered = sparse_matrix.copy().tolil()

for i in range(mean_centered.shape[0]):
    if len(mean_centered.rows[i]) > 0:
        mean_centered.data[i] = [
            val - user_means[i] for val in mean_centered.data[i]
        ]

mean_centered = mean_centered.tocsr()

In [15]:
from scipy.sparse.linalg import svds

k = 50  # latent dimensions

U, sigma, Vt = svds(mean_centered, k=k)
sigma = np.diag(sigma)

reconstructed = np.dot(np.dot(U, sigma), Vt)

predicted_ratings = reconstructed + user_means.reshape(-1, 1)

predicted_ratings = np.clip(predicted_ratings, 1, 5)

In [16]:
from sklearn.metrics import mean_squared_error

y_true = []
y_pred = []

for _, row in test_df.iterrows():
    user = row['user_id']
    product = row['product_id']
    
    if user in user_id_map and product in product_id_map:
        u_idx = user_id_map[user]
        p_idx = product_id_map[product]
        
        y_true.append(row['rating'])
        y_pred.append(predicted_ratings[u_idx, p_idx])

rmse = np.sqrt(mean_squared_error(y_true, y_pred))
print("SVD RMSE:", rmse)

SVD RMSE: 1.1649390862932856


In [17]:
def recommend_svd(user_id, top_n=10):
    if user_id not in user_id_map:
        return []
    
    u_idx = user_id_map[user_id]
    user_preds = predicted_ratings[u_idx].copy()
    
    already_rated = sparse_matrix[u_idx].indices
    user_preds[already_rated] = -np.inf
    
    top_indices = np.argsort(user_preds)[-top_n:][::-1]
    top_products = [product_ids[i] for i in top_indices]
    
    return df2[df2['product_id'].isin(top_products)]

In [19]:
recc = recommend_svd('U0999')
recc

Unnamed: 0,product_id,product_name,brand,category,price,description,image_url
26,P0027,XPS 13 Plus 573,Dell,Electronics,43.16,Experience excellence with the XPS 13 Plus 573...,https://images.unsplash.com/photo-149618113320...
43,P0044,WH-1000XM5 9278,Sony,Electronics,1861.83,"Meet the WH-1000XM5 9278, designed for perfect...",https://images.unsplash.com/photo-150574042092...
189,P0190,High Impact Mascara,Clinique,Beauty,67.57,Experience excellence with the High Impact Mas...,https://images.unsplash.com/photo-159646250227...
258,P0259,Moisturizing Lotion,Clinique,Beauty,49.4,Discover the power of the Moisturizing Lotion....,https://images.unsplash.com/photo-157017261964...
293,P0294,Alpha 7 IV 2876,Sony,Electronics,810.4,Experience excellence with the Alpha 7 IV 2876...,https://images.unsplash.com/photo-151603506937...
339,P0340,Voluminous Mascara,L'Oreal,Beauty,42.06,Discover the power of the Voluminous Mascara. ...,https://images.unsplash.com/photo-159646250227...
375,P0376,Fit Me Foundation,Maybelline,Beauty,57.09,Upgrade your lifestyle with the Fit Me Foundat...,https://images.unsplash.com/photo-159646250227...
411,P0412,Galaxy Buds2 Pro 3242,Samsung,Electronics,2406.47,Discover the power of the Galaxy Buds2 Pro 324...,https://images.unsplash.com/photo-150574042092...
445,P0446,Alpha 7 IV 2398,Sony,Electronics,943.59,Experience excellence with the Alpha 7 IV 2398...,https://images.unsplash.com/photo-151603506937...
488,P0489,Tech Fleece Joggers,Nike,Fashion,35.6,Experience excellence with the Tech Fleece Jog...,https://images.unsplash.com/photo-154227245431...
