In [17]:
import pandas as pd
import numpy as np
from scipy.sparse.linalg import svds
from sklearn.metrics import mean_squared_error

In [18]:
# Load the dataset
data = pd.read_table('../data/u.data')

# Create a user-item matrix
ratings_matrix = data.pivot(index='user', columns='event', values='rating').fillna(0)

In [19]:
# Perform SVD
U, sigma, Vt = np.linalg.svd(ratings_matrix)

k = 10
U = U[:, :k]
sigma = np.diag(sigma[:k])
Vt = Vt[:k, :]

In [20]:
# Reconstruct the original matrix
predicted_ratings = np.dot(np.dot(U, sigma), Vt)

# Convert to DataFrame
predicted_ratings_df = pd.DataFrame(predicted_ratings, columns=ratings_matrix.columns, index=ratings_matrix.index)

In [21]:
# Calculate RMSE
def calculate_rmse(actual, predicted):
    # Keep only non-zero elements to match actual ratings
    nonzero_indexes = np.nonzero(actual)
    actual_nonzero = actual[nonzero_indexes]
    predicted_nonzero = predicted[nonzero_indexes]
    
    # Calculate RMSE
    rmse = np.sqrt(mean_squared_error(actual_nonzero, predicted_nonzero))
    return rmse

In [22]:
actual_ratings_matrix = ratings_matrix.values
rmse = calculate_rmse(actual_ratings_matrix, predicted_ratings)

print("RMSE:", rmse)

# Recommendation for a specific user (e.g., user with ID 22)
user_id = 22
user_ratings = predicted_ratings_df.loc[user_id]
top_recommendations = user_ratings.sort_values(ascending=False).head(10)

print("Top recommendations for user", user_id)
print(top_recommendations)

RMSE: 7.606655358368348e-15
Top recommendations for user 22
event
194    5.0
510    5.0
435    5.0
238    5.0
523    5.0
204    5.0
187    5.0
202    5.0
515    5.0
24     5.0
Name: 22, dtype: float64
