In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from surprise import Dataset, Reader, SVD
from surprise.model_selection import cross_validate

# Load dataset (MovieLens dataset or a custom dataset)
data = {
    'user_id': [1, 1, 1, 2, 2, 3, 3, 4, 4, 5],
    'item_id': [101, 102, 103, 101, 104, 102, 105, 103, 105, 101],
    'rating': [5, 4, 3, 5, 2, 3, 4, 5, 4, 3]
}
df = pd.DataFrame(data)

# Define Surprise reader format
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(df[['user_id', 'item_id', 'rating']], reader)

# Train-test split
trainset, testset = train_test_split(df, test_size=0.2, random_state=42)

# Use Singular Value Decomposition (SVD) for matrix factorization
model = SVD()
cross_validate(model, data, cv=5, verbose=True)

# Train the model on full dataset
trainset = data.build_full_trainset()
model.fit(trainset)

# Make predictions for a specific user and item
user_id = 1
item_id = 104
pred = model.predict(user_id, item_id)
print(f"Predicted rating for User {user_id} on Item {item_id}: {pred.est:.2f}")


Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.0298  1.0044  1.4793  0.9732  1.1251  1.1224  0.1856  
MAE (testset)     0.9410  1.0036  1.0847  0.8189  1.1133  0.9923  0.1058  
Fit time          0.00    0.00    0.00    0.00    0.00    0.00    0.00    
Test time         0.00    0.00    0.00    0.00    0.00    0.00    0.00    
Predicted rating for User 1 on Item 104: 3.52
