In [13]:
from surprise import SVD, Dataset, Reader
from surprise.model_selection import cross_validate

# Sample data
data = Dataset.load_builtin('ml-100k')

# Using the SVD algorithm
algo = SVD()

# Run 5-fold cross-validation
cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)


Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9321  0.9360  0.9361  0.9374  0.9379  0.9359  0.0020  
MAE (testset)     0.7341  0.7378  0.7382  0.7385  0.7396  0.7376  0.0019  
Fit time          2.49    2.74    2.51    2.51    2.95    2.64    0.18    
Test time         0.70    0.30    1.04    0.27    0.50    0.56    0.29    


{'test_rmse': array([0.93210783, 0.93598997, 0.93609189, 0.93735031, 0.93793598]),
 'test_mae': array([0.73406939, 0.73780937, 0.73816264, 0.73849379, 0.73964776]),
 'fit_time': (2.4919216632843018,
  2.7424747943878174,
  2.508394956588745,
  2.5114548206329346,
  2.9509172439575195),
 'test_time': (0.6988515853881836,
  0.29666900634765625,
  1.0418956279754639,
  0.26525044441223145,
  0.4957089424133301)}

In [14]:
# Step 1: Train-test split
from surprise.model_selection import train_test_split

# Split the data into training and test sets (you can change the test_size if needed)
trainset, testset = train_test_split(data, test_size=0.25)

# Step 2: Train the model using the training set
algo.fit(trainset)

# Step 3: Evaluate the model using the test set
predictions = algo.test(testset)

# Step 4: Calculate RMSE and MAE
from surprise import accuracy

# RMSE (Root Mean Squared Error)
rmse = accuracy.rmse(predictions)

# MAE (Mean Absolute Error)
mae = accuracy.mae(predictions)

print(f"RMSE on test set: {rmse}")
print(f"MAE on test set: {mae}")


RMSE: 0.9417
MAE:  0.7433
RMSE on test set: 0.9416531851755341
MAE on test set: 0.7432654860842849


In [15]:
# Step 5: Making predictions for specific user-movie pairs
# Predict rating for user 1 and movie 10
prediction = algo.predict(1, 10)
print(f"Predicted rating for User 1 and Movie 10: {prediction.est}")


Predicted rating for User 1 and Movie 10: 3.52548


In [16]:
from collections import defaultdict

# Function to get top-N recommendations
def get_top_n(predictions, n=10):
    top_n = defaultdict(list)

    # Map the predictions to each user
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    # Sort the predictions for each user and retrieve the N highest ones
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n

# Get top-10 recommendations for each user
top_n = get_top_n(predictions, n=10)

# Display top-10 recommendations for each user
for uid, user_ratings in top_n.items():
    print(f"User {uid} - Top 10 Recommendations: {user_ratings}")


User 738 - Top 10 Recommendations: [('169', 4.657302805613138), ('408', 4.550627098580284), ('98', 4.441068340164447), ('172', 4.3093884010473085), ('318', 4.276754710225911), ('79', 4.201689800845418), ('181', 4.040043363913503), ('144', 4.015295045220726), ('89', 4.014583965818512), ('258', 4.004633215581034)]
User 790 - Top 10 Recommendations: [('22', 3.878187849276065), ('269', 3.8343619834447953), ('181', 3.8258715735253976), ('69', 3.8017193418423987), ('100', 3.7201914011274035), ('96', 3.630652460186043), ('208', 3.5791157330349894), ('153', 3.5720316794699642), ('436', 3.5437438903998952), ('10', 3.542147861933264)]
User 397 - Top 10 Recommendations: [('199', 4.664320676627067), ('135', 4.421307384463428), ('615', 4.257375415733406), ('498', 4.2481744668813075), ('223', 4.198431473912606), ('286', 4.1737630120124125), ('7', 4.0194919559444395), ('693', 4.018156385030886), ('210', 4.012467521677182), ('492', 3.9463926533983344)]
User 637 - Top 10 Recommendations: [('150', 3.069

In [17]:
# Save the trained model using pickle
import pickle

with open('svd_model.pkl', 'wb') as f:
    pickle.dump(algo, f)

# To load the saved model in the future:
# with open('svd_model.pkl', 'rb') as f:
#     loaded_model = pickle.load(f)



In [18]:
from surprise.model_selection import GridSearchCV

# Define the parameter grid for hyperparameter tuning
param_grid = {
    'n_factors': [50, 100],
    'lr_all': [0.005, 0.01],
    'reg_all': [0.02, 0.1]
}

# Initialize GridSearchCV
grid_search = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=3)

# Fit the grid search
grid_search.fit(data)

# Print the best parameters and best RMSE score
print(f"Best Parameters: {grid_search.best_params}")
print(f"Best RMSE: {grid_search.best_score}")


Best Parameters: {'rmse': {'n_factors': 100, 'lr_all': 0.01, 'reg_all': 0.1}, 'mae': {'n_factors': 100, 'lr_all': 0.01, 'reg_all': 0.1}}
Best RMSE: {'rmse': 0.9284289140926386, 'mae': 0.7352784834129134}


In [19]:
# Load the dataset again (make sure it's the full dataset)
data = Dataset.load_builtin('ml-100k')

# Re-train the SVD model on the full dataset
trainset = data.build_full_trainset()  # Build a full training set
algo = SVD()  # You can adjust the parameters here if you want to use the best ones from cross-validation

# Train the model
algo.fit(trainset)


<surprise.prediction_algorithms.matrix_factorization.SVD at 0x2cac0e2a730>

In [20]:
# Predicting for a specific user-item pair
uid = str(1)  # User ID, replace with your desired user ID
iid = str(50)  # Item ID (movie ID), replace with your desired movie ID

# Predict the rating for user 1 and movie 50
prediction = algo.predict(uid, iid)
print(f"Predicted rating for user {uid} on movie {iid}: {prediction.est}")


Predicted rating for user 1 on movie 50: 5


In [21]:
def get_top_n_recommendations(algo, trainset, n=10):
    # Get a list of all movie IDs
    all_movie_ids = set([str(i) for i in range(1, trainset.n_items+1)])
    
    # Get the list of movies the user has already rated
    rated_movies = set([iid for (uid, iid, _) in trainset.all_ratings() if uid == '1'])
    
    # Predict ratings for all movies the user hasn't rated yet
    predictions = []
    for movie_id in all_movie_ids - rated_movies:
        prediction = algo.predict('1', movie_id)
        predictions.append((movie_id, prediction.est))
    
    # Sort predictions by predicted rating in descending order
    predictions.sort(key=lambda x: x[1], reverse=True)
    
    # Get the top N recommendations
    top_n = predictions[:n]
    return top_n

# Get top-10 movie recommendations for user '1'
top_n_recommendations = get_top_n_recommendations(algo, trainset, n=10)

# Display recommendations
for movie_id, rating in top_n_recommendations:
    print(f"Movie ID: {movie_id}, Predicted Rating: {rating}")


Movie ID: 50, Predicted Rating: 5
Movie ID: 100, Predicted Rating: 5
Movie ID: 169, Predicted Rating: 5
Movie ID: 114, Predicted Rating: 4.985495689205472
Movie ID: 183, Predicted Rating: 4.9466304410819735
Movie ID: 408, Predicted Rating: 4.89489569629663
Movie ID: 318, Predicted Rating: 4.849949207594992
Movie ID: 258, Predicted Rating: 4.840468200694174
Movie ID: 48, Predicted Rating: 4.819336132505978
Movie ID: 172, Predicted Rating: 4.810159520283235


In [22]:
from surprise import accuracy

# Test on some known data (for example, a small set of test data)
testset = trainset.build_testset()  # You can use your own testset here
predictions = algo.test(testset)

# Compute RMSE
rmse = accuracy.rmse(predictions)
print(f"Root Mean Squared Error (RMSE): {rmse}")


RMSE: 0.6754
Root Mean Squared Error (RMSE): 0.6753723358515856


In [23]:
import pickle

# Save the trained model
with open('svd_model.pkl', 'wb') as f:
    pickle.dump(algo, f)

# Later, you can load the model
with open('svd_model.pkl', 'rb') as f:
    loaded_model = pickle.load(f)
