In [5]:

import pandas as pd
from surprise import Dataset, Reader, SVD, accuracy
from surprise.model_selection import train_test_split
df = pd.read_csv('book-recommendation-dataset/Ratings.csv')
df.dropna(inplace=True)  
df.drop_duplicates(inplace=True)  
reader = Reader(rating_scale=(0, 10))
data = Dataset.load_from_df(df[['User-ID', 'ISBN', 'Book-Rating']], reader)
trainset, testset = train_test_split(data, test_size=0.2)
model = SVD()
model.fit(trainset)
predictions = model.test(testset)

# Calculate RMSE
print("RMSE on test set:")
accuracy.rmse(predictions)

def recommend_books(user_id, model, data, num_recommendations=5):
    # Get all ISBNs the user has rated
    user_books = df[df['User-ID'] == user_id]['ISBN'].tolist()
    
    # Get unique ISBNs for books the user has not rated
    all_books = df['ISBN'].unique()
    unrated_books = [isbn for isbn in all_books if isbn not in user_books]

    predictions = [model.predict(user_id, isbn) for isbn in unrated_books]
    recommendations = sorted(predictions, key=lambda x: x.est, reverse=True)
    top_recommendations = [(rec.iid, rec.est) for rec in recommendations[:num_recommendations]]
    return top_recommendations

user_id = '1234'  
recommendations = recommend_books(user_id, model, data)
print("Top recommendations for User", user_id, ":", recommendations)


RMSE on test set:
RMSE: 3.5099
Top recommendations for User 1234 : [('1844262553', 7.3831806101814985), ('8826703132', 6.269483542300053), ('0615116426', 5.991204598560337), ('0060248025', 5.788383458029721), ('0091842050', 5.777927664268642)]
