There are two main types of collaborative filtering:

- User-Based Collaborative Filtering

- Item-Based Collaborative Filtering

this applies the User-Based Collaborative Filtering.


In [1]:
!pip install scikit-surprise



In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv
from surprise.model_selection import train_test_split
from surprise import Reader, Dataset
from surprise import accuracy
from surprise import KNNBasic


In [3]:
book_df = pd.read_csv('../input/book-recommendation-dataset/Books.csv', low_memory=False)
ratings_df = pd.read_csv('../input/book-recommendation-dataset/Ratings.csv').sample(40000)
user_df = pd.read_csv('../input/book-recommendation-dataset/Users.csv')
user_rating_df = ratings_df.merge(user_df, on='User-ID')

In [4]:
#creating a surpise reader
reader = Reader(rating_scale=(1,10))

#load data from df
data = Dataset.load_from_df(user_rating_df[['User-ID', 'ISBN', 'Book-Rating']], reader)

In [5]:
#split train test
trainset, testset = train_test_split(data, test_size=0.2, random_state=23)



In [6]:
#create a knnbasic collaborative model

similarity_option = {
    'name': 'cosine',
    'user_based': False #uses item-based collaborative filtering
}

In [7]:
model = KNNBasic(similarity_option=similarity_option)

model.fit(trainset)

Computing the msd similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x7d71312770a0>

In [8]:
pred = model.test(testset)

In [9]:
## Evaluate the model's performance
accuracy.rmse(pred)

RMSE: 3.8805


3.8804615268517932

Excellent. We have a good rmse

In [10]:

# Choose a user for whom you want to get recommendations
user_id = 17

# Get a list of all books (ISBNs) in your dataset
all_books = user_rating_df['ISBN'].unique()

# Create a list of tuples containing the book ISBN and the predicted rating for the chosen user
predicted_ratings = [(book, model.predict(user_id, book).est) for book in all_books]

# Sort the list by predicted ratings in descending order
predicted_ratings.sort(key=lambda x: x[1], reverse=True)

# Print the top 5 recommended titles
top_n = 5  # Adjust the number of top recommendations you want to display
print(f"Top {top_n} recommended titles for User {user_id}:")
for i, (book, rating) in enumerate(predicted_ratings[:top_n], 1):
    # Check if the book is present in book_df before accessing its title
    book_info = book_df[book_df['ISBN'] == book]
    if not book_info.empty:
        book_title = book_info['Book-Title'].iloc[0]
        print(f"{i}. {book_title} (ISBN: {book}) - Predicted Rating: {rating:.2f}")
    else:
        print(f"{i}. Book with ISBN {book} not found in book_df.")


Top 5 recommended titles for User 17:
1. Book with ISBN 0380406675 not found in book_df.
2. Cattle Baron (Harlequin Super Romance) (ISBN: 0373709668) - Predicted Rating: 2.85
3. All the Names (ISBN: 0156010593) - Predicted Rating: 2.85
4. The Enneagram : A Journey of Self Discovery (ISBN: 0871932148) - Predicted Rating: 2.85
5. The Vampire Lestat (Vampire Chronicles, Book II) (ISBN: 0345313860) - Predicted Rating: 2.85
