In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from IPython.display import display, Image
warnings.filterwarnings("ignore")

from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy

In [3]:

books = pd.read_csv("/kaggle/input/book-recommendation-dataset/Books.csv")
users = pd.read_csv("/kaggle/input/book-recommendation-dataset/Users.csv")
ratings = pd.read_csv('/kaggle/input/book-recommendation-dataset/Ratings.csv')

In [4]:
print(books.shape)
print(ratings.shape)
print(users.shape)

(271360, 8)
(1149780, 3)
(278858, 3)


In [5]:
books.isnull().sum()

ISBN                   0
Book-Title             0
Book-Author            2
Year-Of-Publication    0
Publisher              2
Image-URL-S            0
Image-URL-M            0
Image-URL-L            3
dtype: int64

In [6]:
ratings_with_book_titles = ratings.merge(books,on='ISBN')

In [7]:
ratings_with_book_titles.head()

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,276725,034545104X,0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
1,276726,0155061224,5,Rites of Passage,Judith Rae,2001,Heinle,http://images.amazon.com/images/P/0155061224.0...,http://images.amazon.com/images/P/0155061224.0...,http://images.amazon.com/images/P/0155061224.0...
2,276727,0446520802,0,The Notebook,Nicholas Sparks,1996,Warner Books,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...
3,276729,052165615X,3,Help!: Level 1,Philip Prowse,1999,Cambridge University Press,http://images.amazon.com/images/P/052165615X.0...,http://images.amazon.com/images/P/052165615X.0...,http://images.amazon.com/images/P/052165615X.0...
4,276729,0521795028,6,The Amsterdam Connection : Level 4 (Cambridge ...,Sue Leather,2001,Cambridge University Press,http://images.amazon.com/images/P/0521795028.0...,http://images.amazon.com/images/P/0521795028.0...,http://images.amazon.com/images/P/0521795028.0...


In [8]:
ratings_with_book_titles.drop(columns=["ISBN","Image-URL-S","Image-URL-M"],axis=1,inplace=True)

In [9]:
ratings_with_book_titles

Unnamed: 0,User-ID,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-L
0,276725,0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...
1,276726,5,Rites of Passage,Judith Rae,2001,Heinle,http://images.amazon.com/images/P/0155061224.0...
2,276727,0,The Notebook,Nicholas Sparks,1996,Warner Books,http://images.amazon.com/images/P/0446520802.0...
3,276729,3,Help!: Level 1,Philip Prowse,1999,Cambridge University Press,http://images.amazon.com/images/P/052165615X.0...
4,276729,6,The Amsterdam Connection : Level 4 (Cambridge ...,Sue Leather,2001,Cambridge University Press,http://images.amazon.com/images/P/0521795028.0...
...,...,...,...,...,...,...,...
1031131,276704,0,Edgar Cayce on the Akashic Records: The Book o...,Kevin J. Todeschi,1998,A.R.E. Press (Association of Research &amp; Enlig,http://images.amazon.com/images/P/0876044011.0...
1031132,276704,9,Get Clark Smart : The Ultimate Guide for the S...,Clark Howard,2000,Longstreet Press,http://images.amazon.com/images/P/1563526298.0...
1031133,276706,0,Eight Weeks to Optimum Health: A Proven Progra...,Andrew Weil,1997,Alfred A. Knopf,http://images.amazon.com/images/P/0679447156.0...
1031134,276709,10,The Sherbrooke Bride (Bride Trilogy (Paperback)),Catherine Coulter,1996,Jove Books,http://images.amazon.com/images/P/0515107662.0...


### Collaborative Filtering

In [10]:
# users with more than 180 book ratings
rate_threshold = 180

# Count ratings per user
num_ratings_per_user = ratings_with_book_titles.groupby('User-ID')['Book-Rating'].count()

# Filter users with more than the minimum threshold
user_ids = num_ratings_per_user[num_ratings_per_user >rate_threshold].index

In [11]:
# Filter ratings from user_ids
user_ratings =ratings_with_book_titles[ratings_with_book_titles['User-ID'].isin(user_ids)]

In [12]:
min_rate_count_thresh=50
rating_counts= user_ratings.groupby('Book-Title').count()['Book-Rating']
popular_books = rating_counts[rating_counts >=min_rate_count_thresh].index

In [13]:
final_ratings = user_ratings[user_ratings['Book-Title'].isin(popular_books)]

In [14]:
pivot_table = final_ratings.pivot_table(index='Book-Title',columns='User-ID'
                          ,values='Book-Rating')
pivot_table

User-ID,254,2033,2276,2766,2977,3363,4017,4385,6251,6323,...,274301,274308,274808,275970,276680,277427,277478,277639,278188,278418
Book-Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1984,9.0,,,,,,,,,,...,,,,0.0,,,,,,
1st to Die: A Novel,,,,,,,,,,,...,,,,,,,,,,
2nd Chance,,,10.0,,,,,,,,...,,0.0,,,,,,0.0,,
4 Blondes,,,,,,,,,0.0,,...,,,,,,,,,,
A Bend in the Road,0.0,,,7.0,,,,,,,...,,,,,,,,,0.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Year of Wonders,,,,,7.0,,,,,,...,,,,0.0,,,,,,
You Belong To Me,,,,,,,,,,0.0,...,,,,,,,,,,
Zen and the Art of Motorcycle Maintenance: An Inquiry into Values,,,,,,0.0,,,0.0,,...,,,,0.0,,,,,,
Zoya,,,,,,,,,,,...,,,,,,,,,,


In [15]:
pivot_table.fillna(0,inplace=True)
pivot_table

User-ID,254,2033,2276,2766,2977,3363,4017,4385,6251,6323,...,274301,274308,274808,275970,276680,277427,277478,277639,278188,278418
Book-Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1984,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1st to Die: A Novel,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2nd Chance,0.0,0.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4 Blondes,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A Bend in the Road,0.0,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Year of Wonders,0.0,0.0,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
You Belong To Me,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Zen and the Art of Motorcycle Maintenance: An Inquiry into Values,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Zoya,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### cosine_similarity 
matrix as input, each rows represent a data point and  columns represent a feature. 
So in my data, rows represent users,  columns represent book titles. 
Function calculates the cosine similarity between every pair of users in the matrix, measuring angle between two vectors;
a 1 score shows perfect similarity, 
and 0 shows perfect dissimilarity. 
Output is square matrix where each element (i, j) represents the cosine similarity score between user i and user j.

Use this matrix to recommend items to users based on their similarity to other users who have rated those books highly.
For example, find the user with the highest cosine similarity to a particular user and recommend the items that the similar user rated highly.

In [16]:
from sklearn.metrics.pairwise import cosine_similarity 

In [17]:
similarity_score = cosine_similarity(pivot_table)

In [18]:
from sklearn.metrics.pairwise import cosine_similarity 

similarity_score = cosine_similarity(pivot_table)

def recommend(book_name):
    index = np.where(pivot_table.index==book_name)[0][0]
    similar_books = sorted(list(enumerate(similarity_score[index])),key=lambda x:x[1], reverse=True)[1:6]
    
    data = []
    
    for i in similar_books:
        item = []
        temp_df = books[books['Book-Title'] == pivot_table.index[i[0]]]
        item.append(temp_df['Book-Title'].values[0]) 
        item.append(temp_df['Book-Author'].values[0])
        
        similarity = i[1]  # similarity score
        item.append(similarity)  # Adding similarity score
        
        data.append(item)
    return data

In [19]:
recommended_books = recommend("The Alienist")
for book in recommended_books:
    print(f"Book Title: {book[0]}")
    print(f"Author: {book[1]}")
    print(f"Similarity Score: {book[2]}")
    print()


Book Title: The Poisonwood Bible
Author: Barbara Kingsolver
Similarity Score: 0.2821255010454464

Book Title: The Angel of Darkness
Author: Caleb Carr
Similarity Score: 0.2736767004329991

Book Title: The Cradle Will Fall
Author: Mary Higgins Clark
Similarity Score: 0.27300712186521814

Book Title: Cry Wolf
Author: TAMI HOAG
Similarity Score: 0.262006542413843

Book Title: Secret History
Author: DONNA TARTT
Similarity Score: 0.24811666266253346



## Evaluation Martix

Singular Value Decomposition is used for collaborative filtering based on matrix factorization. It decomposes the user-item rating matrix into two smaller matrices:

User latent factors: These represent "underlying preferences" or hidden characteristics of users.
Item latent factors: These represent "intrinsic features" or characteristics of items. When multiplied together, these two matrices approximate the original rating matrix.

In [29]:
!pip install scikit-surprise



In [31]:
import pandas as pd
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy

# Define the rating scale
reader = Reader(rating_scale=(0, 10))

# Load the data into Surprise's dataset format
data = Dataset.load_from_df(final_ratings[['User-ID', 'Book-Title', 'Book-Rating']], reader)

# Split the dataset into training and testing sets
train_set, test_set = train_test_split(data, test_size=0.20, random_state=42)

# Define the SVD algorithm
model = SVD()

# Train the algorithm on the training set
model.fit(train_set)

# Make predictions on the test set
predictions = model.test(test_set)

# Evaluate the model
accuracy.rmse(predictions)

RMSE: 3.5095


3.5094750560864885

In [37]:
def recommend_books(user_id, n=10):
    # List all unique book titles
    all_books = final_ratings['Book-Title'].unique()

    # Remove books already rated by the user
    rated_books = final_ratings[final_ratings['User-ID'] == user_id]['Book-Title'].values
    books_to_predict = [book for book in all_books if book not in rated_books]

    # Predict ratings for remaining books
    predictions = []
    for book in books_to_predict:
        pred = model.predict(user_id, book)
        predictions.append((book, pred.est))

    # Sort predictions by estimated rating
    predictions.sort(key=lambda x: x[1], reverse=True)

    # Get top N recommendations
    top_n = predictions[:n]

    return top_n


In [38]:

user_id = 271705
recommended_books = recommend_books(user_id)
print(f"Top {len(recommended_books)} recommended books for user {user_id}:")
for i, (title, similarity_score) in enumerate(recommended_books, start=1):
    print(f"{i}. {title} (Similarity Score: {similarity_score})")


Top 10 recommended books for user 271705:
1. Harry Potter and the Order of the Phoenix (Book 5) (Similarity Score: 10)
2. The Tale of the Body Thief (Vampire Chronicles (Paperback)) (Similarity Score: 8.281974669885614)
3. Slaughterhouse Five or the Children's Crusade: A Duty Dance With Death (Similarity Score: 7.85050289008001)
4. The Queen of the Damned (Vampire Chronicles (Paperback)) (Similarity Score: 7.747265252676238)
5. Me Talk Pretty One Day (Similarity Score: 7.728488954547373)
6. Big Cherry Holler: A Big Stone Gap Novel (Ballantine Reader's Circle) (Similarity Score: 7.663302690446539)
7. Year of Wonders (Similarity Score: 7.536996490946592)
8. Chicken Soup for the Soul (Chicken Soup for the Soul) (Similarity Score: 7.53512125057853)
9. Harry Potter and the Goblet of Fire (Book 4) (Similarity Score: 7.316871818881353)
10. Bastard Out of Carolina (Similarity Score: 7.224758532060466)
