<a href="https://colab.research.google.com/github/SisekoC/OpenCV/blob/main/Book_Recommender_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 1. Importing Required Libraries

In [1]:
import warnings
warnings.simplefilter('ignore')

import numpy as np
import pandas as pd

# 2. Data Ingestion

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
!kaggle datasets download -d ruchi798/bookcrossing-dataset

Dataset URL: https://www.kaggle.com/datasets/ruchi798/bookcrossing-dataset
License(s): CC0-1.0
Downloading bookcrossing-dataset.zip to /content
 64% 49.0M/76.1M [00:00<00:00, 82.1MB/s]
100% 76.1M/76.1M [00:00<00:00, 105MB/s] 


In [4]:
!unzip bookcrossing-dataset

Archive:  bookcrossing-dataset.zip
  inflating: Book reviews/Book reviews/BX-Book-Ratings.csv  
  inflating: Book reviews/Book reviews/BX-Users.csv  
  inflating: Book reviews/Book reviews/BX_Books.csv  
  inflating: Books Data with Category Language and Summary/Preprocessed_data.csv  


In [11]:
books = pd.read_csv('/content/Book reviews/Book reviews/BX-Book-Ratings.csv', sep=";", encoding='latin-1')
users = pd.read_csv('/content/Book reviews/Book reviews/BX-Users.csv', sep=";", encoding='latin-1')
ratings = pd.read_csv('/content/Book reviews/Book reviews/BX_Books.csv', sep=";", encoding='latin-1')

# 3. Data Exploration

In [12]:
print("Books Dataset: ",books.shape)
print("Users Dataset: ",users.shape)
print("Ratings Dataset: ",ratings.shape)

Books Dataset:  (1149780, 3)
Users Dataset:  (278858, 3)
Ratings Dataset:  (271379, 8)


In [13]:
print("Books Dataset: ",books.columns)
print("\nUsers Dataset: ",users.columns)
print("\nRatings Dataset: ",ratings.columns)

Books Dataset:  Index(['User-ID', 'ISBN', 'Book-Rating'], dtype='object')

Users Dataset:  Index(['User-ID', 'Location', 'Age'], dtype='object')

Ratings Dataset:  Index(['ISBN', 'Book-Title', 'Book-Author', 'Year-Of-Publication', 'Publisher',
       'Image-URL-S', 'Image-URL-M', 'Image-URL-L'],
      dtype='object')


In [14]:
books.rename(columns={"Book-Title":'Title',
                      'Book-Author':'Author',
                     "Year-Of-Publication":'Year',
                     "Image-URL-L":"URL"},inplace=True)

ratings.rename(columns={"Book-Rating": "Ratings"}, inplace=True)

In [15]:
books.head(2)

Unnamed: 0,User-ID,ISBN,Book-Rating
0,276725,034545104X,0
1,276726,0155061224,5


In [16]:
users.head(2)

Unnamed: 0,User-ID,Location,Age
0,1,"nyc, new york, usa",
1,2,"stockton, california, usa",18.0


In [17]:
ratings.head(2)

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...
1,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...


In [26]:
books = books[['ISBN']]

In [27]:
books.head()

Unnamed: 0,ISBN
0,034545104X
1,0155061224
2,0446520802
3,052165615X
4,0521795028


# 4. Data Pre-processing

In [28]:
ratings['Book-Author'].value_counts()

Unnamed: 0_level_0,count
Book-Author,Unnamed: 1_level_1
Agatha Christie,632
William Shakespeare,567
Stephen King,524
Ann M. Martin,423
Carolyn Keene,373
...,...
Emily Pearl Kingsley,1
Jean Cassels,1
Bernice Meyers,1
Mark A. Taylor,1


In [29]:
x = ratings['Book-Author'].value_counts() > 200
x[x].shape

(20,)

In [30]:
y= x[x].index

In [32]:
ratings = ratings[ratings['Book-Author'].isin(y)]

In [33]:
ratings.shape

(6457, 8)

In [34]:
ratings_with_books = ratings.merge(books, on='ISBN')

In [35]:
ratings_with_books.head()

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,055321215X,Pride and Prejudice,Jane Austen,1983,Bantam,http://images.amazon.com/images/P/055321215X.0...,http://images.amazon.com/images/P/055321215X.0...,http://images.amazon.com/images/P/055321215X.0...
1,055321215X,Pride and Prejudice,Jane Austen,1983,Bantam,http://images.amazon.com/images/P/055321215X.0...,http://images.amazon.com/images/P/055321215X.0...,http://images.amazon.com/images/P/055321215X.0...
2,055321215X,Pride and Prejudice,Jane Austen,1983,Bantam,http://images.amazon.com/images/P/055321215X.0...,http://images.amazon.com/images/P/055321215X.0...,http://images.amazon.com/images/P/055321215X.0...
3,0671888587,I'll Be Seeing You,Mary Higgins Clark,1994,Pocket,http://images.amazon.com/images/P/0671888587.0...,http://images.amazon.com/images/P/0671888587.0...,http://images.amazon.com/images/P/0671888587.0...
4,0671888587,I'll Be Seeing You,Mary Higgins Clark,1994,Pocket,http://images.amazon.com/images/P/0671888587.0...,http://images.amazon.com/images/P/0671888587.0...,http://images.amazon.com/images/P/0671888587.0...


In [36]:
ratings_with_books.shape

(45236, 8)

In [39]:
number_rating = ratings_with_books.groupby('Book-Title')['ISBN'].count().reset_index()

In [40]:
number_rating.head()

Unnamed: 0,Book-Title,ISBN
0,Goosebumps Monster Edition 1: Welcome to Dead...,9
1,Little Comic Shop of Horrors (Give Yourself G...,4
2,"The Secret of the Old Clock (Nancy Drew, Book 1)",3
3,'Salem's Lot,47
4,10 LITTLE INDIANS,1


In [41]:
number_rating.rename(columns={'Ratings':'No. of Ratings'},inplace=True)

In [42]:
number_rating.head()

Unnamed: 0,Book-Title,ISBN
0,Goosebumps Monster Edition 1: Welcome to Dead...,9
1,Little Comic Shop of Horrors (Give Yourself G...,4
2,"The Secret of the Old Clock (Nancy Drew, Book 1)",3
3,'Salem's Lot,47
4,10 LITTLE INDIANS,1


In [44]:
final_rating = ratings_with_books.merge(number_rating, on='Book-Title')

In [45]:
final_rating.head()

Unnamed: 0,ISBN_x,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L,ISBN_y
0,055321215X,Pride and Prejudice,Jane Austen,1983,Bantam,http://images.amazon.com/images/P/055321215X.0...,http://images.amazon.com/images/P/055321215X.0...,http://images.amazon.com/images/P/055321215X.0...,182
1,055321215X,Pride and Prejudice,Jane Austen,1983,Bantam,http://images.amazon.com/images/P/055321215X.0...,http://images.amazon.com/images/P/055321215X.0...,http://images.amazon.com/images/P/055321215X.0...,182
2,055321215X,Pride and Prejudice,Jane Austen,1983,Bantam,http://images.amazon.com/images/P/055321215X.0...,http://images.amazon.com/images/P/055321215X.0...,http://images.amazon.com/images/P/055321215X.0...,182
3,0553213105,Pride and Prejudice,Jane Austen,1981,Bantam,http://images.amazon.com/images/P/0553213105.0...,http://images.amazon.com/images/P/0553213105.0...,http://images.amazon.com/images/P/0553213105.0...,182
4,0553213105,Pride and Prejudice,Jane Austen,1981,Bantam,http://images.amazon.com/images/P/0553213105.0...,http://images.amazon.com/images/P/0553213105.0...,http://images.amazon.com/images/P/0553213105.0...,182


In [46]:
final_rating.shape

(45236, 9)

In [48]:
final_rating.rename(columns={'ISBN_y':'No. of Ratings'},inplace=True)

In [49]:
final_rating = final_rating[final_rating['No. of Ratings'] >= 50]

In [50]:
final_rating.head()

Unnamed: 0,ISBN_x,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L,No. of Ratings
0,055321215X,Pride and Prejudice,Jane Austen,1983,Bantam,http://images.amazon.com/images/P/055321215X.0...,http://images.amazon.com/images/P/055321215X.0...,http://images.amazon.com/images/P/055321215X.0...,182
1,055321215X,Pride and Prejudice,Jane Austen,1983,Bantam,http://images.amazon.com/images/P/055321215X.0...,http://images.amazon.com/images/P/055321215X.0...,http://images.amazon.com/images/P/055321215X.0...,182
2,055321215X,Pride and Prejudice,Jane Austen,1983,Bantam,http://images.amazon.com/images/P/055321215X.0...,http://images.amazon.com/images/P/055321215X.0...,http://images.amazon.com/images/P/055321215X.0...,182
3,0553213105,Pride and Prejudice,Jane Austen,1981,Bantam,http://images.amazon.com/images/P/0553213105.0...,http://images.amazon.com/images/P/0553213105.0...,http://images.amazon.com/images/P/0553213105.0...,182
4,0553213105,Pride and Prejudice,Jane Austen,1981,Bantam,http://images.amazon.com/images/P/0553213105.0...,http://images.amazon.com/images/P/0553213105.0...,http://images.amazon.com/images/P/0553213105.0...,182


In [51]:
final_rating.shape

(20781, 9)

In [54]:
book_pivot = final_rating.pivot_table(columns='ISBN_x', index='Book-Title', values= 'No. of Ratings')

In [55]:
book_pivot

ISBN_x,0006170056,0060467215,0060806125,0061003263,0061020702,0061020710,0099928205,0140059342,0140238212,0140239316,...,8437610974,8878242357,9504905501,B00001IVC7,B00005BBW4,B00005NCS7,B00005Q8OL,B00007CWQC,B0000E63CJ,B0000T6KHI
Book-Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A Cry In The Night,,,,,,,,,,,...,,,,,,,,,,
A Spell for Chameleon (Xanth Novels (Paperback)),,,,,,,,,,,...,,,,,,,,,,
Adventures of Huckleberry Finn,,81.0,81.0,,,,,,,,...,,,,,,,,,,
All Around the Town,,,,,,,,,,,...,,,,,,,,,,
All Through the Night (Holiday Classics),,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Where Are the Children?,,,,,,,,,,,...,,,,,,,,,,
While My Pretty One Sleeps,,,,,,,,,,,...,,,,,,,,,,
"Wizard and Glass (The Dark Tower, Book 4)",,,,,,,,,,,...,,,,,,,,,,
YOU BELONG TO ME,,,,,,,,,,,...,,,,,,,,,,


In [56]:
book_pivot.shape

(174, 510)

In [57]:
book_pivot.fillna(0, inplace=True)

In [58]:
book_pivot

ISBN_x,0006170056,0060467215,0060806125,0061003263,0061020702,0061020710,0099928205,0140059342,0140238212,0140239316,...,8437610974,8878242357,9504905501,B00001IVC7,B00005BBW4,B00005NCS7,B00005Q8OL,B00007CWQC,B0000E63CJ,B0000T6KHI
Book-Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A Cry In The Night,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A Spell for Chameleon (Xanth Novels (Paperback)),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Adventures of Huckleberry Finn,0.0,81.0,81.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
All Around the Town,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
All Through the Night (Holiday Classics),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Where Are the Children?,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
While My Pretty One Sleeps,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Wizard and Glass (The Dark Tower, Book 4)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
YOU BELONG TO ME,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Training Model

In [59]:
from scipy.sparse import csr_matrix
book_sparse = csr_matrix(book_pivot)

book_names = book_pivot.index

In [60]:
from sklearn.neighbors import NearestNeighbors
model = NearestNeighbors(algorithm= 'brute')

In [61]:
model.fit(book_sparse)

In [65]:
print(book_pivot.shape)


(174, 510)


In [67]:
distances, suggestions = model.kneighbors(book_pivot.iloc[170, :].values.reshape(1, -1), n_neighbors=6)


In [69]:
distances

array([[  0.        , 197.35754356, 197.35754356, 197.35754356,
        197.87369709, 197.87369709]])

In [70]:
suggestions

array([[170, 154,  71,  29,  92,  88]])

In [72]:
book_pivot.iloc[173,:]

Unnamed: 0_level_0,You Belong To Me
ISBN_x,Unnamed: 1_level_1
0006170056,0.0
0060467215,0.0
0060806125,0.0
0061003263,0.0
0061020702,0.0
...,...
B00005NCS7,0.0
B00005Q8OL,0.0
B00007CWQC,0.0
B0000E63CJ,0.0


In [74]:
for i in range(len(suggestions)):
    print(book_pivot.index[suggestions[i]])

Index(['While My Pretty One Sleeps', 'The Stand: The Complete & Uncut Edition',
       'Less Of A Stranger (Silhouette Single Title)', 'Dangerous',
       'On a Pale Horse (Incarnations of Immortality, Bk. 1)',
       'Night Shield (Intimate Moments, 1027)'],
      dtype='object', name='Book-Title')


In [75]:
import pickle
pickle.dump(model,open('Model.pkl','wb'))
pickle.dump(book_names,open('Book_Names.pkl','wb'))
pickle.dump(final_rating,open('Final_Rating.pkl','wb'))
pickle.dump(book_pivot,open('Book_Pivot.pkl','wb'))

# 5. Model Evaluation

In [81]:
def recommend_book(book_name):
    if book_name in book_pivot.index:
        book_id = np.where(book_pivot.index == book_name)[0][0]
        distances, suggestions = model.kneighbors(book_pivot.iloc[book_id, :].values.reshape(1, -1), n_neighbors=6)

        for i in range(len(suggestions)):
            print(book_pivot.index[suggestions[i]])
    else:
        print(f"Book '{book_name}' not found in the dataset.")

# Example usage
recommend_book("While My Pretty One Sleeps")


Index(['While My Pretty One Sleeps', 'The Stand: The Complete & Uncut Edition',
       'Less Of A Stranger (Silhouette Single Title)', 'Dangerous',
       'On a Pale Horse (Incarnations of Immortality, Bk. 1)',
       'Night Shield (Intimate Moments, 1027)'],
      dtype='object', name='Book-Title')
