Book Recommender System using Clustering | Collaborative Based

In [None]:
import sys
print(sys.executable)


In [None]:
import scipy
print(scipy.__version__)


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
books = pd.read_csv('bkdata/BX-Books.csv', sep=";", on_bad_lines='skip', encoding='latin-1', low_memory=False)

In [None]:
books.head()

In [None]:
books.rename(columns={
    "Book-Title": "title",
    "Book-Author": "author",
    "Year-Of-Publication": "year",
    "Publisher": "publisher",
    "Image-URL-L": "img_url"}, inplace=True)

In [None]:
books.shape

In [None]:
books.columns

In [None]:
print(books.columns)


In [None]:
books = books[['ISBN', 'title', 'author', 'year', 'publisher',
       'Image-URL-S', 'Image-URL-M', 'img_url']]

In [None]:
users = pd.read_csv('bkdata/BX-Users.csv', sep=";", on_bad_lines='skip', encoding='latin-1', low_memory=False)

In [None]:
users.head()

In [None]:
ratings = pd.read_csv('bkdata/BX-Book-Ratings.csv', sep=";", on_bad_lines='skip', encoding='latin-1', low_memory=False)

In [None]:
print(books.shape)
print(users.shape)
print(ratings.shape)

In [None]:
ratings.head()

In [None]:
ratings.rename(columns={
    "User-ID": "user_id",
    "Book-Rating": "rating"}, inplace=True)

In [None]:
ratings.head()

In [None]:
ratings['user_id'].value_counts()

In [None]:
ratings['user_id'].unique().shape

In [None]:
x = ratings['user_id'].value_counts() > 200

In [None]:
x[x].shape

In [None]:
y = x[x].index

In [None]:
y

In [None]:
ratings = ratings[ratings['user_id'].isin(y)]

In [None]:
ratings.head()

In [None]:
ratings.shape

In [None]:
ratings_with_books = ratings.merge(books, on ="ISBN")

In [None]:
ratings_with_books.head()

In [None]:
ratings_with_books.shape

In [None]:
num_rating = ratings_with_books.groupby('title')['rating'].count().reset_index()

In [None]:
num_rating.head()

In [None]:
num_rating.rename(columns={"rating": "num_of_rating"}, inplace=True)

In [None]:
num_rating.head()

In [None]:
ratings_with_books.head()

In [None]:
final_rating=ratings_with_books.merge(num_rating, on= 'title')

In [None]:
final_rating.head()

In [None]:
final_rating.shape

In [None]:
final_rating = final_rating[final_rating['num_of_rating']>=50]

In [None]:
final_rating.sample(10)

In [None]:
final_rating.shape

In [None]:
final_rating.drop_duplicates(['user_id','title'], inplace=True)

In [None]:
final_rating.shape

In [None]:
book_pivot = final_rating.pivot_table(columns='user_id', index='title', values = 'rating')

In [None]:
book_pivot

In [None]:
book_pivot.shape

In [None]:
book_pivot.fillna(0, inplace=True)

In [None]:
book_pivot

In [None]:
from scipy.sparse import csr_matrix

In [None]:
book_sparse = csr_matrix(book_pivot)

In [None]:
book_sparse

In [None]:
from sklearn.neighbors import NearestNeighbors
model = NearestNeighbors(algorithm = 'brute')

In [None]:
model.fit(book_sparse)

In [None]:
distance, suggestion = model.kneighbors(book_pivot.iloc[237,:].values.reshape(1,-1), n_neighbors=6)

In [None]:
distance

In [None]:
suggestion

In [None]:
for i in range(len(suggestion)):
    print(book_pivot.index[suggestion[i]])

In [None]:
book_pivot.index

In [None]:
books_name = book_pivot.index

In [None]:
import pickle
import os

os.makedirs('artifacts', exist_ok=True)

pickle.dump(model, open('artifacts/model.pk1', 'wb'))
pickle.dump(books_name, open('artifacts/books_name.pk1', 'wb'))
pickle.dump(final_rating, open('artifacts/final_rating.pk1', 'wb'))
pickle.dump(book_pivot, open('artifacts/book_pivot.pk1', 'wb'))

In [None]:
# to check if pickling was successful
model = pickle.load(open('artifacts/model.pk1', 'rb'))
print("Model loaded successfully!")


In [None]:
def recommend_book(book_name):
    book_id = np.where(book_pivot.index == book_name)[0][0]
    distance, suggestion = model.kneighbors(book_pivot.iloc[book_id,:].values.reshape(1,-1), n_neighbors=6)
    
    for i in range(len(suggestion)):
        books = book_pivot.index[suggestion[i]]
        for j in books:
            print(j)

In [None]:
book_name = 'A Bend in the Road'
recommend_book(book_name)