# Book recomendation system with clustering | collaborative based

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors

## Importing data

In [3]:
books = pd.read_csv('BX-Books.csv', sep=';', on_bad_lines='skip', low_memory=False, encoding='latin-1')

books=books[['ISBN', 'Book-Title', 'Book-Author', 'Year-Of-Publication', 'Publisher',
       'Image-URL-L']]
books.rename(columns={
    'Book-Title' :'title',
    'Book-Author':'author',
    'Year-Of-Publication':'publication',
    'Publisher':'publisher',
    'Image-URL-L':'img_url'
}, inplace= True)

In [4]:
users = pd.read_csv('BX-Users.csv', sep=';', on_bad_lines='skip', low_memory=False, encoding='latin-1')

In [5]:
ratings=pd.read_csv('BX-Book-Ratings.csv', sep=';', on_bad_lines='skip', low_memory=False, encoding='latin-1')
ratings.rename(columns={
    'User-ID':'user_id',
    'Book-Rating':'rating'
}, inplace=True)

### Filtering data rating more than 200

In [6]:
x = ratings['user_id'].value_counts() > 200
y = x[x].index
ratings = ratings[ratings['user_id'].isin(y)]

In [7]:
rating_with_books = ratings.merge(books, on='ISBN')
num_rating = rating_with_books.groupby('title')['rating'].count().reset_index()
num_rating.rename(columns={'rating':'num_of_rating'}, inplace=True)

In [8]:
final_rating = rating_with_books.merge(num_rating, on = 'title')
final_rating = final_rating[final_rating['num_of_rating']>=50]
final_rating.drop_duplicates(['user_id','title'], inplace=True)

In [9]:
book_pivot = final_rating.pivot_table(columns='user_id', index='title', values='rating')
book_pivot.fillna(0, inplace=True)
book_sparse = csr_matrix(book_pivot)

## Making model

In [10]:
model = NearestNeighbors(algorithm='brute')
model.fit(book_sparse)

In [11]:
distance, suggestion = model.kneighbors(book_pivot.iloc[237,:].values.reshape(1,-1), n_neighbors=6)
books_name = book_pivot.index

## Exporting model

In [12]:
import pickle
pickle.dump(model, open('artifacts/model.pkl', 'wb'))
pickle.dump(books_name, open('artifacts/books_name.pkl', 'wb'))
pickle.dump(final_rating, open('artifacts/final_rating.pkl', 'wb'))
pickle.dump(book_pivot, open('artifacts/book_pivot.pkl', 'wb'))

## Testing model

In [14]:
def recommend_book(book_name):
    book_id=np.where(book_pivot.index==book_name)[0][0]
    distance, suggestion = model.kneighbors(book_pivot.iloc[book_id,:].values.reshape(1,-1), n_neighbors=6)
    for i in range(len(suggestion)):
        books = book_pivot.index[suggestion[i]]
        print(books)

recommend_book('A Bend in the Road')

Index(['A Bend in the Road', 'Exclusive', 'The Cradle Will Fall',
       'No Safe Place', 'Family Album', 'Last Man Standing'],
      dtype='object', name='title')
