# **Book Recommender System** 

In [1]:
import numpy as np
import pandas as pd 

In [2]:
books = pd.read_csv('Books.csv')
users = pd.read_csv('Users.csv')
ratings = pd.read_csv('Ratings.csv')

In [3]:
books.head()

In [4]:
users.head()

In [5]:
ratings.head()

In [6]:
print(books.shape)
print(users.shape)
print(ratings.shape)

In [7]:
books.isnull().sum()

In [8]:
users.isnull().sum()

In [9]:
ratings.isnull().sum()

In [10]:
books.duplicated().sum()

In [11]:
users.duplicated().sum()

In [12]:
ratings.duplicated().sum()

## **Popularity Based Recommendation System**

In [13]:
ratings_with_name = ratings.merge(books, on='ISBN')

In [14]:
ratings_with_name.head()

In [15]:
num_ratings_df = ratings_with_name.groupby('Book-Title').count()['Book-Rating'].reset_index()
num_ratings_df.rename(columns={'Book-Rating':'num_ratings'}, inplace=True)
num_ratings_df.head()

In [16]:
avg_ratings_df = ratings_with_name.groupby('Book-Title').mean(numeric_only=True)['Book-Rating'].reset_index()
avg_ratings_df.rename(columns={'Book-Rating':'avg_rating'}, inplace=True)
avg_ratings_df.head()

In [17]:
popular_df = num_ratings_df.merge(avg_ratings_df, on='Book-Title')
popular_df.head()

In [18]:
popular_df = popular_df[popular_df['num_ratings']>=250].sort_values('avg_rating', ascending=False).head(50)
popular_df.head()

In [19]:
popular_df= popular_df.merge(books, on='Book-Title').drop_duplicates('Book-Title')[['Book-Title','Book-Author','Year-Of-Publication','Image-URL-M','num_ratings','avg_rating']]

In [20]:
popular_df

## **Colaborative Filtering Based Recommendation System**

In [22]:
x= ratings_with_name.groupby('User-ID').count()['Book-Rating'] > 200
bibliophile_users = x[x].index

In [23]:
filter_rating = ratings_with_name[ratings_with_name['User-ID'].isin(bibliophile_users)]

In [24]:
filter_rating.head()

In [25]:
y = filter_rating.groupby('Book-Title').count()['Book-Rating'] >= 50
famous_books = y[y].index

In [26]:
final_ratings = filter_rating[filter_rating['Book-Title'].isin(famous_books)]

In [27]:
final_ratings.head()

In [28]:
pt = final_ratings.pivot_table(index='Book-Title', columns='User-ID', values='Book-Rating')

In [29]:
pt.fillna(0, inplace=True)

In [30]:
pt.head()

In [31]:
from sklearn.metrics.pairwise import cosine_similarity

In [32]:
similarity_scores = cosine_similarity(pt)

In [33]:
similarity_scores.shape

In [52]:
def recommend(book_name):
    index = np.where(pt.index == book_name)[0][0]
    similar_items = sorted(list(enumerate(similarity_scores[index])), key=lambda x: x[1], reverse=True)[1:11]

    data = []
    for i in similar_items:
        item = []
        temp_df = books[books['Book-Title'] == pt.index[i[0]]]
        item.append(temp_df['Book-Title'].drop_duplicates().values[0])
        item.append(temp_df['Book-Author'].drop_duplicates().values[0])
        item.append(temp_df['Image-URL-M'].drop_duplicates().values[0])
        item.append(temp_df['Year-Of-Publication'].drop_duplicates().values[0])

        data.append(item)
        
    return data


In [53]:
recommend('1984')

In [54]:
recommend('Harry Potter and the Chamber of Secrets (Book 2)')

In [55]:
import pickle 
pickle.dump(popular_df, open('popular.pkl', 'wb'))
pickle.dump(books, open('books.pkl', 'wb')) 
pickle.dump(pt, open('pt.pkl', 'wb')) 
pickle.dump(similarity_scores, open('similarity_scores.pkl', 'wb')) 