## Import Library

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics.pairwise import cosine_similarity

import warnings
warnings.filterwarnings('ignore')

## Load Data

In [None]:
books = pd.read_csv("/kaggle/input/book-recommendation-dataset/Books.csv")
ratings = pd.read_csv("/kaggle/input/book-recommendation-dataset/Ratings.csv")
users = pd.read_csv("/kaggle/input/book-recommendation-dataset/Users.csv")

In [None]:
books

In [None]:
users

In [None]:
ratings

In [None]:
print(books.shape)
print(users.shape)
print(ratings.shape)

## Data Preprocessing

In [None]:
books.info()

In [None]:
books.isna().sum()

In [None]:
users.info()

In [None]:
users.isna().sum()

In [None]:
ratings.isna().sum()

In [None]:
# Duplicates
books.duplicated().sum()

In [None]:
# Duplicates
users.duplicated().sum()

In [None]:
# Duplicates
ratings.duplicated().sum()

## EDA

In [None]:
books.head(3)

In [None]:
books['Year-Of-Publication'].unique()

## Popularity Based Recommendation System

In [None]:
books.head(3)

In [None]:
ratings.head()

In [None]:
ratings_with_name = ratings.merge(books, on='ISBN')

In [None]:
num_rating_df = ratings_with_name.groupby('Book-Title').count()['Book-Rating'].reset_index()
num_rating_df.rename(columns={'Book-Rating':'num_rating'},inplace=True)
num_rating_df

In [None]:
avg_rating_df = ratings_with_name.groupby('Book-Title').mean(numeric_only=True)['Book-Rating'].reset_index()
avg_rating_df.rename(columns={'Book-Rating':'avg_rating'},inplace=True)
avg_rating_df

In [None]:
popularity_df = num_rating_df.merge(avg_rating_df,on='Book-Title')
popularity_df

In [None]:
# Top 50 books
popularity_df = popularity_df[popularity_df['num_rating']>=250].sort_values('avg_rating',ascending=False).head(50)
popularity_df

In [None]:
popular_df = popularity_df.merge(books, on='Book-Title').drop_duplicates('Book-Title')[['Book-Title','Book-Author','Image-URL-M','num_rating','avg_rating']]
popular_df

## Colaberative Filtering Recommender system

In [None]:
x = ratings_with_name.groupby('User-ID').count()['Book-Rating']>200
padhe_likhe_users = x[x].index

In [None]:
filtered_rating = ratings_with_name[ratings_with_name['User-ID'].isin(padhe_likhe_users)]
filtered_rating

In [None]:
y = filtered_rating.groupby('Book-Title').count()['Book-Rating']>=50
famous_books = y[y].index

In [None]:
final_ratings = filtered_rating[filtered_rating['Book-Title'].isin(famous_books)]

In [None]:
pt = final_ratings.pivot_table(index='Book-Title',columns='User-ID',values='Book-Rating')

In [None]:
pt.fillna(0,inplace=True)

In [None]:
pt

In [None]:
# book1 > [...........] 810 numbers

In [None]:
cosine_similarity(pt).shape

In [None]:
similarity_scores = cosine_similarity(pt)
similarity_scores

In [None]:
similarity_scores[0]   # similarity of book1 with all books

In [None]:
np.where(pt.index=='1984')[0][0]   # Index Position

In [None]:
np.where(pt.index=='Zoya')[0][0]   # Index Position

In [None]:
sorted(list(enumerate(similarity_scores[704])),key=lambda x: x[1], reverse=True)[1:6] # 1 to 6 books

### Recommend Function

In [None]:
def recommend(book_name):
    # index fectch
    index = np.where(pt.index==book_name)[0][0]
    # similarity score of this index >> book
    similar_items = sorted(list(enumerate(similarity_scores[index])),key=lambda x: x[1], reverse=True)[1:6] # 1 to 6 books
    data = []
    for i in similar_items:
        item = []
        #print(pt.index[i[0]])   # Book Index  
        temp_df = books[books['Book-Title']==pt.index[i[0]]]
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Title'].values))
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Author'].values))
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Image-URL-M'].values))
        
        data.append(item)
    return data

In [None]:
#books[books['Book-Title']=='1984']

In [None]:
recommend('1984')

In [None]:
pt.index[100]

In [None]:
recommend('Zoya')

In [None]:
recommend('Message in a Bottle')

In [None]:
recommend('The Notebook')

In [None]:
popular_df['Image-URL-M'][0]

In [None]:
import pickle 

with open("popular_df.pkl",'wb') as file:
    pickle.dump(popular_df,file)

In [None]:
list(popular_df['Book-Title'].values)

In [None]:
list(popular_df['Book-Author'].values)

In [None]:
popular_df

In [None]:
pt

In [None]:
books

In [None]:
similarity_scores

In [None]:
similarity_scores[0][0]

In [None]:
with open('pt.pkl','wb') as file:
    pickle.dump(pt,file)
    
with open('books.pkl','wb') as file:
    pickle.dump(books,file)

with open('similarity_scores.pkl','wb') as file:
    pickle.dump(similarity_scores,file)