In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
books = pd.read_csv('Books.csv')
users = pd.read_csv('Users.csv')
ratings = pd.read_csv('Ratings.csv')

In [None]:
books.head()

In [None]:
users.head()

In [None]:
ratings.head()

In [None]:
ratings.describe()

In [None]:
print(books.shape)

In [None]:
print(users.shape)

In [None]:
print(ratings.shape)

In [None]:
books.isnull().sum()

In [None]:
users.isnull().sum()

In [None]:
ratings.isnull().sum()

In [None]:
books.duplicated().sum()

In [None]:
ratings.duplicated().sum()

In [None]:
users.duplicated().sum()

#Popularity Based Recommendation System

In [78]:
ratings_with_books = ratings.merge(books, on='ISBN')

In [None]:
ratings_with_books.head()

In [None]:
num_of_rating_df = ratings_with_books.groupby('Book-Title').count()['Book-Rating'].reset_index()
num_of_rating_df.rename(columns={'Book-Rating' : 'num_ratings'}, inplace = True)
num_of_rating_df

In [None]:
avg_rating_df = ratings_with_books.groupby('Book-Title').mean()['Book-Rating'].reset_index()
avg_rating_df.rename(columns={'Book-Rating' : 'avg_ratings'}, inplace = True)
avg_rating_df

In [None]:
popularity_df = num_of_rating_df.merge(avg_rating_df, on='Book-Title')
popularity_df

In [None]:
popularity_df.describe()


In [None]:
sns.set()
plt.scatter(popularity_df['num_ratings'], popularity_df['avg_ratings'], s=50, c='blue')
plt.title('Popularity')
plt.xlabel('num_ratings')
plt.ylabel('avg_ratings')
plt.show

In [85]:
popularity_df = popularity_df[popularity_df['num_ratings']>=250].sort_values('avg_ratings', ascending=False)

In [None]:
popularity_df

In [87]:
popular_df = popularity_df.head(50)
popular_df = popular_df.merge(books,on='Book-Title').drop_duplicates('Book-Title')[['Book-Title','Book-Author','Image-URL-M','num_ratings','avg_ratings']]

In [None]:
popular_df

#Collaborative Filtering Based Recommender System

In [89]:
ratings_by_usersid_df = ratings.groupby('User-ID').count().reset_index()

In [None]:
ratings_by_usersid_df[ratings_by_usersid_df['Book-Rating']<=2000].max()

In [91]:
ratings_by_usersid_df=ratings_by_usersid_df[ratings_by_usersid_df['Book-Rating']<=2000]

In [None]:
ratings_by_usersid_df.tail()

In [None]:
sns.set()
plt.scatter(ratings_by_usersid_df['User-ID'],ratings_by_usersid_df['Book-Rating'], s=50, c='blue')
plt.title('Numerber of Ratings given by each User')
plt.xlabel('User ID')
plt.ylabel('Numerber of Ratings')
plt.show

In [112]:
temp_df =ratings_with_books.groupby('User-ID').count()['Book-Rating'] > 200
expirenced_users = temp_df[temp_df].index

In [113]:
filtered_rating = ratings_with_books[ratings_with_books['User-ID'].isin(expirenced_users)]

In [None]:
filtered_rating

In [115]:
temp_df = filtered_rating.groupby('Book-Title').count()['Book-Rating']>=50
famous_books = temp_df[temp_df].index

In [None]:
famous_books

In [117]:
final_ratings = filtered_rating[filtered_rating['Book-Title'].isin(famous_books)]

In [None]:
final_ratings

In [119]:
pivot_df = final_ratings.pivot_table(index='Book-Title',columns='User-ID',values='Book-Rating')

In [121]:
pivot_df.fillna(0, inplace=True)

In [None]:
pivot_df

In [123]:
from sklearn.metrics.pairwise import cosine_similarity

In [124]:
similarity_scores = cosine_similarity(pivot_df)

In [None]:
similarity_scores.shape

In [126]:
def recommend_book(book_name):
  book_index = np.where(pivot_df.index==book_name)[0][0]
  similar_books = sorted(list(enumerate(similarity_scores[book_index])), key = lambda x:x[1], reverse=True)[1:6]
  suggestions = []
  for i in similar_books:
        item = []
        temp_df = books[books['Book-Title'] == pivot_df.index[i[0]]]
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Title'].values))
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Author'].values))
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Image-URL-M'].values))

        suggestions.append(item)
  return suggestions

In [None]:
recommend_book('Year of Wonders')

In [None]:
recommend_book('1984')