In [None]:
import numpy as np
import pandas as pd
import difflib
import pickle
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
books = pd.read_csv("/content/Books.csv")
users = pd.read_csv("/content/Users.csv")
ratings = pd.read_csv("/content/Ratings.csv")

In [None]:
unique_books = books.drop_duplicates(subset='Book-Title')
unique_books = unique_books.reset_index(drop=True)
unique_books.shape

(242135, 8)

In [None]:
ratings_with_name = ratings.merge(books,on='ISBN')

# Popularity Based Recommendation


In [None]:
num_rating_df = ratings_with_name.groupby('Book-Title').count()['Book-Rating'].reset_index()
num_rating_df.rename(columns={'Book-Rating':'Num_Rating'},inplace=True)

In [None]:
avg_rating_df = ratings_with_name.groupby('Book-Title')['Book-Rating'].mean().reset_index()
avg_rating_df['Book-Rating'] = pd.to_numeric(avg_rating_df['Book-Rating'], errors='coerce')
avg_rating_df.rename(columns={'Book-Rating':'Avg_Rating'},inplace=True)

In [None]:
popular_df = num_rating_df.merge(avg_rating_df,on ='Book-Title')
popular_df = popular_df[popular_df['Num_Rating']>=250].sort_values('Avg_Rating',ascending=False).head(50)

In [None]:
popular_df = (
    popular_df
    .merge(books[['Book-Title', 'Book-Author', 'Publisher','Image-URL-M']], on='Book-Title', how='left')
    .drop_duplicates('Book-Title')
)
if 'Publisher_x' in popular_df.columns and 'Publisher_y' in popular_df.columns:
    popular_df = popular_df.drop(columns=['Publisher_x']).rename(columns={'Publisher_y': 'Publisher'})

if 'Book-Author_x' in popular_df.columns and 'Book-Author_y' in popular_df.columns:
    popular_df = popular_df.drop(columns=['Book-Author_x']).rename(columns={'Book-Author_y': 'Book-Author'})
if 'Image-URL-M_x' in popular_df.columns and 'Image-URL-M_y' in popular_df.columns:
    popular_df = popular_df.drop(columns=['Image-URL-M_x']).rename(columns={'Image-URL-M_y': 'Image-URL-M_y'})

In [None]:
popular_df.head()

Unnamed: 0,Book-Title,Num_Rating,Avg_Rating,Book-Author,Publisher,Image-URL-M
0,Harry Potter and the Prisoner of Azkaban (Book 3),428,5.852804,J. K. Rowling,Scholastic,http://images.amazon.com/images/P/0439136350.0...
3,Harry Potter and the Goblet of Fire (Book 4),387,5.824289,J. K. Rowling,Scholastic,http://images.amazon.com/images/P/0439139597.0...
5,Harry Potter and the Sorcerer's Stone (Book 1),278,5.73741,J. K. Rowling,Scholastic,http://images.amazon.com/images/P/0590353403.0...
9,Harry Potter and the Order of the Phoenix (Boo...,347,5.501441,J. K. Rowling,Scholastic,http://images.amazon.com/images/P/043935806X.0...
13,Harry Potter and the Chamber of Secrets (Book 2),556,5.183453,J. K. Rowling,Scholastic,http://images.amazon.com/images/P/0439064872.0...


# **Collaborative Filtering Based Recommendation System**

In [None]:
user = ratings_with_name.groupby('User-ID').count()['Book-Rating'] >= 200
rated_users = user[user].index

In [None]:
relevant_ratings = ratings_with_name[ratings_with_name['User-ID'].isin(rated_users)]

In [None]:
relevant_books = relevant_ratings.groupby('Book-Title').count()['Book-Rating']>=50
reco_books = relevant_books[relevant_books].index

In [None]:
best_books = relevant_ratings[relevant_ratings['Book-Title'].isin(reco_books)]

In [None]:
collab_df = best_books.pivot_table(index='Book-Title', columns='User-ID', values='Book-Rating')

In [None]:
collab_df.fillna(0, inplace=True)
collab_df

In [None]:
sim_scores = cosine_similarity(collab_df)
sim_scores.shape

(707, 707)

In [None]:
def recommend(book_name):
  book_titles = books['Book-Title'].tolist()
  find_close_match = difflib.get_close_matches(book_name,book_titles)
  close_match = find_close_match[0]
  index = np.where(collab_df.index == close_match)[0][0]
  similar_choices = sorted(list(enumerate(sim_scores[index])),key = lambda x:x[1],reverse = True)[1:11]

  data = []
  for i in similar_choices:
    item = []
    temp_df = books[books['Book-Title'] == collab_df.index[i[0]]]
    item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Title'].values))
    item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Author'].values))
    item.extend(list(temp_df.drop_duplicates('Book-Title')['Publisher'].values))
    item.extend(list(temp_df.drop_duplicates('Book-Title')['Image-URL-M'].values))
    data.append(item)

  return data
  print(data)

In [None]:
recommend('the da vinci code')

[['Angels &amp; Demons',
  'Dan Brown',
  'Pocket Star',
  'http://images.amazon.com/images/P/0671027360.01.MZZZZZZZ.jpg'],
 ['Touching Evil',
  'Kay Hooper',
  'Bantam Books',
  'http://images.amazon.com/images/P/0553583441.01.MZZZZZZZ.jpg'],
 ['Saving Faith',
  'David Baldacci',
  'Warner Vision',
  'http://images.amazon.com/images/P/0446608890.01.MZZZZZZZ.jpg'],
 ["The Sweet Potato Queens' Book of Love",
  'JILL CONNER BROWNE',
  'Three Rivers Press',
  'http://images.amazon.com/images/P/0609804138.01.MZZZZZZZ.jpg'],
 ['Middlesex: A Novel',
  'Jeffrey Eugenides',
  'Picador',
  'http://images.amazon.com/images/P/0312422156.01.MZZZZZZZ.jpg'],
 ['The Blue Nowhere : A Novel',
  'Jeffery Deaver',
  'Pocket',
  'http://images.amazon.com/images/P/0671042262.01.MZZZZZZZ.jpg'],
 ['The Lovely Bones: A Novel',
  'Alice Sebold',
  'Little, Brown',
  'http://images.amazon.com/images/P/0316666343.01.MZZZZZZZ.jpg'],
 ['Timeline',
  'MICHAEL CRICHTON',
  'Ballantine Books',
  'http://images.amazon

In [None]:
pickle.dump(popular_df,open('popular.pkl','wb'))
pickle.dump(collab_df,open('collab_df.pkl','wb'))
pickle.dump(books,open('books.pkl','wb'))
pickle.dump(sim_scores,open('sim_scores.pkl','wb'))