In [48]:
import numpy as np
import pandas as pd

from sklearn.metrics.pairwise import cosine_similarity

import warnings
warnings.filterwarnings('ignore')

In [49]:
books_data = pd.read_csv('Books.csv')
users_data = pd.read_csv('Users.csv')
ratings_data = pd.read_csv('Ratings.csv')

In [50]:
ratings_with_name = ratings_data.merge(books_data, on='ISBN')

In [51]:
temp_mask = ratings_with_name.groupby('User-ID').count()['Book-Rating'] > 200
literate_users = temp_mask[temp_mask].index

In [52]:
filtered_rating = ratings_with_name[ratings_with_name['User-ID'].isin(literate_users)]

In [53]:
temp_mask = filtered_rating.groupby('Book-Title').count()['Book-Rating'] >= 50
famous_books = temp_mask[temp_mask].index

In [54]:
final_ratings = filtered_rating[filtered_rating['Book-Title'].isin(famous_books)]

In [55]:
item_user_interaction_matrix = final_ratings.pivot_table(index='Book-Title', columns='User-ID', values='Book-Rating')
item_user_interaction_matrix.fillna(0, inplace=True)

In [56]:
famous_books_dict = {
  'title': [],
  'author': [],
  'image': [],
}

for _, data in item_user_interaction_matrix.index.to_frame(index=False).iterrows():
  title = data.values[0]
  temp_df = books_data[books_data['Book-Title'] == title]
  author = temp_df.drop_duplicates('Book-Title')['Book-Author'].values[0]
  icon = temp_df.drop_duplicates('Book-Title')['Image-URL-M'].values[0]

  famous_books_dict['title'].append(title)
  famous_books_dict['author'].append(author)
  famous_books_dict['image'].append(icon)

pd.DataFrame(famous_books_dict).to_csv('famous.txt', index=False, header=False)

In [57]:
similarity_matrix = cosine_similarity(item_user_interaction_matrix)
np.savetxt('similarity.txt', similarity_matrix, delimiter=',', fmt='%s')

In [58]:
def recommend_books(book_name, top_k=5):
  #index fetch
  index = np.where(item_user_interaction_matrix.index==book_name)[0][0]
  similar_items = sorted(list(enumerate(similarity_matrix[index])), key=lambda x:x[1], reverse=True)[1:top_k+1]

  data = []
  for i in similar_items:
    item = []
    temp_df = books_data[books_data['Book-Title'] == item_user_interaction_matrix.index[i[0]]]
    item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Title'].values))
    item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Author'].values))
    item.extend(list(temp_df.drop_duplicates('Book-Title')['Image-URL-M'].values))

    data.append(item)

  return data

In [59]:
recommend_books("The Hitchhiker's Guide to the Galaxy", 10)

[['The Hours : A Novel',
  'Michael Cunningham',
  'http://images.amazon.com/images/P/0312243022.01.MZZZZZZZ.jpg'],
 ['Pet Sematary',
  'Stephen King',
  'http://images.amazon.com/images/P/0451162072.01.MZZZZZZZ.jpg'],
 ['Good Omens',
  'Neil Gaiman',
  'http://images.amazon.com/images/P/0441003257.01.MZZZZZZZ.jpg'],
 ['I Capture the Castle',
  'Dodie Smith',
  'http://images.amazon.com/images/P/031231616X.01.MZZZZZZZ.jpg'],
 ['Zen and the Art of Motorcycle Maintenance: An Inquiry into Values',
  'ROBERT PIRSIG',
  'http://images.amazon.com/images/P/0553277472.01.MZZZZZZZ.jpg'],
 ['The Catcher in the Rye',
  'J.D. Salinger',
  'http://images.amazon.com/images/P/0316769487.01.MZZZZZZZ.jpg'],
 ['Cujo',
  'Stephen King',
  'http://images.amazon.com/images/P/8878242357.01.MZZZZZZZ.jpg'],
 ['The Giver (21st Century Reference)',
  'LOIS LOWRY',
  'http://images.amazon.com/images/P/0440219078.01.MZZZZZZZ.jpg'],
 ['The Fellowship of the Ring (The Lord of the Rings, Part 1)',
  'J.R.R. TOLKIEN'