In [1]:
import numpy as np
import pandas as pd
import os
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [2]:
books = pd.read_csv("books.csv")
books.shape

(11127, 12)

In [3]:
books = books[(books[['average_rating','ratings_count','text_reviews_count']] != '0').all(axis=1)]
books.drop(books.index[books['authors'] == 'NOT A BOOK'], inplace = True)

In [4]:
content_data = books[['title','authors','average_rating']]
content_data = content_data.astype(str)

In [5]:
content_data['content'] = content_data['title'] + ' ' + content_data['authors'] + ' ' + content_data['average_rating']

In [6]:
content_data.shape

(11097, 4)

In [7]:
content_data = content_data.reset_index()
indices = pd.Series(content_data.index, index=content_data['title'])
indices

title
Harry Potter and the Half-Blood Prince (Harry Potter  #6)           0
Harry Potter and the Order of the Phoenix (Harry Potter  #5)        1
Harry Potter and the Chamber of Secrets (Harry Potter  #2)          2
Harry Potter and the Prisoner of Azkaban (Harry Potter  #3)         3
Harry Potter Boxed Set  Books 1-5 (Harry Potter  #1-5)              4
                                                                ...  
Expelled from Eden: A William T. Vollmann Reader                11092
You Bright and Risen Angels                                     11093
The Ice-Shirt (Seven Dreams #1)                                 11094
Poor People                                                     11095
Las aventuras de Tom Sawyer                                     11096
Length: 11097, dtype: int64

In [8]:
count = CountVectorizer(stop_words='english')
count_matrix = count.fit_transform(content_data['content'])

cosine_sim_content = cosine_similarity(count_matrix, count_matrix)

In [9]:
count_matrix.shape

(11097, 17918)

In [10]:
cosine_sim_content.shape

(11097, 11097)

In [11]:
select_books = books['title'].unique()

In [15]:
def get_recommendations(title, cosine_sim=cosine_sim_content):
    idx = indices[title]

    # Get the pairwsie similarity scores of all books with that book
    sim_scores = list(enumerate(cosine_sim_content[idx]))

    # Sort the books based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 10 most similar books
    sim_scores = sim_scores[1:5]

    # Get the book indices
    book_indices = [i[0] for i in sim_scores]

    # Return the top 10 most similar books
    return list(content_data['title'].iloc[book_indices])#,sim_scores

In [16]:
def book_shows(book):
    for book in book:
        print(book)

In [17]:
books4 = get_recommendations(input("Enter the book:"), cosine_sim_content)
book_shows(books4)

Enter the book:The Hobbit
The Hobbit: Or There and Back Again
The Hobbit
The Hobbit  or  There and Back Again
The Annotated Hobbit
