In [1]:
import pandas as pd

books_rating_df = pd.read_csv('Books_rating.csv', 
                              usecols=['Title', 'review/summary', 'review/score', 'review/text'], 
                              nrows=1000)

print(books_rating_df.head())

                            Title  review/score  \
0  Its Only Art If Its Well Hung!           4.0   
1        Dr. Seuss: American Icon           5.0   
2        Dr. Seuss: American Icon           5.0   
3        Dr. Seuss: American Icon           4.0   
4        Dr. Seuss: American Icon           4.0   

                                    review/summary  \
0           Nice collection of Julie Strain images   
1                                Really Enjoyed It   
2  Essential for every personal and Public Library   
3  Phlip Nel gives silly Seuss a serious treatment   
4                           Good academic overview   

                                         review/text  
0  This is only for Julie Strain fans. It's a col...  
1  I don't care much for Dr. Seuss but after read...  
2  If people become the books they read and if "t...  
3  Theodore Seuss Geisel (1904-1991), aka &quot;D...  
4  Philip Nel - Dr. Seuss: American IconThis is b...  


In [2]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(stop_words='english', max_features=5000)

# Fit and transform the review text to a TF-IDF matrix
tfidf_matrix = vectorizer.fit_transform(books_rating_df['review/text'].fillna(''))

print(tfidf_matrix.shape)

(1000, 5000)


In [3]:
from sklearn.metrics.pairwise import cosine_similarity

# Compute cosine similarity matrix
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

print(cosine_sim.shape)

(1000, 1000)


In [4]:
def recommend_books(title, cosine_sim=cosine_sim):
    idx = books_rating_df[books_rating_df['Title'] == title].index[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:6]  # Exclude the first match (itself) and get top 5
    book_indices = [i[0] for i in sim_scores]
    return books_rating_df['Title'].iloc[book_indices]

In [None]:

# Example: Recommend books similar to a given title
recommended_books = recommend_books('Some Book Title')
print(recommended_books)