In [4]:
# Import necessary libraries
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer



In [5]:
# Load the dataset (e.g., Book-Crossings dataset)
books = pd.read_csv("goodreads_data.csv")

In [14]:
books.head()

Unnamed: 0.1,Unnamed: 0,Book,Author,Description,Genres,Avg_Rating,Num_Ratings,URL
0,0,To Kill a Mockingbird,Harper Lee,The unforgettable novel of a childhood in a sl...,"['Classics', 'Fiction', 'Historical Fiction', ...",4.27,5691311,https://www.goodreads.com/book/show/2657.To_Ki...
1,1,Harry Potter and the Philosopher’s Stone (Harr...,J.K. Rowling,Harry Potter thinks he is an ordinary boy - un...,"['Fantasy', 'Fiction', 'Young Adult', 'Magic',...",4.47,9278135,https://www.goodreads.com/book/show/72193.Harr...
2,2,Pride and Prejudice,Jane Austen,"Since its immediate success in 1813, Pride and...","['Classics', 'Fiction', 'Romance', 'Historical...",4.28,3944155,https://www.goodreads.com/book/show/1885.Pride...
3,3,The Diary of a Young Girl,Anne Frank,Discovered in the attic in which she spent the...,"['Classics', 'Nonfiction', 'History', 'Biograp...",4.18,3488438,https://www.goodreads.com/book/show/48855.The_...
4,4,Animal Farm,George Orwell,Librarian's note: There is an Alternate Cover ...,"['Classics', 'Fiction', 'Dystopia', 'Fantasy',...",3.98,3575172,https://www.goodreads.com/book/show/170448.Ani...


In [7]:
# Preprocess the data (remove duplicates, handle missing values, etc.)


# Fill NaN values in the 'description' column with an empty string
books['Description'] = books['Description'].fillna('')

# Create a TF-IDF Vectorizer object
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(books['Description'])

# Compute the cosine similarity matrix
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)



In [16]:
# Function to get book recommendations based on book title
def get_recommendations(book_title, cosine_sim=cosine_sim, data=books):
    # Check if the book title exists in the dataset
    if book_title not in data['Book'].values:
        return "Book title not found in the dataset"
    
    # Get the index of the book that matches the title
    idx = data[data['Book'] == book_title].index
    if len(idx) == 0:
        return "Book title not found in the dataset"
    
    idx = idx[0]  # Get the first index if multiple matches
    
    # Get the pairwise similarity scores with that book
    sim_scores = list(enumerate(cosine_sim[idx]))
    
    # Sort the books based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    # Get the top 10 most similar books
    sim_scores = sim_scores[1:11]
    
    # Get the book indices
    book_indices = [i[0] for i in sim_scores]
    
    # Return the top 10 recommended books
    return data['Book'].iloc[book_indices]


In [17]:

# Example usage: Get recommendations for a specific book title
book_title = "To Kill a Mockingbird"
recommended_books = get_recommendations(book_title)
print(recommended_books)

1432                                    Go Set a Watchman
439                    Harper Lee's To Kill a Mockingbird
6938        Return to the Castle (Castle of Mysteries #2)
5598    The Chronicles of Prydain (The Chronicles of P...
4671      Discovery of a Hidden Castle (A Gypsy Curse #1)
7026       Castle of Mysteries Three: The Complete Series
8948                                           Dreamscape
5638                                          Mockingbird
9025    Amazing Grief A Healing Guide for Parents of Y...
451     The 5 Love Languages: The Secret to Love that ...
Name: Book, dtype: object
