## Based Book Recommendation System

# Kaggle link - https://www.kaggle.com/datasets/elvinrustam/books-dataset

# Step 1: Import Required Libraries

In [1]:
import pandas as pd
import numpy as np
import nltk
import re
import string
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from tqdm import tqdm

nltk.download("stopwords")
nltk.download("punkt")

stop_words = set(stopwords.words('english')) 

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/sirishag/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /Users/sirishag/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


# Step 2: Load Dataset

In [2]:
df = pd.read_csv("/Users/sirishag/Downloads/Cleaned_BooksDataset.csv") 
df = df[["Title", "Description"]].dropna().head(500)  

# Display sample data
df.head()

Unnamed: 0,Title,Description
0,Goat Brothers,"History , General"
1,The Missing Person,"Fiction , General"
2,Don't Eat Your Heart Out Cookbook,"Cooking , Reference"
3,When Your Corporate Umbrella Begins to Leak: A...,When Your Corporate Umbrella Begins to Leak: A...
4,Amy Spangler's Breastfeeding : A Parent's Guide,Amy Spangler's Breastfeeding : A Parent's Guide


 # Step 3: Text preprocessing

In [3]:
def preprocess(text):
    text = re.sub('[^a-zA-Z]', ' ', text)  # Now works
    text = text.lower().split()
    text = [word for word in text if word not in stop_words]
    return ' '.join(text)

df['processed_desc'] = df['Description'].apply(preprocess)

# Step 4:# TF-IDF Vectorization

In [4]:
tfidf = TfidfVectorizer(max_features=5000)
tfidf_matrix = tfidf.fit_transform(df['processed_desc'])

# Recommendation function
def recommend_books(query, n=5):
    processed_query = preprocess(query)
    query_vec = tfidf.transform([processed_query])
    similarity_scores = cosine_similarity(query_vec, tfidf_matrix).flatten()
    top_indices = similarity_scores.argsort()[-n:][::-1]
    
    recommendations = []
    for idx in top_indices:
        recommendations.append({
            'Title': df.iloc[idx]['Title'],
            'similarity': f"{similarity_scores[idx]*100:.2f}%"
        })
    return recommendations

# Step 5:  Example usage

In [5]:
query = "I love thrilling action novels set in space with a comedic twist"
recommendations = recommend_books(query)
print("Top Recommendations:")
for i, book in enumerate(recommendations, 1):
    print(f"{i}. {book['Title']} ({book['similarity']} match)")

Top Recommendations:
1. Future Space: Beyond Earth (22.53% match)
2. The National Air and Space Museum (20.74% match)
3. Love Invents Us (18.21% match)
4. Love, Love, and Love (14.64% match)
5. Why Can't I Fall in Love? A 12-Step Program (13.82% match)


In [6]:
query = "A thrilling mystery novel with detectives and crime-solving"
recommendations = recommend_books(query)
print("Top Recommendations:")
for i, book in enumerate(recommendations, 1):
    print(f"{i}. {book['Title']} ({book['similarity']} match)")

Top Recommendations:
1. A delicately personal matter: A J.D. Mulroy mystery novel (32.37% match)
2. Ride a Pale Horse (24.11% match)
3. Collected Stories of Reynolds Price (12.32% match)
4. The 27-Ingredient Chili Con Carne Murders (11.01% match)
5. An Atlas of the Difficult World: Poems 1988-1991 (10.34% match)


In [8]:
query = "A good sports Rom-Com"
recommendations = recommend_books(query)
print("Top Recommendations:")
for i, book in enumerate(recommendations, 1):
    print(f"{i}. {book['Title']} ({book['similarity']} match)")

Top Recommendations:
1. Links Lore (29.30% match)
2. Smoky Mountains Trout Fishing Guide (25.50% match)
3. A Thing or Two About Soccer (24.87% match)
4. Hacking Exposed: Network Security Secrets & Solutions, Third Edition (Hacking Exposed) (10.06% match)
5. Betrayal : How the Clinton Administration Undermined American Security (8.70% match)
