In [2]:
import pandas as pd

# Load the dataset
df = pd.read_csv("books.csv")

# Function to assign genre based on title keywords
def assign_genre(title):
    title_lower = str(title).lower()

    # Fiction genres
    if any(word in title_lower for word in ["love", "romance", "heart"]):
        return "Romance"
    elif any(word in title_lower for word in ["murder", "crime", "detective", "thriller"]):
        return "Thriller"
    elif any(word in title_lower for word in ["magic", "dragon", "fantasy", "wizard"]):
        return "Fantasy"
    elif any(word in title_lower for word in ["space", "alien", "galaxy", "planet"]):
        return "Science Fiction"
    elif any(word in title_lower for word in ["history", "war", "ancient", "kingdom"]):
        return "Historical"
    elif any(word in title_lower for word in ["self", "mind", "life", "success", "habit"]):
        return "Self-Help"
    elif any(word in title_lower for word in ["cook", "recipe", "food", "kitchen"]):
        return "Cooking"
    elif any(word in title_lower for word in ["business", "money", "finance", "economy"]):
        return "Business"
    elif any(word in title_lower for word in ["art", "design", "drawing", "painting"]):
        return "Art"
    elif any(word in title_lower for word in ["poem", "poetry", "verse", "lyric"]):
        return "Poetry"
    else:
        return "General"

# Apply genre assignment
df["genre"] = df["title"].apply(assign_genre)

# Save the updated file
df.to_csv("book_with_genres.csv", index=False)

print("✅ Genre column added and saved as 'book_with_genres.csv'")


✅ Genre column added and saved as 'book_with_genres.csv'


In [6]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load your enriched dataset
df = pd.read_csv("book_with_genres.csv")

# Filter out rows with missing ratings or genres
df = df.dropna(subset=["average_rating", "genre"])
df["average_rating"] = pd.to_numeric(df["average_rating"], errors="coerce")

# STRATEGY A - Top Rated

def recommend_top_rated(genre, min_rating=4.0, top_n=5):
    genre_books = df[(df["genre"].str.lower() == genre.lower()) &
                     (df["average_rating"] >= min_rating)]
    return genre_books.sort_values(by="average_rating", ascending=False).head(top_n)[
        ["title", "authors", "average_rating"]  # changed 'author' → 'authors'
    ]

# STRATEGY B - Similar Books

def recommend_similar_books(fav_book_title, genre, top_n=5):
    genre_books = df[df["genre"].str.lower() == genre.lower()].copy()
    genre_books["text"] = genre_books["title"] + " " + genre_books["authors"]

    tfidf = TfidfVectorizer(stop_words="english")
    tfidf_matrix = tfidf.fit_transform(genre_books["text"])

    cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

    # Find closest matching title
    match = genre_books[genre_books["title"].str.lower().str.contains(fav_book_title.lower())]
    if match.empty:
        return pd.DataFrame({"Error": ["Book not found in dataset"]})

    idx = match.index[0]  # take the first match
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1: top_n+1]
    book_indices = [i[0] for i in sim_scores]

    return genre_books.iloc[book_indices][["title", "authors", "average_rating"]]

# Example
print("📚A - Top Rated Fantasy Books:")
print(recommend_top_rated("Fantasy", 4.0, 5))

print("\n📚B - Books Similar to 'Pride and Prejudice':")
print(recommend_similar_books("Pride and Prejudice", "Romance", 5))


📚A - Top Rated Fantasy Books:
                                                  title         authors  \
4482  It's a Magical World: A Calvin and Hobbes Coll...  Bill Watterson   
9711                     Magic Binds (Kate Daniels, #9)   Ilona Andrews   
6117                    Magic Breaks (Kate Daniels, #7)   Ilona Andrews   
4920                     Magic Rises (Kate Daniels, #6)   Ilona Andrews   
2000                        Harry Potter: Film Wizardry    Brian Sibley   

      average_rating  
4482            4.75  
9711            4.57  
6117            4.52  
4920            4.49  
2000            4.48  

📚B - Books Similar to 'Pride and Prejudice':
                                        title          authors  average_rating
4724                         My Beloved World  Sonia Sotomayor            4.03
2189                 Cry, the Beloved Country       Alan Paton            3.87
9                         Pride and Prejudice      Jane Austen            4.24
21                 