In [2]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load cleaned book data
books = pd.read_csv("cleaned_books.csv")

# Vectorize content using TF-IDF
tfidf = TfidfVectorizer(stop_words="english")
tfidf_matrix = tfidf.fit_transform(books["content"])

# Recommendation function
def recommend_books(liked_titles, top_n=5):
    liked_indices = books[books["title"].isin(liked_titles)].index
    if not liked_indices.any():
        return []

    user_profile = tfidf_matrix[liked_indices].mean(axis=0)
    similarity_scores = cosine_similarity(user_profile.A.reshape(1, -1), tfidf_matrix)[0]
    ranked_indices = similarity_scores.argsort()[::-1]

    recommendations = [
        i for i in ranked_indices if i not in liked_indices
    ][:top_n]

    return books.iloc[recommendations][["title", "author", "edition", "imprint", "date"]].to_dict("records")



In [3]:
# ======================
# Main user interaction
# ======================
if __name__ == "__main__":
    print("\n🔎 Sample Book Titles:")
    print(books['title'].sample(10).tolist())

    user_input = input("\n📘 Enter the title of a book you like (partial OK): ").strip().lower()

    matched_titles = books[books['title'].str.contains(user_input, case=False)]['title'].unique()

    if len(matched_titles) == 0:
        print("⚠️ No matching book found in dataset.")
    else:
        print(f"\n✅ Found {len(matched_titles)} match(es):")
        for i, title in enumerate(matched_titles):
            print(f"{i+1}. {title}")

        # Ask user which match to use
        choice = input(f"\nEnter the number of the book to base recommendations on (1–{len(matched_titles)}): ").strip()
        if not choice.isdigit() or not (1 <= int(choice) <= len(matched_titles)):
            print("❌ Invalid choice.")
        else:
            selected_title = matched_titles[int(choice) - 1]
            print(f"\n📖 Using: {selected_title.title()}")

            recommendations = recommend_books([selected_title])

            if recommendations:
                print("\n📚 Recommended Books:")
                for book in recommendations:
                    print(f"- {book['title'].title()} by {book['author'].title()} ({book['edition']}, {book['imprint']}, {book['date']})")
            else:
                print("⚠️ No recommendations found for that book.")



🔎 Sample Book Titles:
['philosophical investigations /', 'the everything reading music book:', 'management essentials:', 'rise of the american nation /', 'human resources management /', 'short model essays /', 'adolescence and youth:', 'error correction techniques for the foreign language classroom /', 'the worldly philosophers:', 'managed futures:']

✅ Found 1616 match(es):
1. systems analysis and design /
2. programming logic and design: comprehensive
3. electrical design guide for commercial buildings /
4. practical research: planning and design
5. designing and conducting mixed methods research /
6. qualitative research design:
7. qualitative research design :
8. research design and methods :
9. design of experiments:
10. the presentation design book:
11. experimental designs /
12. the design and understanding of survey questions /
13. designing sensible surveys /
14. designing and conducting survey research:
15. design and analysis of experiments /
16. research design and statist