# Book recommendation system

## Imports & set up

In [183]:
!pip install -q transformers torch sentence-transformers gradio requests beautifulsoup4 langchain langchain-community


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [184]:
!pip install -q bitsandbytes accelerate


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [185]:
!pip install -q plotly

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [186]:
import os
import json
import re
import requests
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
from bs4 import BeautifulSoup
import plotly.graph_objects as go
import plotly.express as px
from sklearn.decomposition import PCA
from collections import Counter

## The Model Class

In [199]:
import os

class OpenSourceLLM:
    def __init__(self):
        print("Real LLM loaded ‚Äî ready to recommend like a human")
        self.api_key = "your_key_here
        self.api_url = "https://api.groq.com/openai/v1/chat/completions"
        self.model = "llama-3.3-70b-versatile"  

    def generate(self, prompt, max_tokens=200):
        if not self.api_key:
            return "Sweet romance with amazing chemistry and all the feels!"
    
        import requests
        payload = {
            "model": self.model,
            "messages": [{"role": "user", "content": prompt}],
            "max_tokens": max_tokens,
            "temperature": 0.8
        }
    
        try:
            response = requests.post(
                self.api_url,
                headers={
                    "Authorization": f"Bearer {self.api_key.strip()}",  # Strip any spaces
                    "Content-Type": "application/json"
                },
                json=payload,
                timeout=30
            )
            print(f"Groq status: {response.status_code}")  # Debug line
            if response.status_code == 200:
                return response.json()["choices"][0]["message"]["content"].strip()
            else:
                print(f"Groq error details: {response.text}")  # Shows exact error
                return "Sweet romance with amazing chemistry and all the feels!"
        except Exception as e:
            print(f"Request failed: {e}")
            return "Sweet romance with amazing chemistry and all the feels!"

In [200]:
llm = OpenSourceLLM()

Real LLM loaded ‚Äî ready to recommend like a human


## Book Search Engine

In [201]:

class BookSearchEngine:
    """Search books from multiple sources on the web"""
    
    def __init__(self):
        self.google_books_url = "https://www.googleapis.com/books/v1/volumes"
        self.openlibrary_url = "https://openlibrary.org/search.json"
    
    def search_google_books(self, query, max_results=10):
        """Search Google Books API"""
        try:
            params = {
                'q': query,
                'maxResults': max_results,
                'printType': 'books',
                'orderBy': 'relevance'
            }
            response = requests.get(self.google_books_url, params=params, timeout=10)
            data = response.json()
            
            books = []
            for item in data.get('items', []):
                vol_info = item.get('volumeInfo', {})
                books.append({
                    'title': vol_info.get('title', 'Unknown'),
                    'authors': ', '.join(vol_info.get('authors', ['Unknown'])),
                    'description': vol_info.get('description', 'No description available'),
                    'categories': ', '.join(vol_info.get('categories', ['General'])),
                    'published': vol_info.get('publishedDate', 'N/A'),
                    'rating': vol_info.get('averageRating', 'N/A'),
                    'thumbnail': vol_info.get('imageLinks', {}).get('thumbnail', ''),
                    'source': 'Google Books'
                })
            return books
        except Exception as e:
            print(f"Google Books error: {e}")
            return []
    
    def search_openlibrary(self, query, max_results=10):
        """Search Open Library API"""
        try:
            params = {
                'q': query,
                'limit': max_results,
                'fields': 'title,author_name,first_publish_year,subject,ratings_average'
            }
            response = requests.get(self.openlibrary_url, params=params, timeout=10)
            data = response.json()
            
            books = []
            for doc in data.get('docs', []):
                books.append({
                    'title': doc.get('title', 'Unknown'),
                    'authors': ', '.join(doc.get('author_name', ['Unknown'])),
                    'description': ', '.join(doc.get('subject', ['No description'])[:3]),
                    'categories': ', '.join(doc.get('subject', ['General'])[:2]),
                    'published': doc.get('first_publish_year', 'N/A'),
                    'rating': doc.get('ratings_average', 'N/A'),
                    'thumbnail': '',
                    'source': 'Open Library'
                })
            return books
        except Exception as e:
            print(f"Open Library error: {e}")
            return []
    
    def search_books(self, query, max_results=15):
        """Search books from multiple sources"""
        print(f"Searching for: {query}")
        
        # Search both APIs
        google_results = self.search_google_books(query, max_results=max_results//2)
        openlibrary_results = self.search_openlibrary(query, max_results=max_results//2)
        
        # Combine results
        all_books = google_results + openlibrary_results
        
        # Remove duplicates based on title similarity
        unique_books = []
        seen_titles = set()
        for book in all_books:
            title_lower = book['title'].lower()
            if title_lower not in seen_titles:
                seen_titles.add(title_lower)
                unique_books.append(book)
        
        print(f"Found {len(unique_books)} unique books")
        return unique_books


In [202]:
search_engine = BookSearchEngine()


### Testing the search 

In [203]:
test_results = search_engine.search_books("science fiction space", max_results=5)
print(f"\nTest search found {len(test_results)} books:")
for book in test_results[:3]:
    print(f"- {book['title']} by {book['authors']}")

Searching for: science fiction space
Found 4 unique books

Test search found 4 books:
- Science Fiction Literature through History by Gary Westfahl
- Science Fiction and Space Futures by Eugene Morlock Emme
- 2001 by Arthur C. Clarke


## Book Embeddings

In [204]:

print("Loading embedding model...")
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
print("Embedding model ready!")


Loading embedding model...
Embedding model ready!


In [205]:

def create_book_embeddings(books):
    """Create embeddings for book search results"""
    if not books:
        return np.array([])
    
    # Create rich text for embedding
    texts = []
    for book in books:
        text = f"{book['title']}. {book['description']} {book['categories']}"
        texts.append(text)
    
    embeddings = embedding_model.encode(texts)
    return embeddings

def semantic_rerank(books, query, top_k=5):
    """Rerank books using semantic similarity"""
    if not books:
        return []
    
    book_embeddings = create_book_embeddings(books)
    query_embedding = embedding_model.encode([query])
    
    similarities = cosine_similarity(query_embedding, book_embeddings)[0]
    top_indices = np.argsort(similarities)[-top_k:][::-1]
    
    ranked_books = [books[i] for i in top_indices]
    return ranked_books

## LLM Part 

In [206]:
class AIBookRecommender:
    def __init__(self, llm, search_engine, embedding_model):
        self.llm = llm
        self.search_engine = search_engine
        self.embedding_model = embedding_model
        self.reading_list = []
        self.last_search_results = []
        self.last_embeddings = None

    def create_book_embeddings(self, books):
        """Create embeddings for book search results"""
        if not books:
            return np.array([])
        
        texts = []
        for book in books:
            text = f"{book['title']}. {book['description']} {book['categories']}"
            texts.append(text)
        
        embeddings = self.embedding_model.encode(texts)
        return embeddings

    def semantic_rerank(self, books, query, top_k=20):
        """Rerank books using semantic similarity"""
        if not books:
            return []
        
        book_embeddings = self.create_book_embeddings(books)
        query_embedding = self.embedding_model.encode([query])
        
        similarities = cosine_similarity(query_embedding, book_embeddings)[0]
        top_indices = np.argsort(similarities)[-top_k:][::-1]
        
        ranked_books = [books[i] for i in top_indices]
        return ranked_books

    def extract_intent(self, user_message):
        """Extract book title and tropes using LLM + fallbacks, then build smart search query"""
        lower = user_message.lower()

        # If no "like" or genre keywords ‚Üí just search for the message + "books"
        if not any(phrase in lower for phrase in ["like", "similar", "recommend", "loved", "read", "want", "something"]):
            return user_message + " books", None

        # LLM prompt to extract structured intent
        intent_prompt = f"""
User message: "{user_message}"

Extract:
1. The exact book title they want something similar to (if mentioned). Return "None" if not clear.
2. List of key tropes, genres, or vibes they mentioned or implied (e.g. fake dating, enemies to lovers, dark academia, cozy fantasy, slow burn, found family).

Format exactly as:
BOOK: <title or None>
TROPES: trope1, trope2, trope3

Only respond with those two lines.
"""

        response = self.llm.generate(intent_prompt, max_tokens=100).strip()

        # Parse LLM response
        book_title = None
        tropes = []
        for line in response.split('\n'):
            if line.startswith("BOOK:"):
                title = line[5:].strip()
                if title.lower() != "none":
                    book_title = title
            elif line.startswith("TROPES:"):
                trope_str = line[7:].strip()
                if trope_str:
                    tropes = [t.strip() for t in trope_str.split(',') if t.strip()]

        # Fallback: regex for book title
        if not book_title:
            import re
            match = re.search(r"(?:like|similar to|loved|read|something like)\s+([a-zA-Z0-9\s,'\"\-\.&!]+?)(?:\s+by|\?|$|,)", lower)
            if match:
                book_title = match.group(1).strip().title()

        # Fallback: keyword trope detection
        if not tropes:
            common_checks = {
                'fake dating': ['fake dat', 'fake relat', 'pretend', 'fake boyfriend', 'fake girlfriend'],
                'friends to lovers': ['friend', 'best friend', 'childhood friend', 'friends to lovers'],
                'enemies to lovers': ['enem', 'hate to love', 'rival'],
                'grumpy sunshine': ['grumpy', 'sunshine'],
                'slow burn': ['slow burn', 'slowburn'],
                'romcom': ['romcom', 'romantic comedy', 'funny romance'],
                'young adult': ['ya', 'young adult', 'teen'],
                'new adult': ['new adult', 'college'],
                'dark academia': ['dark academia', 'academic'],
                'cozy fantasy': ['cozy', 'wholesome fantasy'],
                'closed door': ['clean', 'closed door', 'no spice', 'sweet'],
            }
            for trope, keywords in common_checks.items():
                if any(k in lower for k in keywords):
                    tropes.append(trope)

        # Build powerful search query
        parts = []
        if book_title:
            parts.append(f'"{book_title}"')
            parts.append("similar books")
            parts.append("read alikes")
            parts.append("goodreads recommendations")
            parts.append("booktok recommendations")
        else:
            parts.append("books")

        if tropes:
            parts.extend(tropes)

        if "book" not in " ".join(parts).lower():
            parts.append("book")

        query = " ".join(parts)
        return query, book_title

    def chat(self, user_message, history=[]):
        user_message_lower = user_message.lower().strip()
    
        # === COMMAND: Add book to reading list ===
        if user_message_lower.startswith("add "):
            title_to_add = user_message[4:].strip()
            added = False
            for book in self.last_search_results:  # fallback to last results if any
                if title_to_add.lower() in book['title'].lower():
                    if book not in self.reading_list:
                        self.reading_list.append(book)
                    return f"‚úÖ Added **{book['title']}** by {book['authors']} to your reading list!"
                    added = True
            if not added:
                return "‚ùå Couldn't find that book in recent recs. Try copying the exact title."
    
        # === COMMAND: Show reading list ===
        if user_message_lower == "reading list":
            if not self.reading_list:
                return "üìö Your reading list is empty. Start adding some bangers."
            response = "üìö **Your Reading List:**\n\n"
            for i, book in enumerate(self.reading_list, 1):
                response += f"**{i}. {book['title']}** by {book['authors']}\n"
                response += f"üìñ {book['description'][:160]}...\n\n"
            return response
    
        # === EVERYTHING ELSE: Pure LLM mode ‚Äî no search, no filtering, just raw recs ===
        prompt = f"""
    You are a brutally honest, no-bullshit book recommender who actually listens.
    User says: "{user_message}"
    
    Give exactly 5 book recommendations that genuinely match what they're asking for ‚Äî nothing safe, nothing generic.
    For each book:
    - Title
    - Author
    - One raw, real sentence explaining why it fits (vibes, themes, energy ‚Äî be direct)
    
    No disclaimers. No "it depends on your taste". No moral lectures. Just cold, hard truth recs.
    """
    
        raw_response = self.llm.generate(prompt, max_tokens=500)
    
        # Fallback if LLM fails
        if not raw_response or "error" in raw_response.lower():
            raw_response = "LLM's being a little bitch right now, but trust ‚Äî I'd have given you fire recs."
    
        final_response = f"üìö **Straight-up recs for you:**\n\n{raw_response}\n\nüí¨ Commands: `add [title]` to save ¬∑ `reading list` to view saved"
    
        return final_response
# Create the recommender
recommender = AIBookRecommender(llm, search_engine, embedding_model)
print("AI-Powered Book Recommender Ready! üöÄ")

AI-Powered Book Recommender Ready! üöÄ


## Gradio interface

In [209]:
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("#  AI Book Recommender \n###")
    chatbot = gr.Chatbot(height=500, type="messages")  
    msg = gr.Textbox(placeholder="Tell me what you like...", show_label=False)
    
    with gr.Row():
        submit = gr.Button("Send", variant="primary")
        clear = gr.Button("Clear")
    
    gr.Markdown("""
    ### Try:
    - "I want a book like the love hypothesis"
    - "fake dating grumpy sunshine"
    - "dark academia enemies to lovers"
    - "add [book title]" to save
    - "reading list" to view saved books
    """)

    def respond(message, history):
        response = recommender.chat(message, history)
        history.append({"role": "user", "content": message})
        history.append({"role": "assistant", "content": response})
        return "", history

    msg.submit(respond, [msg, chatbot], [msg, chatbot])
    submit.click(respond, [msg, chatbot], [msg, chatbot])
    clear.click(lambda: [], None, chatbot) 
    


## Application Launch

In [210]:
demo.queue()
demo.launch(share=True, debug=True)


* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://61eab0d761212545b2.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Groq status: 200
Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://61eab0d761212545b2.gradio.live


