In [None]:
import pandas as pd
import numpy as np

from langchain.schema import Document
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain_text_splitters import RecursiveCharacterTextSplitter

import gradio as gr


# Load dataset
books = pd.read_csv("books_with_emotions.csv")

# Create large thumbnail with fallback
books["large_thumbnail"] = books["thumbnail"].astype(str) + "&fife=w800"
books["large_thumbnail"] = np.where(
    books["thumbnail"].isna(),
    "cover-not-found.jpg",
    books["large_thumbnail"]
)

# --- Build documents with metadata ---
documents = []
for idx, row in books.iterrows():
    if pd.isna(row.get("tagged_description", "")):
        continue
    doc = Document(
        page_content=row["tagged_description"],
        metadata={
            "isbn13": str(row["isbn13"]),
            "row": idx
        }
    )
    documents.append(doc)

# Split into chunks (handles long descriptions)
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=100
)
documents = text_splitter.split_documents(documents)

# --- Create Chroma vector store with Hugging Face embeddings ---
embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
db_books = Chroma.from_documents(
    documents=documents,
    embedding=embedding,
    persist_directory="books_chroma_db"
)


# --- Retrieval + Filtering ---
def retrieve_semantic_recommendations(
    query: str,
    category: str = None,
    tone: str = None,
    initial_top_k: int = 50,
    final_top_k: int = 16,
) -> pd.DataFrame:

    recs = db_books.similarity_search(query, k=initial_top_k)

    # Extract row indices directly from metadata
    row_indices = [rec.metadata["row"] for rec in recs if "row" in rec.metadata]
    book_recs = books.iloc[row_indices].drop_duplicates(subset="isbn13")

    # Apply category filter
    if category and category != "All":
        book_recs = book_recs[book_recs["simple_categories"] == category]

    # Apply tone-based re-ranking
    if tone and tone != "All":
        tone_map = {
            "Happy": "joy",
            "Surprising": "surprise",
            "Angry": "anger",
            "Suspenseful": "fear",
            "Sad": "sadness"
        }
        if tone in tone_map:
            book_recs = book_recs.sort_values(by=tone_map[tone], ascending=False)

    return book_recs.head(final_top_k)


# --- Book Formatter ---
def recommend_books(query: str, category: str, tone: str):
    recommendations = retrieve_semantic_recommendations(query, category, tone)
    results = []

    for _, row in recommendations.iterrows():
        description = str(row.get("description", ""))
        truncated_description = " ".join(description.split()[:30]) + "..."

        authors_raw = str(row.get("authors", ""))
        authors_split = [a.strip() for a in authors_raw.split(";") if a.strip()]
        if len(authors_split) == 0:
            authors_str = "Unknown Author"
        elif len(authors_split) == 1:
            authors_str = authors_split[0]
        elif len(authors_split) == 2:
            authors_str = f"{authors_split[0]} and {authors_split[1]}"
        else:
            authors_str = f"{', '.join(authors_split[:-1])}, and {authors_split[-1]}"

        caption = f"**{row['title']}** by *{authors_str}*\n\n{truncated_description}"
        results.append((row["large_thumbnail"], caption))

    return results


# --- Gradio UI ---
categories = ["All"] + sorted(books["simple_categories"].dropna().unique())
tones = ["All", "Happy", "Surprising", "Angry", "Suspenseful", "Sad"]

with gr.Blocks(theme=gr.themes.Glass()) as dashboard:
    gr.Markdown("# 📚 Semantic Book Recommender")

    with gr.Row():
        user_query = gr.Textbox(
            label="Please enter a description of a book:",
            placeholder="e.g., A story about forgiveness"
        )
        category_dropdown = gr.Dropdown(
            choices=categories, label="Select a category:", value="All"
        )
        tone_dropdown = gr.Dropdown(
            choices=tones, label="Select an emotional tone:", value="All"
        )
        submit_button = gr.Button("Find recommendations")

    gr.Markdown("## Recommendations")
    output = gr.Gallery(
        label="Recommended books",
        columns=4,
        object_fit="cover",
        height="auto"
    )

    submit_button.click(
        fn=recommend_books,
        inputs=[user_query, category_dropdown, tone_dropdown],
        outputs=output
    )


if __name__ == "__main__":
    dashboard.launch()
