In [None]:
!pip install -U langchain-community langchain-chroma langchain-text-splitters

In [2]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_chroma import Chroma
import pandas as pd

In [3]:
import gradio as gr

In [4]:
books = pd.read_csv("books_with_emotions.csv")

In [5]:
import numpy as np
books["large_thumbnail"] = books["thumbnail"] + "&fife=w800"
books["large_thumbnail"] = np.where(
    books["large_thumbnail"].isna(),
    "cover-not-found.jpg",
    books["large_thumbnail"],
)

In [23]:
# --------------------------
# Load embeddings and DBs
# --------------------------

# Load MiniLM DB
embedding_model1 = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
db_books_minilm = Chroma(
    persist_directory="./chroma_db_minilm",
    embedding_function=embedding_model1
)

# Load MPNet DB
embedding_model2 = HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2')
db_books_mpnet = Chroma(
    persist_directory="./chroma_db_mpnet",
    embedding_function=embedding_model2
)

# --------------------------
# UI options
# --------------------------

categories = ["All"] + sorted(books["simple_categories"].unique())
tones = ["All", "Happy", "Surprising", "Angry", "Suspenseful", "Sad"]
embedding_choices = ["MiniLM-L6-v2", "MPNET-base-v2"]  # You can add more later

# --------------------------
# Recommendation function
# --------------------------

def retrieve_semantic_recommendations(query: str, category: str, tone: str, embedding_choice: str, top_k: int) -> pd.DataFrame:

    # Select DB based on embedding
    if embedding_choice == "MiniLM-L6-v2":
        db_books = db_books_minilm
    elif embedding_choice == "MPNET-base-v2":
        db_books = db_books_mpnet
    else:
        db_books = db_books_minilm  # fallback

    # Search in DB
    recs = db_books.similarity_search(query, k=top_k*3)  # get more to filter
    books_list = [int(rec.page_content.strip('"').split()[0]) for rec in recs]
    book_recs = books[books["isbn13"].isin(books_list)].head(top_k*3)

    # Filter category
    if category != "All":
        book_recs = book_recs[book_recs["simple_categories"] == category].head(top_k)
    else:
        book_recs = book_recs.head(top_k)

    # Filter tone
    if tone == "Happy":
        book_recs.sort_values(by="joy", ascending=False, inplace=True)
    elif tone == "Surprising":
        book_recs.sort_values(by="surprise", ascending=False, inplace=True)
    elif tone == "Angry":
        book_recs.sort_values(by="anger", ascending=False, inplace=True)
    elif tone == "Suspenseful":
        book_recs.sort_values(by="fear", ascending=False, inplace=True)
    elif tone == "Sad":
        book_recs.sort_values(by="sadness", ascending=False, inplace=True)

    return book_recs

# --------------------------
# Format for gallery
# --------------------------

def recommend_books(query: str, category: str, tone: str, embedding_choice: str, top_k: int):
    recommendations = retrieve_semantic_recommendations(query, category, tone, embedding_choice, top_k)
    results = []

    for _, row in recommendations.iterrows():
        description = row["description"]
        truncated_desc_split = description.split()
        truncated_description = " ".join(truncated_desc_split[:30]) + "..."

        authors_split = row["authors"].split(";")
        if len(authors_split) == 2:
            authors_str = f"{authors_split[0]} and {authors_split[1]}"
        elif len(authors_split) > 2:
            authors_str = f"{', '.join(authors_split[:-1])}, and {authors_split[-1]}"
        else:
            authors_str = row["authors"]

        caption = f"{row['title']} by {authors_str}: {truncated_description}"
        results.append((row["large_thumbnail"], caption))

    return results

# --------------------------
# Build Gradio App
# --------------------------

with gr.Blocks(theme=gr.themes.Soft(), css="""
    .gr-block { padding: 12px !important; }
    .gr-button { font-size: 18px !important; padding: 10px 22px !important; }
    .gr-gallery { max-height: 700px; overflow-y: auto; }
""") as dashboard:

    gr.Markdown("""
        # 📚✨ Semantic Book Recommender
        Discover books based on **meaning**, **category**, and **emotional tone** — powered by LLM + embeddings!
    """)

    with gr.Row():
        with gr.Column(scale=2):
            user_query = gr.Textbox(
                label="🔍 Describe the kind of book you're looking for:",
                placeholder="e.g., A story about forgiveness and redemption",
                lines=2
            )
            category_dropdown = gr.Dropdown(
                choices=categories, label="📂 Select category:", value="All"
            )
            tone_dropdown = gr.Dropdown(
                choices=tones, label="🎭 Select emotional tone:", value="All"
            )
            embedding_dropdown = gr.Dropdown(
                choices=embedding_choices, label="🧠 Select Embedding Model:", value="MiniLM-L6-v2"
            )
            top_k_slider = gr.Slider(
                minimum=5, maximum=50, step=1, value=16, label="📈 Number of Recommendations (Top-K)"
            )
            submit_button = gr.Button("🚀 Find Recommendations")
            clear_button = gr.Button("🗑️ Clear")

        with gr.Column(scale=3):
            gr.Markdown("### 📖 Top Recommendations")
            output = gr.Gallery(label="", columns=4, object_fit="contain", height="auto")

    # Button Actions
    submit_button.click(
        fn=recommend_books,
        inputs=[user_query, category_dropdown, tone_dropdown, embedding_dropdown, top_k_slider],
        outputs=output
    )

    clear_button.click(
        fn=lambda: ("", "All", "All", "MiniLM-L6-v2", 16, None),
        inputs=[],
        outputs=[user_query, category_dropdown, tone_dropdown, embedding_dropdown, top_k_slider, output]
    )


# --------------------------
# Launch app
# --------------------------

if __name__ == "__main__":
    dashboard.launch()


It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://2d1a939bda0843c2b2.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
