In [3]:
import pandas as pd
import numpy as np
!pip install langchain_community
!pip install langchain_text_splitter
!pip install langchain_google_genai
!pip install chromadb
!pip install gradio
import gradio as gr
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import Chroma
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()

# Get API key from environment variables
api_key = os.getenv("GOOGLE_API_KEY")
if not api_key:
    raise ValueError("GOOGLE_API_KEY not found in .env")

# Initialize the embeddings with your API key - using a try/except to catch potential errors
try:
    embeddings = GoogleGenerativeAIEmbeddings(
        model="models/embedding-001",
        google_api_key=api_key
    )
    print("Embeddings initialized with API key!")
except Exception as e:
    print(f"Error initializing embeddings: {e}")
    raise

# Load books dataset
try:
    books = pd.read_csv("/content/books_with_emotions.csv")
    books["large_thumbnail"] = books["thumbnail"].fillna("/content/image-not-found.jpg")
    books["large_thumbnail"] = books["large_thumbnail"] + "&fife=w800"
    books["large_thumbnail"] = books["large_thumbnail"].fillna("/content/image-not-found.jpg")
    print(f"Loaded {len(books)} books successfully")
except Exception as e:
    print(f"Error loading books: {e}")
    raise

# Load documents for recommendations
try:
    raw_documents = TextLoader("/content/tagged_description.txt").load()
    text_splitter = CharacterTextSplitter(separator="\n", chunk_size=0, chunk_overlap=0)
    documents = text_splitter.split_documents(raw_documents)
    print(f"Loaded {len(documents)} documents successfully")
except Exception as e:
    print(f"Error loading documents: {e}")
    raise

# Initialize vector database
try:
    db_books = Chroma.from_documents(documents, embedding=embeddings)
    print("Vector database initialized successfully")
except Exception as e:
    print(f"Error initializing vector database: {e}")
    raise

def retrieve_semantic_recommendations(
    query: str,
    category: str = None,
    tone: str = None,
    initial_top_k: int = 50,
    final_top_k: int = 16,
) -> pd.DataFrame:
    try:
        # Get recommendations with scores
        recs = db_books.similarity_search_with_score(query, k=initial_top_k)

        # Extract ISBNs
        books_list = []
        for rec, _ in recs:
            try:
                isbn = int(rec.page_content.strip('"').split()[0])
                books_list.append(isbn)
            except (ValueError, IndexError) as e:
                print(f"Error parsing ISBN: {e} - Content: {rec.page_content[:50]}")

        # Filter by ISBN
        book_recs = books[books["isbn13"].isin(books_list)]

        # Filter by category if specified
        if category and category != "All":
            book_recs = book_recs[book_recs["simple_categories"] == category]

        # Sort by emotional tone if specified
        if tone == "Happy":
            book_recs = book_recs.sort_values(by="joy", ascending=False)
        elif tone == "Surprising":
            book_recs = book_recs.sort_values(by="Surprise", ascending=False)
        elif tone == "Angry":
            book_recs = book_recs.sort_values(by="anger", ascending=False)
        elif tone == "Suspenseful":
            book_recs = book_recs.sort_values(by="fear", ascending=False)
        elif tone == "Sad":
            book_recs = book_recs.sort_values(by="sadness", ascending=False)

        # Return top k results
        return book_recs.head(final_top_k)

    except Exception as e:
        print(f"Error in retrieve_semantic_recommendations: {e}")
        return pd.DataFrame()  # Return empty DataFrame on error

def recommend_books(query: str, category: str, tone: str):
    try:
        if not query.strip():
            return [], "Please enter a book description"

        recommendations = retrieve_semantic_recommendations(query, category, tone)

        if recommendations.empty:
            return [], "No recommendations found. Try a different query or filters."

        results = []
        titles = []

        for _, row in recommendations.iterrows():
            # Process description
            description = row.get("description", "") or row.get("descriptions", "") or ""
            truncated_desc_split = description.split()
            truncated_description = " ".join(truncated_desc_split[:30]) + "..." if truncated_desc_split else "No description available"

            # Process authors
            authors = row.get("authors", "Unknown Author")
            authors_split = authors.split(";") if authors else ["Unknown Author"]

            if len(authors_split) == 2:
                authors_str = f"{authors_split[0]} and {authors_split[1]}"
            elif len(authors_split) > 2:
                authors_str = f"{', '.join(authors_split[:-1])}, and {authors_split[-1]}"
            else:
                authors_str = authors

            # Create caption and result entry
            title = row.get("title", "Untitled")
            caption = f"{title} by {authors_str}"
            titles.append(f"<b>{title}</b> by {authors_str}<br><small>{truncated_description}</small>")

            # Handle thumbnail
            thumbnail = row.get("large_thumbnail", "/content/image-not-found.jpg")
            results.append((thumbnail, caption))

        return results, "<br><br>".join(titles)

    except Exception as e:
        print(f"Error in recommend_books: {e}")
        return [], f"An error occurred: {str(e)}"

# Setup Gradio interface
categories = ["All"] + sorted(books["simple_categories"].dropna().unique().tolist())
tones = ["All", "Happy", "Surprising", "Angry", "Suspenseful", "Sad"]

with gr.Blocks(theme=gr.themes.Glass()) as dashboard:
    gr.Markdown("# Semantic Book Recommender")

    with gr.Row():
        with gr.Column(scale=1):
            user_query = gr.Textbox(
                label="Please enter a description of a book:",
                placeholder="e.g., A story about forgiveness",
                lines=3
            )

            with gr.Row():
                category_dropdown = gr.Dropdown(
                    choices=categories,
                    label="Select a category:",
                    value="All"
                )

                tone_dropdown = gr.Dropdown(
                    choices=tones,
                    label="Select an emotional tone:",
                    value="All"
                )

            submit_button = gr.Button("Find recommendations", variant="primary")

    with gr.Row():
        with gr.Column(scale=2):
            output_gallery = gr.Gallery(
                label="Recommended books",
                columns=4,
                rows=4,
                object_fit="contain",
                height="600px"
            )

        with gr.Column(scale=1):
            output_text = gr.HTML(label="Book Details")

    submit_button.click(
        fn=recommend_books,
        inputs=[user_query, category_dropdown, tone_dropdown],
        outputs=[output_gallery, output_text]
    )

if __name__ == "__main__":
    dashboard.launch(debug=False)

[31mERROR: Could not find a version that satisfies the requirement langchain_text_splitter (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for langchain_text_splitter[0m[31m




Embeddings initialized with API key!
Loaded 5197 books successfully


[1;30;43mStreaming output truncated to the last 5000 lines.[0m


Loaded 5197 documents successfully
Vector database initialized successfully
It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://a3150834d9e1950a30.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
