In [None]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_chroma import Chroma

In [None]:
import pandas as pd
doc = pd.read_csv("dataset/book_cleaned.csv")
doc.head()

In [None]:
raw_documents = TextLoader("Tagged_description.txt").load()

In [None]:
from langchain_community.embeddings import HuggingFaceEmbeddings

embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
# --- Tạo database ---
db_books = Chroma.from_documents(
    documents,
    embedding,
)

print("✅ Tạo db_books thành công!")


In [None]:
def retrieve_semantic_recommendations(query:str,top_k:int=10)->pd.DataFrame:
    recs = db_books.similarity_search(query,k=50)
    book_list = []
    for i in range(0,len(recs)):
        # Strip leading/trailing quotes before splitting
        isbn_str = recs[i].page_content.strip('"')
        book_list += [int(isbn_str.split(":")[0])]
    return doc[doc["isbn13"].isin(book_list)].head(top_k)

In [None]:
import gradio as gr

def recommend_books(query):
    """
    Retrieves semantic recommendations for a given query and returns formatted output including images.
    """
    # Add a check for empty query
    if not query:
        return "Vui lòng nhập yêu cầu về sách.", []

    recommendations_df = retrieve_semantic_recommendations(query)
    if recommendations_df.empty:
        return "Không tìm thấy sách nào phù hợp với yêu cầu của bạn.", []
    else:
        # Format the output for better readability in Gradio
        formatted_output = "Các sách được gợi ý:\n\n"
        image_urls = []
        for index, row in recommendations_df.iterrows():
            formatted_output += f"Tiêu đề: {row['title']}\n"
            formatted_output += f"Tác giả: {row['authors']}\n"
            formatted_output += f"Thể loại: {row['categories']}\n"
            formatted_output += f"Đánh giá trung bình: {row['average_rating']}\n"
            formatted_output += f"Số trang: {row['num_pages']}\n"
            formatted_output += f"Mô tả: {row['description'][:200]}...\n\n" # Limit description length
            image_urls.append(row['thumbnail']) # Collect thumbnail URLs

        return formatted_output, image_urls

def on_book_select(evt: gr.SelectData):
    """
    Placeholder function to be triggered when a book is selected from the gallery.
    """
    print(f"Book selected: {evt.index}")
    # In the next step, this function will be updated to display similar books.
    pass

# Create the Gradio interface using gr.Blocks
with gr.Blocks(theme=gr.themes.Soft(), title="Hệ thống gợi ý sách") as demo:
    gr.Markdown("## Hệ thống gợi ý sách")
    gr.Markdown("Nhập mô tả về loại sách bạn muốn để nhận gợi ý.")
    txt_input = gr.Textbox(lines=2, placeholder="Nhập yêu cầu của bạn về sách...")
    btn_recommend = gr.Button("Gợi ý sách")
    txt_output = gr.Textbox(label="Thông tin sách")
    gallery_output = gr.Gallery(label="Hình ảnh bìa sách", object_fit="contain", interactive=True)

    btn_recommend.click(
        fn=recommend_books,
        inputs=txt_input,
        outputs=[txt_output, gallery_output]
    )

    # Add the event listener to the gallery output
    gallery_output.select(
        fn=on_book_select,
        inputs=None, # No inputs needed for the select event data
        outputs=None # No output for this placeholder function yet
    )


# Launch the Gradio app
demo.launch(debug=True, share=True, height=800) # Increase height and enable sharing