<a href="https://colab.research.google.com/github/AbhinavKumar0000/Machine_learning_practice/blob/main/Book_recommendation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [85]:
import pandas as pd
import numpy as np
from dotenv import load_dotenv

from langchain_community.document_loaders import TextLoader
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain_text_splitters import CharacterTextSplitter
from langchain_chroma import Chroma

import gradio as gr


In [86]:
books = pd.read_csv("books_with_emotions.csv")
books["large_thumbnail"] = books["thumbnail"] + "&fife=w800"
books["large_thumbnail"] = np.where(
    books["large_thumbnail"].isna(),
    "cover-not-found.jpg",
    books["large_thumbnail"],
)


In [87]:
books.head()

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,...,surprise_x,neutral_x,anger_y,disgust_y,fear_y,joy_y,sadness_y,surprise_y,neutral_y,large_thumbnail
0,9780002005883,2005883,Gilead,Marilynne Robinson,Fiction,http://books.google.com/books/content?id=KQZCP...,A NOVEL THAT READERS and critics have been eag...,2004.0,3.85,247.0,...,0.967157,0.729603,0.064134,0.273592,0.928168,0.932797,0.646217,0.967157,0.729603,http://books.google.com/books/content?id=KQZCP...
1,9780002261982,2261987,Spider's Web,Charles Osborne;Agatha Christie,Detective and mystery stories,http://books.google.com/books/content?id=gA5GP...,A new 'Christie for Christmas' -- a full-lengt...,2000.0,3.83,241.0,...,0.11169,0.252545,0.612619,0.348285,0.942528,0.704421,0.88794,0.11169,0.252545,http://books.google.com/books/content?id=gA5GP...
2,9780006178736,6178731,Rage of angels,Sidney Sheldon,Fiction,http://books.google.com/books/content?id=FKo2T...,"A memorable, mesmerizing heroine Jennifer -- b...",1993.0,3.93,512.0,...,0.11169,0.078765,0.064134,0.104007,0.972321,0.767236,0.549477,0.11169,0.078765,http://books.google.com/books/content?id=FKo2T...
3,9780006280897,6280897,The Four Loves,Clive Staples Lewis,Christian life,http://books.google.com/books/content?id=XhQ5X...,Lewis' work on the nature of love divides love...,2002.0,4.15,170.0,...,0.11169,0.078765,0.351483,0.150723,0.360707,0.251881,0.732685,0.11169,0.078765,http://books.google.com/books/content?id=XhQ5X...
4,9780006280934,6280935,The Problem of Pain,Clive Staples Lewis,Christian life,http://books.google.com/books/content?id=Kk-uV...,"""In The Problem of Pain, C.S. Lewis, one of th...",2002.0,4.09,176.0,...,0.475881,0.078765,0.081412,0.184495,0.095043,0.040564,0.88439,0.475881,0.078765,http://books.google.com/books/content?id=Kk-uV...


In [88]:
raw_documents = TextLoader("tagged_description.txt", encoding="utf-8").load()

# Fixed: Increase chunk_size to accommodate larger documents and add chunk_overlap
text_splitter = CharacterTextSplitter(
    separator="\n",
    chunk_size=2000,
    chunk_overlap=200
)
documents = text_splitter.split_documents(raw_documents)

embedding_model_name = "sentence-transformers/all-MiniLM-L6-v2"
embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name)

# Create the Chroma vector database
db_books = Chroma.from_documents(documents, embeddings)

  return forward_call(*args, **kwargs)


In [89]:
def retrieve_semantic_recommendations(
        query: str,
        category: str = None,
        tone: str = None,
        initial_top_k: int = 50,
        final_top_k: int = 16,
) -> pd.DataFrame:

    recs = db_books.similarity_search(query, k=initial_top_k)
    books_list = [int(rec.page_content.strip('"').split()[0]) for rec in recs]
    book_recs = books[books["isbn13"].isin(books_list)].head(initial_top_k)

    if category != "All":
        book_recs = book_recs[book_recs["simple_categories"] == category].head(final_top_k)
    else:
        book_recs = book_recs.head(final_top_k)

    if tone == "Happy":
        book_recs.sort_values(by="joy", ascending=False, inplace=True)
    elif tone == "Surprising":
        book_recs.sort_values(by="surprise", ascending=False, inplace=True)
    elif tone == "Angry":
        book_recs.sort_values(by="anger", ascending=False, inplace=True)
    elif tone == "Suspenseful":
        book_recs.sort_values(by="fear", ascending=False, inplace=True)
    elif tone == "Sad":
        book_recs.sort_values(by="sadness", ascending=False, inplace=True)

    return book_recs

In [90]:
def recommend_books(
        query: str,
        category: str,
        tone: str
):
    recommendations = retrieve_semantic_recommendations(query, category, tone)
    results = []

    for _, row in recommendations.iterrows():
        description = row["description"]
        truncated_desc_split = description.split()
        truncated_description = " ".join(truncated_desc_split[:30]) + "..."

        authors_split = row["authors"].split(";")
        if len(authors_split) == 2:
            authors_str = f"{authors_split[0]} and {authors_split[1]}"
        elif len(authors_split) > 2:
            authors_str = f"{', '.join(authors_split[:-1])}, and {authors_split[-1]}"
        else:
            authors_str = row["authors"]

        caption = f"{row['title']} by {authors_str}: {truncated_description}"
        results.append((row["large_thumbnail"], caption))
    return results


In [106]:
categories = ["All"] + sorted(books["simple_categories"].unique())
tones = ["All"] + ["Happy", "Surprising", "Angry", "Suspenseful", "Sad"]

# Professional, clean CSS
custom_css = """
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');

* {
    font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif !important;
}

.gradio-container {
    background: black !important;
    min-height: 100vh;
}

.main-container {
    background: black !important;
    border-radius: 12px !important;
    box-shadow: 0 1px 3px rgba(0, 0, 0, 0.12), 0 1px 2px rgba(0, 0, 0, 0.24) !important;
    margin: 20px auto !important;
    max-width: 1400px !important;
    overflow: hidden !important;
}

.header {
    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
    color: black !important;
    padding: 32px !important;
    text-align: center !important;
}

.title {
    font-size: 2.25rem !important;
    font-weight: 700 !important;
    margin: 0 !important;
    letter-spacing: -0.025em !important;
}

.subtitle {
    font-size: 1.125rem !important;
    font-weight: 400 !important;
    opacity: 0.9 !important;
    margin-top: 8px !important;
}

.content {
    padding: 32px !important;
}

.input-section {
    background: #07050f !important;
    border-radius: 8px !important;
    padding: 24px !important;
    margin-bottom: 32px !important;
    border: 1px solid #e2e8f0 !important;
}

.gradio-textbox textarea,
.gradio-textbox input,
.gradio-dropdown select {
    background: white !important;
    border: 1px solid #d1d5db !important;
    border-radius: 6px !important;
    padding: 12px 16px !important;
    font-size: 14px !important;
    line-height: 1.5 !important;
    color: #374151 !important;
    transition: border-color 0.2s ease !important;
    width: 100% !important;
    box-sizing: border-box !important;
}

.gradio-textbox textarea {
    resize: vertical !important;
    min-height: 80px !important;
    max-height: 120px !important;
}

.gradio-textbox textarea:focus,
.gradio-textbox input:focus,
.gradio-dropdown select:focus {
    border-color: #667eea !important;
    outline: none !important;
    box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1) !important;
}

.gradio-textbox textarea::placeholder,
.gradio-textbox input::placeholder {
    color: #9ca3af !important;
}

label {
    font-weight: 600 !important;
    color: white !important;
    font-size: 14px !important;
    margin-bottom: 6px !important;
    display: block !important;
}

.gradio-button {
    background: linear-gradient(135deg, #8B5CF6 0%, #A855F7 100%) !important;
    border: none !important;
    border-radius: 6px !important;
    padding: 12px 24px !important;
    font-size: 14px !important;
    font-weight: 600 !important;
    color: white !important;
    cursor: pointer !important;
    transition: all 0.2s ease !important;
    width: auto !important;
    margin-top: 16px !important;
    box-shadow: 0 4px 6px rgba(139, 92, 246, 0.25) !important;
}

.gradio-button:hover {
    background: linear-gradient(135deg, #7C3AED 0%, #9333EA 100%) !important;
    transform: translateY(-1px) !important;
    box-shadow: 0 6px 12px rgba(139, 92, 246, 0.35) !important;
}

.recommendations-section {
    border-top: 1px solid black!important;
    padding-top: 32px !important;
}

.section-title {
    font-size: 1.5rem !important;
    font-weight: 600 !important;
    color: white !important;
    margin-bottom: 20px !important;
    text-align: center !important;
}

.gradio-gallery {
    border: 1px solid #e5e7eb !important;
    border-radius: 8px !important;
    background: white !important;
    padding: 16px !important;
}

.gradio-gallery .gallery-item {
    border-radius: 6px !important;
    overflow: hidden !important;
    box-shadow: 0 1px 2px rgba(0, 0, 0, 0.1) !important;
}

.book-description-section {
    background: #1a1a2e !important;
    border-radius: 8px !important;
    padding: 24px !important;
    margin-top: 20px !important;
    border: 1px solid #374151 !important;
}

.description-title {
    font-size: 1.25rem !important;
    font-weight: 600 !important;
    color: #8B5CF6 !important;
    margin-bottom: 12px !important;
    text-align: center !important;
}

.description-text {
    background: #0f172a !important;
    border: 1px solid #374151 !important;
    border-radius: 6px !important;
    padding: 16px !important;
    color: #e2e8f0 !important;
    font-size: 14px !important;
    line-height: 1.6 !important;
    max-height: 200px !important;
    overflow-y: auto !important;
    white-space: pre-wrap !important;
    word-wrap: break-word !important;
}

.description-text::-webkit-scrollbar {
    width: 8px !important;
}

.description-text::-webkit-scrollbar-track {
    background: #1e293b !important;
    border-radius: 4px !important;
}

.description-text::-webkit-scrollbar-thumb {
    background: #8B5CF6 !important;
    border-radius: 4px !important;
}

.description-text::-webkit-scrollbar-thumb:hover {
    background: #A855F7 !important;
}

.developer-section {
    background: linear-gradient(135deg, #1a1a2e 0%, #16213e 50%, #0f3460 100%) !important;
    border-top: 1px solid #374151 !important;
    padding: 40px 32px !important;
    text-align: center !important;
    margin-top: 32px !important;
    position: relative !important;
    overflow: hidden !important;
}

.developer-section::before {
    content: '' !important;
    position: absolute !important;
    top: 0 !important;
    left: 0 !important;
    right: 0 !important;
    bottom: 0 !important;
    background: radial-gradient(circle at 50% 50%, rgba(139, 92, 246, 0.1) 0%, transparent 50%) !important;
    pointer-events: none !important;
}

.developer-title {
    font-size: 12px !important;
    font-weight: 700 !important;
    color: #8B5CF6 !important;
    margin-bottom: 16px !important;
    text-transform: uppercase !important;
    letter-spacing: 0.1em !important;
    position: relative !important;
    z-index: 1 !important;
}

.developer-name {
    font-size: 24px !important;
    font-weight: 700 !important;
    color: #ffffff !important;
    margin-bottom: 8px !important;
    position: relative !important;
    z-index: 1 !important;
    text-shadow: 0 2px 4px rgba(0, 0, 0, 0.3) !important;
}

.developer-role {
    font-size: 16px !important;
    color: #cbd5e1 !important;
    margin-bottom: 24px !important;
    position: relative !important;
    z-index: 1 !important;
    font-weight: 400 !important;
}

.developer-links {
    display: flex !important;
    justify-content: center !important;
    gap: 16px !important;
    position: relative !important;
    z-index: 1 !important;
}

.developer-links a {
    display: inline-flex !important;
    align-items: center !important;
    gap: 8px !important;
    color: #ffffff !important;
    text-decoration: none !important;
    font-weight: 600 !important;
    font-size: 14px !important;
    padding: 12px 20px !important;
    border-radius: 8px !important;
    border: 2px solid rgba(139, 92, 246, 0.3) !important;
    background: rgba(139, 92, 246, 0.1) !important;
    backdrop-filter: blur(10px) !important;
    transition: all 0.3s ease !important;
    position: relative !important;
    overflow: hidden !important;
}

.developer-links a::before {
    content: '' !important;
    position: absolute !important;
    top: 0 !important;
    left: -100% !important;
    width: 100% !important;
    height: 100% !important;
    background: linear-gradient(90deg, transparent, rgba(255, 255, 255, 0.1), transparent) !important;
    transition: left 0.5s !important;
}

.developer-links a:hover::before {
    left: 100% !important;
}

.developer-links a:hover {
    background: linear-gradient(135deg, #8B5CF6 0%, #A855F7 100%) !important;
    border-color: rgba(139, 92, 246, 0.8) !important;
    transform: translateY(-2px) !important;
    box-shadow: 0 8px 25px rgba(139, 92, 246, 0.3) !important;
    color: #ffffff !important;
}

.developer-links a svg {
    width: 18px !important;
    height: 18px !important;
    filter: drop-shadow(0 1px 2px rgba(0, 0, 0, 0.2)) !important;
}

/* Responsive design */
@media (max-width: 768px) {
    .main-container {
        margin: 10px !important;
    }

    .header {
        padding: 24px !important;
    }

    .title {
        font-size: 1.875rem !important;
    }

    .content {
        padding: 20px !important;
    }

    .input-section {
        padding: 20px !important;
    }

    .book-description-section {
        padding: 20px !important;
    }

    .developer-links {
        flex-direction: column !important;
        align-items: center !important;
    }

    .developer-links a {
        width: 200px !important;
        justify-content: center !important;
    }
}

/* Text overflow handling */
.gradio-textbox {
    width: 100% !important;
}

.gradio-textbox textarea,
.gradio-textbox input {
    word-wrap: break-word !important;
    white-space: pre-wrap !important;
    overflow-wrap: break-word !important;
}

/* Gallery improvements for better readability */
.gradio-gallery .gallery-item img {
    object-fit: contain !important;
    max-width: 100% !important;
    height: auto !important;
}

/* Loading state */
.gradio-button.loading {
    opacity: 0.7 !important;
    cursor: not-allowed !important;
}
"""

with gr.Blocks(css=custom_css, title="Book Recommender - Abhinav Kumar", theme=gr.themes.Default()) as dashboard:
    with gr.Column(elem_classes="main-container"):
        # Header
        gr.HTML('''
        <div class="header">
            <h1 class="title">Semantic Book Recommender</h1>
            <p class="subtitle">Discover your next favorite book with AI-powered recommendations</p>
        </div>
        ''')

        # Main content
        with gr.Column(elem_classes="content"):
            # Input section
            with gr.Column(elem_classes="input-section"):
                user_query = gr.Textbox(
                    label="Describe the book you're looking for",
                    placeholder="Example: A mystery novel with strong female characters set in Victorian London, or a heartwarming story about family and second chances...",
                    lines=3,
                    max_lines=5,
                    show_label=True
                )

                with gr.Row():
                    with gr.Column(scale=1):
                        category_dropdown = gr.Dropdown(
                            choices=categories,
                            label="Category Filter",
                            value="All",
                            interactive=True
                        )

                    with gr.Column(scale=1):
                        tone_dropdown = gr.Dropdown(
                            choices=tones,
                            label="Emotional Tone",
                            value="All",
                            interactive=True
                        )

                submit_button = gr.Button("Find Recommendations", variant="primary")

            # Recommendations section
            with gr.Column(elem_classes="recommendations-section"):
                gr.HTML('<h2 class="section-title">Your Book Recommendations</h2>')

                output = gr.Gallery(
                    label="",
                    columns=4,
                    rows=3,
                    height=600,
                    show_label=False,
                    interactive=False,
                    container=True
                )

                # Book Description Section
                with gr.Column(elem_classes="book-description-section"):
                    gr.HTML('<h3 class="description-title">Book Description</h3>')
                    book_description = gr.Textbox(
                        label="",
                        placeholder="Click on a book above to see its description here...",
                        lines=8,
                        max_lines=12,
                        show_label=False,
                        interactive=False,
                        elem_classes="description-text"
                    )

        # Developer section
        gr.HTML('''
        <div class="developer-section">
            <div class="developer-title">Developed By</div>
            <div class="developer-name">Abhinav Kumar</div>
            <div class="developer-role">AI & Machine Learning Engineer</div>
            <div class="developer-links">
                <a href="https://www.linkedin.com/in/abhinav-kumar-9193632b3/" target="_blank">
                    <svg width="16" height="16" viewBox="0 0 24 24" fill="currentColor">
                        <path d="M20.447 20.452h-3.554v-5.569c0-1.328-.027-3.037-1.852-3.037-1.853 0-2.136 1.445-2.136 2.939v5.667H9.351V9h3.414v1.561h.046c.477-.9 1.637-1.85 3.37-1.85 3.601 0 4.267 2.37 4.267 5.455v6.286zM5.337 7.433c-1.144 0-2.063-.926-2.063-2.065 0-1.138.92-2.063 2.063-2.063 1.14 0 2.064.925 2.064 2.063 0 1.139-.925 2.065-2.064 2.065zm1.782 13.019H3.555V9h3.564v11.452zM22.225 0H1.771C.792 0 0 .774 0 1.729v20.542C0 23.227.792 24 1.771 24h20.451C23.2 24 24 23.227 24 22.271V1.729C24 .774 23.2 0 22.222 0h.003z"/>
                    </svg>
                    LinkedIn
                </a>
                <a href="https://github.com/AbhinavKumar0000" target="_blank">
                    <svg width="16" height="16" viewBox="0 0 24 24" fill="currentColor">
                        <path d="M12 0c-6.626 0-12 5.373-12 12 0 5.302 3.438 9.8 8.207 11.387.599.111.793-.261.793-.577v-2.234c-3.338.726-4.033-1.416-4.033-1.416-.546-1.387-1.333-1.756-1.333-1.756-1.089-.745.083-.729.083-.729 1.205.084 1.839 1.237 1.839 1.237 1.07 1.834 2.807 1.304 3.492.997.107-.775.418-1.305.762-1.604-2.665-.305-5.467-1.334-5.467-5.931 0-1.311.469-2.381 1.236-3.221-.124-.303-.535-1.524.117-3.176 0 0 1.008-.322 3.301 1.23.957-.266 1.983-.399 3.003-.404 1.02.005 2.047.138 3.006.404 2.291-1.552 3.297-1.23 3.297-1.23.653 1.653.242 2.874.118 3.176.77.84 1.235 1.911 1.235 3.221 0 4.609-2.807 5.624-5.479 5.921.43.372.823 1.102.823 2.222v3.293c0 .319.192.694.801.576 4.765-1.589 8.199-6.086 8.199-11.386 0-6.627-5.373-12-12-12z"/>
                    </svg>
                    GitHub
                </a>
            </div>
        </div>
        ''')

    # Event handlers (moved outside the with block)
    submit_button.click(
        fn=recommend_books_with_storage,
        inputs=[user_query, category_dropdown, tone_dropdown],
        outputs=output
    )

    # Gallery selection handler for showing descriptions
    # Simple approach - just use the index directly
    def on_gallery_select(evt):
        global current_recommendations
        if current_recommendations.empty:
            return "No recommendations available."

        try:
            # Get index from event
            idx = evt.index if hasattr(evt, 'index') else 0
            if 0 <= idx < len(current_recommendations):
                book = current_recommendations.iloc[idx]
                return f"{book['title']}\n {book['authors']}\n\n{book['description']}"
            return "Book not found."
        except:
            return "Error loading description."

    output.select(on_gallery_select, outputs=book_description)

# Global variable to store current book recommendations data
current_recommendations = pd.DataFrame()

def recommend_books_with_storage(query: str, category: str, tone: str):
    """Modified recommend_books function that also stores the recommendation data globally"""
    global current_recommendations
    current_recommendations = retrieve_semantic_recommendations(query, category, tone)

    # Your existing recommend_books logic
    recommendations = current_recommendations
    results = []

    for _, row in recommendations.iterrows():
        description = row["description"]
        truncated_desc_split = description.split()
        truncated_description = " ".join(truncated_desc_split[:30]) + "..."

        authors_split = row["authors"].split(";")
        if len(authors_split) == 2:
            authors_str = f"{authors_split[0]} and {authors_split[1]}"
        elif len(authors_split) > 2:
            authors_str = f"{', '.join(authors_split[:-1])}, and {authors_split[-1]}"
        else:
            authors_str = row["authors"]

        caption = f"{row['title']} by {authors_str}: {truncated_description}"
        results.append((row["large_thumbnail"], caption))

    return results

def get_book_description(evt):
    """Function to get book description based on gallery selection"""
    global current_recommendations

    if current_recommendations.empty:
        return "No recommendations available. Please search for books first."

    try:
        # For Gradio gallery, evt is a SelectData event
        if hasattr(evt, 'index'):
            selected_index = evt.index
        else:
            # Fallback - try to get index from different event formats
            selected_index = 0

        # Make sure the index is within bounds
        if 0 <= selected_index < len(current_recommendations):
            selected_book = current_recommendations.iloc[selected_index]

            # Format the full description with book details
            title = selected_book["title"]
            authors = selected_book["authors"]
            description = selected_book["description"]

            # Format authors nicely
            authors_split = authors.split(";")
            if len(authors_split) == 2:
                authors_str = f"{authors_split[0]} and {authors_split[1]}"
            elif len(authors_split) > 2:
                authors_str = f"{', '.join(authors_split[:-1])}, and {authors_split[-1]}"
            else:
                authors_str = authors

            # Return formatted description
            full_description = f"{title}\nBy: {authors_str}\n\nDescription:\n{description}"
            return full_description
        else:
            return "Selected book not found. Please try selecting another book."

    except Exception as e:
        return f"Error loading book description: {str(e)}"

    # Event handlers
    submit_button.click(
        fn=recommend_books_with_storage,
        inputs=[user_query, category_dropdown, tone_dropdown],
        outputs=output
    )

    # Gallery selection handler for showing descriptions
    output.select(
        fn=get_book_description,
        outputs=book_description
    )

# Launch configuration
if __name__ == "__main__":
    dashboard.launch(
        share=False,
        inbrowser=True,
        show_error=True
    )



Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Note: opening Chrome Inspector may crash demo inside Colab notebooks.
* To create a public link, set `share=True` in `launch()`.


<IPython.core.display.Javascript object>