# Code Review Summary

The current notebook loads book summaries from an Excel file, splits them into chunks, generates embeddings using OpenAI, and stores them in a Chroma vector database. It provides a simple retriever interface for semantic search over the book summaries. The code is modular, uses environment variables for API keys, and includes basic error checking for required columns. Overall, it is well-structured for its purpose and ready for further extension or experimentation.

In [None]:
import os
import uuid
import pandas as pd
import chromadb
from dotenv import load_dotenv

from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
import gradio as gr
from openai import OpenAI
from utils import get_summary_by_title

In [3]:
load_dotenv(override=True)

True

In [4]:
EXCEL_PATH = "book_summaries.xlsx"          # <-- put your file path here
SHEET_NAME = "SHEET1"                     # or a sheet string name
PERSIST_DIR = "./chroma_books"     # persist on disk
COLLECTION_NAME = "books"

EMBED_MODEL = "text-embedding-3-small"  # fast & cheap; or "text-embedding-3-large"

TOP_K=5

In [5]:
openai_api_key = os.getenv('OPENAI_API_KEY')
groq_api_key = os.getenv('GROQ_API_KEY')

if openai_api_key:
    print(f"OpenAI API Key exists and begins {openai_api_key[:8]}")
else:
    print("OpenAI API Key not set")

if groq_api_key:
    print(f"Groq API Key exists and begins {groq_api_key[:8]}")
else:
    print("Groq API Key not set")

OpenAI API Key exists and begins sk-svcac
Groq API Key exists and begins gsk_kblI


In [None]:
# Load Excel file with error handling and use SHEET_NAME
try:
    df = pd.read_excel(EXCEL_PATH, sheet_name=SHEET_NAME)
except Exception as e:
    raise RuntimeError(f"Failed to load Excel file: {e}")

# Normalize columns to lowercase for validation and use
df.columns = [c.lower() for c in df.columns]
expected_cols = {"title", "resume"}
missing = expected_cols - set(df.columns)
if missing:
    raise ValueError(f"Missing expected columns: {missing}. Found: {list(df.columns)}")

In [10]:
df.head(6)

Unnamed: 0,title,resume
0,1984,A dystopian story about a totalitarian society...
1,To Kill a Mockingbird,A profound story set in the racially divided A...
2,The Great Gatsby,"A tale of wealth, love, and illusion in the Ja..."
3,Fahrenheit 451,"In a future where books are banned, fireman Gu..."
4,Romeo and Juliet,A tragic love story between two young people f...
5,Crime and Punishment,"Rodion Raskolnikov, a poor student in St. Pete..."


In [11]:
# Drop rows with missing title or resume
df = df.dropna(subset=["title", "resume"]).reset_index(drop=True)

In [12]:
# Function to prepare documents from dataframe
def prepare_documents(df, splitter):
    documents = []
    for _, row in df.iterrows():
        title = str(row["title"])
        resume = str(row["resume"])
        text = f"{title} — {resume}"
        chunks = splitter.split_text(text)
        for i, chunk in enumerate(chunks):
            documents.append({
                "page_content": chunk,
                "metadata": {"title": title, "chunk": i}
            })
    return documents

splitter = RecursiveCharacterTextSplitter(
    chunk_size=700,
    chunk_overlap=120,
    length_function=len,
)

documents = prepare_documents(df, splitter)
print(f"Prepared {len(documents)} chunks from {len(df)} books.")

Prepared 15 chunks from 15 books.


In [13]:
embeddings = OpenAIEmbeddings(
    model=EMBED_MODEL,
    api_key=openai_api_key
)

In [8]:
import shutil

if os.path.exists(PERSIST_DIR):
    print(f"Removing existing Chroma DB at {PERSIST_DIR} to avoid settings conflict...")
    shutil.rmtree(PERSIST_DIR)

In [15]:
# Function to initialize or load Chroma vectorstore safely
def get_or_create_vectorstore(documents, embeddings, persist_dir, collection_name):
    if os.path.exists(persist_dir) and os.listdir(persist_dir):
        print(f"Loading existing Chroma DB from {persist_dir}")
        return Chroma(
            collection_name=collection_name,
            embedding_function=embeddings,
            persist_directory=persist_dir,
        )
    else:
        print(f"Creating new Chroma DB at {persist_dir}")
        vectorstore = Chroma(
            collection_name=collection_name,
            embedding_function=embeddings,
            persist_directory=persist_dir,
        )
        texts = [doc["page_content"] for doc in documents]
        metadatas = [doc["metadata"] for doc in documents]
        ids = [f"{m['title']}_{m['chunk']}" for m in metadatas]
        vectorstore.add_texts(texts=texts, metadatas=metadatas, ids=ids)
        print(f"Stored {len(documents)} chunks in Chroma at {persist_dir}.")
        return vectorstore

vectorstore = get_or_create_vectorstore(documents, embeddings, PERSIST_DIR, COLLECTION_NAME)

Loading existing Chroma DB from ./chroma_books


In [16]:
retriever = vectorstore.as_retriever(search_kwargs={"k": TOP_K})

def search(query: str):
    results = retriever.get_relevant_documents(query)
    for i, doc in enumerate(results, start=1):
        print(f"[{i}] {doc.metadata['title']} (chunk {doc.metadata['chunk']})")
        print("    " + doc.page_content[:200] + "...\n")

In [17]:
example_queries = [
    "friendship and adventure",
]

for q in example_queries:
    print("\n" + "="*80)
    print(f"Query: {q}")
    search(q)


Query: friendship and adventure


  results = retriever.get_relevant_documents(query)


[1] The Hobbit (chunk 0)
    The Hobbit — Bilbo Baggins, a simple hobbit, is thrust into an adventure with Gandalf and thirteen dwarves to reclaim their homeland from the dragon Smaug. Along the journey, he discovers courage, cun...

[2] The Alchemist (chunk 0)
    The Alchemist — Santiago, a shepherd, follows his dream of finding treasure in Egypt, learning lessons of destiny, faith, and self-discovery along the way. Themes: dreams, personal legend, and transfo...

[3] Harry Potter and the Philosopher’s Stone (chunk 0)
    Harry Potter and the Philosopher’s Stone — Harry discovers he is a wizard and begins his studies at Hogwarts. Alongside friends Ron and Hermione, he faces challenges and uncovers a dark secret. Themes...

[4] The Shadow of the Wind (chunk 0)
    The Shadow of the Wind — In post–Civil War Barcelona, a boy discovers a mysterious book that leads him into a dark labyrinth of secrets, betrayal, and hidden histories. Themes: memory, literature, and...

[5] Life of Pi (chu

In [None]:
llm_client = OpenAI(api_key=openai_api_key)

def chatbot_response(user_message, history=None):
    results = retriever.get_relevant_documents(user_message)
    if not results:
        return "Sorry, I couldn't find a matching book recommendation."
    doc = results[0]
    title = doc.metadata.get('title', 'Unknown Title')
    summary = doc.page_content[:700].replace('\n', ' ')
    # Compose a prompt for the LLM
    prompt = (
        f"You are a helpful book recommendation assistant. "
        f"A user asked: \"{user_message}\". "
        f"Here is a relevant book summary:\n\n"
        f"Title: {title}\nSummary: {summary}\n\n"
        f"Based on this, reply conversationally with a recommendation. "
        f"Only recommend the book with the exact title provided above."
    )
    try:
        response = llm_client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are a helpful book recommendation assistant."},
                {"role": "user", "content": prompt}
            ],
            max_tokens=256,
            temperature=0.7,
        )
        llm_reply = response.choices[0].message.content.strip()
    except Exception as e:
        return f"LLM error: {e}"
    # After LLM reply, append the full summary using the tool
    full_summary = get_summary_by_title(title)
    return f"{llm_reply}\n\n**Full summary for '{title}':**\n{full_summary}"

chatbot = gr.ChatInterface(
    fn=chatbot_response,
    title="Book Recommendation Chatbot",
    description="Ask for a book by theme, genre, or keywords. The bot will recommend a matching book from the database.",
    examples=[
        ["I want a story about friendship and adventure"],
        ["Suggest a mystery novel"],
        ["Looking for a book about overcoming adversity"],
    ]
)

chatbot.launch()

  self.chatbot = Chatbot(


* Running on local URL:  http://127.0.0.1:7862
* To create a public link, set `share=True` in `launch()`.
* To create a public link, set `share=True` in `launch()`.


