In [2]:
import os

In [20]:
from langchain.document_loaders.csv_loader import CSVLoader
import time
# Import the Chroma class, which is used to create and interact with a Chroma vector database.
from langchain_chroma import Chroma

# Import the GoogleGenerativeAIEmbeddings class to create numerical vector representations (embeddings) of text using Google's models.
from langchain_google_genai import GoogleGenerativeAIEmbeddings


REVIEWS_CSV_PATH = "reviews.csv"

# Define a constant variable for the directory where the Chroma vector database will be stored.
REVIEWS_CHROMA_PATH = "chroma_data"


# Create an instance of the CSVLoader.
loader = CSVLoader(
    file_path=REVIEWS_CSV_PATH,  # Specify the path to the CSV file to be loaded.
    source_column="review"       # Specify the name of the column that contains the main text content.
)

# Call the .load() method on the loader instance.
# This reads the specified column from the CSV file and loads the content into a list of Document objects.
reviews = loader.load()

# Specify the embedding function to use. We define it once to be reused.
embedding_function = GoogleGenerativeAIEmbeddings(
    model="models/gemini-embedding-001",  # Choose the specific embedding model provided by Google.
    google_api_key=api_key  # Securely fetch the Google API key.
)

# Set the size of each batch to process.
batch_size = 20
# Calculate the total number of batches.
num_batches = (len(reviews) - 1) // batch_size + 1
reviews_vector_db = None

# Loop through the documents in batches to avoid hitting the API's rate limit.
for i in range(0, len(reviews), batch_size):
    # Get the current batch of documents.
    batch_docs = reviews[i:i + batch_size]
    current_batch_num = i // batch_size + 1

    print(f"Processing batch {current_batch_num}/{num_batches}...")

    if i == 0:
        # For the first batch, create a new Chroma vector database.
        # The `from_documents` method handles the entire process of embedding and storing the data.
        reviews_vector_db = Chroma.from_documents(
            documents=batch_docs,  # Pass the list of Document objects that need to be embedded.
            embedding=embedding_function,
            # Specify the directory on the disk where the vector database will be saved.
            # This makes the database persistent, so we can load it directly in the future.
            persist_directory=REVIEWS_CHROMA_PATH
        )
    else:
        # For subsequent batches, add the documents to the existing database.
        reviews_vector_db.add_documents(documents=batch_docs)

    # Pause the script for 30 seconds after each batch to respect the per-minute rate limit.
    print(f"Batch {current_batch_num} processed. Waiting for 30 seconds...")
    time.sleep(30)

print("Vector database created successfully and saved to the specified directory.")

Processing batch 1/51...
Batch 1 processed. Waiting for 30 seconds...
Processing batch 2/51...
Batch 2 processed. Waiting for 30 seconds...
Processing batch 3/51...
Batch 3 processed. Waiting for 30 seconds...
Processing batch 4/51...
Batch 4 processed. Waiting for 30 seconds...
Processing batch 5/51...
Batch 5 processed. Waiting for 30 seconds...
Processing batch 6/51...
Batch 6 processed. Waiting for 30 seconds...
Processing batch 7/51...
Batch 7 processed. Waiting for 30 seconds...
Processing batch 8/51...
Batch 8 processed. Waiting for 30 seconds...
Processing batch 9/51...
Batch 9 processed. Waiting for 30 seconds...
Processing batch 10/51...
Batch 10 processed. Waiting for 30 seconds...
Processing batch 11/51...
Batch 11 processed. Waiting for 30 seconds...
Processing batch 12/51...
Batch 12 processed. Waiting for 30 seconds...
Processing batch 13/51...
Batch 13 processed. Waiting for 30 seconds...
Processing batch 14/51...
Batch 14 processed. Waiting for 30 seconds...
Processing

In [3]:
from langchain_core.prompts import PromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser 

In [4]:
review_template_str = """Your job is to use patient reviews to answer questions about their experience at a hospital.
Use the following context to answer questions.
Be as detailed as possible, but don't make up any information that's not from the context.
If you don't know an answer, say you don't know.

context: {context}, question: {query}
"""

prompt = PromptTemplate(
    template=review_template_str,
    input_variables=["context", "query"]
)

In [6]:
api_key=os.getenv('GOOGLE_API_KEY')

In [7]:
from langchain_google_genai import ChatGoogleGenerativeAI
chat_model = ChatGoogleGenerativeAI(model="gemini-2.5-flash",
                                    google_api_key=api_key)

In [8]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_chroma import Chroma

In [9]:
REVIEWS_CHROMA_PATH = "chroma_data"
embedding_function = GoogleGenerativeAIEmbeddings(
    model="models/gemini-embedding-001",
    google_api_key=api_key  
)
reviews_vector_db = Chroma(
    persist_directory=REVIEWS_CHROMA_PATH,
    embedding_function=embedding_function
)

In [10]:
reviews_retriever = reviews_vector_db.as_retriever(search_type="similarity", search_kwargs={"k": 10})

chain = (
    {"context": reviews_retriever, "query": RunnablePassthrough()}
    | prompt
    | chat_model
    | StrOutputParser()
)

In [20]:
question = """Has anyone complained about communication with the hospital staff?"""
print(chain.invoke(question))

Yes, several patients have complained about communication with the hospital staff. Here are the details from the reviews:

*   **Makayla Reynolds (review_id: 707)** encountered issues with the nursing staff's communication, noting a lack of coordination that led to confusion about her medication schedule and treatment plan at Brown-Golden.
*   **Dennis Fitzgerald (review_id: 43)** stated that the hospital staff lacked proper communication among themselves, causing confusion about his treatment plan at Shea LLC.
*   **Jessica Mays (review_id: 773)** experienced communication issues during her stay, finding the medical staff disorganized, which led to confusion about her treatment plan at Rush, Owens and Johnson.
*   **Terri Smith (review_id: 73)** had a frustrating experience due to unclear communication between the medical staff and herself, leading to misunderstandings about her treatment plan at Little-Spencer.
*   **Jacob Smith (review_id: 1001)** was disappointed with the lack of c

In [21]:
question = "What is the capital of France?"
print(chain.invoke(question))

I don't know.


In [22]:
questions = [
    "Has anyone complained about communication with the hospital staff?",
    "What do patients say about the cleanliness of the hospital?",
    "Are there any positive comments about the doctors?",
    "Did anyone mention waiting times?",
    "How do patients feel about the food served?"
]
for q in questions:
    print(f"Q: {q}")
    print("A:", chain.invoke(q))
    print("-" * 40)

Q: Has anyone complained about communication with the hospital staff?
A: Yes, several patients have complained about communication with the hospital staff.

Here are the details from the reviews:

*   **Makayla Reynolds (review_id: 707)** encountered issues with the **nursing staff's communication**, noting a lack of coordination that led to confusion about her medication schedule and treatment plan.
*   **Dennis Fitzgerald (review_id: 43)** stated that the **hospital staff lacked proper communication among themselves**, which caused confusion about his treatment plan.
*   **Jessica Mays (review_id: 773)** reported communication issues during her stay, observing that the **medical staff seemed disorganized**, leading to confusion about her treatment plan.
*   **Terri Smith (review_id: 73)** had a frustrating experience due to **unclear communication between the medical staff and her**, resulting in misunderstandings about her treatment plan.
*   **Jacob Smith (review_id: 1001)** was di

In [11]:
def respond_to_user_question(question: str, history: list) -> str:
    """
    Respond to a user's question using the review_chain.
    """
    return chain.invoke(question)

In [None]:
import gradio as gr
# Create the Gradio ChatInterface
interface = gr.ChatInterface(fn=respond_to_user_question, title="Review Helper Bot")

# Launch the Gradio app
interface.launch(debug=True)

  self.chatbot = Chatbot(


* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.
