In [1]:
import json
import google.generativeai as genai
import chromadb
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from typing import List
from dotenv import load_dotenv
import os


  from .autonotebook import tqdm as notebook_tqdm


In [2]:

class HospitalRAGProcessor:
    def __init__(self, api_key: str, db_path: str):
        self.api_key = api_key
        self.db_path = db_path
        self.embeddings = GoogleGenerativeAIEmbeddings(
            model="models/embedding-001",
            google_api_key=self.api_key,
            task_type="retrieval_document"
        )
        
    def prepare_documents(self, data: List[dict]) -> List[Document]:
        """Convert raw data into LangChain Document objects."""
        documents = []
        
        for doc in data:
            if not isinstance(doc, dict):
                print(f"Invalid document format: {doc}")
                continue
                
            # Convert keywords list to string if present
            keywords = doc.get("metadata", {}).get("keywords", [])
            if isinstance(keywords, list):
                keywords = ", ".join(keywords)
                
            metadata = {
                "title": doc.get("title", ""),
                "category": doc.get("category", ""),
                "keywords": keywords,
                "id": doc.get("id", "")
            }
            
            # Create LangChain Document object
            if content := doc.get("content"):
                documents.append(
                    Document(
                        page_content=content,
                        metadata=metadata
                    )
                )
            else:
                print(f"Document {doc.get('id')} has no content. Skipping...")
                
        return documents
    
    def split_documents(self, documents: List[Document]) -> List[Document]:
        """Split documents into smaller chunks if needed."""
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=100,
            length_function=len,
        )
        return text_splitter.split_documents(documents)
    
    def create_vector_store(self, documents: List[Document], collection_name: str):
        """Create and persist the vector store."""
        vector_store = Chroma.from_documents(
            documents=documents,
            embedding=self.embeddings,
            persist_directory=os.path.join(self.db_path, collection_name),
            collection_name=collection_name
        )
        vector_store.persist()
        return vector_store
    
    def process_and_store_documents(self, data: List[dict], collection_name: str):
        """Main method to process and store documents."""
        # Prepare documents
        documents = self.prepare_documents(data)
        
        # Split documents if needed
        split_docs = self.split_documents(documents)
        
        # Create and persist vector store
        vector_store = self.create_vector_store(split_docs, collection_name)
        
        return vector_store



In [3]:
def create_database():
    load_dotenv()
    
    # Load data
    with open('data_set.json', 'r') as file:
        data = json.load(file)
        
    
    # Initialize processor
    processor = HospitalRAGProcessor(
        api_key=os.getenv('API_KEY'),
        db_path="./database"
    )
    
    # Process and store documents
    vector_store = processor.process_and_store_documents(
        data=data,
        collection_name="hospital_documents_langchain"
    )
    
    print("Vector store created successfully!")
    return vector_store



In [4]:
create_database()

Vector store created successfully!


  vector_store.persist()


<langchain_community.vectorstores.chroma.Chroma at 0x275f2dda870>

In [5]:
import os
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_chroma import Chroma
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables.history import RunnableWithMessageHistory,RunnableLambda
from langchain_google_genai import GoogleGenerativeAI,GoogleGenerativeAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [6]:
api_key='AIzaSyCGsj7XUUDktYTIqS3ITCOIk54oN7OD9dw'
db_path="./database"
collection_name="hospital_documents_langchain"

llm = GoogleGenerativeAI(
            model="gemini-1.5-flash",
            google_api_key=api_key,
            temperature=0.3
        )

embeddings = GoogleGenerativeAIEmbeddings(
            model="models/embedding-001",
            google_api_key=api_key,
            task_type="retrieval_document"
        )

retriever = Chroma(
            persist_directory= os.path.join(db_path, collection_name),
            embedding_function=embeddings,
            collection_name=collection_name
        ).as_retriever()

In [7]:
### Contextualize question ###
contextualize_q_system_prompt = """Given a chat history and the latest user question \
which might reference context in the chat history, formulate a standalone question \
which can be understood without the chat history. Do NOT answer the question, \
just reformulate it if needed and otherwise return it as is."""
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)


In [8]:
### Answer question ###
qa_system_prompt = """You are a friendly customer service agent working for Horizon Hospitals Lanka PLC. 
            Your goal is to assist with any questions using the most relevant and up-to-date information provided in the context below. 
            When responding, ensure you:
            
            Previous conversation history:

            - Keep your tone warm, professional, and helpful, just as a caring hospital representative would.
            - Provide detailed and accurate answers, incorporating only relevant data from the context.
            - If the information doesn't directly address the question, acknowledge that politely and offer a general response if appropriate.
            - Avoid making up answers if the data does not apply. It's better to admit that the information is not available than to provide inaccurate information and mention to contact hospital via phone.
            
            Context: {context}

            Based on the context, craft a thoughtful, precise, and helpful response:
            """

qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)



In [9]:
### Statefully manage chat history ###
store = {}


def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]


conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

In [10]:
conversational_rag_chain.invoke(
    {"input": "What is the name of the hospital?"},
    config={
        "configurable": {"session_id": "abc123"}
    },  # constructs a key "abc123" in `store`.
)["answer"]

"Hello!  The name of our hospital is Horizon Hospitals Lanka PLC. We're here to help in any way we can.  😊 \n"