<a href="https://colab.research.google.com/github/Asritha0606/GenAIcohort_May2025/blob/main/GenAIcohort_May2025_RAGAssignment_Asritha.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Install Required Packages

In [None]:
!pip install -q langchain faiss-cpu gradio sentence-transformers groq langchain-groq langchain-community text-generation

In [None]:
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import TextLoader
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferWindowMemory
from langchain_groq import ChatGroq
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain, create_history_aware_retriever
from langchain.prompts import PromptTemplate
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.memory import ConversationBufferWindowMemory
import gradio as gr
import re

RAG + LLM + Memory + Guardrails + Cache

Load and split document

In [None]:
loader = TextLoader("/content/AYUSHMAN BHARAT.txt")
docs = loader.load()

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
chunks = text_splitter.split_documents(docs)

Create embeddings and vector store

In [None]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(chunks, embeddings)
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

In [None]:
import os
from google.colab import userdata
groq_api_key = userdata.get('groq_api')
os.environ["GROQ_API_KEY"] = groq_api_key

In [None]:
llm = ChatGroq(model="llama-3.3-70b-versatile", temperature=0)

Memory for 3-turn history

In [None]:
memory = ConversationBufferWindowMemory(k=3, return_messages=True)

RAG QA Chain

In [None]:

# Step 1: Question rephrasing prompt
question_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant for answering questions about Ayushman Bharat."),
    MessagesPlaceholder(variable_name="chat_history"),
    ("human", "{input}"),
    ("human", "Given the above conversation, generate a search query for relevant information:")
])

# Step 2: History-aware retriever (rephrases the query using memory)
retriever_with_context = create_history_aware_retriever(
    llm=llm,
    retriever=retriever,
    prompt=question_prompt
)

# Step 3:  Include {context} in the answer generation prompt
qa_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are an expert assistant answering public queries about Ayushman Bharat using the following context:\n\n{context}"),
    MessagesPlaceholder(variable_name="chat_history"),
    ("human", "{input}")
])

# Step 4: Document answer chain
document_chain = create_stuff_documents_chain(
    llm=llm,
    prompt=qa_prompt
)

# Step 5: Final RAG chain
qa_chain = create_retrieval_chain(
    retriever=retriever_with_context,
    combine_docs_chain=document_chain
)


Domain guardrails

In [None]:
domain_keywords = [
    "ayushman bharat", "ayushman", "bharat",
    "health", "healthcare", "hospitals",
    "nhm", "pmjay", "hwc", "insurance"
]

def is_in_domain(query: str) -> bool:
    query = query.lower()
    return any(keyword in query for keyword in domain_keywords)


In-memory cache

In [None]:
query_cache = {}

def chat_interface(query, chat_history):
    print("Incoming query:", query)
    print("Chat history:", chat_history)

    if not is_in_domain(query):
        return chat_history + [[query, "❌ Sorry, I can only answer questions about Ayushman Bharat and healthcare topics."]]

    if query in query_cache:
        return chat_history + [[query, f"✅ Cached: {query_cache[query]}"]]

    # ✅ Correct input format for create_retrieval_chain()
    result = qa_chain.invoke({
        "input": query,
        "chat_history": memory.chat_memory.messages  # this is what the chain uses internally
    })

    answer = result["answer"]
    query_cache[query] = answer
    return chat_history + [[query, answer]]


Launch Gradio Chat UI

In [None]:
with gr.Blocks() as demo:
    gr.Markdown("### 🤖 Ayushman Bharat FAQ Chatbot")
    chatbot = gr.Chatbot()
    msg = gr.Textbox(placeholder="Ask about Ayushman Bharat...")
    clear = gr.Button("Clear")

    state = gr.State([])

    def user_interaction(user_message, chat_history):
        return chat_interface(user_message, chat_history)

    msg.submit(user_interaction, [msg, state], [chatbot])
    msg.submit(lambda: "", None, msg)
    clear.click(lambda: [], None, chatbot)



In [None]:
demo.launch(debug=True)