In [1]:
import os
import gradio as gr
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain_groq import ChatGroq
from langchain.prompts import PromptTemplate
from langchain.retrievers import ParentDocumentRetriever

  from .autonotebook import tqdm as notebook_tqdm


In [None]:


#os.environ['groq_api_key'] = "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"

In [3]:
# ----------------------------
# Load documents
# ----------------------------
document_path = "C:/Users/USER/Downloads/LUX TECH ACADEMY/RAG CHATBOT/document"

loader = PyPDFDirectoryLoader(document_path)
documents = loader.load()
#print(len(documents))

In [4]:
# ----------------------------
# Define splitters
# ----------------------------
# For parent docs (large chunks)
parent_splitter = RecursiveCharacterTextSplitter(
    chunk_size=2000,
    chunk_overlap=200
)

In [5]:

# For child docs (small chunks used for embeddings/retrieval)
child_splitter = RecursiveCharacterTextSplitter(
    chunk_size=600,
    chunk_overlap=300
)

In [6]:
# ----------------------------
# Embeddings + Vector Store
# ----------------------------
embeddings_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

vector_store = Chroma(
    collection_name="sample_collection",
    embedding_function=embeddings_model,
)

  embeddings_model = HuggingFaceEmbeddings(


In [None]:
# ------------------------------
# ParentDocumentRetriever
# ----------------------------
from langchain.storage import InMemoryByteStore
# Create a simple in-memory docstore for parent docs
docstore = InMemoryByteStore()

retriever = ParentDocumentRetriever(
    vectorstore=vector_store,
    byte_store=docstore,     
    child_splitter=child_splitter,
    parent_splitter=parent_splitter,
)

In [9]:

# Add documents (they are automatically split into parent + child chunks)
retriever.add_documents(documents)
#print("Documents added to ParentDocumentRetriever")


In [None]:

# ----------------------------
# LLM
# ----------------------------
LLM = ChatGroq(
    model="llama-3.1-8b-instant",
    #api_key=os.getenv("groq_api_key"),
    temperature=0.5
)


In [11]:
# ----------------------------
# Prompt Template
# ----------------------------
rag_prompt_template = PromptTemplate(
    input_variables=["question", "history", "knowledge"],
    template="""
    You are a contextual assistant. Answer the question using only the knowledge provided.
    Do not say "according to documents" or similar.

    Question: {question}

    Conversation history: {history}

    Knowledge:
    {knowledge}
    """
)

In [12]:
# ----------------------------
# Ask function
# ----------------------------
def ask_question(question, history=None):
    history = history or []

    # Retrieve parent docs (not just child snippets)
    docs = retriever.get_relevant_documents(question)
    knowledge = "\n\n".join([doc.page_content for doc in docs])

    # Format the prompt
    rag_prompt = rag_prompt_template.format(
        question=question,
        history=history,
        knowledge=knowledge
    )

    # Stream response
    response = ""
    for chunk in LLM.stream(rag_prompt):
        response += chunk.content
    return response


In [13]:
print(ask_question("What are the functions of the Senate as established in the Constitution?"))

  docs = retriever.get_relevant_documents(question)


The functions of the Senate, as established in the Constitution, are:

1. To represent the counties and protect their interests.
2. To participate in the law-making function of Parliament by considering, debating, and approving Bills concerning counties.
3. To determine the allocation of national revenue among counties and exercise oversight over national revenue allocated to county governments.
4. To participate in the oversight of State officers by considering and determining any resolution to remove the President or Deputy President from office.
5. To perform any function or power that is required to be performed or exercised by both Houses (National Assembly and Senate) jointly, or one after the other.


In [14]:
print(ask_question("What can you say about Mobile money as a driver of financial inclusion in Kenya according to the 2009 survey analysis?"))

Mobile money, specifically M-PESA, has been a significant driver of financial inclusion in Kenya. It has been responsible for the lion's share of the growth in access to formal financial services since 2006. As of 2009, M-PESA had 5.3 million registered customers, which equals 25 percent of the adult population. Close to half of its customers are in the formally included category exclusively on the basis of being registered M-PESA users, accounting for 85 percent of the increase in the formally included category over the period between the two surveys.


In [15]:
print(ask_question("How does the Constitution ensure judicial independence in Kenya?"))

The Constitution ensures judicial independence in Kenya by:

- Establishing the Judicial Service Commission (Article 173) to manage the administration of the judiciary and protect its independence.
- Providing for the tenure of office of the Chief Justice and other judges (Article 167), which ensures that they serve for a fixed term and cannot be removed arbitrarily.
- Prohibiting the removal of judges from office except on specified grounds and through a fair and transparent process (Article 168).
- Ensuring that the judiciary has its own fund (Article 173) to manage its affairs and maintain its independence.
- Establishing the Supreme Court, Court of Appeal, and High Court as independent institutions (Article 163-165) with their own powers and jurisdiction.
- Providing for the appointment of judges through a transparent and independent process (Article 166).
- Protecting the independence of the judiciary from interference by other branches of government, including the Executive and L

In [16]:
print(ask_question("What were the contributions of commercial banks and MFIs according to the 2009 survey?"))

According to the 2009 survey, the contributions of commercial banks and MFIs were:

Commercial banks:
- The number of deposit accounts increased by 3.9 million, up from 2.5 million at the end of 2005, to 6.4 million accounts at the end of 2008, an impressive growth of 152 percent.
- Deposits increased 71 percent, from Ksh. 560 billion equivalent to 36 percent of GDP to Ksh. 864 billion, equivalent to 41 percent of GDP.
- The branch network expanded by 60 percent, with banks opening 312 new branches, expanding the network from 534 to 887 branches.
- The ATM infrastructure expanded fourfold, from 323 units to 1,325 units.
- Staff employed doubled from 12,600 to 25,400.

MFIs:
- The customer base grew by 117 percent.
- The proportion of urban residents using MFIs was higher than that of rural residents in 2006, but this is now more even at 3.5% and 3.4% reflecting a significant expansion of MFI outreach to rural areas.
- The pattern of factors influencing MFI use were prioritized differen

In [None]:
# ----------------------------
# Chatbot function
# ----------------------------
def chatbot_fn(question, history):
    history.append({"role": "user", "content": question})
    answer = ask_question(question, history)
    history.append({"role": "assistant", "content": answer})
    return history, history

# ----------------------------
# Gradio UI
# ----------------------------
with gr.Blocks() as demo:
    gr.Markdown("## Constitution of Kenya 2010 ChatBot (ParentDocument Retriever)")
    chatbot = gr.Chatbot(type="messages")  
    msg = gr.Textbox(label="Ask a question")
    state = gr.State([])

    def respond(message, history):
        return chatbot_fn(message, history)

    msg.submit(respond, [msg, state], [chatbot, state])

demo.launch()
