In [42]:
# %pip install langchain-groq langchain-community pypdf chromadb

In [43]:
import pypdf
from sentence_transformers import SentenceTransformer
from langchain.retrievers import ParentDocumentRetriever
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.storage import InMemoryStore
from langchain_groq import ChatGroq
import gradio as gr

In [44]:
doc_path="/content/The_Constitution_of_Kenya_2010.pdf" # set the file path of PDF
loader = PyPDFLoader(doc_path) # opens PDF and extract each page
docs= loader.load() #loads and splits PDF into list of document objects
print(len(docs)) # number of document objects created

193


In [45]:
# Intializing parent and text splitters

# create big sections for retrieval and coarse context
parent_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1500,
    chunk_overlap=100
)

# break the sections smaller pieces for detailed processing like mebedding
child_splitter=RecursiveCharacterTextSplitter(
    chunk_size=700,
    chunk_overlap=50
)

In [51]:
# intializing embedding model

# load BGE model from Hugging Face
bge_model= SentenceTransformer("BAAI/bge-base-en-v1.5")
class BGEEmbeddings:
  def embed_documents(self, text): #create vector database
    return bge_model.encode(text, batch_size=8, normalize_embeddings=True).tolist()
  def embed_query(self, query): #search user query against stored document vectors
    return bge_model.encode([query], normalize_embeddings=True).tolist()[0]

In [52]:
# creating the vector store

constitution_vector_store=Chroma(
    collection_name="Kenya_Constitution", #creates collection in chroma that groups text vectors
    embedding_function=BGEEmbeddings(), # custom embedding model toconvert text to vectors
    persist_directory="./chroma" # directory on disk to save database
)

In [53]:
# creating in-memory stroe

store=InMemoryStore()

In [54]:
# Creating retrievers

retriever = ParentDocumentRetriever(
    vectorstore = constitution_vector_store,# store child chunks with embeddings
    docstore=store, # store retrievable full parent documents
    child_splitter=child_splitter, # define small chunks split
    parent_splitter=parent_splitter # define big chunks split
)

In [55]:
# adding documents into vector store

retriever.add_documents(docs)

In [56]:
#  get secret key
from google.colab import userdata
userdata.get('alice_in_wonderland')

'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'

In [57]:
# create a child retriever
child_retriever=constitution_vector_store.as_retriever()

In [58]:
# initializing the LLM

llm=ChatGroq(
    groq_api_key=userdata.get('alice_in_wonderland'),
    model_name= "llama-3.3-70b-versatile",
    temperature=0.7
)

In [59]:
# creating the template
template="""
Please answer the following questions from the context given according to the context given.

If the question is out of context, just say the question is out of context kindly give me another question.

Dont try to make up an answer

{context}

Question: {question}
"""

In [60]:
# creating the chain
qa_chain=PromptTemplate(template=template,
                         input_variables=["context", "question"])

In [61]:
chain= RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    chain_type_kwargs={"prompt":qa_chain},
    return_source_documents=True
)

In [64]:
question="What are the three branches of government created by the Constitution of Kenya?"

answer= chain({"query":question})
print(answer["result"])

  answer= chain({"query":question})


The three branches of government created by the Constitution of Kenya are:

1. The Legislative (Parliament and the legislative assemblies in the county governments)
2. The Executive (the national executive and the executive structures in the county governments)
3. The Judiciary (the Judiciary and independent tribunals)

These are mentioned in Article 1, Section 3 of the Constitution, which states that sovereign power is delegated to these State organs.


In [62]:
response = llm.predict("Write a short poem about constituion of kenya.")
print(response)

In Kenya's heart, a document stands tall,
The Constitution, a guiding light for all.
Adopted in 2010, a new dawn did rise,
With principles of justice, equality in its eyes.

It safeguards rights, of every single soul,
Freedom of speech, and expression, made whole.
The Bill of Rights, a cornerstone so strong,
Protects the weak, and keeps the powerful in check, all day long.

Devolution's promise, of power to the ground,
Empowering counties, with resources unbound.
The three arms of government, in balance so fine,
The Legislature, Executive, and Judiciary, in harmony divine.

This Constitution, a foundation so grand,
A blueprint for progress, in this eastern land.
May it guide Kenya, towards a brighter day,
Where all citizens thrive, in a just and equal way.


In [65]:
# use gradio as user interface for chatbot

def qa_bot(message, history):
  result=chain({"query":message})
  answer=result["result"]
  return answer

demo=gr.ChatInterface(
    fn=qa_bot,
    title="Constitutiion Of Kenya [RAG]",
    description= "Ask question Based on context. Out-of-context questions will not be answered.",
    examples= [
        "What are the three branches of government created by the Constitution of Kenya?",
        "Can the constitution of Kenya be altered? If so, how?",
        "(Out of context Question) :Is it constitutionally wrong to eat ice cream for breakfast?"
    ]
)

demo.launch(share=True) # gives a public link

  self.chatbot = Chatbot(


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://255704ed6b3f7a1c4a.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


