In [1]:
# PDF processing
from langchain_community.document_loaders import PyPDFLoader
# Splitting
from langchain.text_splitter import RecursiveCharacterTextSplitter
# Embedding & Chroma DB
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
# from sentence_transformers import SentenceTransformer
from langchain.retrievers import MultiQueryRetriever
from langchain.prompts import PromptTemplate
import chromadb
from langchain_chroma import Chroma # Import Chroma class for vectorstore initialization.
from langchain_groq import ChatGroq
from langchain.schema import HumanMessage
# Groq for LLM
import groq

import os

In [2]:
# API Key
GROQ_API_KEY = os.getenv("GROQ_API_KEY")

In [3]:
# Load the pdf file.
loader = PyPDFLoader("constitution_of_kenya.pdf")
pdf = loader.load()

In [4]:
# Initializing the splitters.
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_documents(pdf)

In [5]:
# Initialize the embedding model.
model = SentenceTransformerEmbeddings(model_name="multi-qa-MiniLM-L6-cos-v1", show_progress=True)

  model = SentenceTransformerEmbeddings(model_name="multi-qa-MiniLM-L6-cos-v1", show_progress=True)
  from .autonotebook import tqdm as notebook_tqdm


In [6]:
# Prepare texts for embedding
texts = [chunk.page_content for chunk in chunks]
metadatas = [chunk.metadata for chunk in chunks]

In [7]:
# Generate embeddings
embeddings_vectors = model.embed_documents(texts)

Batches:   0%|          | 0/16 [00:00<?, ?it/s]

Batches: 100%|██████████| 16/16 [00:26<00:00,  1.68s/it]


In [8]:
# Setting up Chroma
client = chromadb.PersistentClient(path = "chroma_store")
collection = client.get_or_create_collection(name = "Kenya_constitution")

In [9]:
# Initialize ChromaDB with updated settings
vectore_store = Chroma(
    collection_name="kenya_constitution",
    embedding_function=model,
    persist_directory="./chroma_store"
)

In [10]:
# Initialize the retriever
question = "What is the structure of the Kenyan government?"
llm = ChatGroq(
    model="llama3-70b-8192",
    temperature=0,
    groq_api_key=GROQ_API_KEY
)

retriever_from_llm = MultiQueryRetriever.from_llm(
    retriever=vectore_store.as_retriever(), llm=llm
)

In [11]:
def get_response(question):
    # Retrieve relevant documents
    docs = retriever_from_llm.get_relevant_documents(question)

    # Format the retrieved documents (using a simple prompt template here)
    prompt_template = """You are a constitutional advisor specialized in the Kenyan Constitution.
    Use only the following excerpts from the Kenyan Constitution to answer the question.
    If the specific information isn't found in these excerpts, state that the information
isn't available in the provided constitutional sections rather than speculating.

CONSTITUTIONAL EXCERPTS:
{context}

Question: {question}
Answer based strictly on the Kenyan Constitution:"""


    # Moved the prompt formatting inside the function
    prompt = PromptTemplate(
        template=prompt_template, input_variables=["context", "question"]
    ).format(context="\n\n".join([doc.page_content for doc in docs]), question=question)

    # Generate a response using the LLM
    response = llm([HumanMessage(content=prompt)])
    print(response)
    return response # Added return statement to return the response

In [12]:
# Example usage
query = input("You:")
response = get_response(query)
print(response)

  docs = retriever_from_llm.get_relevant_documents(question)
Batches: 100%|██████████| 1/1 [00:00<00:00, 83.50it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 85.82it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 101.59it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 98.51it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 14.15it/s]
  response = llm([HumanMessage(content=prompt)])


content='According to Article 137 of the Kenyan Constitution:\n\n"A person qualifies for nomination as a presidential candidate if the person—\n\n(a) is a Kenyan citizen by birth;\n\n(b) is qualified to stand for election as a member of Parliament;\n\n(c) is nominated by a political party, or is an independent candidate;\n\n(d) meets the educational, moral and ethical requirements prescribed by this Constitution or by an Act of Parliament;\n\n(e) is nominated by not fewer than two thousand voters from each of a majority of the counties."\n\nTherefore, the qualifications for becoming a President in Kenya are:\n\n1. Being a Kenyan citizen by birth\n2. Being qualified to stand for election as a member of Parliament\n3. Being nominated by a political party or being an independent candidate\n4. Meeting the educational, moral, and ethical requirements prescribed by the Constitution or an Act of Parliament\n5. Being nominated by not fewer than two thousand voters from each of a majority of th