In [1]:
# import libraries
import os
import textwrap
from langchain.chains import RetrievalQA
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.prompts import PromptTemplate
from dotenv import load_dotenv
from langchain_groq import ChatGroq

  from .autonotebook import tqdm as notebook_tqdm


## 1. SETUP AND CONFIGURATION

In [2]:
# Load environment variables from .env file
load_dotenv()

# Load Groq API token from environment
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
if not GROQ_API_KEY:
    raise EnvironmentError("Missing GROQ_API_KEY. Please set it in your .env file.")

# Define the path to your data file (relative path is safer)
DATA_FILE_PATH = "The_Constitution_of_Kenya_2010.pdf"
if not os.path.exists(DATA_FILE_PATH):
    raise FileNotFoundError(f"PDF not found: {DATA_FILE_PATH}. Please place it in the project folder.")

## 2. LOAD DATA

In [3]:
# Load the text from the file.
print("Loading the document...")
loader = PyPDFLoader(DATA_FILE_PATH)
documents = loader.load()
print(f"PDF Document loaded. Total characters: {len(documents[0].page_content)}")

Loading the document...
PDF Document loaded. Total characters: 157


## 3. SPLIT TEXT INTO CHUNKS

In [4]:
# Split the document into smaller, manageable chunks.
print("Splitting the document into smaller chunks...")
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=100,
    length_function=len
)
texts = text_splitter.split_documents(documents)
print(f"Document split into {len(texts)} chunks.")

Splitting the document into smaller chunks...
Document split into 875 chunks.


## 4. CREATE EMBEDDINGS AND VECTOR STORE
Embeddings are numerical representations of the text, allowing the model to understand their semantic meaning. We'll store these in a vector store (FAISS).

In [5]:
print("Creating embeddings and building the FAISS vector store...")
embeddings = HuggingFaceEmbeddings()
db = FAISS.from_documents(texts, embeddings)
print("Vector store created successfully.")

Creating embeddings and building the FAISS vector store...
Vector store created successfully.


## 5. SET UP THE RAG CHAIN

In [6]:
# Setup ChatGroq LLM
print("Setting up ChatGroq...")
llm = ChatGroq(
    model="llama-3.1-8b-instant",
    temperature=0,
    max_tokens=512,
    api_key= GROQ_API_KEY
)

Setting up ChatGroq...


In [7]:
# Custom prompt template
prompt_template = """Use the following pieces of context to answer the question.
If you don't know the answer, say "I don't know" and do not make up an answer.

{context}

Question: {question}
Answer:"""

PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"])

# Create RetrievalQA Chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=db.as_retriever(search_kwargs={"k": 3}),
    chain_type_kwargs={"prompt": PROMPT}
)


## 6. ASK QUESTIONS

In [None]:
print("✅ RAG model with ChatGroq is ready! Ask me a question about the Kenyan Constitution.")
print("Type 'exit' or 'quit' to end the session.")

while True:
    query = input("\nYour question: ")
    if query.lower() in ['exit', 'quit']:
        print("Goodbye!")
        break

    print("Thinking...")
    try:
        response = qa_chain.invoke({"query": query})
        answer = response.get('result', str(response))
        print("\nAnswer:")
        print(textwrap.fill(answer, width=100))
    except Exception as e:
        print(f"Error processing query: {e}")
        try:
            print("\nFalling back to simple document retrieval...")
            docs = db.similarity_search(query, k=2)
            print("Relevant text snippets:")
            for i, doc in enumerate(docs, 1):
                print(f"\n{i}. {textwrap.fill(doc.page_content[:300], width=100)}...")
        except Exception as e2:
            print(f"Retrieval also failed: {e2}")
# the sample questions are:
#1. what does the constitution say about freedom of speech?
#2. how many articles are there in the constitution?
#3. who is harry potter?

✅ RAG model with ChatGroq is ready! Ask me a question about the Kenyan Constitution.
Type 'exit' or 'quit' to end the session.
Thinking...

Answer:
According to the given context, the constitution says that every person has the right to freedom of
expression, which includes:  - Freedom to seek, receive or impart information or ideas - Freedom of
artistic creativity - Academic freedom and freedom of scientific research  However, this right does
not extend to:  - Propaganda for war - Incitement to violence - Hate speech - Advocacy of hatred
that constitutes ethnic incitement, vilification of others, or incitement to cause harm, or is based
on any ground of discrimination specified or contemplated in Article 27 (4).  Additionally, any
limitation on this right must be clear and specific, and the nature and extent of the limitation
must be demonstrated by the State or a person seeking to justify it. The limitation must not
derogate from the core or essential content of the right.
Thinking..