In [3]:
! pip install langchain langchain-community huggingface-hub faiss-cpu pypdf langchain-huggingface langchain-groq

Collecting langchain-groq
  Downloading langchain_groq-0.3.8-py3-none-any.whl.metadata (2.6 kB)
Collecting groq<1,>=0.30.0 (from langchain-groq)
  Downloading groq-0.31.1-py3-none-any.whl.metadata (16 kB)
Downloading langchain_groq-0.3.8-py3-none-any.whl (16 kB)
Downloading groq-0.31.1-py3-none-any.whl (134 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.9/134.9 kB[0m [31m12.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: groq, langchain-groq
Successfully installed groq-0.31.1 langchain-groq-0.3.8


In [4]:
# import libraries
import os
import textwrap
from langchain.chains import RetrievalQA
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.prompts import PromptTemplate
from google.colab import userdata
from langchain_groq import ChatGroq

## 1. SETUP AND CONFIGURATION

In [5]:
# Load Groq API token
os.environ["GROQ_API_KEY"] = userdata.get("GROQ_API_KEY")

# Define the path to your data file.
DATA_FILE_PATH = "/content/The_Constitution_of_Kenya_2010.pdf"
if not os.path.exists(DATA_FILE_PATH):
    raise FileNotFoundError(f"Error: The file '{DATA_FILE_PATH}' was not found. Please upload it.")


## 2. LOAD DATA

In [6]:
# Load the text from the file.
print("Loading the document...")
loader = PyPDFLoader(DATA_FILE_PATH)
documents = loader.load()
print(f"PDF Document loaded. Total characters: {len(documents[0].page_content)}")

Loading the document...
PDF Document loaded. Total characters: 157


## 3. SPLIT TEXT INTO CHUNKS

In [7]:
# Split the document into smaller, manageable chunks.
print("Splitting the document into smaller chunks...")
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=100,
    length_function=len
)
texts = text_splitter.split_documents(documents)
print(f"Document split into {len(texts)} chunks.")

Splitting the document into smaller chunks...
Document split into 875 chunks.


## 4. CREATE EMBEDDINGS AND VECTOR STORE
Embeddings are numerical representations of the text, allowing the model to understand their semantic meaning. We'll store these in a vector store (FAISS).

In [8]:
print("Creating embeddings and building the FAISS vector store...")
embeddings = HuggingFaceEmbeddings()
db = FAISS.from_documents(texts, embeddings)
print("Vector store created successfully.")

Creating embeddings and building the FAISS vector store...


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Vector store created successfully.


## 5. SET UP THE RAG CHAIN

In [12]:
# Setup ChatGroq LLM
print("Setting up ChatGroq...")
llm = ChatGroq(
    model="llama-3.1-8b-instant",
    temperature=0,
    max_tokens=512,
    api_key=os.environ["GROQ_API_KEY"]
)

Setting up ChatGroq...


In [14]:
# Custom prompt template
prompt_template = """Use the following pieces of context to answer the question.
If you don't know the answer, say "I don't know" and do not make up an answer.

{context}

Question: {question}
Answer:"""

PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"])

# Create RetrievalQA Chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=db.as_retriever(search_kwargs={"k": 3}),
    chain_type_kwargs={"prompt": PROMPT}
)


## 6. ASK QUESTIONS

In [15]:
print("✅ RAG model with ChatGroq is ready! Ask me a question about the Kenyan Constitution.")
print("Type 'exit' or 'quit' to end the session.")

while True:
    query = input("\nYour question: ")
    if query.lower() in ['exit', 'quit']:
        print("Goodbye!")
        break

    print("Thinking...")
    try:
        response = qa_chain.invoke({"query": query})
        answer = response.get('result', str(response))
        print("\nAnswer:")
        print(textwrap.fill(answer, width=100))
    except Exception as e:
        print(f"Error processing query: {e}")
        try:
            print("\nFalling back to simple document retrieval...")
            docs = db.similarity_search(query, k=2)
            print("Relevant text snippets:")
            for i, doc in enumerate(docs, 1):
                print(f"\n{i}. {textwrap.fill(doc.page_content[:300], width=100)}...")
        except Exception as e2:
            print(f"Retrieval also failed: {e2}")

✅ RAG model with ChatGroq is ready! Ask me a question about the Kenyan Constitution.
Type 'exit' or 'quit' to end the session.

Your question: what does the constitution say about freedom of speech?
Thinking...

Answer:
According to the given context, the constitution says that every person has the right to freedom of
expression, which includes:  - Freedom to seek, receive or impart information or ideas - Freedom of
artistic creativity - Academic freedom and freedom of scientific research  However, this right does
not extend to:  - Propaganda for war - Incitement to violence - Hate speech - Advocacy of hatred
that constitutes ethnic incitement, vilification of others, or incitement to cause harm, or is based
on any ground of discrimination specified or contemplated in Article 27 (4).  Additionally, any
limitation on this right must be clear and specific, and the nature and extent of the limitation
must be demonstrated by the State or a person seeking to justify it. The limitation must 