# Simple RAG Implementation with Constitution of Kenya

This notebook implements a simple Retrieval-Augmented Generation (RAG) system using the Constitution of Kenya 2010 PDF.

In [3]:
# Install required packages
!pip install pymupdf sentence-transformers faiss-cpu openai tiktoken



In [4]:
# Import libraries
import fitz  # PyMuPDF
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer
import re
from typing import List

print("Libraries imported successfully!")

  from .autonotebook import tqdm as notebook_tqdm


  from .autonotebook import tqdm as notebook_tqdm


Libraries imported successfully!


In [5]:
# Load PDF and extract text
def load_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    text = ""
    
    for page in doc:
        page_text = page.get_text()
        # Clean up text
        page_text = re.sub(r'\s+', ' ', page_text)
        text += page_text + "\n\n"
    
    doc.close()
    return text.strip()

# Load the Constitution PDF
pdf_path = "/Users/pikachu/Downloads/llms-and-a-bit-more/The_Constitution_of_Kenya_2010.pdf"
constitution_text = load_pdf(pdf_path)

print(f"Document loaded: {len(constitution_text)} characters")
print(f"First 300 characters: {constitution_text[:300]}...")

Document loaded: 318999 characters
First 300 characters: LAWS OF KENYA THE CONSTITUTION OF KENYA, 2010 Published by the National Council for Law Reporting with the Authority of the Attorney-General www.kenyalaw.org 

Constitution of Kenya, 2010 THE CONSTITUTION OF KENYA, 2010 ARRANGEMENT OF ARTICLES PREAMBLE CHAPTER ONE—SOVEREIGNTY OF THE PEOPLE AND SUPRE...


In [6]:
# Split text into chunks
def split_text(text, chunk_size=1000, overlap=200):
    words = text.split()
    chunks = []
    
    for i in range(0, len(words), chunk_size - overlap):
        chunk = ' '.join(words[i:i + chunk_size])
        chunks.append(chunk)
        
        if i + chunk_size >= len(words):
            break
    
    return chunks

# Create chunks
chunks = split_text(constitution_text)
print(f"Created {len(chunks)} chunks")
print(f"Sample chunk: {chunks[0][:200]}...")

Created 64 chunks
Sample chunk: LAWS OF KENYA THE CONSTITUTION OF KENYA, 2010 Published by the National Council for Law Reporting with the Authority of the Attorney-General www.kenyalaw.org Constitution of Kenya, 2010 THE CONSTITUTI...


In [7]:
# Create embeddings
print("Loading embedding model...")
model = SentenceTransformer('all-MiniLM-L6-v2')

print("Generating embeddings...")
embeddings = model.encode(chunks, show_progress_bar=True)

print(f"Embeddings shape: {embeddings.shape}")
print("Embeddings created successfully!")

Loading embedding model...
Generating embeddings...
Generating embeddings...


Loading embedding model...
Generating embeddings...
Generating embeddings...


Batches: 100%|██████████| 2/2 [00:00<00:00,  3.78it/s]

Loading embedding model...
Generating embeddings...
Generating embeddings...


Batches: 100%|██████████| 2/2 [00:00<00:00,  3.78it/s]

Embeddings shape: (64, 384)
Embeddings created successfully!





In [8]:
# Create vector database
print("Setting up vector database...")
dimension = embeddings.shape[1]
index = faiss.IndexFlatIP(dimension)  # Inner product for similarity

# Normalize embeddings for cosine similarity
normalized_embeddings = embeddings / np.linalg.norm(embeddings, axis=1, keepdims=True)
index.add(normalized_embeddings.astype('float32'))

print(f"Vector database created with {index.ntotal} vectors")
print("Ready for similarity search!")

Setting up vector database...
Vector database created with 64 vectors
Ready for similarity search!


In [9]:
# Simple RAG query function
def query_constitution(question, k=3):
    """Query the Constitution and return relevant chunks"""
    
    # Create embedding for the question
    question_embedding = model.encode([question])
    
    # Normalize question embedding
    question_embedding = question_embedding / np.linalg.norm(question_embedding)
    
    # Search for similar chunks
    scores, indices = index.search(question_embedding.astype('float32'), k)
    
    # Collect results
    results = []
    for i, (score, idx) in enumerate(zip(scores[0], indices[0])):
        results.append({
            'chunk': chunks[idx],
            'score': float(score),
            'rank': i + 1
        })
    
    return results

print("RAG query function ready!")
print("Use: query_constitution on citizenship to search the Constitution")

RAG query function ready!
Use: query_constitution on citizenship to search the Constitution


In [10]:
# Test the RAG system with sample questions
sample_questions = [
    "What are the fundamental rights in Kenya?",
    "How is the President elected?",
    "What are the functions of county governments?",
    "What is the role of Parliament?"
]

# Function to display results nicely
def display_results(question, results):
    print(f"\n{'='*60}")
    print(f"QUESTION: {question}")
    print('='*60)
    
    for result in results:
        print(f"\nRank {result['rank']} (Score: {result['score']:.3f})")
        print(f"Text: {result['chunk'][:300]}...")
        print("-" * 40)

# Test with first question
question = sample_questions[0]
results = query_constitution(question)
display_results(question, results)


QUESTION: What are the fundamental rights in Kenya?

Rank 1 (Score: 0.719)
Text: our ethnic, cultural and religious diversity, and determined to live in peace and unity as one indivisible sovereign nation: RESPECTFUL of the environment, which is our heritage, and determined to sustain it for the benefit of future generations: COMMITTED to nurturing and protecting the well-being ...
----------------------------------------

Rank 2 (Score: 0.696)
Text: of Kenya in order to safeguard the well-being of the people of Kenya; (b) establishes a free and democratic system of Government that ensures good governance, constitutionalism, the rule of law, human rights and gender equity; (c) recognizes and demarcates divisions of responsibility among the vario...
----------------------------------------

Rank 3 (Score: 0.696)
Text: LAWS OF KENYA THE CONSTITUTION OF KENYA, 2010 Published by the National Council for Law Reporting with the Authority of the Attorney-General www.kenyalaw.org Constitution