# Implementation: Tiny RAG

**Goal**: Build RAG from scratch using Python lists.

In [None]:
def retrieve(query, database, top_k=1):
    # 1. Simple Keyword Matching (Naive Retriever)
    scores = []
    query_words = set(query.lower().split())
    
    for doc in database:
        doc_words = set(doc.lower().split())
        # Jaccard Similarity
        intersection = query_words.intersection(doc_words)
        score = len(intersection) / len(query_words.union(doc_words))
        scores.append((score, doc))
    
    # Sort desc
    scores.sort(key=lambda x: x[0], reverse=True)
    return [doc for score, doc in scores[:top_k]]

def generate(query, context):
    # Mock LLM
    return f"Based on '{context}', the answer is related to the query '{query}'."

# 1. Database
db = [
    "The CEO of Acme Corp is Jane Doe.",
    "The company revenue was $1M last year.",
    "Apples are red."
]

# 2. Query
user_q = "Who is CEO of Acme?"

# 3. RAG Pipeline
retrieved_docs = retrieve(user_q, db)
context_str = "\n".join(retrieved_docs)
answer = generate(user_q, context_str)

print(f"Retrieval: {retrieved_docs}")
print(f"LLM Answer: {answer}")

## Conclusion
Even simple keyword search can ground an LLM.