## Prompt Augmentation and Generation

In [1]:
# First import the libraries and set up the environment
# Install required packages

from typing import List, Dict


print("Setup complete!")

Setup complete!


In [2]:
# Basic Rag Template
BASIC_RAG_TEMPLATE = """
You are a helpful assistant that answers questions based on provided context.
Use ONLY the information from the context below to answer the question.
If the answer cannot be found in the context, say "I don't have enough information to answer this question.


context:
{context}

Question: {question}

Answer:
"""


#Example usage
sample_context = """
[1] RAG stands for Retrieval-Augmented Generation. It's a technique that combines information retrieval with text generation.
[2] The main benefit of RAG is that it allows LLMs to access external knowledge without retraining.
"""
sample_question = "What does RAG stand for?"

prompt = BASIC_RAG_TEMPLATE.format(
    context=sample_context,
    question=sample_question
)

print("Generated Prompt:")
print("="*50)
print(prompt)


Generated Prompt:

You are a helpful assistant that answers questions based on provided context.
Use ONLY the information from the context below to answer the question.
If the answer cannot be found in the context, say "I don't have enough information to answer this question.


context:

[1] RAG stands for Retrieval-Augmented Generation. It's a technique that combines information retrieval with text generation.
[2] The main benefit of RAG is that it allows LLMs to access external knowledge without retraining.


Question: What does RAG stand for?

Answer:



In [4]:
# Context Injection Strategies
# Strategy 1: Simple Concatenation
# Simulate retrieved chunks  (normally from Module 4's retrieval syste,)
retrieved_chunks = [
    {
        "content": "Python is a high-level programming language known for its simple syntax and readability.",
        "source": "python_basics.pdf",
        "score": 0.89
    },
    {
         "content": "Python supports multiple programming paradigms including procedural, object-oriented, and functional programming.",
        "source": "python_guide.pdf",
        "score": 0.85       
    },
    {
        "content": "Python was created by Guido van Rossum and first released in 1991.",
        "source": "python_history.pdf",
        "score": 0.72
    }        
]

# Simple Concatenation
def format_context_simple(chunks: List[Dict])  -> str:
    """Simple concatenate all chunk content."""
    return "\n\n".join(chunk["content"] for chunk in chunks)


context_simple = format_context_simple(retrieved_chunks)
print("Simple Concatenation:")
print("="*50)
print(context_simple)

Simple Concatenation:
Python is a high-level programming language known for its simple syntax and readability.

Python supports multiple programming paradigms including procedural, object-oriented, and functional programming.

Python was created by Guido van Rossum and first released in 1991.


In [5]:
def format_context_numbered(chunks: List[Dict]) -> str:
    """Format chunks with numbers for citation."""
    formatted = []
    for i, chunk in enumerate(chunks, 1):
        formatted.append(f"[{i}] {chunk['content']}")
    return "\n\n".join(formatted)

context_numbered = format_context_numbered(retrieved_chunks)
print("Numbered Context:")
print("="*50)
print(context_numbered)

Numbered Context:
[1] Python is a high-level programming language known for its simple syntax and readability.

[2] Python supports multiple programming paradigms including procedural, object-oriented, and functional programming.

[3] Python was created by Guido van Rossum and first released in 1991.


In [9]:
def format_context_with_metadata(chunks: List[Dict]) -> str:
    """Format chunks with source metadata for better attribution."""
    formatted = []
    for i, chunk in enumerate(chunks, 1):
        formatted.append(
            f"[{i}] (Source: {chunk['source']}, Relevance: {chunk['score']:.2f})\n"
            f"{chunk['content']}"
        )
    return "\n\n".join(formatted)

context_with_metadata = format_context_with_metadata(retrieved_chunks)
print("Context with Metadata:")
print("="*50)
print(context_with_metadata)

Context with Metadata:
[1] (Source: python_basics.pdf, Relevance: 0.89)
Python is a high-level programming language known for its simple syntax and readability.

[2] (Source: python_guide.pdf, Relevance: 0.85)
Python supports multiple programming paradigms including procedural, object-oriented, and functional programming.

[3] (Source: python_history.pdf, Relevance: 0.72)
Python was created by Guido van Rossum and first released in 1991.


In [10]:
print("Strategy Comparison:")
print("="*60)
print("\n1. Simple Concatenation")
print("   ‚úÖ Pros: Clean, minimal tokens")
print("   ‚ùå Cons: No citations possible, hard to track sources")
print("\n2. Numbered Context")
print("   ‚úÖ Pros: Enables citations, clear separation")
print("   ‚ùå Cons: Slightly more tokens")
print("   üëç Recommended: Good balance")
print("\n3. Context with Metadata")
print("   ‚úÖ Pros: Full traceability, source attribution")
print("   ‚ùå Cons: Most tokens, may confuse some LLMs")
print("   üëç Best for: Production systems requiring full audit trail")

Strategy Comparison:

1. Simple Concatenation
   ‚úÖ Pros: Clean, minimal tokens
   ‚ùå Cons: No citations possible, hard to track sources

2. Numbered Context
   ‚úÖ Pros: Enables citations, clear separation
   ‚ùå Cons: Slightly more tokens
   üëç Recommended: Good balance

3. Context with Metadata
   ‚úÖ Pros: Full traceability, source attribution
   ‚ùå Cons: Most tokens, may confuse some LLMs
   üëç Best for: Production systems requiring full audit trail


In [11]:
QA_WITH_CITATIONS_TEMPLATE = """
You are a helpful AI assistant. Answer the user's question based on the context provided below.

Important guidelines:
- Use ONLY information from the context
- Cite your sources using [1], [2], etc.
- If multiple sources support a claim, cite all: [1][2]
- If the answer is not in the context, say "I don't have enough information to answer this question."
- Be concise and accurate

Context:
{context}

Question: {question}

Answer (remember to cite sources):
"""

print(QA_WITH_CITATIONS_TEMPLATE)


You are a helpful AI assistant. Answer the user's question based on the context provided below.

Important guidelines:
- Use ONLY information from the context
- Cite your sources using [1], [2], etc.
- If multiple sources support a claim, cite all: [1][2]
- If the answer is not in the context, say "I don't have enough information to answer this question."
- Be concise and accurate

Context:
{context}

Question: {question}

Answer (remember to cite sources):



In [14]:
COMPARISON_TEMPLATE = """
You are a helpful AI assistant. Compare and contrast the information provided in the context.

Guidelines:
- Identify similarities and differences
- Organize your comparison clearly
- Cite sources using [1], [2], etc.
- If information conflicts, present both sides
- Use ONLY information from the context

Context:
{context}

Question: {question}

Comparative Analysis:
"""

print(f'\n{COMPARISON_TEMPLATE}')



You are a helpful AI assistant. Compare and contrast the information provided in the context.

Guidelines:
- Identify similarities and differences
- Organize your comparison clearly
- Cite sources using [1], [2], etc.
- If information conflicts, present both sides
- Use ONLY information from the context

Context:
{context}

Question: {question}

Comparative Analysis:

