**Setup and Installation**

In [None]:
!pip install langchain langchain-openai tiktoken openai

import os
import re
import json
from typing import List, Dict, Any, Optional

os.environ["OPENAI_API_KEY"] = "your-api-key"

import tiktoken
from langchain.prompts import PromptTemplate
from langchain_core.documents import Document
from langchain_openai import OpenAI, ChatOpenAI

**Basic Utility Functions**

In [2]:
def count_tokens(text: str, model: str = "gpt-3.5-turbo") -> int:
    """Count the number of tokens in a text string."""
    encoder = tiktoken.encoding_for_model(model)
    return len(encoder.encode(text))

def print_separator():
    """Print a visual separator."""
    print("\n" + "="*50 + "\n")

# Create sample documents for testing
sample_docs = [
    Document(page_content="The Eiffel Tower is 330 meters (1,083 ft) tall, about the same height as an 81-story building, and was the tallest man-made structure in the world from its completion in 1889 until 1930.",
             metadata={"source": "travel_guide", "page": 25}),
    Document(page_content="The Tower was built by Gustave Eiffel for the 1889 World's Fair. Initially criticized by some of France's leading artists and intellectuals for its design, it has since become a global cultural icon of France.",
             metadata={"source": "history_book", "page": 42}),
    Document(page_content="The Eiffel Tower is made of wrought iron and weighs approximately 10,100 tonnes. It has three levels for visitors, with restaurants on the first and second levels.",
             metadata={"source": "engineering_text", "page": 89}),
    Document(page_content="Tokyo Tower was completed in 1958 and is modeled after the Eiffel Tower, although it is painted white and orange to comply with air safety regulations. At 333 meters, it is slightly taller than the Eiffel Tower.",
             metadata={"source": "global_landmarks", "page": 118}),
]

# Create contradictory documents for testing
contradictory_docs = [
    Document(page_content="Studies suggest drinking coffee can increase the risk of heart disease due to its caffeine content raising blood pressure temporarily.",
             metadata={"source": "health_study_2005", "page": 45}),
    Document(page_content="Recent research from 2022 indicates that moderate coffee consumption (3-4 cups daily) may actually decrease heart disease risk by improving arterial function.",
             metadata={"source": "medical_journal_2022", "page": 112}),
    Document(page_content="A meta-analysis found no conclusive evidence linking coffee consumption to heart disease in healthy individuals.",
             metadata={"source": "nutrition_science", "page": 78}),
    Document(page_content="Excessive coffee intake (over 6 cups daily) has been associated with increased heart palpitations and potential strain on the cardiovascular system.",
             metadata={"source": "cardiology_review", "page": 23}),
]

**Section 1: Strategies for Reducing Hallucinations**

In [None]:
print("Section 1: Strategies for Reducing Hallucinations")

# Explicit Constraints and Boundaries
anti_hallucination_template = """
Answer the question based EXCLUSIVELY on the provided context.
If the context doesn't contain enough information to answer completely,
say "I don't have enough information" rather than guessing.

CONTEXT:
{context}

QUESTION:
{question}

ANSWER:
"""

# Knowledge Boundary Acknowledgment
boundary_template = """
Use ONLY the facts present in the context below to answer the question.
Your knowledge is limited to this context - do not introduce external information.

CONTEXT:
{context}

QUESTION:
{question}

Begin your response with a statement of what you know based on the context,
then provide your answer based strictly on that information.

ANSWER:
"""

# Source Attribution Requirements
attribution_template = """
Answer based solely on the provided documents.

DOCUMENTS:
{context}

QUESTION:
{question}

INSTRUCTIONS:
1. Every factual statement must cite its source document using [Doc X] notation
2. If information from multiple documents is used, cite each source
3. If the question cannot be answered from the documents, state this clearly

ANSWER:
"""

# Test hallucination reduction templates
context = "\n".join([f"[Document {i+1}] {doc.page_content}" for i, doc in enumerate(sample_docs)])
answerable_question = "What is the height of the Eiffel Tower?"
unanswerable_question = "When was the Eiffel Tower renovated last?"

def format_and_analyze_prompt(template, context, question):
    """Format a prompt and analyze its token usage."""
    formatted = template.format(context=context, question=question)
    tokens = count_tokens(formatted)
    print(f"Tokens: {tokens}")
    return formatted

print("\nExplicit Constraints Template with answerable question:")
formatted_anti_hallucination = format_and_analyze_prompt(
    anti_hallucination_template, context, answerable_question
)
print(formatted_anti_hallucination)

print("\nBoundary Template with unanswerable question:")
formatted_boundary = format_and_analyze_prompt(
    boundary_template, context, unanswerable_question
)
print(formatted_boundary)

print("\nAttribution Template with answerable question:")
formatted_attribution = format_and_analyze_prompt(
    attribution_template, context, answerable_question
)
print(formatted_attribution)

# Evaluate with LLM if API key is available
if os.environ.get("OPENAI_API_KEY"):
    try:
        llm = ChatOpenAI(temperature=0)

        print("\nTesting anti-hallucination techniques with LLM:")

        print("\nStandard template (control):")
        standard_template = """
        Answer the question based on the provided context.

        CONTEXT:
        {context}

        QUESTION:
        {question}
        """

        standard_response = llm.invoke(standard_template.format(
            context=context,
            question=unanswerable_question
        ))
        print(f"Response: {standard_response.content}")

        print("\nAnti-hallucination template:")
        anti_hall_response = llm.invoke(anti_hallucination_template.format(
            context=context,
            question=unanswerable_question
        ))
        print(f"Response: {anti_hall_response.content}")
    except Exception as e:
        print(f"Error testing with LLM: {e}")
else:
    print("\nOpenAI API key not set - skipping LLM evaluation")

print_separator()

**Section 2: Techniques to Improve Relevance and Coherence**

In [None]:
print("Section 2: Techniques to Improve Relevance and Coherence")

# Contextual Priming
priming_template = """
You are answering a question about {domain}.
When working with {domain} information, it's important to consider {key_considerations}.

CONTEXT:
{context}

QUESTION:
{question}

Using the context provided, give a well-structured answer addressing the question.

ANSWER:
"""

# Response Structuring
structured_response_template = """
Answer the question using the provided context.

CONTEXT:
{context}

QUESTION:
{question}

Please structure your answer as follows:
1. Direct answer to the question (1-2 sentences)
2. Supporting evidence from the context
3. Any important nuances or limitations to consider

ANSWER:
"""

# Query-Focused Reasoning
reasoning_template = """
Answer the question using the provided information.

CONTEXT:
{context}

QUESTION:
{question}

REASONING PROCESS:
1. First, identify what the question is specifically asking for
2. Then, locate the relevant information in the context
3. Consider whether the context fully answers the question
4. Formulate your answer based on this analysis

ANSWER:
"""

# Test relevance and coherence templates
print("\nContextual Priming Template:")
domain = "historical architecture"
key_considerations = "historical context, architectural significance, and cultural impact"
formatted_priming = priming_template.format(
    domain=domain,
    key_considerations=key_considerations,
    context=context,
    question="Why is the Eiffel Tower significant?"
)
print(formatted_priming)
print(f"Tokens: {count_tokens(formatted_priming)}")

print("\nStructured Response Template:")
formatted_structured = structured_response_template.format(
    context=context,
    question="How tall is the Eiffel Tower compared to Tokyo Tower?"
)
print(formatted_structured)
print(f"Tokens: {count_tokens(formatted_structured)}")

print("\nReasoning Template:")
formatted_reasoning = reasoning_template.format(
    context=context,
    question="What materials were used to build the Eiffel Tower and why are they significant?"
)
print(formatted_reasoning)
print(f"Tokens: {count_tokens(formatted_reasoning)}")

print_separator()

**Section 3: Methods for Handling Contradictory Information**

In [None]:
print("Section 3: Methods for Handling Contradictory Information")

# Contradiction Acknowledgment
contradiction_template = """
Answer based on the following information, which may contain contradictions.

CONTEXT:
{context}

QUESTION:
{question}

INSTRUCTIONS:
- If you notice contradictory information, explicitly identify the contradiction
- Present both perspectives with their respective sources
- If possible, explain potential reasons for the contradiction
- Indicate which answer has stronger support, if applicable

ANSWER:
"""

# Information Quality Assessment
quality_assessment_template = """
Analyze the following information and answer the question.

CONTEXT:
{context}

QUESTION:
{question}

When answering:
1. Assess the reliability of each piece of information (recency, source credibility)
2. Prioritize information from more authoritative sources
3. Consider the consistency across multiple sources
4. Indicate confidence level in your final answer

ANSWER:
"""

# Multi-Perspective Synthesis
multi_perspective_template = """
The following sources may present different perspectives on the question.

SOURCES:
{context}

QUESTION:
{question}

Present a balanced answer that:
- Synthesizes the different perspectives
- Acknowledges areas of consensus and disagreement
- Avoids favoring one viewpoint without justification
- Helps the reader understand the full picture

ANSWER:
"""

# Test contradiction handling templates
contradiction_context = "\n".join([f"[Document {i+1}] {doc.page_content}" for i, doc in enumerate(contradictory_docs)])
contradiction_question = "Does coffee increase the risk of heart disease?"

print("\nContradiction Template:")
formatted_contradiction = format_and_analyze_prompt(
    contradiction_template, contradiction_context, contradiction_question
)
print(formatted_contradiction)

print("\nQuality Assessment Template:")
formatted_quality = format_and_analyze_prompt(
    quality_assessment_template, contradiction_context, contradiction_question
)
print(formatted_quality)

print("\nMulti-Perspective Template:")
formatted_multi = format_and_analyze_prompt(
    multi_perspective_template, contradiction_context, contradiction_question
)
print(formatted_multi)

# Test with LLM if API key is available
if os.environ.get("OPENAI_API_KEY"):
    try:
        llm = ChatOpenAI(temperature=0)

        print("\nTesting contradiction handling with LLM:")
        contradiction_response = llm.invoke(contradiction_template.format(
            context=contradiction_context,
            question=contradiction_question
        ))
        print(f"Response: {contradiction_response.content}")
    except Exception as e:
        print(f"Error testing with LLM: {e}")
else:
    print("\nOpenAI API key not set - skipping LLM evaluation")

print_separator()

**Section 4: Approaches for Source Attribution and Citation**

In [None]:
print("Section 4: Approaches for Source Attribution and Citation")

# Inline Citation Format
inline_citation_template = """
Answer the question using the provided sources.

SOURCES:
{context}

QUESTION:
{question}

Cite your sources using the format [Source X] immediately after each piece
of information that comes from that source. Every factual statement should
have a citation.

ANSWER:
"""

# Evidence Grading
evidence_grading_template = """
Answer the question based on the provided information.

INFORMATION:
{context}

QUESTION:
{question}

For each key point in your answer:
1. Cite the relevant source(s) [Source X]
2. Indicate evidence strength (Strong, Moderate, Limited)
3. Note if critical information is missing

ANSWER:
"""

# Source Qualification
source_qualification_template = """
Answer using the following information sources.

SOURCES:
{context}

QUESTION:
{question}

When citing sources, include relevant qualifiers:
- Recency: Note publication date when relevant [Source X, 2022]
- Type: Identify source type [Source X, Research Paper]
- Agreement: Mention if multiple sources confirm the information [Sources X, Y]

ANSWER:
"""

# Test citation templates
print("\nInline Citation Template:")
formatted_inline = format_and_analyze_prompt(
    inline_citation_template, context, "What is the design and history of the Eiffel Tower?"
)
print(formatted_inline)

print("\nEvidence Grading Template:")
formatted_evidence = format_and_analyze_prompt(
    evidence_grading_template, contradiction_context, "What are the health effects of coffee consumption?"
)
print(formatted_evidence)

print("\nSource Qualification Template:")
formatted_qualification = format_and_analyze_prompt(
    source_qualification_template, contradiction_context, "How does coffee affect heart health?"
)
print(formatted_qualification)

print_separator()

**Section 5: Combining Techniques for Complex RAG Scenarios**

In [None]:
print("Section 5: Combining Techniques for Complex RAG Scenarios")

# Comprehensive RAG template combining multiple techniques
comprehensive_template = """
You are a knowledgeable assistant answering questions based solely on the provided information.

SOURCES:
{context}

QUESTION:
{question}

APPROACH:
1. First, identify if the sources contain sufficient information to answer the question
2. If you notice contradictions, explicitly acknowledge them
3. Consider the reliability and recency of each source
4. Organize your response in a clear, structured manner

RESPONSE REQUIREMENTS:
- Cite sources for every factual claim using [Source X] notation
- Indicate confidence level for claims (High, Medium, Low)
- Clearly state when information is missing or incomplete
- Present a balanced view when multiple perspectives exist
- Begin with a direct answer, followed by supporting details

ANSWER:
"""

# Test comprehensive template
print("\nComprehensive RAG Template:")
formatted_comprehensive = format_and_analyze_prompt(
    comprehensive_template, contradiction_context, "What is the relationship between coffee consumption and heart health?"
)
print(formatted_comprehensive)

# Test with LLM if API key is available
if os.environ.get("OPENAI_API_KEY"):
    try:
        llm = ChatOpenAI(temperature=0)

        print("\nTesting comprehensive template with LLM:")
        comprehensive_response = llm.invoke(comprehensive_template.format(
            context=contradiction_context,
            question="What is the relationship between coffee consumption and heart health?"
        ))
        print(f"Response: {comprehensive_response.content}")
    except Exception as e:
        print(f"Error testing with LLM: {e}")
else:
    print("\nOpenAI API key not set - skipping LLM evaluation")

print_separator()

**Section 6: Evaluating Prompt Engineering Techniques**

In [None]:
print("Section 6: Evaluating Prompt Engineering Techniques")

def evaluate_template(template_name, template, context, question, expected_elements=[]):
    """
    Evaluate a template against specific criteria.

    Args:
        template_name: Name of the template
        template: The template text
        context: Context to use in formatting
        question: Question to use in formatting
        expected_elements: List of elements that should be in a good response

    Returns:
        Dictionary with evaluation metrics
    """
    formatted = template.format(context=context, question=question)
    token_count = count_tokens(formatted)

    # Calculate clarity score (simple heuristic)
    clarity_score = 0
    if "CONTEXT:" in template or "SOURCES:" in template:
        clarity_score += 1
    if "QUESTION:" in template:
        clarity_score += 1
    if "INSTRUCTIONS:" in template or "APPROACH:" in template:
        clarity_score += 1
    if "ANSWER:" in template:
        clarity_score += 1

    # Calculate constraint score (simple heuristic)
    constraint_score = 0
    constraint_phrases = [
        "only", "exclusively", "solely", "strictly",
        "do not", "avoid", "must", "should", "limit"
    ]
    for phrase in constraint_phrases:
        if phrase in template.lower():
            constraint_score += 0.5
    constraint_score = min(constraint_score, 3)  # Cap at 3

    # Calculate guidance score (simple heuristic)
    guidance_score = 0
    if "steps" in template.lower() or "process" in template.lower():
        guidance_score += 1
    if "1." in template and "2." in template:
        guidance_score += 1
    if "structure" in template.lower():
        guidance_score += 1

    # Overall score
    overall_score = (clarity_score + constraint_score + guidance_score) / 3

    return {
        "name": template_name,
        "tokens": token_count,
        "clarity": clarity_score,
        "constraints": constraint_score,
        "guidance": guidance_score,
        "overall": overall_score
    }

# Templates to evaluate
templates_to_evaluate = {
    "Standard": """
    Answer the question based on the context.

    CONTEXT:
    {context}

    QUESTION:
    {question}

    ANSWER:
    """,

    "Anti-Hallucination": anti_hallucination_template,
    "Attribution": attribution_template,
    "Reasoning": reasoning_template,
    "Contradiction": contradiction_template,
    "Comprehensive": comprehensive_template
}

# Evaluate all templates
evaluation_results = []
for name, template in templates_to_evaluate.items():
    result = evaluate_template(name, template, contradiction_context, contradiction_question)
    evaluation_results.append(result)

# Print evaluation results
print("Template Evaluation Results:")
print(f"{'Template':<20} | {'Tokens':<7} | {'Clarity':<7} | {'Constraints':<11} | {'Guidance':<8} | {'Overall':<7}")
print("-" * 70)
for result in evaluation_results:
    print(f"{result['name']:<20} | {result['tokens']:<7} | {result['clarity']:<7.1f} | {result['constraints']:<11.1f} | {result['guidance']:<8.1f} | {result['overall']:<7.1f}")

print("\nNote: This is a simplified evaluation. For real-world use, templates should be evaluated")
print("based on actual LLM outputs and compared against ground truth answers.")

print_separator()

**Section 7: Real-World Examples and Case Studies**

In [None]:
print("Section 7: Real-World Examples and Case Studies")

# Financial analysis example
financial_docs = [
    Document(page_content="Apple Inc. reported Q1 2023 revenue of $97.3 billion, beating analyst expectations of $93.9 billion. The company's services segment grew by 17% year-over-year, reaching an all-time high.",
             metadata={"source": "earnings_report", "date": "2023-04-28"}),
    Document(page_content="Apple's gross margin for Q1 2023 was 43.3%, down slightly from 43.7% in the previous quarter but up from 42.5% year-over-year.",
             metadata={"source": "financial_analysis", "date": "2023-04-29"}),
    Document(page_content="The company announced a $90 billion share repurchase program, maintaining its position as the largest dividend payer in the world.",
             metadata={"source": "investor_call_transcript", "date": "2023-04-28"}),
    Document(page_content="Analysts at Morgan Stanley maintained their 'overweight' rating on Apple stock following the earnings report, with a price target of $180.",
             metadata={"source": "analyst_report", "date": "2023-05-01"}),
]

financial_template = """
You are a financial analyst providing insights based on the latest earnings information.

SOURCES:
{context}

QUESTION:
{question}

ANALYSIS FRAMEWORK:
1. Begin with the key metrics directly addressing the question
2. Provide context and comparisons (quarter-over-quarter, year-over-year)
3. Note any divergence from analyst expectations
4. Include relevant forward-looking statements

When presenting financial information:
- Cite the specific source for each data point [Source X]
- Note the date of each source for temporal context
- Distinguish between reported figures and projections/estimates
- Indicate confidence level when interpreting significance

ANALYSIS:
"""

financial_context = "\n".join([
    f"[Document {i+1}, {doc.metadata.get('source')}, {doc.metadata.get('date')}] {doc.page_content}"
    for i, doc in enumerate(financial_docs)
])
financial_question = "How did Apple perform in Q1 2023 and what are the key takeaways for investors?"

print("Financial Analysis Case Study:")
formatted_financial = format_and_analyze_prompt(
    financial_template, financial_context, financial_question
)
print(formatted_financial)

# Medical information example
medical_docs = [
    Document(page_content="A 2021 meta-analysis of 15 clinical trials found that statin therapy reduced major cardiovascular events by 24% for each 1 mmol/L reduction in LDL cholesterol.",
             metadata={"source": "journal_cardiology", "date": "2021-03-15", "type": "meta-analysis"}),
    Document(page_content="Common side effects of statins include muscle pain (reported in 5-10% of patients), liver enzyme elevations (0.5-3%), and slightly increased risk of type 2 diabetes.",
             metadata={"source": "medical_guidelines", "date": "2022-01-10", "type": "clinical guidelines"}),
    Document(page_content="For patients with LDL cholesterol above 190 mg/dL, statin therapy is recommended regardless of calculated 10-year ASCVD risk.",
             metadata={"source": "treatment_handbook", "date": "2022-07-22", "type": "clinical recommendation"}),
    Document(page_content="Some studies suggest that CoQ10 supplementation may reduce statin-related muscle pain, though evidence remains inconclusive and larger trials are needed.",
             metadata={"source": "research_review", "date": "2020-11-05", "type": "literature review"}),
]

medical_template = """
You are a medical information provider answering questions based strictly on the provided medical literature.

MEDICAL SOURCES:
{context}

PATIENT QUESTION:
{question}

RESPONSE GUIDELINES:
1. Present information accurately based ONLY on the provided sources
2. Cite each source with its type and date [Source X, Type, Date]
3. Clearly distinguish between established medical consensus and areas of ongoing research
4. Use precise medical terminology but explain it clearly
5. Include standard medical disclaimer about consulting healthcare providers

INFORMATION RESPONSE:
"""

medical_context = "\n".join([
    f"[Document {i+1}, {doc.metadata.get('source')}, {doc.metadata.get('type')}, {doc.metadata.get('date')}] {doc.page_content}"
    for i, doc in enumerate(medical_docs)
])
medical_question = "What are the benefits and risks of statin medications, and are there ways to reduce side effects?"

print("\nMedical Information Case Study:")
formatted_medical = format_and_analyze_prompt(
    medical_template, medical_context, medical_question
)
print(formatted_medical)

print_separator()

**Section 8: Practical Techniques for Common RAG Challenges**

In [None]:
print("Section 8: Practical Techniques for Common RAG Challenges")

# Handling long documents
long_document_template = """
Answer the question using the provided lengthy document extracts.

DOCUMENT EXTRACTS:
{context}

QUESTION:
{question}

APPROACH:
1. Scan the extracts to locate the most relevant sections
2. Focus on passages that directly address the question
3. Look for headings, topic sentences, and key terms related to the question
4. Synthesize information from multiple relevant sections if needed

When answering:
- Cite the specific parts of the document you're drawing from
- Maintain the original meaning without oversimplification
- If information appears to be missing or incomplete, acknowledge this

ANSWER:
"""

# Handling multiple languages
multilingual_template = """
Answer the question based on documents in multiple languages.

MULTILINGUAL SOURCES:
{context}

QUESTION:
{question}

INSTRUCTIONS:
- Identify information relevant to the question regardless of source language
- Synthesize information across language barriers
- Maintain the accuracy of facts and concepts during translation
- Note any cultural or linguistic nuances that affect interpretation
- Provide your answer in {response_language}

ANSWER:
"""

# Handling ambiguous queries
ambiguous_query_template = """
The following question may have multiple interpretations. Answer based on the provided context.

CONTEXT:
{context}

AMBIGUOUS QUESTION:
{question}

APPROACH:
1. Identify the possible interpretations of the question
2. For each plausible interpretation:
   a. Note the interpretation
   b. Provide the relevant answer based on the context
   c. Cite supporting information from the context
3. If the context doesn't address certain interpretations, acknowledge this

ANSWER:
"""

# Practical examples
print("Long Document Handling Template:")
long_doc_question = "What are the environmental impacts of renewable energy technologies?"
print(long_document_template.format(context="[Long document extracts would be here...]", question=long_doc_question))
print(f"Tokens (without actual content): {count_tokens(long_document_template.format(context='', question=long_doc_question))}")

print("\nMultilingual Template:")
multilingual_question = "What are the key differences between Western and Eastern approaches to sustainable development?"
print(multilingual_template.format(context="[Multilingual sources would be here...]", question=multilingual_question, response_language="English"))
print(f"Tokens (without actual content): {count_tokens(multilingual_template.format(context='', question=multilingual_question, response_language='English'))}")

print("\nAmbiguous Query Template:")
ambiguous_question = "Why did the company change its policy?"
print(ambiguous_query_template.format(context="[Context would be here...]", question=ambiguous_question))
print(f"Tokens (without actual content): {count_tokens(ambiguous_query_template.format(context='', question=ambiguous_question))}")

print_separator()

**Section 9: Putting It All Together - A Prompt Engineering Framework**

In [None]:
print("Section 9: Putting It All Together - A Prompt Engineering Framework")

class RAGPromptEngineer:
    """Framework for systematically designing and optimizing RAG prompts."""

    def __init__(self):
        self.templates = {
            "standard": """
                Answer the question based on the context.

                CONTEXT:
                {context}

                QUESTION:
                {question}

                ANSWER:
            """,
            "anti_hallucination": anti_hallucination_template,
            "attribution": attribution_template,
            "reasoning": reasoning_template,
            "contradiction": contradiction_template,
            "comprehensive": comprehensive_template
        }

    def analyze_query(self, query):
        """Analyze the query to determine appropriate template features."""
        features = {
            "needs_reasoning": False,
            "may_have_contradictions": False,
            "needs_attribution": False,
            "is_complex": False,
            "is_sensitive": False
        }

        # Simple heuristics for query analysis
        reasoning_terms = ["why", "how", "explain", "reason", "analyze"]
        if any(term in query.lower() for term in reasoning_terms):
            features["needs_reasoning"] = True
            features["is_complex"] = True

        comparison_terms = ["compare", "contrast", "difference", "versus", "vs"]
        if any(term in query.lower() for term in comparison_terms):
            features["may_have_contradictions"] = True
            features["needs_attribution"] = True

        sensitive_domains = ["medical", "health", "legal", "financial", "political"]
        if any(domain in query.lower() for domain in sensitive_domains):
            features["needs_attribution"] = True
            features["is_sensitive"] = True

        # Check if the query is complex
        if len(query.split()) > 15 or "and" in query.lower() or "or" in query.lower():
            features["is_complex"] = True

        return features

    def analyze_context(self, context):
        """Analyze the context to determine appropriate template features."""
        features = {
            "has_multiple_sources": False,
            "has_potential_contradictions": False,
            "has_technical_content": False,
            "has_numerical_data": False,
            "has_temporal_aspects": False
        }

        # Check for multiple sources
        if context.count("[Document") > 1:
            features["has_multiple_sources"] = True

        # Check for potential contradictions (very simplistic approach)
        if "however" in context.lower() or "contrary" in context.lower() or "whereas" in context.lower():
            features["has_potential_contradictions"] = True

        # Check for technical content
        technical_terms = ["algorithm", "methodology", "procedure", "technical", "mechanism"]
        if any(term in context.lower() for term in technical_terms):
            features["has_technical_content"] = True

        # Check for numerical data
        if re.search(r'\d+(?:\.\d+)?%|\$\d+|\d+\s(?:million|billion)', context):
            features["has_numerical_data"] = True

        # Check for temporal aspects
        time_terms = ["year", "month", "century", "period", "era", "decade"]
        if any(term in context.lower() for term in time_terms):
            features["has_temporal_aspects"] = True

        return features

    def select_template_components(self, query_features, context_features):
        """Select appropriate template components based on features."""
        components = {
            "anti_hallucination": False,
            "source_attribution": False,
            "reasoning_process": False,
            "contradiction_handling": False,
            "evidence_grading": False,
            "response_structuring": False
        }

        # Anti-hallucination for most cases
        components["anti_hallucination"] = True

        # Source attribution
        if query_features["needs_attribution"] or context_features["has_multiple_sources"]:
            components["source_attribution"] = True

        # Reasoning process
        if query_features["needs_reasoning"] or query_features["is_complex"]:
            components["reasoning_process"] = True

        # Contradiction handling
        if query_features["may_have_contradictions"] or context_features["has_potential_contradictions"]:
            components["contradiction_handling"] = True

        # Evidence grading
        if context_features["has_multiple_sources"] or query_features["is_sensitive"]:
            components["evidence_grading"] = True

        # Response structuring
        if query_features["is_complex"] or context_features["has_technical_content"] or context_features["has_numerical_data"]:
            components["response_structuring"] = True

        return components

    def build_custom_prompt(self, query, context):
        """Build a custom prompt template based on query and context analysis."""
        query_features = self.analyze_query(query)
        context_features = self.analyze_context(context)
        components = self.select_template_components(query_features, context_features)

        # Start with base template
        prompt = "You are an assistant answering questions based on provided information.\n\n"

        # Add context section
        prompt += "SOURCES:\n{context}\n\n"

        # Add question section
        prompt += "QUESTION:\n{question}\n\n"

        # Add instructions based on selected components
        prompt += "INSTRUCTIONS:\n"

        if components["anti_hallucination"]:
            prompt += "- Answer based EXCLUSIVELY on the provided sources. If the sources don't contain sufficient information, say so clearly.\n"

        if components["source_attribution"]:
            prompt += "- Cite sources for each piece of information using [Source X] notation.\n"

        if components["reasoning_process"]:
            prompt += "- Explain your reasoning step by step before providing the final answer.\n"

        if components["contradiction_handling"]:
            prompt += "- If you find contradictory information in the sources, acknowledge the contradictions and present multiple perspectives.\n"

        if components["evidence_grading"]:
            prompt += "- Indicate the strength of evidence (Strong, Moderate, Limited) for key claims.\n"

        if components["response_structuring"]:
            prompt += "- Structure your response with: (1) Direct answer, (2) Supporting evidence, (3) Nuance or limitations.\n"

        # Add answer section
        prompt += "\nANSWER:\n"

        return prompt

    def generate_prompt(self, query, context, template_type="auto"):
        """Generate a formatted prompt based on query, context, and template type."""
        if template_type == "auto":
            template = self.build_custom_prompt(query, context)
        else:
            template = self.templates.get(template_type, self.templates["standard"])

        formatted_prompt = template.format(context=context, question=query)
        tokens = count_tokens(formatted_prompt)

        return {
            "prompt": formatted_prompt,
            "tokens": tokens,
            "template_type": template_type if template_type != "auto" else "custom"
        }

# Test the RAG Prompt Engineer
prompt_engineer = RAGPromptEngineer()

# Financial query example
financial_query = "What were Apple's Q1 2023 financial results and how did they compare to analyst expectations?"
result_financial = prompt_engineer.generate_prompt(financial_query, financial_context)

print("Automatically Generated Prompt for Financial Query:")
print(result_financial["prompt"])
print(f"Tokens: {result_financial['tokens']}")
print(f"Template Type: {result_financial['template_type']}")

# Medical query example
medical_query = "What are the evidence-based benefits and risks of statins?"
result_medical = prompt_engineer.generate_prompt(medical_query, medical_context)

print("\nAutomatically Generated Prompt for Medical Query:")
print(result_medical["prompt"])
print(f"Tokens: {result_medical['tokens']}")
print(f"Template Type: {result_medical['template_type']}")

# Try with specific template
result_contradiction = prompt_engineer.generate_prompt(
    "Does coffee increase the risk of heart disease?",
    contradiction_context,
    template_type="contradiction"
)

print("\nUsing Specific Contradiction Template:")
print(result_contradiction["prompt"])
print(f"Tokens: {result_contradiction['tokens']}")
print(f"Template Type: {result_contradiction['template_type']}")

print_separator()



**Section 10: Testing with LLMs (If API Key Available)**

In [None]:
print("Section 10: Testing with LLMs (If API Key Available)")

if os.environ.get("OPENAI_API_KEY"):
    try:
        llm = ChatOpenAI(temperature=0)

        print("Testing various templates with OpenAI API:")

        test_question = "Does coffee increase the risk of heart disease?"

        # Test with standard template
        print("\nStandard Template Response:")
        standard_result = llm.invoke(
            templates_to_evaluate["Standard"].format(
                context=contradiction_context,
                question=test_question
            )
        )
        print(standard_result.content)

        # Test with anti-hallucination template
        print("\nAnti-Hallucination Template Response:")
        anti_hallucination_result = llm.invoke(
            templates_to_evaluate["Anti-Hallucination"].format(
                context=contradiction_context,
                question=test_question
            )
        )
        print(anti_hallucination_result.content)

        # Test with comprehensive template
        print("\nComprehensive Template Response:")
        comprehensive_result = llm.invoke(
            templates_to_evaluate["Comprehensive"].format(
                context=contradiction_context,
                question=test_question
            )
        )
        print(comprehensive_result.content)

        # Test with auto-generated template
        print("\nAuto-Generated Template Response:")
        auto_template = prompt_engineer.generate_prompt(
            test_question,
            contradiction_context
        )["prompt"]

        auto_result = llm.invoke(auto_template)
        print(auto_result.content)

    except Exception as e:
        print(f"Error testing with LLM: {e}")
else:
    print("OpenAI API key not set - skipping LLM testing")
    print("\nTo test with an LLM:")
    print("1. Get an API key from OpenAI")
    print("2. Set it as an environment variable: export OPENAI_API_KEY='your-api-key'")
    print("3. Or add it to this notebook: os.environ['OPENAI_API_KEY'] = 'your-api-key'")
    print("4. Re-run this section to see how different prompting techniques affect the responses")

print_separator()

print("Notebook completed!")

# Key Takeaways:
# 1. Effective prompt engineering can significantly reduce hallucinations in RAG systems
# 2. Different types of queries and contexts benefit from specialized prompt templates
# 3. Combining multiple techniques creates robust prompts for complex scenarios
# 4. Systematic analysis of queries and contexts can guide template selection
# 5. Prompt engineering should be evaluated based on output quality, not just intuition