# Adaptive Retrieval 
一个自适应检索系统，该系统根据查询类型动态选择最合适的检索策略。这种方法大大增强了RAG系统在各种问题上提供准确和相关回答的能力。
不同的问题需要不同的检索策略。这个系统：
- 分类查询类型（事实、分析、意见或上下文）
- 选择适当的检索策略
- 执行专门的检索技术
- 生成定制响应


导入相关的库

In [None]:
import pymupdf
import os
import numpy as np
import json
import openai
from tqdm import tqdm
import re
from datetime import datetime

从pdf提取文本

In [None]:
def extract_text_from_pdf(pdf_path):
    """
    提取PDF文件中的文本并打印前`num_chars`个字符。

    参数：
    pdf_path (str): PDF文件的路径。

    返回：
    str: 从PDF中提取的文本。

    """
    # 打开PDF文件
    mypdf = pymupdf.open(pdf_path)
    all_text = ""  # 初始化一个空字符串来存储提取的文本

    # 迭代PDF中的每个页面
    for page_num in range(mypdf.page_count):
        page = mypdf[page_num]  # 获取页面
        text = page.get_text("text")  # 从页面中提取文本
        all_text += text  # 将提取的文本附加到all_text字符串

    return all_text  # 返回提取的文本

pdf_path = "data/AI_Information.pdf"


extracted_text = extract_text_from_pdf(pdf_path)

print(extracted_text[:500])

切块

In [None]:
def chunk_text(text, n, overlap):
    """
    将文本分割为多个块，每个块的大小为n，重叠部分为overlap。
    参数：
    text: 输入的文本
    n: 每个块的大小
    overlap: 相邻块之间的重叠部分大小

    返回：
    文本块列表
    """
    chunks = []  
    for i in range(0, len(text), n - overlap):
        
        chunks.append(text[i:i + n])
    
    return chunks  

配置client

In [None]:
client = openai.OpenAI(
    api_key=os.getenv("DASHSCOPE_API_KEY"),  # 如果你没有配置环境变量，请在此处用你的API Key进行替换
    base_url="https://dashscope.aliyuncs.com/compatible-mode/v1"  # 百炼服务的base_url
)

简易向量库

In [None]:
class SimpleVectorStore:
    """
    简易的向量存储库。
    """
    def __init__(self):
        
        self.vectors = []
        self.texts = []
        self.metadata = []
    
    def add_item(self, text, embedding, metadata=None):
        """
        添加一个新的项到存储库。

        参数:
        text (str): 文本内容。
        embedding (List[float]): 文本的嵌入向量。
        metadata (Dict, optional): 与文本相关的元数据。
        """
        self.vectors.append(np.array(embedding))
        self.texts.append(text)
        self.metadata.append(metadata or {})
    
    def similarity_search(self, query_embedding, k=5):
        """
        查找与查询嵌入向量最相似的文本。

        参数:
        query_embedding (List[float]): 查询的嵌入向量。
        k (int, optional): 返回最相似的k个结果。

        返回:
        List[Dict]: 最相似的文本及其相关信息。
        """
        if not self.vectors:
            return []
        

        query_vector = np.array(query_embedding)
        

        similarities = []
        for i, vector in enumerate(self.vectors):
            similarity = np.dot(query_vector, vector) / (np.linalg.norm(query_vector) * np.linalg.norm(vector))
            similarities.append((i, similarity))
        

        similarities.sort(key=lambda x: x[1], reverse=True)
        

        results = []
        for i in range(min(k, len(similarities))):
            idx, score = similarities[i]
            results.append({
                "text": self.texts[idx],
                "metadata": self.metadata[idx],
                "similarity": score
            })
        
        return results

向量化

In [None]:
def create_embeddings_in_batches(text_chunks, model="text-embedding-v3", batch_size_limit=10): # 我改成了官方模型名，你可以换回 "text-embedding-v3"
    """
    调用 OpenAI 的 Embedding API 来创建文本列表的嵌入向量，处理批处理大小限制。

    参数:
    text_chunks (List[str]): 需要创建嵌入的文本字符串列表。
    model (str): 使用的嵌入模型。
    batch_size_limit (int): API 允许的最大批处理大小。根据错误信息，这里是10。

    返回:
    List[List[float]]: 所有文本的嵌入向量列表。
    """
    all_embeddings = []
    if not text_chunks:
        return []

    if not isinstance(text_chunks, list): # 确保输入是列表
        text_chunks = [text_chunks]

    for i in range(0, len(text_chunks), batch_size_limit):
        batch = text_chunks[i:i + batch_size_limit]
        try:
            #print(f"Processing batch {i//batch_size_limit + 1}, size: {len(batch)}")
            response = client.embeddings.create(
                input=batch,
                model=model,
                encoding_format="float"
            )
            # 从响应中提取该批次的嵌入向量
            batch_embeddings = [item.embedding for item in response.data]
            all_embeddings.extend(batch_embeddings)


        except Exception as e:
            print(f"Error processing batch starting with chunk: '{batch[0][:50]}...'")
            print(f"API Error: {e}")

            raise e 

    return all_embeddings

def create_embeddings(text, model="text-embedding-v3"):
    """
    字符串向量化
    参数:
    text (str): 需要创建嵌入的文本字符串。
    model (str): 使用的嵌入模型。

    返回:
    List[float]: 文本的嵌入向量。
    """
    response = client.embeddings.create(
        model=model,
        input=text
    )

    return response.data[0].embedding

文本处理流程

In [None]:
def process_document(pdf_path, chunk_size=1000, chunk_overlap=200):
    """
    处理带有反馈循环的RAG（检索增强生成）文档。
    此函数处理完整的文档处理管道：
    1、从PDF中提取文本
    2、重叠文本分块
    3、嵌入区块创建
    4、矢量数据库元数据存储
    """
    
    print("Extracting text from PDF...")
    extracted_text = extract_text_from_pdf(pdf_path)
    
    print("Chunking text...")
    chunks = chunk_text(extracted_text, chunk_size, chunk_overlap)
    print(f"Created {len(chunks)} text chunks")
    

    print("Creating embeddings for chunks...")
    chunk_embeddings = create_embeddings_in_batches(chunks)
    
    store = SimpleVectorStore()

    for i, (chunk, embedding) in enumerate(zip(chunks, chunk_embeddings)):
        store.add_item(
            text=chunk,
            embedding=embedding,
            metadata={
                "index": i,                
                "source": pdf_path,     
                "relevance_score": 1.0,   
                "feedback_count": 0        
            }
        )
    
    print(f"Added {len(chunks)} chunks to the vector store")
    return chunks, store

请求分类

In [None]:
def classify_query(query, model="qwen-turbo"):
    """
    将query分类为Factual、Analytical、Opinion或Contextual。
    """
    system_prompt = """你是一位问题分类专家。
    请将给定的问题精确地归入以下类别之一：
    - 事实型 (Factual)：寻求具体、可验证信息的问题。
    - 分析型 (Analytical)：需要全面分析或解释的问题。
    - 观点型 (Opinion)：关于主观事务或寻求不同观点的问题。
    - 情景型 (Contextual)：依赖于用户特定情景的问题。

    请只返回类别名称，不要包含任何解释或其他文字。
    """


    user_prompt = f"Classify this query: {query}"
    

    response = client.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ],
        temperature=0
    )
    

    category = response.choices[0].message.content.strip()
    

    valid_categories = ["Factual", "Analytical", "Opinion", "Contextual"]
    

    for valid in valid_categories:
        if valid in category:
            return valid
    

    return "Factual"

## 专门的检索策略

Factual策略 - 注重精准

In [None]:
def factual_retrieval_strategy(query, vector_store, k=4):

    print(f"Executing Factual retrieval strategy for: '{query}'")
    
    system_prompt = """你是优化搜索查询的专家。
        你的任务是重新构造给定的事实性查询，使其在信息检索时更精确、更具体。
        请重点关注关键实体及其之间的关系。
        仅提供优化后的查询，不作任何解释。
    """

    user_prompt = f"优化下面的请求: {query}"
    
    response = client.chat.completions.create(
        model="qwen-turbo",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ],
        temperature=0
    )
    
    enhanced_query = response.choices[0].message.content.strip()
    print(f"Enhanced query: {enhanced_query}")
    
    query_embedding = create_embeddings(enhanced_query)

    initial_results = vector_store.similarity_search(query_embedding, k=k*2)

    ranked_results = []
    
    for doc in initial_results:
        relevance_score = score_document_relevance(enhanced_query, doc["text"])
        ranked_results.append({
            "text": doc["text"],
            "metadata": doc["metadata"],
            "similarity": doc["similarity"],
            "relevance_score": relevance_score
        })
    
    ranked_results.sort(key=lambda x: x["relevance_score"], reverse=True)

    return ranked_results[:k]

Analytical - 全面覆盖

In [None]:
def analytical_retrieval_strategy(query, vector_store, k=4):
    
    print(f"Executing Analytical retrieval strategy for: '{query}'")
    
    system_prompt = """
    你是分解复杂问题的专家。
    生成子问题，以探究主要分析性查询的不同方面。
    这些子问题应涵盖主题的广度，并有助于检索全面的信息。
    返回一个包含恰好 3 个子问题的列表，每行一个。
    """

    user_prompt = f"为下面分析型query生成子查询: {query}"
    
    response = client.chat.completions.create(
        model="qwen-turbo",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ],
        temperature=0.3
    )
    
    sub_queries = response.choices[0].message.content.strip().split('\n')
    sub_queries = [q.strip() for q in sub_queries if q.strip()]
    print(f"Generated sub-queries: {sub_queries}")
    
    all_results = []
    for sub_query in sub_queries:

        sub_query_embedding = create_embeddings(sub_query)

        results = vector_store.similarity_search(sub_query_embedding, k=2)
        all_results.extend(results)
    
    unique_texts = set()
    diverse_results = []
    
    for result in all_results:
        if result["text"] not in unique_texts:
            unique_texts.add(result["text"])
            diverse_results.append(result)

    if len(diverse_results) < k:

        main_query_embedding = create_embeddings(query)
        main_results = vector_store.similarity_search(main_query_embedding, k=k)
        
        for result in main_results:
            if result["text"] not in unique_texts and len(diverse_results) < k:
                unique_texts.add(result["text"])
                diverse_results.append(result)

    return diverse_results[:k]

Opinion - 多元化视角

In [None]:
def opinion_retrieval_strategy(query, vector_store, k=4):

    print(f"Executing Opinion retrieval strategy for: '{query}'")
    
    system_prompt = """你是识别某个主题不同观点的专家。
        对于给定的关于意见或观点的查询，请识别人们可能对该主题持有的不同观点。
        返回一个包含恰好 3 个不同观点角度的列表，每行一个。
    """

    user_prompt = f"提出关于下面查询的不同角度观点: {query}"
    

    response = client.chat.completions.create(
        model="meta-llama/Llama-3.2-3B-Instruct",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ],
        temperature=0.3
    )
    

    viewpoints = response.choices[0].message.content.strip().split('\n')
    viewpoints = [v.strip() for v in viewpoints if v.strip()]
    print(f"Identified viewpoints: {viewpoints}")
    
    all_results = []
    for viewpoint in viewpoints:

        combined_query = f"{query} {viewpoint}"

        viewpoint_embedding = create_embeddings(combined_query)

        results = vector_store.similarity_search(viewpoint_embedding, k=2)
        
        for result in results:
            result["viewpoint"] = viewpoint
        
        all_results.extend(results)
    
    selected_results = []
    for viewpoint in viewpoints:

        viewpoint_docs = [r for r in all_results if r.get("viewpoint") == viewpoint]
        if viewpoint_docs:
            selected_results.append(viewpoint_docs[0])
    
    remaining_slots = k - len(selected_results)
    if remaining_slots > 0:
        remaining_docs = [r for r in all_results if r not in selected_results]
        remaining_docs.sort(key=lambda x: x["similarity"], reverse=True)
        selected_results.extend(remaining_docs[:remaining_slots])

    return selected_results[:k]

Contextual - 用户上下文集成

In [None]:
def contextual_retrieval_strategy(query, vector_store, k=4, user_context=None):
    
    print(f"Executing Contextual retrieval strategy for: '{query}'")

    if not user_context:
        system_prompt = """You are an expert at understanding implied context in questions.
For the given query, infer what contextual information might be relevant or implied 
but not explicitly stated. Focus on what background would help answering this query.

Return a brief description of the implied context."""

        user_prompt = f"Infer the implied context in this query: {query}"
        
        response = client.chat.completions.create(
            model="meta-llama/Llama-3.2-3B-Instruct",
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt}
            ],
            temperature=0.1
        )
        

        user_context = response.choices[0].message.content.strip()
        print(f"Inferred context: {user_context}")
    

    system_prompt = """
    你是一位理解问题中隐含背景信息的专家。
    对于给定的查询，请推断哪些背景信息可能是相关的或隐含的，但没有明确说明。
    重点关注哪些背景信息有助于回答此查询。
    返回隐含背景信息的简要描述。"""

    user_prompt = f"""
    Query: {query}
    Context: {user_context}

    重新格式化查询以合并此上下文:"""
    

    response = client.chat.completions.create(
        model="qwen-turbo",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ],
        temperature=0
    )

    contextualized_query = response.choices[0].message.content.strip()
    print(f"Contextualized query: {contextualized_query}")

    query_embedding = create_embeddings(contextualized_query)
    initial_results = vector_store.similarity_search(query_embedding, k=k*2)
    

    ranked_results = []
    
    for doc in initial_results:
 
        context_relevance = score_document_context_relevance(query, user_context, doc["text"])
        ranked_results.append({
            "text": doc["text"],
            "metadata": doc["metadata"],
            "similarity": doc["similarity"],
            "context_relevance": context_relevance
        })
    
    ranked_results.sort(key=lambda x: x["context_relevance"], reverse=True)
    return ranked_results[:k]

In [None]:
def score_document_context_relevance(query, context, document, model="meta-llama/Llama-3.2-3B-Instruct"):
    """
    给文本打分，打分越高，文本越相关
    """
    # System prompt to instruct the model on how to rate relevance considering context
    system_prompt = """Y
    你是一位在考虑上下文的情况下评估文档相关性的专家。
    根据文档在多大程度上解决了查询（当考虑到所提供的上下文时），对其进行评分，评分范围为 0 到 10，其中：
    0 = 完全不相关
    10 = 在给定上下文中完美地解决了查询
    仅返回一个 0 到 10 之间的数字分数，不要返回其他任何内容。
    """


    doc_preview = document[:1500] + "..." if len(document) > 1500 else document
    
    user_prompt = f"""
    Query: {query}
    Context: {context}

    Document: {doc_preview}

    Relevance score considering context (0-10):
    """
    

    response = client.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ],
        temperature=0
    )
    
  
    score_text = response.choices[0].message.content.strip()
    
 
    match = re.search(r'(\d+(\.\d+)?)', score_text)
    if match:
        score = float(match.group(1))
        return min(10, max(0, score))  
    else:

        return 5.0

选择策略

In [None]:
def adaptive_retrieval(query, vector_store, k=4, user_context=None):
    """
    选择不同策略执行daptive retrieval
    """

    query_type = classify_query(query)
    print(f"Query classified as: {query_type}")

    if query_type == "Factual":

        results = factual_retrieval_strategy(query, vector_store, k)
    elif query_type == "Analytical":

        results = analytical_retrieval_strategy(query, vector_store, k)
    elif query_type == "Opinion":

        results = opinion_retrieval_strategy(query, vector_store, k)
    elif query_type == "Contextual":

        results = contextual_retrieval_strategy(query, vector_store, k, user_context)
    else:

        results = factual_retrieval_strategy(query, vector_store, k)
    
    return results  

响应生成

In [None]:
def generate_response(query, results, query_type, model="meta-llama/Llama-3.2-3B-Instruct"):
    """
    基于请求生成响应
    """
    context = "\n\n---\n\n".join([r["text"] for r in results])
    
    if query_type == "Factual":
        system_prompt = """你是一位提供事实信息的乐于助人的助手。
        根据提供的上下文回答问题。注重准确性和精确性。
        如果上下文中不包含所需信息，请承认其局限性。"""
        
    elif query_type == "Analytical":
        system_prompt = """你是一位提供分析见解的乐于助人的助手。
        根据提供的上下文，对该主题进行全面分析。
        在你的解释中涵盖不同的方面和观点。
        如果上下文存在不足，请在提供尽可能最佳分析的同时承认这些不足。"""
        
    elif query_type == "Opinion":
        system_prompt = """你是一位乐于助人的助手，能够讨论包含多种观点的主题。
        根据提供的上下文，针对该主题提出不同的观点。
        确保公正地呈现各种意见，不带偏见。
        承认上下文中观点有限的地方。"""
        
    elif query_type == "Contextual":
        system_prompt = """你是一位乐于助人的助手，提供与上下文相关的信息。
        同时考虑查询及其上下文来回答问题。
        将查询上下文与所提供文档中的信息联系起来。
        如果上下文未能完全解决具体情况，请承认其局限性。"""
        
    else:
        system_prompt = """你是一位乐于助人的助手。请根据提供的上下文回答问题。如果你无法从上下文中找到答案，请承认其局限性。"""
    
    user_prompt = f"""
    Context:
    {context}

    Question: {query}

    请根据上下文提供有用的答复。
    """
    
    response = client.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ],
        temperature=0.2
    )
    

    return response.choices[0].message.content

实现Adaptive Retrieval 的 RAG流程

In [None]:
def rag_with_adaptive_retrieval(pdf_path, query, k=4, user_context=None):
    
    print("\n=== RAG WITH ADAPTIVE RETRIEVAL ===")
    print(f"Query: {query}")
    
    chunks, vector_store = process_document(pdf_path)
    
    query_type = classify_query(query)
    print(f"Query classified as: {query_type}")
    

    retrieved_docs = adaptive_retrieval(query, vector_store, k, user_context)
    

    response = generate_response(query, retrieved_docs, query_type)

    result = {
        "query": query,
        "query_type": query_type,
        "retrieved_documents": retrieved_docs,
        "response": response
    }
    
    print("\n=== RESPONSE ===")
    print(response)
    
    return result

评估框架

In [None]:
def evaluate_adaptive_vs_standard(pdf_path, test_queries, reference_answers=None):
    """
    Compare adaptive retrieval with standard retrieval on a set of test queries.
    
    This function processes a document, runs both standard and adaptive retrieval methods
    on each test query, and compares their performance. If reference answers are provided,
    it also evaluates the quality of responses against these references.
    
    Args:
        pdf_path (str): Path to PDF document to be processed as the knowledge source
        test_queries (List[str]): List of test queries to evaluate both retrieval methods
        reference_answers (List[str], optional): Reference answers for evaluation metrics
        
    Returns:
        Dict: Evaluation results containing individual query results and overall comparison
    """
    print("=== EVALUATING ADAPTIVE VS. STANDARD RETRIEVAL ===")
    
    chunks, vector_store = process_document(pdf_path)
    
    results = []
    
    for i, query in enumerate(test_queries):
        print(f"\n\nQuery {i+1}: {query}")
        
        # --- Standard retrieval approach ---
        print("\n--- Standard Retrieval ---")
        # Create embedding for the query
        query_embedding = create_embeddings(query)
        # Retrieve documents using simple vector similarity
        standard_docs = vector_store.similarity_search(query_embedding, k=4)
        # Generate response using a generic approach
        standard_response = generate_response(query, standard_docs, "General")
        
        # --- Adaptive retrieval approach ---
        print("\n--- Adaptive Retrieval ---")
        # Classify the query to determine its type (Factual, Analytical, Opinion, Contextual)
        query_type = classify_query(query)
        # Retrieve documents using the strategy appropriate for this query type
        adaptive_docs = adaptive_retrieval(query, vector_store, k=4)
        # Generate a response tailored to the query type
        adaptive_response = generate_response(query, adaptive_docs, query_type)
        
        # Store complete results for this query
        result = {
            "query": query,
            "query_type": query_type,
            "standard_retrieval": {
                "documents": standard_docs,
                "response": standard_response
            },
            "adaptive_retrieval": {
                "documents": adaptive_docs,
                "response": adaptive_response
            }
        }
        
        # Add reference answer if available for this query
        if reference_answers and i < len(reference_answers):
            result["reference_answer"] = reference_answers[i]
            
        results.append(result)
        
        # Display preview of both responses for quick comparison
        print("\n--- Responses ---")
        print(f"Standard: {standard_response[:200]}...")
        print(f"Adaptive: {adaptive_response[:200]}...")
    
    # Calculate comparative metrics if reference answers are available
    if reference_answers:
        comparison = compare_responses(results)
        print("\n=== EVALUATION RESULTS ===")
        print(comparison)
    
    # Return the complete evaluation results
    return {
        "results": results,
        "comparison": comparison if reference_answers else "No reference answers provided for evaluation"
    }

比较结果

In [None]:
def compare_responses(results):
    """
    Compare standard and adaptive responses against reference answers.
    
    Args:
        results (List[Dict]): Results containing both types of responses
        
    Returns:
        str: Comparison analysis
    """
    # Define the system prompt to guide the AI in comparing responses
    comparison_prompt = """You are an expert evaluator of information retrieval systems.
    Compare the standard retrieval and adaptive retrieval responses for each query.
    Consider factors like accuracy, relevance, comprehensiveness, and alignment with the reference answer.
    Provide a detailed analysis of the strengths and weaknesses of each approach."""
    
    # Initialize the comparison text with a header
    comparison_text = "# Evaluation of Standard vs. Adaptive Retrieval\n\n"
    
    # Iterate through each result to compare responses
    for i, result in enumerate(results):
        # Skip if there is no reference answer for the query
        if "reference_answer" not in result:
            continue
            
        # Add query details to the comparison text
        comparison_text += f"## Query {i+1}: {result['query']}\n"
        comparison_text += f"*Query Type: {result['query_type']}*\n\n"
        comparison_text += f"**Reference Answer:**\n{result['reference_answer']}\n\n"
        
        # Add standard retrieval response to the comparison text
        comparison_text += f"**Standard Retrieval Response:**\n{result['standard_retrieval']['response']}\n\n"
        
        # Add adaptive retrieval response to the comparison text
        comparison_text += f"**Adaptive Retrieval Response:**\n{result['adaptive_retrieval']['response']}\n\n"
        
        # Create the user prompt for the AI to compare the responses
        user_prompt = f"""
        Reference Answer: {result['reference_answer']}
        
        Standard Retrieval Response: {result['standard_retrieval']['response']}
        
        Adaptive Retrieval Response: {result['adaptive_retrieval']['response']}
        
        Provide a detailed comparison of the two responses.
        """
        
        # Generate the comparison analysis using the OpenAI client
        response = client.chat.completions.create(
            model="meta-llama/Llama-3.2-3B-Instruct",
            messages=[
                {"role": "system", "content": comparison_prompt},
                {"role": "user", "content": user_prompt}
            ],
            temperature=0.2
        )
        
        # Add the AI's comparison analysis to the comparison text
        comparison_text += f"**Comparison Analysis:**\n{response.choices[0].message.content}\n\n"
    
    return comparison_text  # Return the complete comparison analysis

In [None]:
pdf_path = "data/AI_Information.pdf"
test_queries = [
    "What is Explainable AI (XAI)?",                                              # Factual query - seeking definition/specific information
    # "How do AI ethics and governance frameworks address potential societal impacts?",  # Analytical query - requiring comprehensive analysis
    # "Is AI development moving too fast for proper regulation?",                   # Opinion query - seeking diverse perspectives
    # "How might explainable AI help in healthcare decisions?",                     # Contextual query - benefits from context-awareness
]

reference_answers = [
    "Explainable AI (XAI) aims to make AI systems transparent and understandable by providing clear explanations of how decisions are made. This helps users trust and effectively manage AI technologies.",
    # "AI ethics and governance frameworks address potential societal impacts by establishing guidelines and principles to ensure AI systems are developed and used responsibly. These frameworks focus on fairness, accountability, transparency, and the protection of human rights to mitigate risks and promote beneficial output.5.",
    # "Opinions on whether AI development is moving too fast for proper regulation vary. Some argue that rapid advancements outpace regulatory efforts, leading to potential risks and ethical concerns. Others believe that innovation should continue at its current pace, with regulations evolving alongside to address emerging challenges.",
    # "Explainable AI can significantly aid healthcare decisions by providing transparent and understandable insights into AI-driven recommendations. This transparency helps healthcare professionals trust AI systems, make informed decisions, and improve patient output by understanding the rationale behind AI suggestions."
]

In [None]:
evaluation_results = evaluate_adaptive_vs_standard(
    pdf_path=pdf_path,                  # Source document for knowledge extraction
    test_queries=test_queries,          # List of test queries to evaluate
    reference_answers=reference_answers  # Optional ground truth for comparison
)

# The results will show a detailed comparison between standard retrieval and 
# adaptive retrieval performance across different query types, highlighting
# where adaptive strategies provide improved outcomes
print(evaluation_results["comparison"])