In [None]:
# nb31_researcher_with_rag.ipynb
# Stage 4: Multi-Agent Orchestrator - Researcher with RAG Integration
"""
Goals:
- Integrate Stage 2 RAG components into Researcher agent role
- Implement knowledge retrieval with citation tracking
- Create research synthesis with source attribution
- Enable multi-source evidence gathering and summarization
- Build foundation for 4-role orchestrator collaboration

Prerequisites:
- Completed nb10-nb19 (RAG basics)
- Completed nb30 (orchestrator skeleton)
- Basic understanding of agent roles and blackboard patterns
"""

# Cell1:  Shared Cache Bootstrap
import os, pathlib, torch
import sys
from datetime import datetime

# Shared cache configuration (複製到每本 notebook)
AI_CACHE_ROOT = os.getenv("AI_CACHE_ROOT", "../ai_warehouse/cache")

for k, v in {
    "HF_HOME": f"{AI_CACHE_ROOT}/hf",
    "TRANSFORMERS_CACHE": f"{AI_CACHE_ROOT}/hf/transformers",
    "HF_DATASETS_CACHE": f"{AI_CACHE_ROOT}/hf/datasets",
    "HUGGINGFACE_HUB_CACHE": f"{AI_CACHE_ROOT}/hf/hub",
    "TORCH_HOME": f"{AI_CACHE_ROOT}/torch",
}.items():
    os.environ[k] = v
    pathlib.Path(v).mkdir(parents=True, exist_ok=True)
print("[Cache]", AI_CACHE_ROOT, "| GPU:", torch.cuda.is_available())

[Cache] ../ai_warehouse/cache | GPU: True


In [None]:
# ================================
# Cell 2: Dependencies and Imports
# ================================

import json
import time
from typing import List, Dict, Any, Tuple, Optional
from dataclasses import dataclass, asdict
from pathlib import Path

# Core ML components
import torch
import numpy as np
from transformers import AutoTokenizer, AutoModelForCausalLM
from sentence_transformers import SentenceTransformer

# RAG components
import faiss
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Utility
import re
from datetime import datetime
import logging

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

print("✓ Dependencies loaded")

In [None]:
# ================================
# Cell 3: RAG Components Integration
# ================================


class RAGRetriever:
    """Lightweight RAG retriever for Researcher agent"""

    def __init__(
        self,
        embedding_model: str = "BAAI/bge-m3",
        index_path: str = "indices/general.faiss",
        chunks_path: str = "indices/chunks.jsonl",
        device: str = "auto",
    ):

        self.embedding_model = embedding_model
        self.index_path = Path(index_path)
        self.chunks_path = Path(chunks_path)

        # Load embedding model
        print(f"Loading embedding model: {embedding_model}")
        self.embedder = SentenceTransformer(embedding_model, device=device)

        # Load or create index
        self.index = None
        self.chunks = []
        self._load_index()

    def _load_index(self):
        """Load FAISS index and chunks metadata"""
        try:
            if self.index_path.exists() and self.chunks_path.exists():
                # Load existing index
                self.index = faiss.read_index(str(self.index_path))

                # Load chunks metadata
                with open(self.chunks_path, "r", encoding="utf-8") as f:
                    self.chunks = [json.loads(line) for line in f if line.strip()]

                print(
                    f"✓ Loaded index with {self.index.ntotal} vectors and {len(self.chunks)} chunks"
                )
            else:
                print("⚠ No existing index found - will create sample data")
                self._create_sample_index()

        except Exception as e:
            logger.error(f"Error loading index: {e}")
            self._create_sample_index()

    def _create_sample_index(self):
        """Create sample index for demonstration"""
        # Sample Chinese documents for RAG demonstration
        sample_docs = [
            {
                "text": "RAG（檢索增強生成）是一種結合資訊檢索與生成式AI的技術架構。它能夠讓語言模型在生成回答時，先從外部知識庫中檢索相關資訊，再基於這些資訊生成更準確、更有根據的回答。",
                "meta": {
                    "source_id": "rag_intro",
                    "title": "RAG技術介紹",
                    "section": "基本概念",
                },
            },
            {
                "text": "多代理系統（Multi-Agent System）是由多個自主智能體組成的系統，每個智能體都有自己的目標和行為模式。在AI應用中，不同角色的代理可以協作完成複雜任務，如研究、規劃、寫作和審核。",
                "meta": {
                    "source_id": "mas_intro",
                    "title": "多代理系統",
                    "section": "系統架構",
                },
            },
            {
                "text": "向量檢索使用向量相似度來找到語義相關的文檔片段。常用的方法包括餘弦相似度、歐氏距離和內積。FAISS是Facebook開發的高效向量檢索庫，支持大規模向量索引和快速相似度搜索。",
                "meta": {
                    "source_id": "vector_search",
                    "title": "向量檢索技術",
                    "section": "檢索算法",
                },
            },
            {
                "text": "中文文本處理面臨獨特挑戰，包括分詞、繁簡轉換、語義分段等。BGE-M3是針對中文優化的多語言嵌入模型，在中文語義理解任務上表現優秀。",
                "meta": {
                    "source_id": "zh_nlp",
                    "title": "中文NLP處理",
                    "section": "技術挑戰",
                },
            },
            {
                "text": "引用標註（Citation）在RAG系統中至關重要，它確保生成的內容有據可依。常見格式包括括號標註[1][2]和腳註形式，同時需要提供完整的來源資訊以便驗證。",
                "meta": {
                    "source_id": "citation",
                    "title": "引用標註系統",
                    "section": "實現方法",
                },
            },
        ]

        # Create chunks
        self.chunks = []
        texts = []
        for i, doc in enumerate(sample_docs):
            chunk = {"id": i, "text": doc["text"], "meta": doc["meta"]}
            self.chunks.append(chunk)
            texts.append(doc["text"])

        # Create embeddings
        print("Creating sample embeddings...")
        embeddings = self.embedder.encode(texts, normalize_embeddings=True)
        embeddings = embeddings.astype(np.float32)

        # Create FAISS index
        dim = embeddings.shape[1]
        self.index = faiss.IndexFlatIP(dim)  # Inner product for normalized vectors
        self.index.add(embeddings)

        # Save index and chunks
        self.index_path.parent.mkdir(parents=True, exist_ok=True)
        faiss.write_index(self.index, str(self.index_path))

        with open(self.chunks_path, "w", encoding="utf-8") as f:
            for chunk in self.chunks:
                f.write(json.dumps(chunk, ensure_ascii=False) + "\n")

        print(f"✓ Created sample index with {len(self.chunks)} chunks")

    def retrieve(self, query: str, top_k: int = 5) -> List[Dict[str, Any]]:
        """Retrieve relevant chunks for query"""
        if not self.index or not self.chunks:
            return []

        # Encode query
        query_embedding = self.embedder.encode([query], normalize_embeddings=True)
        query_embedding = query_embedding.astype(np.float32)

        # Search
        scores, indices = self.index.search(
            query_embedding, min(top_k, len(self.chunks))
        )

        # Format results
        results = []
        for score, idx in zip(scores[0], indices[0]):
            if idx >= 0 and idx < len(self.chunks):  # Valid index
                chunk = self.chunks[idx].copy()
                chunk["score"] = float(score)
                results.append(chunk)

        return results


# Test RAG retriever
print("Setting up RAG retriever...")
rag_retriever = RAGRetriever()

In [None]:
# ================================
# Cell 4: Researcher Agent Class
# ================================


@dataclass
class ResearchTask:
    """Research task specification"""

    query: str
    domain: str = "general"
    max_sources: int = 5
    depth: str = "moderate"  # surface, moderate, deep
    citation_style: str = "brackets"  # brackets, footnotes
    language: str = "zh"


@dataclass
class ResearchResult:
    """Research result with citations"""

    summary: str
    key_findings: List[str]
    sources: List[Dict[str, Any]]
    confidence: float
    timestamp: str
    task: ResearchTask


class ResearcherAgent:
    """Researcher agent with RAG integration"""

    def __init__(
        self,
        llm_model: str = "Qwen/Qwen2.5-7B-Instruct",
        rag_retriever: RAGRetriever = None,
        max_new_tokens: int = 1024,
        temperature: float = 0.3,
    ):

        self.model_name = llm_model
        self.rag_retriever = rag_retriever
        self.max_new_tokens = max_new_tokens
        self.temperature = temperature

        # Load LLM
        print(f"Loading LLM: {llm_model}")
        self.tokenizer = AutoTokenizer.from_pretrained(llm_model, use_fast=True)
        self.model = AutoModelForCausalLM.from_pretrained(
            llm_model,
            device_map="auto",
            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
            trust_remote_code=True,
        )

        if self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token

        print(f"✓ Researcher agent initialized")

    def _build_research_prompt(
        self, task: ResearchTask, retrieved_chunks: List[Dict]
    ) -> str:
        """Build research prompt with retrieved context"""

        # Build context from retrieved chunks
        context_parts = []
        for i, chunk in enumerate(retrieved_chunks, 1):
            source_info = chunk["meta"]
            source_desc = f"{source_info.get('title', 'Unknown')} - {source_info.get('section', 'N/A')}"
            context_parts.append(f"[來源{i}] {source_desc}\n{chunk['text']}")

        context = "\n\n".join(context_parts)

        # Build citations list
        citations = []
        for i, chunk in enumerate(retrieved_chunks, 1):
            meta = chunk["meta"]
            citation = (
                f"[{i}] {meta.get('source_id', 'N/A')} - {meta.get('title', 'Unknown')}"
            )
            citations.append(citation)

        citations_text = "\n".join(citations)

        # Research prompt template
        system_prompt = """你是一位專業的研究助理，擅長從多個來源整合資訊並產生有據可依的研究摘要。

任務要求：
1. 基於提供的來源資料回答研究問題
2. 在回答中使用 [1], [2] 等標註引用來源
3. 提供關鍵發現列表
4. 確保內容準確且有根據
5. 使用繁體中文回答

回答格式：
## 研究摘要
[基於來源資料的綜合分析]

## 關鍵發現
- [發現1] [引用]
- [發現2] [引用]
- [發現3] [引用]

不要透露內部思考過程，直接提供研究結果。"""

        user_prompt = f"""研究問題：{task.query}

參考資料：
{context}

請基於以上資料進行研究分析，並在回答中適當引用來源。

來源列表：
{citations_text}"""

        return system_prompt, user_prompt

    def _generate_response(self, system_prompt: str, user_prompt: str) -> str:
        """Generate LLM response"""
        messages = [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt},
        ]

        # Apply chat template
        formatted_prompt = self.tokenizer.apply_chat_template(
            messages, tokenize=False, add_generation_prompt=True
        )

        # Tokenize
        inputs = self.tokenizer(
            formatted_prompt,
            return_tensors="pt",
            truncation=True,
            max_length=3072,  # Leave room for generation
        ).to(self.model.device)

        # Generate
        with torch.no_grad():
            outputs = self.model.generate(
                **inputs,
                max_new_tokens=self.max_new_tokens,
                temperature=self.temperature,
                do_sample=self.temperature > 0,
                pad_token_id=self.tokenizer.eos_token_id,
                eos_token_id=self.tokenizer.eos_token_id,
            )

        # Decode response
        response = self.tokenizer.decode(
            outputs[0][inputs["input_ids"].shape[1] :], skip_special_tokens=True
        ).strip()

        return response

    def _parse_research_response(self, response: str) -> Tuple[str, List[str]]:
        """Parse research response to extract summary and key findings"""
        lines = response.split("\n")

        summary_lines = []
        findings = []
        current_section = "summary"

        for line in lines:
            line = line.strip()
            if not line:
                continue

            if "## 關鍵發現" in line or "關鍵發現" in line:
                current_section = "findings"
                continue
            elif "##" in line and current_section == "findings":
                break

            if current_section == "summary":
                if not line.startswith("#"):
                    summary_lines.append(line)
            elif current_section == "findings":
                if line.startswith("-") or line.startswith("•"):
                    findings.append(line.lstrip("- •").strip())

        summary = " ".join(summary_lines).strip()

        return summary, findings

    def research(self, task: ResearchTask) -> ResearchResult:
        """Conduct research on given task"""
        start_time = time.time()

        logger.info(f"Starting research on: {task.query}")

        # Step 1: Retrieve relevant information
        if self.rag_retriever:
            retrieved_chunks = self.rag_retriever.retrieve(
                task.query, top_k=task.max_sources
            )
            logger.info(f"Retrieved {len(retrieved_chunks)} relevant chunks")
        else:
            retrieved_chunks = []
            logger.warning("No RAG retriever available")

        # Step 2: Generate research response
        if retrieved_chunks:
            system_prompt, user_prompt = self._build_research_prompt(
                task, retrieved_chunks
            )
            response = self._generate_response(system_prompt, user_prompt)
        else:
            # Fallback: direct response without RAG
            response = self._generate_fallback_response(task)

        # Step 3: Parse response
        summary, key_findings = self._parse_research_response(response)

        # Step 4: Build result
        sources = []
        for chunk in retrieved_chunks:
            source = {
                "id": chunk.get("id"),
                "text": (
                    chunk["text"][:200] + "..."
                    if len(chunk["text"]) > 200
                    else chunk["text"]
                ),
                "meta": chunk["meta"],
                "score": chunk.get("score", 0.0),
            }
            sources.append(source)

        # Calculate confidence based on retrieval scores and content length
        if sources:
            avg_score = np.mean([s["score"] for s in sources])
            confidence = min(0.95, max(0.3, avg_score))
        else:
            confidence = 0.2  # Low confidence without sources

        elapsed_time = time.time() - start_time
        logger.info(
            f"Research completed in {elapsed_time:.2f}s with confidence {confidence:.2f}"
        )

        result = ResearchResult(
            summary=summary,
            key_findings=key_findings,
            sources=sources,
            confidence=confidence,
            timestamp=datetime.now().isoformat(),
            task=task,
        )

        return result

    def _generate_fallback_response(self, task: ResearchTask) -> str:
        """Generate response without RAG (fallback)"""
        system_prompt = """你是一位研究助理。基於你的知識回答問題，並明確說明這是基於一般知識而非特定資料來源的回答。"""

        user_prompt = f"""研究問題：{task.query}

請提供一個結構化的回答，包含：
## 研究摘要
## 關鍵發現

注意：此回答基於一般知識，未使用特定資料來源。"""

        return self._generate_response(system_prompt, user_prompt)


# Initialize researcher agent
print("Initializing Researcher Agent...")
researcher = ResearcherAgent(rag_retriever=rag_retriever)

In [None]:
# ================================
# Cell 5: Smoke Test - Basic Research
# ================================


def test_researcher_basic():
    """Smoke test for researcher agent"""
    print("=== Smoke Test: Basic Research ===")

    # Define research task
    task = ResearchTask(
        query="什麼是RAG技術？它有哪些主要優勢？",
        domain="ai",
        max_sources=3,
        depth="moderate",
    )

    print(f"Research Task: {task.query}")
    print("Running research...")

    # Conduct research
    result = researcher.research(task)

    # Display results
    print("\n📊 Research Results:")
    print(f"Confidence: {result.confidence:.2f}")
    print(f"Sources used: {len(result.sources)}")

    print(f"\n📝 Summary:")
    print(result.summary)

    print(f"\n🔍 Key Findings:")
    for i, finding in enumerate(result.key_findings, 1):
        print(f"{i}. {finding}")

    print(f"\n📚 Sources:")
    for i, source in enumerate(result.sources, 1):
        meta = source["meta"]
        print(f"[{i}] {meta.get('title', 'Unknown')} (Score: {source['score']:.3f})")
        print(f"    {source['text']}")

    # Validation
    assert result.summary, "Summary should not be empty"
    assert result.key_findings, "Should have key findings"
    assert result.confidence > 0, "Confidence should be positive"

    print("\n✅ Smoke test passed!")
    return result


# Run smoke test
smoke_result = test_researcher_basic()

In [None]:
# ================================
# Cell 6: Advanced Research Features
# ================================


class AdvancedResearcher(ResearcherAgent):
    """Enhanced researcher with advanced features"""

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.research_history = []

    def multi_query_research(
        self, main_query: str, sub_queries: List[str]
    ) -> ResearchResult:
        """Research with multiple related queries"""
        print(f"Conducting multi-query research on: {main_query}")

        all_chunks = []

        # Retrieve for main query
        main_chunks = self.rag_retriever.retrieve(main_query, top_k=3)
        all_chunks.extend(main_chunks)

        # Retrieve for sub-queries
        for sub_query in sub_queries:
            sub_chunks = self.rag_retriever.retrieve(sub_query, top_k=2)
            all_chunks.extend(sub_chunks)

        # Deduplicate by chunk ID
        seen_ids = set()
        unique_chunks = []
        for chunk in all_chunks:
            chunk_id = chunk.get("id")
            if chunk_id not in seen_ids:
                unique_chunks.append(chunk)
                seen_ids.add(chunk_id)

        # Sort by score and take top results
        unique_chunks.sort(key=lambda x: x.get("score", 0), reverse=True)
        top_chunks = unique_chunks[:5]

        # Create comprehensive task
        task = ResearchTask(query=main_query, max_sources=len(top_chunks), depth="deep")

        # Generate response with all context
        system_prompt, user_prompt = self._build_research_prompt(task, top_chunks)

        # Enhanced prompt for multi-query research
        enhanced_prompt = (
            user_prompt
            + f"""

相關子問題：
{chr(10).join(f"- {sq}" for sq in sub_queries)}

請提供一個綜合性的研究分析，涵蓋主要問題和相關子問題。"""
        )

        response = self._generate_response(system_prompt, enhanced_prompt)
        summary, key_findings = self._parse_research_response(response)

        # Build enhanced result
        sources = []
        for chunk in top_chunks:
            source = {
                "id": chunk.get("id"),
                "text": (
                    chunk["text"][:300] + "..."
                    if len(chunk["text"]) > 300
                    else chunk["text"]
                ),
                "meta": chunk["meta"],
                "score": chunk.get("score", 0.0),
            }
            sources.append(source)

        result = ResearchResult(
            summary=summary,
            key_findings=key_findings,
            sources=sources,
            confidence=min(0.9, np.mean([s["score"] for s in sources]) + 0.1),
            timestamp=datetime.now().isoformat(),
            task=task,
        )

        self.research_history.append(result)
        return result

    def get_research_summary(self) -> Dict[str, Any]:
        """Get summary of research history"""
        if not self.research_history:
            return {"total_research": 0, "avg_confidence": 0, "top_topics": []}

        total = len(self.research_history)
        avg_confidence = np.mean([r.confidence for r in self.research_history])

        # Extract topics from queries
        topics = [r.task.query for r in self.research_history]

        return {
            "total_research": total,
            "avg_confidence": avg_confidence,
            "recent_topics": topics[-5:],  # Last 5 topics
            "best_confidence": max(r.confidence for r in self.research_history),
        }


# Test advanced researcher
advanced_researcher = AdvancedResearcher(rag_retriever=rag_retriever)


def test_multi_query_research():
    """Test multi-query research capability"""
    print("\n=== Testing Multi-Query Research ===")

    main_query = "多代理系統在AI中的應用"
    sub_queries = ["代理協作的基本原理", "多代理系統的優勢", "實際應用案例"]

    result = advanced_researcher.multi_query_research(main_query, sub_queries)

    print(f"Main Query: {main_query}")
    print(f"Sub-queries: {sub_queries}")
    print(f"\nResult Confidence: {result.confidence:.2f}")
    print(f"Sources: {len(result.sources)}")
    print(f"\nSummary:\n{result.summary}")

    return result


multi_result = test_multi_query_research()

In [None]:
# ================================
# Cell 7: Integration with Blackboard
# ================================


class Blackboard:
    """Simple blackboard for agent communication"""

    def __init__(self):
        self.data = {}
        self.history = []

    def write(self, key: str, value: Any, agent: str = "unknown"):
        """Write data to blackboard"""
        self.data[key] = value
        self.history.append(
            {
                "action": "write",
                "key": key,
                "agent": agent,
                "timestamp": datetime.now().isoformat(),
            }
        )

    def read(self, key: str) -> Any:
        """Read data from blackboard"""
        return self.data.get(key)

    def get_all(self) -> Dict[str, Any]:
        """Get all data"""
        return self.data.copy()


def integrate_researcher_with_blackboard():
    """Demonstrate researcher integration with blackboard"""
    print("\n=== Researcher + Blackboard Integration ===")

    # Create blackboard
    blackboard = Blackboard()

    # Simulate research workflow
    blackboard.write(
        "research_request",
        {"topic": "向量檢索技術的發展", "urgency": "medium", "deadline": "2024-12-31"},
        agent="planner",
    )

    # Researcher reads request
    request = blackboard.read("research_request")
    print(f"Research request: {request}")

    # Conduct research
    task = ResearchTask(query=request["topic"], depth="moderate", max_sources=4)

    result = researcher.research(task)

    # Write results to blackboard
    research_output = {
        "summary": result.summary,
        "key_findings": result.key_findings,
        "source_count": len(result.sources),
        "confidence": result.confidence,
        "completed_at": result.timestamp,
    }

    blackboard.write("research_results", research_output, agent="researcher")

    print(f"\nResearch completed and written to blackboard:")
    print(f"Confidence: {result.confidence:.2f}")
    print(f"Key findings: {len(result.key_findings)}")

    # Other agents can now read the results
    planner_view = blackboard.read("research_results")
    print(f"\nPlanner can access: {list(planner_view.keys())}")

    return blackboard, result


bb, bb_result = integrate_researcher_with_blackboard()

In [None]:
# ================================
# Cell 8: Performance Metrics and Monitoring
# ================================


class ResearchMetrics:
    """Track researcher performance metrics"""

    def __init__(self):
        self.metrics = {
            "total_queries": 0,
            "avg_response_time": 0,
            "avg_confidence": 0,
            "source_utilization": 0,
            "citation_accuracy": 0,
        }
        self.query_times = []
        self.confidences = []
        self.source_counts = []

    def record_research(self, result: ResearchResult, response_time: float):
        """Record metrics for a research session"""
        self.metrics["total_queries"] += 1
        self.query_times.append(response_time)
        self.confidences.append(result.confidence)
        self.source_counts.append(len(result.sources))

        # Update averages
        self.metrics["avg_response_time"] = np.mean(self.query_times)
        self.metrics["avg_confidence"] = np.mean(self.confidences)
        self.metrics["source_utilization"] = np.mean(self.source_counts)

    def get_summary(self) -> Dict[str, Any]:
        """Get metrics summary"""
        if self.metrics["total_queries"] == 0:
            return {"status": "no_data"}

        return {
            "total_queries": self.metrics["total_queries"],
            "avg_response_time": f"{self.metrics['avg_response_time']:.2f}s",
            "avg_confidence": f"{self.metrics['avg_confidence']:.2f}",
            "avg_sources_per_query": f"{self.metrics['source_utilization']:.1f}",
            "performance_score": min(
                1.0,
                self.metrics["avg_confidence"]
                * (1 / max(1, self.metrics["avg_response_time"] / 5)),
            ),
        }


# Test metrics tracking
metrics = ResearchMetrics()


def benchmark_researcher():
    """Benchmark researcher performance"""
    print("\n=== Researcher Benchmark ===")

    test_queries = [
        "RAG技術的核心組件有哪些？",
        "向量檢索如何提升搜索準確性？",
        "中文NLP處理面臨什麼挑戰？",
        "引用標註在學術寫作中的重要性",
        "多代理系統的協作機制",
    ]

    results = []

    for query in test_queries:
        print(f"\nTesting: {query}")

        start_time = time.time()
        task = ResearchTask(query=query, max_sources=3)
        result = researcher.research(task)
        end_time = time.time()

        response_time = end_time - start_time
        metrics.record_research(result, response_time)

        results.append(
            {
                "query": query,
                "confidence": result.confidence,
                "sources": len(result.sources),
                "time": response_time,
            }
        )

        print(
            f"  Confidence: {result.confidence:.2f}, Sources: {len(result.sources)}, Time: {response_time:.2f}s"
        )

    # Display summary
    summary = metrics.get_summary()
    print(f"\n📊 Benchmark Summary:")
    for key, value in summary.items():
        print(f"  {key}: {value}")

    return results, summary


benchmark_results, benchmark_summary = benchmark_researcher()

In [None]:
# ================================
# Cell 9: Error Handling and Robustness
# ================================


class RobustResearcher(ResearcherAgent):
    """Researcher with enhanced error handling"""

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.error_count = 0
        self.max_retries = 3

    def safe_research(self, task: ResearchTask) -> Optional[ResearchResult]:
        """Research with error handling and retries"""
        for attempt in range(self.max_retries):
            try:
                print(f"Research attempt {attempt + 1}/{self.max_retries}")
                return self.research(task)

            except torch.cuda.OutOfMemoryError:
                print("⚠ CUDA OOM - clearing cache and retrying...")
                torch.cuda.empty_cache()
                self.error_count += 1

            except Exception as e:
                print(f"⚠ Research error: {str(e)[:100]}")
                self.error_count += 1

                if attempt == self.max_retries - 1:
                    # Return fallback result
                    return self._create_fallback_result(task, str(e))

                time.sleep(1)  # Brief pause before retry

        return None

    def _create_fallback_result(
        self, task: ResearchTask, error_msg: str
    ) -> ResearchResult:
        """Create fallback result when research fails"""
        return ResearchResult(
            summary=f"無法完成對「{task.query}」的研究分析。錯誤：{error_msg[:50]}...",
            key_findings=["研究過程中遇到技術問題", "建議稍後重試或調整查詢"],
            sources=[],
            confidence=0.1,
            timestamp=datetime.now().isoformat(),
            task=task,
        )

    def get_error_stats(self) -> Dict[str, Any]:
        """Get error statistics"""
        return {
            "total_errors": self.error_count,
            "error_rate": self.error_count / max(1, metrics.metrics["total_queries"]),
            "status": "healthy" if self.error_count < 3 else "needs_attention",
        }


# Test robust researcher
robust_researcher = RobustResearcher(rag_retriever=rag_retriever)


def test_error_handling():
    """Test error handling capabilities"""
    print("\n=== Testing Error Handling ===")

    # Test with normal query
    normal_task = ResearchTask(query="什麼是機器學習？")
    result = robust_researcher.safe_research(normal_task)

    print(f"Normal query result: {result.confidence:.2f}")

    # Test with potentially problematic query (very long)
    long_query = "很長的查詢 " * 100  # Artificially long query
    long_task = ResearchTask(query=long_query)

    try:
        result = robust_researcher.safe_research(long_task)
        print(f"Long query handled: {result.confidence:.2f}")
    except Exception as e:
        print(f"Long query failed: {e}")

    # Get error stats
    error_stats = robust_researcher.get_error_stats()
    print(f"Error stats: {error_stats}")

    return error_stats


error_stats = test_error_handling()

In [None]:
# ================================
# Cell 10: Export and Integration Utils
# ================================


def save_research_result(result: ResearchResult, output_dir: str = "outs"):
    """Save research result to file"""
    output_path = Path(output_dir)
    output_path.mkdir(exist_ok=True)

    # Create filename from timestamp and query
    safe_query = re.sub(r"[^\w\s-]", "", result.task.query)[:50]
    safe_query = re.sub(r"[-\s]+", "-", safe_query)
    filename = f"research_{safe_query}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"

    filepath = output_path / filename

    # Convert to serializable format
    data = {
        "task": asdict(result.task),
        "result": {
            "summary": result.summary,
            "key_findings": result.key_findings,
            "sources": result.sources,
            "confidence": result.confidence,
            "timestamp": result.timestamp,
        },
        "metadata": {
            "model": researcher.model_name,
            "embedding_model": rag_retriever.embedding_model,
            "total_sources": len(result.sources),
        },
    }

    with open(filepath, "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=2)

    print(f"Research result saved to: {filepath}")
    return str(filepath)


def load_research_result(filepath: str) -> ResearchResult:
    """Load research result from file"""
    with open(filepath, "r", encoding="utf-8") as f:
        data = json.load(f)

    task = ResearchTask(**data["task"])
    result_data = data["result"]

    result = ResearchResult(
        summary=result_data["summary"],
        key_findings=result_data["key_findings"],
        sources=result_data["sources"],
        confidence=result_data["confidence"],
        timestamp=result_data["timestamp"],
        task=task,
    )

    return result


def create_research_report(results: List[ResearchResult]) -> str:
    """Create formatted research report from multiple results"""
    report_lines = [
        "# 研究報告",
        f"生成時間：{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
        f"總研究數量：{len(results)}",
        "",
    ]

    for i, result in enumerate(results, 1):
        report_lines.extend(
            [
                f"## 研究 {i}: {result.task.query}",
                f"**可信度：** {result.confidence:.2f}",
                f"**來源數量：** {len(result.sources)}",
                "",
                "### 摘要",
                result.summary,
                "",
                "### 關鍵發現",
            ]
        )

        for j, finding in enumerate(result.key_findings, 1):
            report_lines.append(f"{j}. {finding}")

        report_lines.extend(
            [
                "",
                "### 參考來源",
            ]
        )

        for j, source in enumerate(result.sources, 1):
            meta = source["meta"]
            title = meta.get("title", "Unknown")
            score = source.get("score", 0)
            report_lines.append(f"{j}. {title} (相關度: {score:.3f})")

        report_lines.append("\n---\n")

    return "\n".join(report_lines)


# Test export functionality
def test_export_functionality():
    """Test research result export and reporting"""
    print("\n=== Testing Export Functionality ===")

    # Save a research result
    filepath = save_research_result(smoke_result)

    # Load it back
    loaded_result = load_research_result(filepath)
    print(f"Loaded result confidence: {loaded_result.confidence:.2f}")

    # Create a report with multiple results
    sample_results = (
        [smoke_result, multi_result] if "multi_result" in locals() else [smoke_result]
    )
    report = create_research_report(sample_results)

    # Save report
    report_path = (
        Path("outs") / f"research_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md"
    )
    with open(report_path, "w", encoding="utf-8") as f:
        f.write(report)

    print(f"Report saved to: {report_path}")
    print(f"Report length: {len(report)} characters")

    return report_path


report_path = test_export_functionality()

In [None]:
# ================================
# Cell 11: Final Integration Test
# ================================


def comprehensive_integration_test():
    """Comprehensive test of all researcher features"""
    print("\n" + "=" * 50)
    print("🧪 COMPREHENSIVE RESEARCHER INTEGRATION TEST")
    print("=" * 50)

    test_scenarios = [
        {
            "name": "Basic RAG Research",
            "query": "解釋向量檢索的工作原理",
            "expected_sources": 2,
        },
        {
            "name": "Multi-domain Query",
            "query": "RAG技術在多代理系統中的應用",
            "expected_sources": 3,
        },
        {
            "name": "Technical Deep Dive",
            "query": "FAISS索引的優化策略",
            "expected_sources": 2,
        },
    ]

    all_results = []
    total_start = time.time()

    for i, scenario in enumerate(test_scenarios, 1):
        print(f"\n📋 Test {i}: {scenario['name']}")
        print(f"Query: {scenario['query']}")

        start = time.time()

        # Create task
        task = ResearchTask(
            query=scenario["query"],
            max_sources=scenario["expected_sources"],
            depth="moderate",
        )

        # Execute research
        result = robust_researcher.safe_research(task)

        if result:
            elapsed = time.time() - start
            all_results.append(result)

            print(f"✅ Completed in {elapsed:.2f}s")
            print(f"   Confidence: {result.confidence:.2f}")
            print(f"   Sources: {len(result.sources)}")
            print(f"   Findings: {len(result.key_findings)}")

            # Validate result quality
            if result.confidence > 0.3 and len(result.sources) > 0:
                print("   ✅ Quality check passed")
            else:
                print("   ⚠ Quality check warning")
        else:
            print("   ❌ Research failed")

    total_time = time.time() - total_start

    # Generate summary report
    if all_results:
        print(f"\n📊 INTEGRATION TEST SUMMARY")
        print(f"Total time: {total_time:.2f}s")
        print(
            f"Success rate: {len(all_results)}/{len(test_scenarios)} ({len(all_results)/len(test_scenarios)*100:.1f}%)"
        )
        print(f"Average confidence: {np.mean([r.confidence for r in all_results]):.2f}")
        print(
            f"Average sources per query: {np.mean([len(r.sources) for r in all_results]):.1f}"
        )

        # Save comprehensive report
        final_report = create_research_report(all_results)
        final_report_path = (
            Path("outs")
            / f"integration_test_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md"
        )
        with open(final_report_path, "w", encoding="utf-8") as f:
            f.write(final_report)

        print(f"Final report saved: {final_report_path}")

        return all_results, final_report_path
    else:
        print("❌ All tests failed")
        return [], None


integration_results, final_report_path = comprehensive_integration_test()

In [None]:
# ================================
# Cell 12: Summary and Next Steps
# ================================

print("\n" + "=" * 60)
print("📋 NB31 SUMMARY: RESEARCHER WITH RAG INTEGRATION")
print("=" * 60)

print(
    """
## 🎯 Goals Completed:
✅ Integrated Stage 2 RAG components into Researcher agent role
✅ Implemented knowledge retrieval with citation tracking
✅ Created research synthesis with source attribution
✅ Enabled multi-source evidence gathering and summarization
✅ Built foundation for 4-role orchestrator collaboration

## 🔧 Core Components Built:
• RAGRetriever: 向量檢索與FAISS索引整合
• ResearcherAgent: 具備RAG能力的研究助理
• AdvancedResearcher: 多查詢與歷史追蹤
• RobustResearcher: 錯誤處理與重試機制
• Blackboard Integration: 與多代理黑板系統整合
• Research Metrics: 效能追蹤與基準測試

## 🏗 Key Parameters (Low-VRAM Optimized):
• Embedding Model: BAAI/bge-m3 (multilingual, efficient)
• LLM: Qwen2.5-7B-Instruct (device_map="auto", fp16)
• Max Context: 3072 tokens (留空間給生成)
• Top-K Retrieval: 3-5 chunks (平衡品質與速度)
• Temperature: 0.3 (研究任務需要一致性)
• Batch Processing: 支援批次嵌入以提升效率

## 🧪 Smoke Test Results:"""
)

if "smoke_result" in locals():
    print(f"• Basic Research: ✅ (Confidence: {smoke_result.confidence:.2f})")
if "multi_result" in locals():
    print(f"• Multi-Query: ✅ (Confidence: {multi_result.confidence:.2f})")
if "error_stats" in locals():
    print(f"• Error Handling: ✅ (Status: {error_stats.get('status', 'unknown')})")
if "benchmark_summary" in locals():
    print(f"• Performance: ✅ (Avg: {benchmark_summary.get('avg_confidence', 'N/A')})")

print(
    f"""
## ⚡ Performance Metrics:"""
)
if "metrics" in locals():
    summary = metrics.get_summary()
    if summary.get("status") != "no_data":
        for key, value in summary.items():
            print(f"• {key}: {value}")

print(
    f"""
## 🎮 When to Use This:
• 需要基於特定知識庫的研究分析
• 多代理協作中的資訊收集角色
• 學術寫作的文獻調研階段
• 技術文檔的背景資料整理
• 需要引用來源的內容生成

## ⚠ Pitfalls & Solutions:
• 索引為空 → 檢查data/目錄與chunks.jsonl
• CUDA OOM → 降低batch_size或使用CPU
• 引用格式錯亂 → 確保chunks包含正確meta資料
• 檢索不相關 → 調整embedding模型或查詢改寫
• 生成過長 → 限制max_new_tokens與context長度

## 🚀 Next Steps (nb32):
• Planner Agent: 基於研究結果生成大綱
• Writer-Researcher協作: 共享黑板通信
• 結構化輸出: JSON schema驗證
• 多輪對話: 追問與深入研究
• 領域專精: 技術/法律/教育等專業模式
"""
)

print("\n" + "=" * 60)
print("🎉 NB31 COMPLETED - Ready for nb32_planner_outline.ipynb")
print("=" * 60)