In [1]:
# nb32_planner_outline.ipynb
# Goals: Implement Planner agent for structured outline generation
# Prerequisites: LLMAdapter, basic message formatting, style dictionary concepts

# Cell1:  Shared Cache Bootstrap
import os, pathlib, torch
import sys
from datetime import datetime

# Shared cache configuration (複製到每本 notebook)
AI_CACHE_ROOT = os.getenv("AI_CACHE_ROOT", "../ai_warehouse/cache")

for k, v in {
    "HF_HOME": f"{AI_CACHE_ROOT}/hf",
    "TRANSFORMERS_CACHE": f"{AI_CACHE_ROOT}/hf/transformers",
    "HF_DATASETS_CACHE": f"{AI_CACHE_ROOT}/hf/datasets",
    "HUGGINGFACE_HUB_CACHE": f"{AI_CACHE_ROOT}/hf/hub",
    "TORCH_HOME": f"{AI_CACHE_ROOT}/torch",
}.items():
    os.environ[k] = v
    pathlib.Path(v).mkdir(parents=True, exist_ok=True)
print("[Cache]", AI_CACHE_ROOT, "| GPU:", torch.cuda.is_available())

[Cache] ../ai_warehouse/cache | GPU: True


In [None]:
# Cell 2: Import Dependencies
from transformers import AutoTokenizer, AutoModelForCausalLM
import json
import re
from typing import Dict, List, Any, Optional
from dataclasses import dataclass
import yaml

In [None]:
# Cell 3: LLMAdapter (Minimal)
class LLMAdapter:
    def __init__(self, model_id="Qwen/Qwen2.5-7B-Instruct"):
        self.tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
        self.model = AutoModelForCausalLM.from_pretrained(
            model_id, device_map="auto", torch_dtype=torch.float16
        )
        if self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token

    def generate(self, messages, max_new_tokens=512, temperature=0.7):
        # Simple message formatting
        prompt = "\n".join(
            [f"{msg['role'].upper()}: {msg['content']}" for msg in messages]
        )

        inputs = self.tokenizer(
            prompt, return_tensors="pt", truncation=True, max_length=2048
        )
        inputs = {k: v.to(self.model.device) for k, v in inputs.items()}

        with torch.no_grad():
            outputs = self.model.generate(
                **inputs,
                max_new_tokens=max_new_tokens,
                temperature=temperature,
                do_sample=True,
                pad_token_id=self.tokenizer.eos_token_id,
            )

        response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        # Extract only the new generated part
        return response[len(prompt) :].strip()

In [None]:
# Cell 4: Outline Data Structures
@dataclass
class OutlineNode:
    title: str
    level: int  # 1=chapter, 2=section, 3=subsection
    description: str = ""
    key_points: List[str] = None
    children: List["OutlineNode"] = None

    def __post_init__(self):
        if self.key_points is None:
            self.key_points = []
        if self.children is None:
            self.children = []

    def to_dict(self):
        return {
            "title": self.title,
            "level": self.level,
            "description": self.description,
            "key_points": self.key_points,
            "children": [child.to_dict() for child in self.children],
        }

    def to_markdown(self, indent=""):
        lines = []
        prefix = "#" * self.level
        lines.append(f"{indent}{prefix} {self.title}")

        if self.description:
            lines.append(f"{indent}{self.description}")

        for point in self.key_points:
            lines.append(f"{indent}- {point}")

        for child in self.children:
            lines.extend(child.to_markdown(indent))

        return lines

In [None]:
# Cell 5: Document Type Templates
OUTLINE_TEMPLATES = {
    "research_report": {
        "structure": [
            {"title": "摘要", "level": 1, "description": "核心發現與結論概要"},
            {"title": "背景與動機", "level": 1, "description": "研究問題與重要性"},
            {"title": "主要發現", "level": 1, "description": "核心研究結果分析"},
            {"title": "深入討論", "level": 1, "description": "發現的意涵與影響"},
            {"title": "結論與建議", "level": 1, "description": "行動建議與未來方向"},
        ]
    },
    "technical_guide": {
        "structure": [
            {"title": "概述", "level": 1, "description": "技術概念與目標"},
            {"title": "準備工作", "level": 1, "description": "環境設定與前置需求"},
            {"title": "實作步驟", "level": 1, "description": "詳細操作流程"},
            {"title": "最佳實務", "level": 1, "description": "經驗分享與注意事項"},
            {"title": "疑難排解", "level": 1, "description": "常見問題與解決方案"},
        ]
    },
    "business_proposal": {
        "structure": [
            {"title": "執行摘要", "level": 1, "description": "提案核心重點"},
            {"title": "市場分析", "level": 1, "description": "機會與競爭環境"},
            {"title": "解決方案", "level": 1, "description": "產品或服務說明"},
            {"title": "實施計畫", "level": 1, "description": "時程與資源配置"},
            {"title": "財務預測", "level": 1, "description": "成本效益分析"},
        ]
    },
}

In [None]:
# Cell 6: Style Dictionary Integration
class StyleDictionary:
    def __init__(self, config_path=None):
        if config_path and os.path.exists(config_path):
            with open(config_path, "r", encoding="utf-8") as f:
                self.config = yaml.safe_load(f)
        else:
            # Default Chinese style configuration
            self.config = {
                "tone": "professional",
                "format": {
                    "use_numbered_chapters": True,
                    "max_sections_per_chapter": 5,
                    "prefer_bullet_points": True,
                },
                "terminology": {
                    "RAG": "檢索增強生成",
                    "LLM": "大型語言模型",
                    "embedding": "嵌入向量",
                    "chunk": "文本片段",
                },
                "structure_rules": [
                    "每章應有明確的學習目標",
                    "使用繁體中文撰寫",
                    "技術術語保持中英對照",
                    "章節安排要循序漸進",
                ],
            }

    def apply_terminology(self, text):
        """Apply terminology mapping to text"""
        for en_term, zh_term in self.config.get("terminology", {}).items():
            # Replace English terms with Chinese equivalents
            text = re.sub(
                r"\b" + re.escape(en_term) + r"\b", f"{zh_term} ({en_term})", text
            )
        return text

    def get_structure_guidelines(self):
        """Get structure rules for outline generation"""
        return self.config.get("structure_rules", [])

In [None]:
# Cell 7: Main Planner Agent
class PlannerAgent:
    def __init__(self, llm_adapter, style_config=None):
        self.llm = llm_adapter
        self.style = StyleDictionary(style_config)

    def create_outline(
        self, research_data, document_type="research_report", target_audience="general"
    ):
        """Generate structured outline based on research data"""

        # Get template structure
        template = OUTLINE_TEMPLATES.get(
            document_type, OUTLINE_TEMPLATES["research_report"]
        )

        # Prepare planning prompt
        style_rules = "\n".join(self.style.get_structure_guidelines())

        planning_prompt = f"""
基於以下研究資料，為「{document_type}」類型文件規劃詳細大綱。

研究資料：
{research_data[:2000]}  # Truncate to avoid token limit

目標讀者：{target_audience}

基本結構範本：
{json.dumps(template, ensure_ascii=False, indent=2)}

風格要求：
{style_rules}

請生成包含以下要素的結構化大綱：
1. 主要章節（第一層）
2. 各章節下的小節（第二層）
3. 每個小節的核心要點（3-5點）
4. 簡短的內容描述

輸出格式要求：JSON格式，結構如下：
{{
  "outline": [
    {{
      "title": "章節標題",
      "level": 1,
      "description": "章節描述",
      "key_points": ["要點1", "要點2"],
      "sections": [
        {{
          "title": "小節標題",
          "level": 2,
          "description": "小節描述",
          "key_points": ["細節要點1", "細節要點2"]
        }}
      ]
    }}
  ]
}}
"""

        messages = [
            {
                "role": "system",
                "content": "你是一位專業的文件結構規劃專家，擅長創建邏輯清晰、內容完整的文件大綱。",
            },
            {"role": "user", "content": planning_prompt},
        ]

        response = self.llm.generate(messages, max_new_tokens=1024, temperature=0.5)

        # Parse JSON response
        try:
            # Extract JSON from response
            json_match = re.search(r"\{.*\}", response, re.DOTALL)
            if json_match:
                outline_data = json.loads(json_match.group())
                return self._parse_outline_data(outline_data)
            else:
                # Fallback: create basic outline from template
                return self._create_fallback_outline(template, research_data)
        except json.JSONDecodeError:
            return self._create_fallback_outline(template, research_data)

    def _parse_outline_data(self, data):
        """Parse JSON outline data into OutlineNode objects"""
        outline_nodes = []

        for chapter_data in data.get("outline", []):
            chapter = OutlineNode(
                title=chapter_data["title"],
                level=chapter_data["level"],
                description=chapter_data.get("description", ""),
                key_points=chapter_data.get("key_points", []),
            )

            # Add sections
            for section_data in chapter_data.get("sections", []):
                section = OutlineNode(
                    title=section_data["title"],
                    level=section_data["level"],
                    description=section_data.get("description", ""),
                    key_points=section_data.get("key_points", []),
                )
                chapter.children.append(section)

            outline_nodes.append(chapter)

        return outline_nodes

    def _create_fallback_outline(self, template, research_data):
        """Create basic outline when JSON parsing fails"""
        outline_nodes = []

        for item in template["structure"]:
            node = OutlineNode(
                title=item["title"],
                level=item["level"],
                description=item["description"],
                key_points=["待規劃內容要點"],
            )
            outline_nodes.append(node)

        return outline_nodes

    def validate_outline(self, outline):
        """Validate outline structure and completeness"""
        issues = []

        if not outline:
            issues.append("大綱為空")
            return issues

        # Check minimum chapters
        if len(outline) < 3:
            issues.append("章節數量過少（建議至少3章）")

        # Check maximum chapters
        if len(outline) > 10:
            issues.append("章節數量過多（建議不超過10章）")

        # Check each chapter
        for i, chapter in enumerate(outline):
            if not chapter.title.strip():
                issues.append(f"第{i+1}章缺少標題")

            if len(chapter.children) == 0:
                issues.append(f"第{i+1}章「{chapter.title}」缺少小節")

            if len(chapter.children) > 7:
                issues.append(f"第{i+1}章「{chapter.title}」小節過多（建議不超過7個）")

            # Check sections
            for j, section in enumerate(chapter.children):
                if not section.key_points:
                    issues.append(f"第{i+1}章第{j+1}小節「{section.title}」缺少要點")

        return issues

    def refine_outline(self, outline, feedback):
        """Refine outline based on feedback"""
        refinement_prompt = f"""
請根據以下回饋意見優化大綱結構：

回饋意見：
{feedback}

目前大綱：
{self._outline_to_text(outline)}

請提供改進建議，輸出格式：
1. 具體修改建議
2. 調整後的大綱結構（重點章節）
"""

        messages = [
            {
                "role": "system",
                "content": "你是文件結構優化專家，能根據回饋改善大綱品質。",
            },
            {"role": "user", "content": refinement_prompt},
        ]

        response = self.llm.generate(messages, max_new_tokens=512, temperature=0.3)
        return response

    def _outline_to_text(self, outline):
        """Convert outline to readable text format"""
        lines = []
        for chapter in outline:
            lines.extend(chapter.to_markdown())
            lines.append("")  # Empty line between chapters
        return "\n".join(lines)


In [None]:
# Cell 9: Blackboard Integration
class PlannerBlackboard:
    """Shared state management for planner"""

    def __init__(self):
        self.research_data = ""
        self.outline_history = []
        self.feedback_log = []
        self.current_outline = None
        self.metadata = {}

    def update_research(self, data):
        self.research_data = data
        self.metadata["research_updated"] = True

    def add_outline_version(self, outline, version_note=""):
        self.outline_history.append(
            {
                "outline": outline,
                "timestamp": str(torch.cuda.Event().record()),  # Simple timestamp
                "note": version_note,
            }
        )
        self.current_outline = outline

    def add_feedback(self, feedback, source="manual"):
        self.feedback_log.append(
            {
                "feedback": feedback,
                "source": source,
                "timestamp": str(torch.cuda.Event().record()),
            }
        )

    def get_status(self):
        return {
            "has_research": bool(self.research_data),
            "outline_versions": len(self.outline_history),
            "feedback_count": len(self.feedback_log),
            "current_outline_chapters": (
                len(self.current_outline) if self.current_outline else 0
            ),
        }

In [None]:
# Cell 10: Smoke Test - End-to-End Outline Generation
print("=== nb32 Planner Agent 煙霧測試 ===\n")

# Initialize components
llm = LLMAdapter()
planner = PlannerAgent(llm)
blackboard = PlannerBlackboard()

# Mock research data
research_data = """
研究主題：中文RAG系統實作指南

主要發現：
1. 中文文本分段策略需要考慮標點符號密度
2. BGE-M3嵌入模型在中文檢索任務表現優異
3. FAISS索引配置對檢索速度影響顯著
4. 重排序模型能有效提升檢索精度
5. 引用格式標準化有助提升可信度

技術細節：
- chunk_size建議800-1000 tokens
- 混合檢索(BM25+向量)比單一方法效果好
- 多域索引路由能處理不同主題文檔
- Style Dictionary確保輸出格式一致性

實驗結果：
- Recall@5達到0.85以上
- 平均檢索延遲<100ms
- 引用準確率>90%
"""

print("1. 更新研究資料到共享狀態...")
blackboard.update_research(research_data)

print("2. 生成技術指南大綱...")
outline = planner.create_outline(
    research_data, document_type="technical_guide", target_audience="AI開發者"
)

print("3. 驗證大綱結構...")
issues = planner.validate_outline(outline)
print(f"   發現問題數量: {len(issues)}")
if issues:
    for issue in issues[:3]:  # Show first 3 issues
        print(f"   - {issue}")

print("4. 儲存大綱版本...")
blackboard.add_outline_version(outline, "初始版本")

print("5. 輸出大綱摘要...")
if outline:
    print(f"   章節數量: {len(outline)}")
    for i, chapter in enumerate(outline[:3]):  # Show first 3 chapters
        print(f"   {i+1}. {chapter.title}")
        if chapter.children:
            print(f"      小節數量: {len(chapter.children)}")

print("6. 匯出為Markdown格式...")
markdown_content = OutlineExporter.to_markdown(outline)
print(f"   Markdown長度: {len(markdown_content)} 字元")

print("7. 共享狀態檢查...")
status = blackboard.get_status()
for key, value in status.items():
    print(f"   {key}: {value}")

# Save to outputs for inspection
os.makedirs("outs", exist_ok=True)
OutlineExporter.save_outline(outline, "outs/sample_outline.md", "markdown")
OutlineExporter.save_outline(outline, "outs/sample_outline.json", "json")

print("\n✅ Planner Agent 核心功能測試完成！")
print("🔥 Smoke test 通過 - 能夠生成結構化大綱並整合到多代理工作流程")

# Key parameters and configurations
print("\n=== 關鍵參數說明 ===")
print("• LLM溫度: 0.5 (平衡創意與一致性)")
print("• 最大新tokens: 1024 (支援詳細大綱)")
print("• 章節數量限制: 3-10章")
print("• 每章小節限制: 最多7個")
print("• 支援文件類型: research_report, technical_guide, business_proposal")
print("• 輸出格式: Markdown, JSON")

print("\n=== 何時使用此模組 ===")
print("✓ 需要將研究資料轉化為結構化文件大綱")
print("✓ 多代理協作中的規劃階段")
print("✓ 確保文件邏輯結構完整性")
print("✓ 標準化不同類型文件的組織方式")
print("✓ 整合風格指南到文件規劃流程")

In [None]:
# Cell 8: Outline Export and Utilities
class OutlineExporter:
    @staticmethod
    def to_markdown(outline):
        """Export outline to Markdown format"""
        lines = []
        for chapter in outline:
            lines.extend(chapter.to_markdown())
            lines.append("")
        return "\n".join(lines)

    @staticmethod
    def to_json(outline):
        """Export outline to JSON format"""
        return json.dumps(
            [node.to_dict() for node in outline], ensure_ascii=False, indent=2
        )

    @staticmethod
    def save_outline(outline, filepath, format="markdown"):
        """Save outline to file"""
        pathlib.Path(filepath).parent.mkdir(parents=True, exist_ok=True)

        if format == "markdown":
            content = OutlineExporter.to_markdown(outline)
        elif format == "json":
            content = OutlineExporter.to_json(outline)
        else:
            raise ValueError(f"Unsupported format: {format}")

        with open(filepath, "w", encoding="utf-8") as f:
            f.write(content)

        print(f"Outline saved to: {filepath}")