In [None]:
# Writer Composition Agent Implementation
# Stage 4 - Multi-Agent Orchestrator
# File: notebooks/4_agents_orch/nb33_writer_composition.ipynb

# Cell1:  Shared Cache Bootstrap
import os, pathlib, torch
import sys
from datetime import datetime

# Shared cache configuration (複製到每本 notebook)
AI_CACHE_ROOT = os.getenv("AI_CACHE_ROOT", "../ai_warehouse/cache")

for k, v in {
    "HF_HOME": f"{AI_CACHE_ROOT}/hf",
    "TRANSFORMERS_CACHE": f"{AI_CACHE_ROOT}/hf/transformers",
    "HF_DATASETS_CACHE": f"{AI_CACHE_ROOT}/hf/datasets",
    "HUGGINGFACE_HUB_CACHE": f"{AI_CACHE_ROOT}/hf/hub",
    "TORCH_HOME": f"{AI_CACHE_ROOT}/torch",
}.items():
    os.environ[k] = v
    pathlib.Path(v).mkdir(parents=True, exist_ok=True)
print("[Cache]", AI_CACHE_ROOT, "| GPU:", torch.cuda.is_available())

In [None]:
# Cell 2: Dependencies and Imports
import json
import yaml
import time
from typing import Dict, List, Optional, Tuple
from dataclasses import dataclass, field
from pathlib import Path

# LLM Adapter (from previous notebooks)
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch.nn.functional as F


class LLMAdapter:
    def __init__(self, model_id: str, device_map="auto", **kwargs):
        self.model_id = model_id
        self.tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
        if self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token

        self.model = AutoModelForCausalLM.from_pretrained(
            model_id,
            device_map=device_map,
            torch_dtype=torch.float16,
            load_in_4bit=True,  # Low VRAM
            **kwargs,
        )

    def generate(
        self, messages: List[Dict], max_new_tokens=512, temperature=0.7, **kwargs
    ):
        # Simple chat template
        prompt = ""
        for msg in messages:
            role = msg.get("role", "user")
            content = msg.get("content", "")
            if role == "system":
                prompt += f"System: {content}\n"
            elif role == "user":
                prompt += f"User: {content}\n"
            elif role == "assistant":
                prompt += f"Assistant: {content}\n"
        prompt += "Assistant: "

        inputs = self.tokenizer(
            prompt, return_tensors="pt", truncation=True, max_length=3072
        )
        inputs = {k: v.to(self.model.device) for k, v in inputs.items()}

        with torch.no_grad():
            outputs = self.model.generate(
                **inputs,
                max_new_tokens=max_new_tokens,
                temperature=temperature,
                do_sample=True,
                pad_token_id=self.tokenizer.eos_token_id,
                **kwargs,
            )

        response = self.tokenizer.decode(
            outputs[0][inputs["input_ids"].shape[1] :], skip_special_tokens=True
        )
        return response.strip()

In [None]:
# Cell 3: Writer Agent Core Class
@dataclass
class WritingSection:
    """Represents a section to be written"""

    title: str
    outline_points: List[str]
    target_length: int = 300  # Target word count
    context: str = ""  # Background context
    citations: List[str] = field(default_factory=list)
    content: str = ""  # Generated content


@dataclass
class WritingProject:
    """Complete writing project structure"""

    title: str
    sections: List[WritingSection]
    research_context: str = ""
    style_guide: Dict = field(default_factory=dict)
    references: List[str] = field(default_factory=list)


class WriterAgent:
    """Writer agent for content composition"""

    def __init__(self, llm_adapter: LLMAdapter, style_config: Dict = None):
        self.llm = llm_adapter
        self.style_config = style_config or {}
        self.writing_history = []

    def load_style_dictionary(self, style_path: str) -> Dict:
        """Load style dictionary from YAML"""
        try:
            with open(style_path, "r", encoding="utf-8") as f:
                return yaml.safe_load(f)
        except FileNotFoundError:
            return self._get_default_style()

    def _get_default_style(self) -> Dict:
        """Default Chinese writing style"""
        return {
            "tone": "formal-neutral",
            "format": {
                "bullets": True,
                "numbered_steps": True,
                "citations": "brackets",
            },
            "glossary": [
                {"src": "RAG", "tgt": "檢索增強生成"},
                {"src": "LLM", "tgt": "大型語言模型"},
                {"src": "Agent", "tgt": "智能代理"},
            ],
            "avoid_phrases": ["以下是", "作為一個AI"],
            "style_rules": [
                "使用繁體中文撰寫",
                "保持段落簡潔，每段不超過150字",
                "適當使用列點和編號",
                "引用格式使用 [1], [2] 方式",
            ],
        }

    def _build_style_prompt(self, style_dict: Dict) -> str:
        """Convert style dictionary to prompt instructions"""
        tone = style_dict.get("tone", "neutral")
        rules = "\n".join(style_dict.get("style_rules", []))

        glossary = ""
        for term in style_dict.get("glossary", []):
            glossary += f"- {term['src']} → {term['tgt']}\n"

        avoid = ", ".join(style_dict.get("avoid_phrases", []))

        return f"""
寫作風格指南：
語調：{tone}
專業術語對照：
{glossary}
避免用詞：{avoid}
寫作規則：
{rules}
"""

    def write_section(
        self,
        section: WritingSection,
        project_context: str = "",
        previous_sections: List[str] = None,
    ) -> str:
        """Write a single section based on outline"""
        previous_sections = previous_sections or []
        style_prompt = self._build_style_prompt(self.style_config)

        # Build context from previous sections for coherence
        context_summary = ""
        if previous_sections:
            context_summary = (
                f"\n前文摘要：\n{' '.join(previous_sections[-2:])}"  # Last 2 sections
            )

        system_prompt = f"""你是專業的中文寫作助手。請根據大綱要點撰寫內容。

{style_prompt}

要求：
1. 內容必須基於提供的大綱要點
2. 保持與前文的邏輯連貫性
3. 目標長度約 {section.target_length} 字
4. 如有引用資料，使用 [1], [2] 格式標註
5. 直接輸出內容，不要包含「以下是」等引導語
"""

        user_prompt = f"""
專案背景：{project_context}

章節標題：{section.title}

大綱要點：
{chr(10).join([f"- {point}" for point in section.outline_points])}

{f"參考資料：{section.context}" if section.context else ""}

{context_summary}

請撰寫此章節的內容：
"""

        messages = [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt},
        ]

        content = self.llm.generate(
            messages,
            max_new_tokens=min(800, section.target_length * 2),  # Control length
            temperature=0.7,
        )

        section.content = content
        return content

    def write_project(self, project: WritingProject) -> str:
        """Write complete project with all sections"""
        full_content = f"# {project.title}\n\n"
        written_sections = []

        for i, section in enumerate(project.sections):
            print(f"Writing section {i+1}/{len(project.sections)}: {section.title}")

            content = self.write_section(
                section, project.research_context, written_sections
            )

            # Format section
            section_text = f"## {section.title}\n\n{content}\n\n"
            full_content += section_text
            written_sections.append(content)

            # Brief pause between sections
            time.sleep(1)

        # Add references if any
        if project.references:
            full_content += "## 參考資料\n\n"
            for i, ref in enumerate(project.references, 1):
                full_content += f"[{i}] {ref}\n"

        return full_content

In [None]:
# Cell 4: Style Dictionary Integration
def create_style_config():
    """Create sample style configuration"""
    return {
        "tone": "professional-friendly",
        "format": {
            "bullets": True,
            "numbered_steps": True,
            "citations": "brackets",
            "max_paragraph_length": 150,
        },
        "glossary": [
            {"src": "RAG", "tgt": "檢索增強生成"},
            {"src": "Retrieval", "tgt": "檢索"},
            {"src": "Embedding", "tgt": "嵌入向量"},
            {"src": "Chunk", "tgt": "文本片段"},
            {"src": "LLM", "tgt": "大型語言模型"},
            {"src": "Agent", "tgt": "智能代理"},
            {"src": "Orchestrator", "tgt": "協調器"},
        ],
        "avoid_phrases": ["以下是", "作為一個AI助手", "讓我來為您", "總的來說"],
        "style_rules": [
            "使用繁體中文撰寫正文",
            "技術術語保持中英文對照一致性",
            "每段落不超過150字，保持簡潔",
            "使用條列式重點整理",
            "引用格式統一使用 [數字] 方式",
            "保持客觀專業語調，避免過度主觀表達",
        ],
    }


def save_style_config(config: Dict, path: str):
    """Save style configuration to YAML"""
    Path(path).parent.mkdir(parents=True, exist_ok=True)
    with open(path, "w", encoding="utf-8") as f:
        yaml.dump(config, f, allow_unicode=True, default_flow_style=False)


# Create and save sample style
style_config = create_style_config()
style_path = "configs/styles/zh_general.yaml"
save_style_config(style_config, style_path)
print(f"Style config saved to: {style_path}")

In [None]:
# Cell 5: Sectional Writing Strategy
class SectionalWriter:
    """Handles sectional writing with smart splitting"""

    def __init__(self, writer_agent: WriterAgent, max_section_length=400):
        self.writer = writer_agent
        self.max_section_length = max_section_length

    def split_long_outline(
        self, title: str, outline_points: List[str], target_total_length: int = 1000
    ) -> List[WritingSection]:
        """Split long outline into manageable sections"""
        sections = []

        if len(outline_points) <= 3 or target_total_length <= self.max_section_length:
            # Single section
            sections.append(
                WritingSection(
                    title=title,
                    outline_points=outline_points,
                    target_length=min(target_total_length, self.max_section_length),
                )
            )
        else:
            # Split into multiple sections
            points_per_section = max(2, len(outline_points) // 3)
            length_per_section = target_total_length // (
                (len(outline_points) + points_per_section - 1) // points_per_section
            )

            for i in range(0, len(outline_points), points_per_section):
                section_points = outline_points[i : i + points_per_section]
                section_num = i // points_per_section + 1

                section_title = (
                    f"{title} - 第{section_num}部分"
                    if len(outline_points) > 3
                    else title
                )

                sections.append(
                    WritingSection(
                        title=section_title,
                        outline_points=section_points,
                        target_length=min(length_per_section, self.max_section_length),
                    )
                )

        return sections

    def estimate_writing_time(self, sections: List[WritingSection]) -> int:
        """Estimate total writing time in seconds"""
        total_length = sum(s.target_length for s in sections)
        # Rough estimate: 1 second per 2 characters (Chinese)
        return max(30, total_length // 2 + len(sections) * 5)

In [None]:
# Cell 6: Content Cohesion and Formatting
class ContentFormatter:
    """Handles content formatting and cohesion"""

    @staticmethod
    def ensure_cohesion(sections: List[str]) -> List[str]:
        """Ensure smooth transitions between sections"""
        if len(sections) <= 1:
            return sections

        cohesive_sections = []
        for i, section in enumerate(sections):
            if i > 0:
                # Add transition if needed
                prev_ends_with = sections[i - 1].strip()[-10:]
                curr_starts_with = section.strip()[:10:]

                # Simple heuristic for adding transitions
                if not any(
                    word in curr_starts_with
                    for word in ["此外", "另外", "接下來", "然而", "因此"]
                ):
                    # Check if we need a transition
                    section = (
                        f"此外，{section}"
                        if not section.startswith(("在", "對於", "關於"))
                        else section
                    )

            cohesive_sections.append(section)

        return cohesive_sections

    @staticmethod
    def format_citations(content: str, references: List[str]) -> str:
        """Format citations consistently"""
        # Ensure citation numbers are in order
        import re

        # Find all citation patterns
        citations = re.findall(r"\[(\d+)\]", content)
        if not citations:
            return content

        # Renumber citations sequentially
        citation_map = {}
        for i, cite in enumerate(sorted(set(citations), key=int), 1):
            citation_map[cite] = str(i)

        # Replace citations in content
        formatted_content = content
        for old_num, new_num in citation_map.items():
            formatted_content = formatted_content.replace(
                f"[{old_num}]", f"[{new_num}]"
            )

        return formatted_content

    @staticmethod
    def add_word_count(content: str) -> str:
        """Add word count information"""
        # Chinese character count (rough approximation)
        chinese_chars = len([c for c in content if "\u4e00" <= c <= "\u9fff"])
        total_chars = len(content.replace(" ", "").replace("\n", ""))

        return f"{content}\n\n---\n*字數統計：約 {chinese_chars} 中文字，總計 {total_chars} 字符*"

In [None]:
# Cell 7: Blackboard Integration
class Blackboard(dict):
    """Simple blackboard for inter-agent communication"""

    def __init__(self):
        super().__init__()
        self.history = []

    def update_state(self, key: str, value, agent: str = "unknown"):
        """Update state with tracking"""
        self[key] = value
        self.history.append(
            {"timestamp": time.time(), "agent": agent, "key": key, "action": "update"}
        )

    def get_state(self, key: str, default=None):
        """Get state value"""
        return self.get(key, default)


def integrate_with_blackboard(
    blackboard: Blackboard, writer: WriterAgent
) -> WritingProject:
    """Create writing project from blackboard state"""

    # Get research context
    research_data = blackboard.get_state("research_summary", "")

    # Get outline from planner
    outline_data = blackboard.get_state("outline", {})
    if not outline_data:
        raise ValueError("No outline found in blackboard. Run planner first.")

    # Get style preferences
    style_prefs = blackboard.get_state("style_preferences", {})

    # Create writing project
    project = WritingProject(
        title=outline_data.get("title", "未命名專案"),
        research_context=research_data,
        references=blackboard.get_state("references", []),
    )

    # Convert outline to sections
    sections_data = outline_data.get("sections", [])
    for section_data in sections_data:
        section = WritingSection(
            title=section_data.get("title", "未命名章節"),
            outline_points=section_data.get("points", []),
            target_length=section_data.get("target_length", 300),
            context=research_data,  # Share research context
        )
        project.sections.append(section)

    return project

In [None]:
# Cell 8: Complete Writing Workflow Demo
def demo_complete_writing_workflow():
    """Demonstrate complete writing workflow"""

    # Initialize LLM (using smaller model for demo)
    print("Initializing LLM...")
    llm = LLMAdapter("Qwen/Qwen2.5-7B-Instruct")

    # Load style configuration
    style_config = create_style_config()

    # Initialize writer agent
    writer = WriterAgent(llm, style_config)

    # Create blackboard with sample data
    blackboard = Blackboard()

    # Simulate previous agent outputs
    blackboard.update_state(
        "research_summary",
        "RAG（檢索增強生成）是結合資訊檢索與語言生成的技術，能讓大型語言模型基於外部知識庫提供更準確的回答。"
        "主要組成包括文檔分割、向量嵌入、相似度檢索和答案生成等步驟。",
        "researcher",
    )

    blackboard.update_state(
        "outline",
        {
            "title": "RAG 系統實作指南",
            "sections": [
                {
                    "title": "RAG 系統概述",
                    "points": [
                        "RAG 的基本概念與架構",
                        "與傳統搜尋引擎的差異",
                        "主要應用場景",
                    ],
                    "target_length": 300,
                },
                {
                    "title": "技術實作要點",
                    "points": [
                        "文檔預處理與分割策略",
                        "嵌入模型選擇與優化",
                        "檢索與重排機制",
                    ],
                    "target_length": 400,
                },
                {
                    "title": "評估與優化",
                    "points": [
                        "檢索品質評估指標",
                        "回答品質評估方法",
                        "系統效能優化建議",
                    ],
                    "target_length": 300,
                },
            ],
        },
        "planner",
    )

    blackboard.update_state(
        "references",
        [
            "Lewis et al. (2020). Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks",
            "BGE: General Embedding Model (BAAI, 2023)",
            "FAISS: A Library for Efficient Similarity Search (Facebook AI, 2017)",
        ],
        "researcher",
    )

    # Create writing project from blackboard
    print("\nCreating writing project from blackboard...")
    project = integrate_with_blackboard(blackboard, writer)

    # Execute writing
    print(f"\nStarting writing project: {project.title}")
    print(f"Sections to write: {len(project.sections)}")

    # Write the complete project
    final_content = writer.write_project(project)

    # Format and finalize
    formatter = ContentFormatter()
    final_content = formatter.format_citations(final_content, project.references)
    final_content = formatter.add_word_count(final_content)

    # Update blackboard with results
    blackboard.update_state("final_content", final_content, "writer")
    blackboard.update_state("writing_completed", True, "writer")

    print("\n" + "=" * 50)
    print("WRITING COMPLETED")
    print("=" * 50)
    print(final_content[:500] + "..." if len(final_content) > 500 else final_content)

    return final_content, blackboard


# Run the demo
if __name__ == "__main__":
    try:
        content, bb = demo_complete_writing_workflow()
        print(f"\nBlackboard final state keys: {list(bb.keys())}")
        print(f"Total characters generated: {len(content)}")
    except Exception as e:
        print(f"Demo failed: {e}")
        print("This is expected in minimal environment. Check dependencies.")

In [None]:
# Cell 9: Smoke Test
def smoke_test_writer():
    """Minimal smoke test for writer functionality"""
    print("Running Writer Agent Smoke Test...")

    try:
        # Test style configuration
        style = create_style_config()
        assert "tone" in style
        assert "glossary" in style
        print("✓ Style configuration OK")

        # Test section creation
        section = WritingSection(
            title="測試章節", outline_points=["要點一", "要點二"], target_length=200
        )
        assert section.title == "測試章節"
        assert len(section.outline_points) == 2
        print("✓ WritingSection creation OK")

        # Test blackboard
        bb = Blackboard()
        bb.update_state("test_key", "test_value", "test_agent")
        assert bb.get_state("test_key") == "test_value"
        assert len(bb.history) == 1
        print("✓ Blackboard integration OK")

        # Test content formatting
        formatter = ContentFormatter()
        test_content = "這是測試內容 [2] 和引用 [1]。"
        formatted = formatter.format_citations(test_content, ["ref1", "ref2"])
        assert "[1]" in formatted
        print("✓ Content formatting OK")

        print("\n🎉 All smoke tests passed!")
        return True

    except Exception as e:
        print(f"❌ Smoke test failed: {e}")
        return False


# Run smoke test
smoke_test_writer()

In [None]:
# Cell 10: Summary and Next Steps
print(
    """
## 📝 Writer Agent 實作完成

### 已完成功能：
1. ✅ Writer 代理核心類別 - 支援分段寫作與風格控制
2. ✅ Style Dictionary 整合 - 語氣、術語、格式統一
3. ✅ 分段寫作策略 - 避免過長內容，提升品質
4. ✅ 內容銜接機制 - 段落間邏輯連貫
5. ✅ 黑板系統整合 - 讀取研究資料與大綱
6. ✅ 格式化與引用 - 統一引用格式，字數統計

### 核心概念：
- **分段寫作**：將長文檔分割為可管理的段落，逐一生成
- **風格一致性**：透過 Style Dictionary 確保語氣與術語統一
- **上下文連貫**：使用前文摘要維持邏輯連貫性
- **引用標準化**：統一 [1], [2] 格式，自動重新編號

### 常見陷阱：
⚠️ **長度控制**：設定合理的 target_length，避免生成過長內容
⚠️ **風格漂移**：確保 Style Dictionary 規則清晰且一致
⚠️ **記憶體使用**：分段寫作時注意累積的上下文長度
⚠️ **引用對齊**：確保引用編號與參考資料清單一致

### 下一步行動：
1. 實作 **nb34_reviewer_groundedness.ipynb** - 事實核查與引用驗證
2. 整合 **失敗重試機制** - 處理生成品質不佳的情況
3. 加入 **版本控制** - 支援多次修改與版本比較
4. 優化 **效能監控** - 追蹤寫作速度與品質指標

### Git 工作流：
```bash
git checkout -b feature/nb33-writer-composition
git add notebooks/4_agents_orch/nb33_writer_composition.ipynb
git add configs/styles/zh_general.yaml
git commit -m "feat(agent): writer composition with style dictionary"
git commit -m "feat(agent): sectional writing and blackboard integration"
```

🚀 Writer Agent 已準備好與其他代理協作！
"""
)