In [None]:
# Multi-Agent Collaboration System
# 多代理協作系統 - Researcher/Planner/Writer 分工協作

import os
import pathlib
import torch
import json
import time
from datetime import datetime
from typing import Dict, List, Optional, Any, Tuple
from dataclasses import dataclass, asdict
from enum import Enum
import asyncio
import logging

# === Shared Cache Bootstrap ===
AI_CACHE_ROOT = os.getenv("AI_CACHE_ROOT", "/mnt/ai/cache")
for k, v in {
    "HF_HOME": f"{AI_CACHE_ROOT}/hf",
    "TRANSFORMERS_CACHE": f"{AI_CACHE_ROOT}/hf/transformers",
    "HF_DATASETS_CACHE": f"{AI_CACHE_ROOT}/hf/datasets",
    "HUGGINGFACE_HUB_CACHE": f"{AI_CACHE_ROOT}/hf/hub",
    "TORCH_HOME": f"{AI_CACHE_ROOT}/torch",
}.items():
    os.environ[k] = v
    pathlib.Path(v).mkdir(parents=True, exist_ok=True)

print("[Cache]", AI_CACHE_ROOT, "| GPU:", torch.cuda.is_available())


In [None]:
# Essential imports
import transformers
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig,
    pipeline,
)
import langchain
from langchain.chat_models import ChatOpenAI
from langchain.schema import BaseMessage, HumanMessage, AIMessage, SystemMessage
from langchain.tools import Tool, DuckDuckGoSearchRun
from langchain.agents import AgentExecutor, create_openai_functions_agent
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.memory import ConversationBufferMemory
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer

# Check GPU and setup device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"[Device] Using: {device}")

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [None]:
## 1. Message System & Agent Base
# 消息系統與代理基類


class MessageType(Enum):
    """Message types for inter-agent communication"""

    TASK_ASSIGNMENT = "task_assignment"
    RESEARCH_RESULT = "research_result"
    PLAN_RESULT = "plan_result"
    WRITING_RESULT = "writing_result"
    FEEDBACK = "feedback"
    ERROR = "error"
    STATUS_UPDATE = "status_update"


@dataclass
class AgentMessage:
    """Standard message format for agent communication"""

    sender: str
    receiver: str
    message_type: MessageType
    content: Dict[str, Any]
    timestamp: str
    message_id: str

    def to_dict(self) -> Dict:
        return asdict(self)

    @classmethod
    def from_dict(cls, data: Dict) -> "AgentMessage":
        data["message_type"] = MessageType(data["message_type"])
        return cls(**data)


class MessageBus:
    """Central message bus for agent communication"""

    def __init__(self):
        self.messages: List[AgentMessage] = []
        self.subscribers: Dict[str, List[str]] = {}  # agent_id -> [message_types]

    def subscribe(self, agent_id: str, message_types: List[MessageType]):
        """Subscribe agent to specific message types"""
        if agent_id not in self.subscribers:
            self.subscribers[agent_id] = []
        self.subscribers[agent_id].extend([mt.value for mt in message_types])

    def publish(self, message: AgentMessage):
        """Publish message to bus"""
        self.messages.append(message)
        logger.info(
            f"Message published: {message.sender} -> {message.receiver} ({message.message_type.value})"
        )

    def get_messages_for_agent(
        self, agent_id: str, since_timestamp: str = None
    ) -> List[AgentMessage]:
        """Get messages for specific agent since timestamp"""
        relevant_messages = []
        for msg in self.messages:
            # Check if message is for this agent
            if msg.receiver == agent_id or msg.receiver == "all":
                # Check if agent subscribed to this message type
                if (
                    agent_id in self.subscribers
                    and msg.message_type.value in self.subscribers[agent_id]
                ):
                    # Check timestamp filter
                    if since_timestamp is None or msg.timestamp > since_timestamp:
                        relevant_messages.append(msg)
        return relevant_messages


class BaseAgent:
    """Base class for all agents in the collaboration system"""

    def __init__(
        self, agent_id: str, agent_type: str, llm_model: str = "gpt-3.5-turbo"
    ):
        self.agent_id = agent_id
        self.agent_type = agent_type
        self.message_bus: Optional[MessageBus] = None
        self.last_message_check = datetime.now().isoformat()
        self.status = "initialized"
        self.context_memory: List[str] = []

        # Initialize LLM
        if "gpt" in llm_model.lower():
            self.llm = ChatOpenAI(
                model=llm_model,
                temperature=0.7,
                openai_api_key=os.getenv("OPENAI_API_KEY", "your-key-here"),
            )
        else:
            # Fallback to local model (simplified)
            self.llm = self._init_local_llm(llm_model)

    def _init_local_llm(self, model_name: str):
        """Initialize local LLM with low-VRAM settings"""
        bnb_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_use_double_quant=True,
            bnb_4bit_compute_dtype=torch.float16,
        )

        try:
            tokenizer = AutoTokenizer.from_pretrained(model_name)
            model = AutoModelForCausalLM.from_pretrained(
                model_name,
                quantization_config=bnb_config,
                device_map="auto",
                trust_remote_code=True,
            )
            return pipeline("text-generation", model=model, tokenizer=tokenizer)
        except Exception as e:
            logger.warning(f"Failed to load {model_name}, using CPU fallback: {e}")
            return None

    def connect_to_bus(self, message_bus: MessageBus):
        """Connect agent to message bus"""
        self.message_bus = message_bus
        # Subscribe to relevant message types (to be overridden)
        self.setup_subscriptions()

    def setup_subscriptions(self):
        """Setup message type subscriptions (override in subclasses)"""
        pass

    def send_message(
        self, receiver: str, message_type: MessageType, content: Dict[str, Any]
    ):
        """Send message through message bus"""
        if not self.message_bus:
            raise ValueError("Agent not connected to message bus")

        message = AgentMessage(
            sender=self.agent_id,
            receiver=receiver,
            message_type=message_type,
            content=content,
            timestamp=datetime.now().isoformat(),
            message_id=f"{self.agent_id}_{int(time.time())}",
        )
        self.message_bus.publish(message)

    def check_messages(self) -> List[AgentMessage]:
        """Check for new messages"""
        if not self.message_bus:
            return []

        messages = self.message_bus.get_messages_for_agent(
            self.agent_id, self.last_message_check
        )
        self.last_message_check = datetime.now().isoformat()
        return messages

    def update_status(self, status: str, details: str = ""):
        """Update agent status and broadcast"""
        self.status = status
        if self.message_bus:
            self.send_message(
                "all",
                MessageType.STATUS_UPDATE,
                {"status": status, "details": details, "agent_type": self.agent_type},
            )

    def add_to_context(self, information: str):
        """Add information to agent's context memory"""
        self.context_memory.append(
            f"[{datetime.now().strftime('%H:%M:%S')}] {information}"
        )
        # Keep only last 20 entries to manage memory
        if len(self.context_memory) > 20:
            self.context_memory = self.context_memory[-20:]

In [None]:
## 2. Research Agent
# 研究代理 - 負責資訊收集與分析


class ResearchAgent(BaseAgent):
    """Agent responsible for research and information gathering"""

    def __init__(self, agent_id: str = "researcher", llm_model: str = "gpt-3.5-turbo"):
        super().__init__(agent_id, "researcher", llm_model)
        self.search_tool = DuckDuckGoSearchRun()
        self.embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

        # Research capabilities
        self.research_tools = [
            Tool(
                name="web_search",
                description="Search the web for current information",
                func=self.search_tool.run,
            ),
            Tool(
                name="analyze_sources",
                description="Analyze and summarize multiple sources",
                func=self.analyze_sources,
            ),
        ]

    def setup_subscriptions(self):
        """Subscribe to task assignments"""
        if self.message_bus:
            self.message_bus.subscribe(self.agent_id, [MessageType.TASK_ASSIGNMENT])

    def analyze_sources(self, sources_text: str) -> str:
        """Analyze multiple sources and extract key information"""
        prompt = f"""
        As a research analyst, analyze the following sources and extract key information:

        Sources:
        {sources_text}

        Please provide:
        1. Key findings (3-5 main points)
        2. Source reliability assessment
        3. Conflicting information (if any)
        4. Research gaps or limitations

        Format as structured summary.
        """

        if self.llm:
            try:
                response = self.llm.invoke([HumanMessage(content=prompt)])
                return response.content
            except Exception as e:
                return f"Analysis failed: {e}"
        else:
            return "Local LLM not available for analysis"

    def conduct_research(
        self, research_topic: str, depth: str = "moderate"
    ) -> Dict[str, Any]:
        """Conduct comprehensive research on given topic"""
        self.update_status("researching", f"Topic: {research_topic}")

        results = {
            "topic": research_topic,
            "search_results": [],
            "analysis": "",
            "key_findings": [],
            "sources": [],
            "confidence_score": 0.0,
        }

        try:
            # Perform web searches with different angles
            search_queries = [
                research_topic,
                f"{research_topic} latest developments",
                f"{research_topic} analysis report",
                f"{research_topic} expert opinion",
            ]

            if depth == "deep":
                search_queries.extend(
                    [
                        f"{research_topic} research papers",
                        f"{research_topic} case studies",
                    ]
                )

            all_sources = []
            for query in search_queries[:3]:  # Limit to avoid rate limits
                try:
                    search_result = self.search_tool.run(query)
                    all_sources.append(search_result)
                    results["search_results"].append(
                        {
                            "query": query,
                            "result": search_result[:500],  # Truncate for memory
                        }
                    )
                    time.sleep(1)  # Rate limiting
                except Exception as e:
                    logger.warning(f"Search failed for '{query}': {e}")

            # Analyze combined sources
            if all_sources:
                combined_sources = "\n\n".join(all_sources)
                analysis = self.analyze_sources(combined_sources)
                results["analysis"] = analysis

                # Extract key findings (simplified)
                results["key_findings"] = [
                    line.strip()
                    for line in analysis.split("\n")
                    if line.strip()
                    and any(
                        keyword in line.lower()
                        for keyword in ["key", "important", "finding", "main"]
                    )
                ][:5]

                results["confidence_score"] = min(0.8, len(all_sources) * 0.2)

            self.add_to_context(f"Completed research on: {research_topic}")

        except Exception as e:
            logger.error(f"Research failed: {e}")
            results["error"] = str(e)

        self.update_status("research_complete")
        return results

    async def process_messages(self):
        """Process incoming messages and handle research requests"""
        messages = self.check_messages()

        for message in messages:
            if message.message_type == MessageType.TASK_ASSIGNMENT:
                if message.content.get("task_type") == "research":
                    research_topic = message.content.get("topic", "")
                    depth = message.content.get("depth", "moderate")

                    # Conduct research
                    research_results = self.conduct_research(research_topic, depth)

                    # Send results back
                    self.send_message(
                        message.sender,
                        MessageType.RESEARCH_RESULT,
                        {
                            "task_id": message.content.get("task_id"),
                            "results": research_results,
                        },
                    )



In [None]:
## 3. Planning Agent
# 規劃代理 - 負責任務分解與執行計畫


class PlanningAgent(BaseAgent):
    """Agent responsible for task planning and coordination"""

    def __init__(self, agent_id: str = "planner", llm_model: str = "gpt-3.5-turbo"):
        super().__init__(agent_id, "planner", llm_model)
        self.active_plans: Dict[str, Dict] = {}

    def setup_subscriptions(self):
        """Subscribe to task assignments and research results"""
        if self.message_bus:
            self.message_bus.subscribe(
                self.agent_id,
                [MessageType.TASK_ASSIGNMENT, MessageType.RESEARCH_RESULT],
            )

    def create_execution_plan(
        self, task_description: str, research_data: Dict = None
    ) -> Dict[str, Any]:
        """Create detailed execution plan based on task and research"""
        self.update_status("planning", f"Task: {task_description}")

        research_context = ""
        if research_data and research_data.get("analysis"):
            research_context = f"\nResearch Context:\n{research_data['analysis']}"

        prompt = f"""
        As a project planning expert, create a detailed execution plan for the following task:

        Task: {task_description}
        {research_context}

        Please provide:
        1. Task breakdown (3-7 subtasks)
        2. Execution sequence and dependencies
        3. Resource requirements
        4. Success criteria
        5. Risk assessment and mitigation
        6. Estimated timeline

        Format as structured plan with clear action items.
        """

        plan = {
            "task_description": task_description,
            "subtasks": [],
            "execution_sequence": [],
            "resources_needed": [],
            "success_criteria": [],
            "risks": [],
            "timeline": "TBD",
        }

        try:
            if self.llm:
                response = self.llm.invoke([HumanMessage(content=prompt)])
                plan_text = response.content

                # Parse plan (simplified parsing)
                plan["raw_plan"] = plan_text

                # Extract subtasks
                lines = plan_text.split("\n")
                current_section = ""
                for line in lines:
                    line = line.strip()
                    if "subtask" in line.lower() or "task breakdown" in line.lower():
                        current_section = "subtasks"
                    elif "sequence" in line.lower():
                        current_section = "sequence"
                    elif "criteria" in line.lower():
                        current_section = "criteria"
                    elif (
                        line
                        and current_section == "subtasks"
                        and any(c.isdigit() for c in line[:3])
                    ):
                        plan["subtasks"].append(line)

                self.add_to_context(f"Created plan for: {task_description}")

        except Exception as e:
            logger.error(f"Planning failed: {e}")
            plan["error"] = str(e)

        self.update_status("plan_complete")
        return plan

    def monitor_execution(self, plan_id: str) -> Dict[str, Any]:
        """Monitor plan execution and provide updates"""
        if plan_id not in self.active_plans:
            return {"error": "Plan not found"}

        plan = self.active_plans[plan_id]

        # Simple execution monitoring (can be enhanced)
        status_report = {
            "plan_id": plan_id,
            "overall_progress": "in_progress",
            "completed_subtasks": [],
            "pending_subtasks": plan.get("subtasks", []),
            "next_actions": ["Continue with writing phase"],
            "issues": [],
        }

        return status_report

    async def process_messages(self):
        """Process incoming messages and handle planning requests"""
        messages = self.check_messages()

        for message in messages:
            if message.message_type == MessageType.TASK_ASSIGNMENT:
                if message.content.get("task_type") == "planning":
                    task_desc = message.content.get("description", "")
                    task_id = message.content.get("task_id", "")

                    # Wait for research results if needed
                    research_data = message.content.get("research_data")

                    # Create execution plan
                    execution_plan = self.create_execution_plan(
                        task_desc, research_data
                    )
                    self.active_plans[task_id] = execution_plan

                    # Send plan back
                    self.send_message(
                        message.sender,
                        MessageType.PLAN_RESULT,
                        {"task_id": task_id, "plan": execution_plan},
                    )

            elif message.message_type == MessageType.RESEARCH_RESULT:
                # Update plans with research data
                task_id = message.content.get("task_id")
                if task_id in self.active_plans:
                    self.active_plans[task_id]["research_data"] = message.content.get(
                        "results"
                    )

In [None]:
## 4. Writing Agent
# 寫作代理 - 負責內容創作與編輯


class WritingAgent(BaseAgent):
    """Agent responsible for content creation and editing"""

    def __init__(self, agent_id: str = "writer", llm_model: str = "gpt-3.5-turbo"):
        super().__init__(agent_id, "writer", llm_model)
        self.writing_styles = {
            "report": "formal, structured, evidence-based",
            "article": "engaging, informative, accessible",
            "summary": "concise, clear, highlights key points",
            "proposal": "persuasive, detailed, action-oriented",
        }

    def setup_subscriptions(self):
        """Subscribe to task assignments and plan results"""
        if self.message_bus:
            self.message_bus.subscribe(
                self.agent_id, [MessageType.TASK_ASSIGNMENT, MessageType.PLAN_RESULT]
            )

    def create_content(
        self,
        content_type: str,
        topic: str,
        research_data: Dict = None,
        plan_data: Dict = None,
        style: str = "report",
        length: str = "medium",
    ) -> Dict[str, Any]:
        """Create content based on research and planning"""
        self.update_status("writing", f"Creating {content_type}: {topic}")

        # Prepare context
        research_context = ""
        if research_data and research_data.get("analysis"):
            research_context = f"\nResearch Findings:\n{research_data['analysis']}"
            key_findings = research_data.get("key_findings", [])
            if key_findings:
                research_context += f"\nKey Points:\n" + "\n".join(
                    f"- {finding}" for finding in key_findings
                )

        plan_context = ""
        if plan_data and plan_data.get("raw_plan"):
            plan_context = f"\nExecution Plan:\n{plan_data['raw_plan']}"

        style_guide = self.writing_styles.get(style, "clear and informative")

        length_guide = {
            "short": "300-500 words",
            "medium": "800-1200 words",
            "long": "1500-2500 words",
        }.get(length, "800-1200 words")

        prompt = f"""
        As a professional content writer, create a {content_type} on the topic: {topic}

        Style: {style_guide}
        Target Length: {length_guide}

        {research_context}
        {plan_context}

        Requirements:
        1. Clear structure with headers
        2. Evidence-based content (cite research when available)
        3. Engaging and accessible language
        4. Actionable insights where appropriate
        5. Professional formatting

        Please create comprehensive, well-structured content.
        """

        content_result = {
            "content_type": content_type,
            "topic": topic,
            "style": style,
            "content": "",
            "word_count": 0,
            "sections": [],
            "quality_score": 0.0,
        }

        try:
            if self.llm:
                response = self.llm.invoke([HumanMessage(content=prompt)])
                content = response.content

                content_result["content"] = content
                content_result["word_count"] = len(content.split())

                # Extract sections (simplified)
                sections = []
                lines = content.split("\n")
                for line in lines:
                    if line.strip() and (
                        line.startswith("#") or line.strip().endswith(":")
                    ):
                        sections.append(line.strip())
                content_result["sections"] = sections

                # Simple quality assessment
                quality_factors = [
                    len(content) > 500,  # Adequate length
                    len(sections) > 2,  # Good structure
                    research_context in prompt,  # Research-based
                    any(
                        keyword in content.lower()
                        for keyword in ["analysis", "findings", "conclusion"]
                    ),
                ]
                content_result["quality_score"] = sum(quality_factors) / len(
                    quality_factors
                )

                self.add_to_context(f"Created {content_type} on: {topic}")

        except Exception as e:
            logger.error(f"Content creation failed: {e}")
            content_result["error"] = str(e)

        self.update_status("writing_complete")
        return content_result

    def edit_content(self, content: str, edit_instructions: str) -> str:
        """Edit existing content based on instructions"""
        prompt = f"""
        Please edit the following content according to these instructions:

        Instructions: {edit_instructions}

        Original Content:
        {content}

        Provide the improved version maintaining the original structure and style.
        """

        try:
            if self.llm:
                response = self.llm.invoke([HumanMessage(content=prompt)])
                return response.content
        except Exception as e:
            logger.error(f"Content editing failed: {e}")

        return content  # Return original if editing fails

    async def process_messages(self):
        """Process incoming messages and handle writing requests"""
        messages = self.check_messages()

        for message in messages:
            if message.message_type == MessageType.TASK_ASSIGNMENT:
                if message.content.get("task_type") == "writing":
                    content_type = message.content.get("content_type", "report")
                    topic = message.content.get("topic", "")
                    style = message.content.get("style", "report")
                    length = message.content.get("length", "medium")
                    research_data = message.content.get("research_data")
                    plan_data = message.content.get("plan_data")
                    task_id = message.content.get("task_id", "")

                    # Create content
                    writing_result = self.create_content(
                        content_type, topic, research_data, plan_data, style, length
                    )

                    # Send result back
                    self.send_message(
                        message.sender,
                        MessageType.WRITING_RESULT,
                        {"task_id": task_id, "content": writing_result},
                    )

In [None]:
## 5. Collaboration Orchestrator
# 協作編排器 - 管理多代理工作流程


class CollaborationOrchestrator:
    """Orchestrates collaboration between multiple agents"""

    def __init__(self):
        self.message_bus = MessageBus()
        self.agents: Dict[str, BaseAgent] = {}
        self.active_tasks: Dict[str, Dict] = {}
        self.task_counter = 0

    def add_agent(self, agent: BaseAgent):
        """Add agent to collaboration system"""
        agent.connect_to_bus(self.message_bus)
        self.agents[agent.agent_id] = agent
        logger.info(f"Added agent: {agent.agent_id} ({agent.agent_type})")

    def create_task_id(self) -> str:
        """Generate unique task ID"""
        self.task_counter += 1
        return f"task_{self.task_counter}_{int(time.time())}"

    async def execute_collaborative_task(
        self,
        task_description: str,
        content_type: str = "report",
        style: str = "report",
        research_depth: str = "moderate",
    ) -> Dict[str, Any]:
        """Execute a collaborative task involving all agents"""
        task_id = self.create_task_id()

        task_info = {
            "task_id": task_id,
            "description": task_description,
            "content_type": content_type,
            "style": style,
            "research_depth": research_depth,
            "status": "started",
            "start_time": datetime.now().isoformat(),
            "results": {},
        }

        self.active_tasks[task_id] = task_info

        try:
            # Phase 1: Research
            if "researcher" in self.agents:
                logger.info(f"Phase 1: Starting research for task {task_id}")

                self.agents["researcher"].send_message(
                    "researcher",
                    MessageType.TASK_ASSIGNMENT,
                    {
                        "task_id": task_id,
                        "task_type": "research",
                        "topic": task_description,
                        "depth": research_depth,
                    },
                )

                # Wait for research completion
                research_complete = False
                research_data = None
                timeout = 60  # 60 seconds timeout
                start_time = time.time()

                while not research_complete and (time.time() - start_time) < timeout:
                    await asyncio.sleep(2)
                    await self.agents["researcher"].process_messages()

                    # Check for research results
                    messages = self.message_bus.get_messages_for_agent("orchestrator")
                    for msg in messages:
                        if (
                            msg.message_type == MessageType.RESEARCH_RESULT
                            and msg.content.get("task_id") == task_id
                        ):
                            research_complete = True
                            research_data = msg.content.get("results")
                            break

                task_info["results"]["research"] = research_data

            # Phase 2: Planning
            if "planner" in self.agents and research_data:
                logger.info(f"Phase 2: Starting planning for task {task_id}")

                self.agents["planner"].send_message(
                    "planner",
                    MessageType.TASK_ASSIGNMENT,
                    {
                        "task_id": task_id,
                        "task_type": "planning",
                        "description": task_description,
                        "research_data": research_data,
                    },
                )

                # Wait for planning completion
                planning_complete = False
                plan_data = None
                start_time = time.time()

                while not planning_complete and (time.time() - start_time) < timeout:
                    await asyncio.sleep(2)
                    await self.agents["planner"].process_messages()

                    # Check for plan results
                    messages = self.message_bus.get_messages_for_agent("orchestrator")
                    for msg in messages:
                        if (
                            msg.message_type == MessageType.PLAN_RESULT
                            and msg.content.get("task_id") == task_id
                        ):
                            planning_complete = True
                            plan_data = msg.content.get("plan")
                            break

                task_info["results"]["planning"] = plan_data

            # Phase 3: Writing
            if "writer" in self.agents:
                logger.info(f"Phase 3: Starting writing for task {task_id}")

                self.agents["writer"].send_message(
                    "writer",
                    MessageType.TASK_ASSIGNMENT,
                    {
                        "task_id": task_id,
                        "task_type": "writing",
                        "topic": task_description,
                        "content_type": content_type,
                        "style": style,
                        "length": "medium",
                        "research_data": research_data,
                        "plan_data": plan_data,
                    },
                )

                # Wait for writing completion
                writing_complete = False
                writing_data = None
                start_time = time.time()

                while not writing_complete and (time.time() - start_time) < timeout:
                    await asyncio.sleep(2)
                    await self.agents["writer"].process_messages()

                    # Check for writing results
                    messages = self.message_bus.get_messages_for_agent("orchestrator")
                    for msg in messages:
                        if (
                            msg.message_type == MessageType.WRITING_RESULT
                            and msg.content.get("task_id") == task_id
                        ):
                            writing_complete = True
                            writing_data = msg.content.get("content")
                            break

                task_info["results"]["writing"] = writing_data

            task_info["status"] = "completed"
            task_info["end_time"] = datetime.now().isoformat()

        except Exception as e:
            logger.error(f"Collaborative task failed: {e}")
            task_info["status"] = "failed"
            task_info["error"] = str(e)

        return task_info

    def get_system_status(self) -> Dict[str, Any]:
        """Get overall system status"""
        status = {
            "agents": {},
            "active_tasks": len(self.active_tasks),
            "message_bus_size": len(self.message_bus.messages),
            "system_health": "healthy",
        }

        for agent_id, agent in self.agents.items():
            status["agents"][agent_id] = {
                "type": agent.agent_type,
                "status": agent.status,
                "context_size": len(agent.context_memory),
            }

        return status

In [None]:
## 6. Multi-Agent System Demo
# 多代理系統示範


async def demo_multi_agent_collaboration():
    """Demonstrate multi-agent collaboration system"""
    print("🤖 Multi-Agent Collaboration Demo")
    print("=" * 50)

    # Initialize orchestrator
    orchestrator = CollaborationOrchestrator()

    # Create and add agents
    # Note: Use smaller models or mock agents if OpenAI API not available
    try:
        researcher = ResearchAgent("researcher", "gpt-3.5-turbo")
        planner = PlanningAgent("planner", "gpt-3.5-turbo")
        writer = WritingAgent("writer", "gpt-3.5-turbo")

        orchestrator.add_agent(researcher)
        orchestrator.add_agent(planner)
        orchestrator.add_agent(writer)

        print("✅ All agents initialized and connected")

    except Exception as e:
        print(f"⚠️ Agent initialization failed: {e}")
        print("💡 Tip: Set OPENAI_API_KEY environment variable or use local models")
        return

    # Subscribe orchestrator to all message types
    orchestrator.message_bus.subscribe(
        "orchestrator",
        [
            MessageType.RESEARCH_RESULT,
            MessageType.PLAN_RESULT,
            MessageType.WRITING_RESULT,
            MessageType.STATUS_UPDATE,
        ],
    )

    # Demo task: Create a report about AI trends
    task_description = (
        "Current trends in Large Language Models and their business applications"
    )

    print(f"\n🎯 Task: {task_description}")
    print("📋 Starting collaborative execution...")

    # Execute collaborative task
    result = await orchestrator.execute_collaborative_task(
        task_description=task_description,
        content_type="report",
        style="report",
        research_depth="moderate",
    )

    # Display results
    print(f"\n📊 Task Results (ID: {result['task_id']})")
    print(f"Status: {result['status']}")

    if result.get("results"):
        results = result["results"]

        # Research results
        if "research" in results and results["research"]:
            research = results["research"]
            print(f"\n🔍 Research Phase:")
            print(f"- Topic: {research.get('topic', 'N/A')}")
            print(f"- Sources found: {len(research.get('search_results', []))}")
            print(f"- Key findings: {len(research.get('key_findings', []))}")
            print(f"- Confidence: {research.get('confidence_score', 0):.2f}")

        # Planning results
        if "planning" in results and results["planning"]:
            planning = results["planning"]
            print(f"\n📋 Planning Phase:")
            print(f"- Subtasks identified: {len(planning.get('subtasks', []))}")
            print(f"- Plan created: {'✅' if planning.get('raw_plan') else '❌'}")

        # Writing results
        if "writing" in results and results["writing"]:
            writing = results["writing"]
            print(f"\n✍️ Writing Phase:")
            print(f"- Content type: {writing.get('content_type', 'N/A')}")
            print(f"- Word count: {writing.get('word_count', 0)}")
            print(f"- Quality score: {writing.get('quality_score', 0):.2f}")
            print(f"- Sections: {len(writing.get('sections', []))}")

            # Show first 300 characters of content
            content = writing.get("content", "")
            if content:
                preview = content[:300] + "..." if len(content) > 300 else content
                print(f"\n📄 Content Preview:")
                print("-" * 40)
                print(preview)
                print("-" * 40)

    # System status
    print(f"\n🏥 System Status:")
    status = orchestrator.get_system_status()
    for agent_id, agent_status in status["agents"].items():
        print(f"- {agent_id}: {agent_status['status']} ({agent_status['type']})")

    print(f"- Total messages: {status['message_bus_size']}")
    print(f"- System health: {status['system_health']}")

    return result


# Run demo
print("Starting Multi-Agent Collaboration System...")
demo_result = await demo_multi_agent_collaboration()

In [None]:
## 7. Performance Monitoring & Cost Analysis
# 效能監控與成本分析


class SystemMetrics:
    """Monitor system performance and costs"""

    def __init__(self):
        self.metrics = {
            "task_execution_times": [],
            "agent_response_times": {},
            "api_calls": {"total": 0, "by_agent": {}},
            "memory_usage": [],
            "error_counts": {"by_agent": {}, "by_type": {}},
        }

    def record_task_time(self, task_id: str, duration: float):
        """Record task execution time"""
        self.metrics["task_execution_times"].append(
            {
                "task_id": task_id,
                "duration": duration,
                "timestamp": datetime.now().isoformat(),
            }
        )

    def record_api_call(self, agent_id: str, model: str, tokens_used: int = 0):
        """Record API usage for cost tracking"""
        self.metrics["api_calls"]["total"] += 1

        if agent_id not in self.metrics["api_calls"]["by_agent"]:
            self.metrics["api_calls"]["by_agent"][agent_id] = []

        self.metrics["api_calls"]["by_agent"][agent_id].append(
            {
                "model": model,
                "tokens": tokens_used,
                "timestamp": datetime.now().isoformat(),
            }
        )

    def estimate_costs(
        self, pricing_model: Dict[str, float] = None
    ) -> Dict[str, float]:
        """Estimate API costs based on usage"""
        if not pricing_model:
            # Default OpenAI pricing (approximate)
            pricing_model = {
                "gpt-3.5-turbo": 0.002,  # per 1K tokens
                "gpt-4": 0.03,  # per 1K tokens
                "embedding": 0.0004,  # per 1K tokens
            }

        total_cost = 0.0
        cost_breakdown = {}

        for agent_id, calls in self.metrics["api_calls"]["by_agent"].items():
            agent_cost = 0.0
            for call in calls:
                model = call["model"]
                tokens = call.get("tokens", 1000)  # Default estimate

                if model in pricing_model:
                    call_cost = (tokens / 1000) * pricing_model[model]
                    agent_cost += call_cost

            cost_breakdown[agent_id] = agent_cost
            total_cost += agent_cost

        return {"total_estimated_cost": total_cost, "by_agent": cost_breakdown}

    def generate_report(self) -> str:
        """Generate performance and cost report"""
        avg_task_time = 0
        if self.metrics["task_execution_times"]:
            avg_task_time = sum(
                t["duration"] for t in self.metrics["task_execution_times"]
            ) / len(self.metrics["task_execution_times"])

        cost_estimate = self.estimate_costs()

        report = f"""
📊 Multi-Agent System Performance Report
==========================================

⏱️ Performance Metrics:
- Tasks completed: {len(self.metrics["task_execution_times"])}
- Average task time: {avg_task_time:.2f} seconds
- Total API calls: {self.metrics["api_calls"]["total"]}

💰 Cost Analysis:
- Estimated total cost: ${cost_estimate["total_estimated_cost"]:.4f}
- Cost breakdown:
"""

        for agent, cost in cost_estimate["by_agent"].items():
            report += f"  - {agent}: ${cost:.4f}\n"

        report += f"""
🔧 Optimization Recommendations:
- Consider using smaller models for simple tasks
- Implement response caching for repeated queries
- Use local models for development/testing
- Monitor token usage and optimize prompts
        """

        return report

# Initialize metrics tracker
metrics = SystemMetrics()

# Example: Record some sample metrics
if demo_result:
    task_duration = 45.2  # Simulated
    metrics.record_task_time(demo_result["task_id"], task_duration)
    metrics.record_api_call("researcher", "gpt-3.5-turbo", 1200)
    metrics.record_api_call("planner", "gpt-3.5-turbo", 800)
    metrics.record_api_call("writer", "gpt-3.5-turbo", 1500)

print(metrics.generate_report())

In [None]:
## 8. Configuration & Deployment Helpers
# 配置與部署輔助工具


def create_agent_config(
    agent_types: List[str], model_preferences: Dict[str, str] = None
) -> Dict:
    """Create configuration for multi-agent system"""

    default_models = {
        "researcher": "gpt-3.5-turbo",
        "planner": "gpt-3.5-turbo",
        "writer": "gpt-4",  # Better for content creation
        "default": "gpt-3.5-turbo",
    }

    if model_preferences:
        default_models.update(model_preferences)

    config = {
        "system": {
            "message_bus_size_limit": 1000,
            "task_timeout": 300,  # 5 minutes
            "max_concurrent_tasks": 3,
        },
        "agents": {},
    }

    for agent_type in agent_types:
        config["agents"][agent_type] = {
            "model": default_models.get(agent_type, default_models["default"]),
            "temperature": 0.7,
            "max_tokens": 2000,
            "retry_attempts": 3,
            "enable_memory": True,
        }

    return config


def save_collaboration_results(
    results: Dict, output_path: str = "collaboration_results.json"
):
    """Save collaboration results to file"""
    try:
        with open(output_path, "w", encoding="utf-8") as f:
            json.dump(results, f, indent=2, ensure_ascii=False, default=str)
        print(f"✅ Results saved to: {output_path}")
    except Exception as e:
        print(f"❌ Failed to save results: {e}")


# Example configuration
sample_config = create_agent_config(
    agent_types=["researcher", "planner", "writer"],
    model_preferences={
        "researcher": "gpt-3.5-turbo",  # Good for search/analysis
        "writer": "gpt-4",  # Better for creative content
        "planner": "gpt-3.5-turbo",  # Sufficient for planning
    },
)

print("\n🔧 Sample Agent Configuration:")
print(json.dumps(sample_config, indent=2))

# Save demo results if available
if "demo_result" in locals() and demo_result:
    save_collaboration_results(demo_result, "demo_collaboration_results.json")

In [None]:
## 9. Smoke Test & Validation
# 煙霧測試與驗證


def validate_multi_agent_system():
    """Basic validation of multi-agent system components"""

    tests = {
        "message_system": False,
        "agent_creation": False,
        "orchestrator": False,
        "config_generation": False,
    }

    try:
        # Test 1: Message system
        bus = MessageBus()
        test_msg = AgentMessage(
            sender="test",
            receiver="test",
            message_type=MessageType.STATUS_UPDATE,
            content={"test": "data"},
            timestamp=datetime.now().isoformat(),
            message_id="test_123",
        )
        bus.publish(test_msg)
        tests["message_system"] = len(bus.messages) == 1

        # Test 2: Agent creation (without LLM)
        class MockAgent(BaseAgent):
            def __init__(self):
                super().__init__("mock", "test", "mock-model")
                self.llm = None

        mock_agent = MockAgent()
        tests["agent_creation"] = mock_agent.agent_id == "mock"

        # Test 3: Orchestrator
        orch = CollaborationOrchestrator()
        orch.add_agent(mock_agent)
        tests["orchestrator"] = "mock" in orch.agents

        # Test 4: Config generation
        config = create_agent_config(["researcher", "writer"])
        tests["config_generation"] = (
            "agents" in config and "researcher" in config["agents"]
        )

    except Exception as e:
        print(f"❌ Validation error: {e}")

    # Report results
    print("\n🧪 System Validation Results:")
    for test_name, passed in tests.items():
        status = "✅ PASS" if passed else "❌ FAIL"
        print(f"- {test_name}: {status}")

    overall_health = all(tests.values())
    print(
        f"\n🏥 Overall System Health: {'✅ HEALTHY' if overall_health else '⚠️ ISSUES DETECTED'}"
    )

    return tests


# Run validation
validation_results = validate_multi

In [None]:
## 10. Usage Examples & Next Steps
# 使用範例與後續步驟


def show_usage_examples():
    """Show practical usage examples"""

    examples = {
        "Market Research Report": {
            "task": "Analysis of electric vehicle market trends in Asia-Pacific region",
            "content_type": "report",
            "style": "report",
            "research_depth": "deep",
        },
        "Product Launch Plan": {
            "task": "Launch strategy for AI-powered healthcare diagnostic tool",
            "content_type": "proposal",
            "style": "proposal",
            "research_depth": "moderate",
        },
        "Technical Documentation": {
            "task": "Implementation guide for microservices architecture",
            "content_type": "article",
            "style": "article",
            "research_depth": "moderate",
        },
        "Competitive Analysis": {
            "task": "Comparison of top 5 cloud computing platforms",
            "content_type": "report",
            "style": "report",
            "research_depth": "deep",
        },
    }

    print("\n📚 Multi-Agent Collaboration Use Cases:")
    print("=" * 50)

    for use_case, params in examples.items():
        print(f"\n🎯 {use_case}:")
        print(f"   Task: {params['task']}")
        print(f"   Type: {params['content_type']} ({params['style']} style)")
        print(f"   Research: {params['research_depth']} depth")

        # Show code example
        print(f"   Code:")
        print(f"   ```python")
        print(f"   result = await orchestrator.execute_collaborative_task(")
        print(f"       task_description='{params['task']}',")
        print(f"       content_type='{params['content_type']}',")
        print(f"       style='{params['style']}',")
        print(f"       research_depth='{params['research_depth']}'")
        print(f"   )")
        print(f"   ```")


show_usage_examples()

print(
    """
🚀 Next Steps & Advanced Features:
=================================

1. 🔄 Implement feedback loops between agents
2. 🧠 Add long-term memory with vector storage
3. 🔧 Create specialized domain agents (legal, medical, finance)
4. 📊 Advanced metrics and dashboard
5. 🌐 Web interface for task management
6. 🔗 Integration with external APIs and databases
7. ⚡ Async processing and queue management
8. 🛡️ Security and access control
9. 📈 Auto-scaling based on workload
10. 🤖 Self-improving agents through reinforcement learning

💡 Pro Tips:
- Start with simple 2-agent collaborations
- Use local models for development to reduce costs
- Implement proper error handling and recovery
- Monitor API usage and costs carefully
- Consider caching frequently used results
- Test with various task complexities
"""
)

print("\n✅ Multi-Agent Collaboration Notebook Complete!")
print(
    "🎯 Key Achievement: Built a collaborative AI system with researcher, planner, and writer agents"
)
print(
    "📚 Ready for: Advanced agent architectures, specialized domains, and production deployment"
)

In [None]:
# 訊息類型與標準格式
class MessageType(Enum):
    TASK_ASSIGNMENT = "task_assignment"
    RESEARCH_RESULT = "research_result"
    PLAN_RESULT = "plan_result"
    WRITING_RESULT = "writing_result"


@dataclass
class AgentMessage:
    sender: str
    receiver: str
    message_type: MessageType
    content: Dict[str, Any]
    timestamp: str
    message_id: str

In [None]:
async def execute_collaborative_task(self, task_description: str):
    # Phase 1: Research
    research_data = await self.execute_research_phase(task_description)

    # Phase 2: Planning
    plan_data = await self.execute_planning_phase(task_description, research_data)

    # Phase 3: Writing
    writing_data = await self.execute_writing_phase(
        task_description, research_data, plan_data
    )

    return {"research": research_data, "planning": plan_data, "writing": writing_data}

In [None]:
def estimate_costs(self, pricing_model: Dict[str, float] = None):
    # 基於實際API調用計算成本
    total_cost = 0.0
    for agent_id, calls in self.api_calls.items():
        for call in calls:
            tokens = call.get("tokens", 1000)
            model_cost = pricing_model.get(call["model"], 0.002)
            total_cost += (tokens / 1000) * model_cost
    return total_cost

In [None]:
# === Multi-Agent System Smoke Test ===
def validate_multi_agent_system():
    tests = {
        "message_system": False,
        "agent_creation": False,
        "orchestrator": False,
        "config_generation": False,
    }

    try:
        # Test message bus
        bus = MessageBus()
        test_msg = AgentMessage(
            sender="test",
            receiver="test",
            message_type=MessageType.STATUS_UPDATE,
            content={"test": "data"},
            timestamp=datetime.now().isoformat(),
            message_id="test_123",
        )
        bus.publish(test_msg)
        tests["message_system"] = len(bus.messages) == 1

        # Test orchestrator
        orch = CollaborationOrchestrator()
        tests["orchestrator"] = isinstance(orch.message_bus, MessageBus)

        # Test config generation
        config = create_agent_config(["researcher", "writer"])
        tests["config_generation"] = "agents" in config

        tests["agent_creation"] = True  # Mock test

    except Exception as e:
        print(f"❌ Validation failed: {e}")

    overall_health = all(tests.values())
    print(f"🏥 System Health: {'✅ HEALTHY' if overall_health else '⚠️ ISSUES'}")
    return tests


validation_results = validate_multi_agent_system()

## 2. Notebook 章節大綱

**Cell 1: 環境初始化與共享快取**
- 設定AI_CACHE_ROOT等環境變數
- GPU檢查與設備配置

**Cell 2: 核心依賴與導入**
- 導入transformers, langchain, faiss等套件
- 設定logging與基礎配置

**Cell 3: 訊息系統 (Message System)**
- MessageType枚舉定義
- AgentMessage資料類別
- MessageBus中央訊息匯流排

**Cell 4: 基礎代理類別 (BaseAgent)**
- 代理間通訊基礎建設
- LLM初始化（支援本地與雲端模型）
- 狀態管理與記憶體機制

**Cell 5: 研究代理 (ResearchAgent)**
- 網路搜尋與資訊收集
- 多來源分析與可信度評估
- 研究深度控制（moderate/deep）

**Cell 6: 規劃代理 (PlanningAgent)**
- 任務分解與執行計畫建立
- 相依性分析與風險評估
- 進度監控機制

**Cell 7: 寫作代理 (WritingAgent)**
- 多樣式內容創作（report/article/proposal）
- 基於研究與計畫的整合寫作
- 內容編輯與品質評估

**Cell 8: 協作編排器 (CollaborationOrchestrator)**
- 多代理工作流程管理
- 階段性任務執行（研究→規劃→寫作）
- 超時處理與錯誤恢復

**Cell 9: 系統示範 (Multi-Agent Demo)**
- 完整協作流程演示
- 即時狀態監控
- 結果展示與品質評估

**Cell 10: 效能監控 (Performance Monitoring)**
- API成本追蹤與估算
- 執行時間分析
- 系統健康狀態報告

**Cell 11: 配置與部署工具**
- 代理配置生成器
- 結果保存與載入
- 煙霧測試驗證

**Cell 12: 使用範例與下一步**
- 實際應用場景展示
- 進階功能規劃
- 最佳實務建議

## 3. 核心程式碼片段

### 代理間通訊架構
```python
# 訊息類型與標準格式
class MessageType(Enum):
    TASK_ASSIGNMENT = "task_assignment"
    RESEARCH_RESULT = "research_result"
    PLAN_RESULT = "plan_result"
    WRITING_RESULT = "writing_result"

@dataclass
class AgentMessage:
    sender: str
    receiver: str
    message_type: MessageType
    content: Dict[str, Any]
    timestamp: str
    message_id: str
```

### 協作執行流程
```python
async def execute_collaborative_task(self, task_description: str):
    # Phase 1: Research
    research_data = await self.execute_research_phase(task_description)
    
    # Phase 2: Planning  
    plan_data = await self.execute_planning_phase(task_description, research_data)
    
    # Phase 3: Writing
    writing_data = await self.execute_writing_phase(task_description, research_data, plan_data)
    
    return {"research": research_data, "planning": plan_data, "writing": writing_data}
```

### 成本監控機制
```python
def estimate_costs(self, pricing_model: Dict[str, float] = None):
    # 基於實際API調用計算成本
    total_cost = 0.0
    for agent_id, calls in self.api_calls.items():
        for call in calls:
            tokens = call.get("tokens", 1000)
            model_cost = pricing_model.get(call["model"], 0.002)
            total_cost += (tokens / 1000) * model_cost
    return total_cost
```

## 4. 驗收測試 Cell

```python
# === Multi-Agent System Smoke Test ===
def validate_multi_agent_system():
    tests = {
        "message_system": False,
        "agent_creation": False, 
        "orchestrator": False,
        "config_generation": False
    }
    
    try:
        # Test message bus
        bus = MessageBus()
        test_msg = AgentMessage(
            sender="test", receiver="test",
            message_type=MessageType.STATUS_UPDATE,
            content={"test": "data"},
            timestamp=datetime.now().isoformat(),
            message_id="test_123"
        )
        bus.publish(test_msg)
        tests["message_system"] = len(bus.messages) == 1
        
        # Test orchestrator
        orch = CollaborationOrchestrator()
        tests["orchestrator"] = isinstance(orch.message_bus, MessageBus)
        
        # Test config generation
        config = create_agent_config(["researcher", "writer"])
        tests["config_generation"] = "agents" in config
        
        tests["agent_creation"] = True  # Mock test
        
    except Exception as e:
        print(f"❌ Validation failed: {e}")
        
    overall_health = all(tests.values())
    print(f"🏥 System Health: {'✅ HEALTHY' if overall_health else '⚠️ ISSUES'}")
    return tests

validation_results = validate_multi_agent_system()
```



## 6. 本章小結

### ✅ 完成項目
* **多代理協作架構** (Multi-Agent Architecture)：建立了researcher/planner/writer三個專門代理，各司其職完成複雜任務
* **訊息匯流排系統** (Message Bus System)：實作了標準化的代理間通訊協定，支援異步訊息傳遞
* **工作流程編排** (Workflow Orchestration)：設計了三階段協作流程，包含超時處理與錯誤恢復機制
* **效能與成本監控** (Performance & Cost Monitoring)：建立了API使用追蹤與成本估算系統
* **低VRAM友善設計** (Low-VRAM Friendly)：支援4-bit量化本地模型，提供雲端API的經濟替代方案

### 🧠 核心概念與原理要點
* **任務分解策略** (Task Decomposition)：複雜任務拆解為研究→規劃→執行三個階段，降低單一代理負擔
* **代理專業化** (Agent Specialization)：每個代理專精特定領域（資訊收集、計畫制定、內容創作），提高整體效率
* **異步協作模式** (Asynchronous Collaboration)：透過訊息佇列實現非阻塞式代理間協作
* **容錯與降級機制** (Fault Tolerance & Graceful Degradation)：當某個代理失效時，系統能繼續運作並提供部分結果
* **成本效益優化** (Cost-Performance Optimization)：根據任務複雜度選擇適當的模型與深度設定

### ⚠️ 常見問題與注意事項
* **協作邏輯複雜性**：多代理間的依賴關係需要仔細設計，避免死鎖或無限循環
* **API成本控制**：多個代理同時運作會快速消耗API配額，需要實作成本監控與限制
* **除錯困難度**：分散式系統的錯誤追蹤較困難，需要完善的日誌記錄機制
* **延遲累積效應**：多階段執行會累積延遲，需要優化各階段的執行時間
* **記憶體管理**：長時間運作的代理需要適當清理上下文記憶，避免記憶體洩漏

### 🚀 下一步優先建議
1. **實作反饋循環機制** (Feedback Loops)：讓代理能根據結果品質進行迭代改進
2. **建立專業領域代理** (Domain-Specific Agents)：增加法律、醫療、金融等領域的專門代理
3. **整合向量記憶體** (Vector Memory Integration)：結合本課程的RAG技術，讓代理具備長期記憶能力
4. **開發Web介面** (Web Interface)：建立任務管理與監控的圖形化介面
5. **測試高併發場景** (Concurrent Execution Testing)：驗證系統在多任務並行執行時的穩定性

---

**🎯 關鍵成就**: 建立了完整的多代理協作生態系統，能夠自動完成從研究到寫作的端到端任務流程

**📚 準備就緒**: 具備了構建大規模AI代理系統的基礎架構，可進一步擴展到生產環境應用