**# INSTALLATION**

In [None]:
!pip install langchain langchain-openai langchain-community langchain-core langchain-experimental crewai crewai-tools pandas numpy python-dotenv tiktoken openai
!pip install plotly matplotlib seaborn redis sqlite3 psutil prometheus-client asyncio aiohttp

# IMPORTS
import os
import json
import time
import asyncio
import sqlite3
import hashlib
import threading
from datetime import datetime, timedelta
from typing import Dict, List, Any, Optional, Union, Callable, Tuple
from dataclasses import dataclass, field
from enum import Enum
from concurrent.futures import ThreadPoolExecutor, as_completed
import pandas as pd
import numpy as np

# Visualization
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import seaborn as sns

# LangChain imports
from langchain.agents import AgentExecutor, create_react_agent, create_structured_chat_agent
from langchain.agents.agent_types import AgentType
from langchain.agents.initialize import initialize_agent
from langchain.tools import BaseTool, tool
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate, MessagesPlaceholder
from langchain_core.tools import Tool
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
from langchain_openai import ChatOpenAI
from langchain.schema import AgentAction, AgentFinish
from langchain.callbacks.base import BaseCallbackHandler
from langchain.memory import ConversationBufferMemory, ConversationSummaryBufferMemory
from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent

# CrewAI imports
from crewai import Agent, Task, Crew, Process
from crewai.tools import BaseTool as CrewBaseTool

# Monitoring and metrics
import psutil
from prometheus_client import Counter, Histogram, Gauge, start_http_server

# Load environment variables
from dotenv import load_dotenv
load_dotenv()

# Set your OpenAI API key here
os.environ["OPENAI_API_KEY"] = "your-api-key-here"

print("="*60)
print("NOTEBOOK 13.4: PRODUCTION DEPLOYMENT AND ADVANCED PATTERNS")
print("="*60)

**# SECTION 1: LANGCHAIN AGENTS (13.7.1)**

In [None]:
print("\n" + "="*60)
print("SECTION 1: LANGCHAIN AGENTS IMPLEMENTATION")
print("="*60)

class AgentTypeEnum(Enum):
    """Types of agents for different use cases (renamed to avoid conflicts)"""
    REACT = "react"
    CONVERSATIONAL = "conversational"
    PANDAS_DATAFRAME = "pandas_dataframe"
    CUSTOM = "custom"

class AgentMetrics:
    """Metrics collection for agent performance monitoring"""

    def __init__(self, use_prometheus: bool = False):
        # Initialize internal metrics (always available)
        self.execution_history = []
        self.performance_stats = {}
        self.use_prometheus = use_prometheus

        # Initialize Prometheus metrics only if requested and available
        if use_prometheus:
            try:
                # Check if metrics already exist and clear registry if needed
                from prometheus_client import REGISTRY
                # Clear existing metrics to avoid duplicates
                collectors_to_remove = []
                for collector in list(REGISTRY._collector_to_names.keys()):
                    if hasattr(collector, '_name') and any(name in collector._name for name in ['agent_requests', 'agent_duration', 'tool_usage', 'active_agents']):
                        collectors_to_remove.append(collector)

                for collector in collectors_to_remove:
                    try:
                        REGISTRY.unregister(collector)
                    except KeyError:
                        pass  # Already unregistered

                # Initialize Prometheus metrics
                self.agent_requests = Counter('agent_requests_total', 'Total agent requests', ['agent_type', 'status'])
                self.agent_duration = Histogram('agent_duration_seconds', 'Agent execution time', ['agent_type'])
                self.tool_usage = Counter('tool_usage_total', 'Tool usage count', ['tool_name', 'status'])
                self.active_agents = Gauge('active_agents', 'Number of active agents')

            except Exception as e:
                print(f"‚ö†Ô∏è Prometheus metrics initialization failed: {e}")
                print("üìä Falling back to internal metrics only")
                self.use_prometheus = False
                self._init_fallback_metrics()
        else:
            self._init_fallback_metrics()

    def _init_fallback_metrics(self):
        """Initialize fallback metrics when Prometheus is not available"""
        self.agent_requests = {"total": 0, "by_type": {}, "by_status": {}}
        self.agent_duration = {"total_time": 0, "count": 0, "by_type": {}}
        self.tool_usage = {"total": 0, "by_tool": {}, "by_status": {}}
        self.active_agents = {"count": 0}

    def record_execution(self, agent_type: str, duration: float, success: bool, tools_used: List[str]):
        """Record agent execution metrics"""
        status = "success" if success else "failure"

        # Update Prometheus metrics if available
        if self.use_prometheus:
            try:
                self.agent_requests.labels(agent_type=agent_type, status=status).inc()
                self.agent_duration.labels(agent_type=agent_type).observe(duration)

                for tool in tools_used:
                    self.tool_usage.labels(tool_name=tool, status=status).inc()
            except Exception as e:
                print(f"‚ö†Ô∏è Prometheus metrics update failed: {e}")
        else:
            # Update fallback metrics
            self.agent_requests["total"] += 1
            self.agent_requests["by_type"][agent_type] = self.agent_requests["by_type"].get(agent_type, 0) + 1
            self.agent_requests["by_status"][status] = self.agent_requests["by_status"].get(status, 0) + 1

            self.agent_duration["total_time"] += duration
            self.agent_duration["count"] += 1
            if agent_type not in self.agent_duration["by_type"]:
                self.agent_duration["by_type"][agent_type] = {"total_time": 0, "count": 0}
            self.agent_duration["by_type"][agent_type]["total_time"] += duration
            self.agent_duration["by_type"][agent_type]["count"] += 1

            for tool in tools_used:
                self.tool_usage["total"] += 1
                self.tool_usage["by_tool"][tool] = self.tool_usage["by_tool"].get(tool, 0) + 1
                tool_status_key = f"{tool}_{status}"
                self.tool_usage["by_status"][tool_status_key] = self.tool_usage["by_status"].get(tool_status_key, 0) + 1

        # Update internal metrics
        execution_record = {
            "timestamp": datetime.now(),
            "agent_type": agent_type,
            "duration": duration,
            "success": success,
            "tools_used": tools_used
        }
        self.execution_history.append(execution_record)

        # Update performance stats
        if agent_type not in self.performance_stats:
            self.performance_stats[agent_type] = {
                "total_executions": 0,
                "successful_executions": 0,
                "average_duration": 0,
                "tool_usage": {}
            }

        stats = self.performance_stats[agent_type]
        stats["total_executions"] += 1
        if success:
            stats["successful_executions"] += 1

        # Update average duration
        stats["average_duration"] = (
            (stats["average_duration"] * (stats["total_executions"] - 1) + duration) /
            stats["total_executions"]
        )

        # Update tool usage stats
        for tool in tools_used:
            stats["tool_usage"][tool] = stats["tool_usage"].get(tool, 0) + 1

    def get_performance_summary(self) -> Dict[str, Any]:
        """Get comprehensive performance summary"""
        total_executions = len(self.execution_history)
        successful_executions = sum(1 for record in self.execution_history if record["success"])

        if total_executions == 0:
            return {"total_executions": 0, "success_rate": 0}

        success_rate = successful_executions / total_executions
        avg_duration = sum(record["duration"] for record in self.execution_history) / total_executions

        # Agent type breakdown
        agent_type_stats = {}
        for agent_type, stats in self.performance_stats.items():
            agent_type_stats[agent_type] = {
                "success_rate": stats["successful_executions"] / stats["total_executions"],
                "average_duration": stats["average_duration"],
                "total_executions": stats["total_executions"],
                "most_used_tools": sorted(stats["tool_usage"].items(), key=lambda x: x[1], reverse=True)[:5]
            }

        return {
            "total_executions": total_executions,
            "success_rate": success_rate,
            "average_duration": avg_duration,
            "agent_type_breakdown": agent_type_stats,
            "recent_performance": self._get_recent_performance()
        }

    def _get_recent_performance(self) -> Dict[str, Any]:
        """Get recent performance metrics (last 24 hours)"""
        cutoff_time = datetime.now() - timedelta(hours=24)
        recent_records = [r for r in self.execution_history if r["timestamp"] > cutoff_time]

        if not recent_records:
            return {"recent_executions": 0, "recent_success_rate": 0}

        recent_success_rate = sum(1 for r in recent_records if r["success"]) / len(recent_records)
        recent_avg_duration = sum(r["duration"] for r in recent_records) / len(recent_records)

        return {
            "recent_executions": len(recent_records),
            "recent_success_rate": recent_success_rate,
            "recent_average_duration": recent_avg_duration
        }

# Advanced LangChain Agent Factory
class ProductionAgentFactory:
    """Factory for creating production-ready LangChain agents"""

    def __init__(self, llm_model: str = "gpt-3.5-turbo", metrics: AgentMetrics = None):
        self.llm = ChatOpenAI(model=llm_model, temperature=0.1)
        self.metrics = metrics or AgentMetrics()
        self.tools_registry = {}
        self.agent_cache = {}

    def register_tool(self, tool: Tool):
        """Register a tool in the factory"""
        self.tools_registry[tool.name] = tool
        print(f"‚úÖ Registered tool: {tool.name}")

    def create_react_agent(self, tools: List[Tool], memory: bool = True) -> AgentExecutor:
        """Create a ReAct agent with monitoring and error handling"""

        # Create memory if requested
        agent_memory = None
        if memory:
            agent_memory = ConversationBufferMemory(
                memory_key="chat_history",
                return_messages=True
            )

        # Create custom prompt template
        react_prompt = PromptTemplate.from_template("""
You are an intelligent assistant with access to tools. Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Available tools: {tools}

Question: {input}
Thought: {agent_scratchpad}
        """)

        # Create the agent
        agent = create_react_agent(
            llm=self.llm,
            tools=tools,
            prompt=react_prompt
        )

        # Create executor with monitoring callback
        monitoring_callback = ProductionCallbackHandler(self.metrics, AgentTypeEnum.REACT.value)

        executor = AgentExecutor(
            agent=agent,
            tools=tools,
            memory=agent_memory,
            verbose=True,
            max_iterations=10,
            max_execution_time=300,  # 5 minutes timeout
            callbacks=[monitoring_callback],
            handle_parsing_errors=True,
            return_intermediate_steps=True
        )

        return executor

    def create_structured_chat_agent(self, tools: List[Tool], system_message: str = None) -> AgentExecutor:
        """Create a structured chat agent for complex conversations"""

        # Default system message
        if system_message is None:
            system_message = """You are a helpful AI assistant with access to tools.
            You can use tools to gather information, perform calculations, and complete tasks.
            Always explain your reasoning and provide clear, helpful responses.

            You have access to the following tools:
            {tools}

            Use the following format:

            Question: the input question you must answer
            Thought: you should always think about what to do
            Action: the action to take, should be one of [{tool_names}]
            Action Input: the input to the action
            Observation: the result of the action
            ... (this Thought/Action/Action Input/Observation can repeat N times)
            Thought: I now know the final answer
            Final Answer: the final answer to the original input question"""

    def create_structured_chat_agent(self, tools: List[Tool], system_message: str = None) -> AgentExecutor:
        """Create a structured chat agent for complex conversations"""

    def create_structured_chat_agent(self, tools: List[Tool], system_message: str = None) -> AgentExecutor:
        """Create a simple ReAct agent (same as create_react_agent but with different name)"""

        # Just create another ReAct agent since structured chat is problematic
        return self.create_react_agent(tools, memory=True)

    def create_pandas_agent(self, dataframe: pd.DataFrame, description: str = "") -> AgentExecutor:
        """Create a pandas dataframe agent for data analysis"""

        # Create the pandas agent with minimal parameters to avoid issues
        agent_executor = create_pandas_dataframe_agent(
            llm=self.llm,
            df=dataframe,
            verbose=True,
            handle_parsing_errors=True
        )

        return agent_executor

class ProductionCallbackHandler(BaseCallbackHandler):
    """Production callback handler with comprehensive monitoring"""

    def __init__(self, metrics: AgentMetrics, agent_type: str):
        self.metrics = metrics
        self.agent_type = agent_type
        self.start_time = None
        self.tools_used = []
        self.steps = []
        self.errors = []

    def on_agent_action(self, action: AgentAction, **kwargs) -> None:
        """Record agent actions and tool usage"""
        if self.start_time is None:
            self.start_time = time.time()

        self.tools_used.append(action.tool)
        self.steps.append({
            "step_type": "action",
            "tool": action.tool,
            "input": action.tool_input,
            "timestamp": datetime.now()
        })

        print(f"üîß Using tool: {action.tool}")

    def on_agent_finish(self, finish: AgentFinish, **kwargs) -> None:
        """Record successful completion"""
        duration = time.time() - self.start_time if self.start_time else 0
        self.metrics.record_execution(
            agent_type=self.agent_type,
            duration=duration,
            success=True,
            tools_used=self.tools_used
        )

        print(f"‚úÖ Agent completed successfully in {duration:.2f}s")

    def on_chain_error(self, error: Exception, **kwargs) -> None:
        """Record errors and failures"""
        duration = time.time() - self.start_time if self.start_time else 0
        self.errors.append(str(error))

        self.metrics.record_execution(
            agent_type=self.agent_type,
            duration=duration,
            success=False,
            tools_used=self.tools_used
        )

        print(f"‚ùå Agent failed after {duration:.2f}s: {error}")

    def get_execution_trace(self) -> Dict[str, Any]:
        """Get detailed execution trace"""
        return {
            "agent_type": self.agent_type,
            "steps": self.steps,
            "tools_used": self.tools_used,
            "errors": self.errors,
            "total_duration": time.time() - self.start_time if self.start_time else 0
        }

# Production-ready tools with monitoring
@tool
def enhanced_web_search_tool(query: str) -> str:
    """Enhanced web search with caching and error handling"""
    try:
        # Simulate web search with error handling
        import requests
        from urllib.parse import quote

        # Add delay to simulate real search
        time.sleep(0.5)

        # Simulate search results
        search_results = {
            "artificial intelligence": "AI continues to advance rapidly with new developments in LLMs, computer vision, and robotics.",
            "climate change": "Latest climate research shows accelerating impacts and the urgent need for action.",
            "stock market": "Markets show mixed signals with technology stocks leading gains while energy sector faces headwinds.",
            "quantum computing": "Quantum computing advances with new error correction breakthroughs and commercial applications."
        }

        # Find relevant results
        query_lower = query.lower()
        for topic, result in search_results.items():
            if topic in query_lower:
                return f"Search results for '{query}': {result}"

        return f"Search results for '{query}': General information found, but no specific match in simulation."

    except Exception as e:
        return f"Search failed: {str(e)}"

@tool
def enhanced_calculator_tool(expression: str) -> str:
    """Enhanced calculator with validation and error handling"""
    try:
        # Validate expression
        allowed_chars = set('0123456789+-*/()., ')
        if not all(c in allowed_chars for c in expression):
            return "Error: Expression contains invalid characters"

        # Evaluate safely
        result = eval(expression, {"__builtins__": {}}, {})
        return f"Calculation result: {result}"

    except ZeroDivisionError:
        return "Error: Division by zero"
    except Exception as e:
        return f"Calculation error: {str(e)}"

@tool
def data_analysis_tool(data_description: str) -> str:
    """Analyze data patterns and provide insights"""
    try:
        # Simulate data analysis
        analysis_types = {
            "trend": "Analysis shows upward trend with 15% growth over the analyzed period",
            "correlation": "Strong positive correlation (r=0.78) detected between variables",
            "distribution": "Data follows normal distribution with mean=50.2, std=12.8",
            "anomaly": "3 anomalies detected outside 2-sigma range",
            "forecast": "Predictive model suggests 12% growth in next quarter"
        }

        data_lower = data_description.lower()
        for analysis_type, result in analysis_types.items():
            if analysis_type in data_lower:
                return f"Data analysis result: {result}"

        return f"Data analysis completed for: {data_description}. Multiple factors analyzed."

    except Exception as e:
        return f"Analysis failed: {str(e)}"

print("‚úÖ LangChain Agent Factory and tools initialized successfully")

**# SECTION 2: ADVANCED PATTERNS (13.7.3)**

In [None]:
print("\n" + "="*60)
print("SECTION 2: ADVANCED PATTERNS")
print("="*60)

class SelfImprovementAgent:
    """Agent that learns and improves from its own performance"""

    def __init__(self, base_agent: AgentExecutor, metrics: AgentMetrics):
        self.base_agent = base_agent
        self.metrics = metrics
        self.improvement_history = []
        self.performance_threshold = 0.8  # Minimum acceptable success rate
        self.learning_rate = 0.1

    def execute_with_learning(self, query: str, feedback: Optional[str] = None) -> Dict[str, Any]:
        """Execute query and learn from results"""
        start_time = time.time()

        try:
            # Execute the query
            result = self.base_agent.invoke({"input": query})
            execution_time = time.time() - start_time
            success = True

            # Analyze performance
            performance_analysis = self._analyze_performance(query, result, execution_time, feedback)

            # Store improvement insights
            self.improvement_history.append({
                "timestamp": datetime.now(),
                "query": query,
                "execution_time": execution_time,
                "success": success,
                "performance_analysis": performance_analysis,
                "feedback": feedback
            })

            # Apply improvements if needed
            self._apply_improvements(performance_analysis)

            return {
                "result": result,
                "execution_time": execution_time,
                "success": success,
                "performance_analysis": performance_analysis,
                "improvements_applied": len(performance_analysis.get("improvements", []))
            }

        except Exception as e:
            execution_time = time.time() - start_time
            error_analysis = self._analyze_error(query, str(e), execution_time)

            self.improvement_history.append({
                "timestamp": datetime.now(),
                "query": query,
                "execution_time": execution_time,
                "success": False,
                "error": str(e),
                "error_analysis": error_analysis
            })

            return {
                "result": None,
                "execution_time": execution_time,
                "success": False,
                "error": str(e),
                "error_analysis": error_analysis
            }

    def _analyze_performance(self, query: str, result: Dict, execution_time: float, feedback: str = None) -> Dict:
        """Analyze agent performance and identify improvement opportunities"""
        analysis = {
            "execution_time_rating": "good" if execution_time < 30 else "slow",
            "result_completeness": self._assess_completeness(result),
            "tool_usage_efficiency": self._assess_tool_efficiency(result),
            "improvements": []
        }

        # Check execution time
        if execution_time > 60:
            analysis["improvements"].append({
                "type": "performance",
                "description": "Execution time too long, consider parallel tool usage",
                "priority": "high"
            })

        # Check tool usage patterns
        if "intermediate_steps" in result:
            steps = result["intermediate_steps"]
            if len(steps) > 10:
                analysis["improvements"].append({
                    "type": "efficiency",
                    "description": "Too many steps, improve planning strategy",
                    "priority": "medium"
                })

        # Incorporate user feedback
        if feedback:
            if "incomplete" in feedback.lower():
                analysis["improvements"].append({
                    "type": "completeness",
                    "description": "User indicates incomplete response, improve thoroughness",
                    "priority": "high"
                })
            elif "slow" in feedback.lower():
                analysis["improvements"].append({
                    "type": "performance",
                    "description": "User indicates slow response, optimize execution",
                    "priority": "medium"
                })

        return analysis

    def _assess_completeness(self, result: Dict) -> str:
        """Assess the completeness of the agent's response"""
        output = result.get("output", "")
        if len(output) < 50:
            return "incomplete"
        elif "I don't know" in output or "not found" in output:
            return "partial"
        else:
            return "complete"

    def _assess_tool_efficiency(self, result: Dict) -> str:
        """Assess how efficiently tools were used"""
        if "intermediate_steps" not in result:
            return "unknown"

        steps = result["intermediate_steps"]
        unique_tools = set()
        repeated_tools = 0

        for step in steps:
            if hasattr(step, 'tool'):
                tool_name = step.tool
                if tool_name in unique_tools:
                    repeated_tools += 1
                unique_tools.add(tool_name)

        if repeated_tools > len(unique_tools):
            return "inefficient"
        elif repeated_tools == 0:
            return "optimal"
        else:
            return "acceptable"

    def _analyze_error(self, query: str, error: str, execution_time: float) -> Dict:
        """Analyze errors to identify improvement opportunities"""
        analysis = {
            "error_type": self._classify_error(error),
            "suggestions": []
        }

        if "timeout" in error.lower():
            analysis["suggestions"].append("Reduce max_iterations or improve tool selection")
        elif "parsing" in error.lower():
            analysis["suggestions"].append("Improve prompt clarity or add parsing error handling")
        elif "rate limit" in error.lower():
            analysis["suggestions"].append("Implement rate limiting and retry logic")

        return analysis

    def _classify_error(self, error: str) -> str:
        """Classify error types for targeted improvements"""
        error_lower = error.lower()
        if "timeout" in error_lower:
            return "timeout"
        elif "parsing" in error_lower:
            return "parsing"
        elif "rate limit" in error_lower:
            return "rate_limit"
        elif "authentication" in error_lower:
            return "auth"
        else:
            return "unknown"

    def _apply_improvements(self, analysis: Dict):
        """Apply identified improvements to the agent"""
        improvements = analysis.get("improvements", [])

        for improvement in improvements:
            if improvement["priority"] == "high":
                if improvement["type"] == "performance":
                    # Adjust execution parameters
                    if hasattr(self.base_agent, 'max_execution_time'):
                        self.base_agent.max_execution_time = min(
                            self.base_agent.max_execution_time * 1.2, 600
                        )
                elif improvement["type"] == "efficiency":
                    # Adjust iteration limits
                    if hasattr(self.base_agent, 'max_iterations'):
                        self.base_agent.max_iterations = max(
                            self.base_agent.max_iterations - 2, 5
                        )

    def get_learning_insights(self) -> Dict[str, Any]:
        """Get insights from learning history"""
        if not self.improvement_history:
            return {"total_interactions": 0}

        total_interactions = len(self.improvement_history)
        successful_interactions = sum(1 for h in self.improvement_history if h["success"])
        success_rate = successful_interactions / total_interactions

        avg_execution_time = sum(h["execution_time"] for h in self.improvement_history) / total_interactions

        # Common improvement patterns
        improvement_types = {}
        for history in self.improvement_history:
            if "performance_analysis" in history:
                for improvement in history["performance_analysis"].get("improvements", []):
                    imp_type = improvement["type"]
                    improvement_types[imp_type] = improvement_types.get(imp_type, 0) + 1

        return {
            "total_interactions": total_interactions,
            "success_rate": success_rate,
            "average_execution_time": avg_execution_time,
            "common_improvements": improvement_types,
            "recent_performance_trend": self._calculate_performance_trend()
        }

    def _calculate_performance_trend(self) -> str:
        """Calculate performance trend over recent interactions"""
        if len(self.improvement_history) < 10:
            return "insufficient_data"

        recent_10 = self.improvement_history[-10:]
        earlier_10 = self.improvement_history[-20:-10] if len(self.improvement_history) >= 20 else []

        if not earlier_10:
            return "insufficient_data"

        recent_success_rate = sum(1 for h in recent_10 if h["success"]) / len(recent_10)
        earlier_success_rate = sum(1 for h in earlier_10 if h["success"]) / len(earlier_10)

        if recent_success_rate > earlier_success_rate + 0.1:
            return "improving"
        elif recent_success_rate < earlier_success_rate - 0.1:
            return "declining"
        else:
            return "stable"

class GoalOrientedAgent:
    """Agent that maintains and pursues long-term goals"""

    def __init__(self, base_agent: AgentExecutor):
        self.base_agent = base_agent
        self.active_goals = []
        self.completed_goals = []
        self.goal_strategies = {}

    def set_goal(self, goal_description: str, priority: int = 1, deadline: datetime = None) -> str:
        """Set a new goal for the agent"""
        goal_id = hashlib.md5(f"{goal_description}{datetime.now()}".encode()).hexdigest()[:8]

        goal = {
            "id": goal_id,
            "description": goal_description,
            "priority": priority,
            "created_at": datetime.now(),
            "deadline": deadline,
            "status": "active",
            "progress": 0.0,
            "subtasks": [],
            "strategy": None
        }

        # Generate strategy for achieving the goal
        strategy = self._generate_goal_strategy(goal_description)
        goal["strategy"] = strategy
        goal["subtasks"] = strategy.get("subtasks", [])

        self.active_goals.append(goal)
        self.active_goals.sort(key=lambda x: x["priority"], reverse=True)

        print(f"üéØ New goal set: {goal_description} (ID: {goal_id})")
        return goal_id

    def _generate_goal_strategy(self, goal_description: str) -> Dict:
        """Generate a strategy for achieving a goal"""
        # Simple strategy generation based on goal type
        if "research" in goal_description.lower():
            return {
                "type": "research",
                "subtasks": [
                    "Identify key information sources",
                    "Gather relevant data and evidence",
                    "Analyze and synthesize findings",
                    "Prepare comprehensive summary"
                ],
                "estimated_duration": "2-3 hours",
                "required_tools": ["web_search", "document_analysis"]
            }
        elif "analysis" in goal_description.lower():
            return {
                "type": "analysis",
                "subtasks": [
                    "Define analysis framework",
                    "Collect relevant data",
                    "Perform quantitative analysis",
                    "Generate insights and recommendations"
                ],
                "estimated_duration": "1-2 hours",
                "required_tools": ["calculator", "data_analysis"]
            }
        elif "monitoring" in goal_description.lower():
            return {
                "type": "monitoring",
                "subtasks": [
                    "Set up monitoring parameters",
                    "Establish baseline metrics",
                    "Schedule regular checks",
                    "Create alert mechanisms"
                ],
                "estimated_duration": "ongoing",
                "required_tools": ["web_search", "data_collection"]
            }
        else:
            return {
                "type": "general",
                "subtasks": [
                    "Break down goal into actionable steps",
                    "Execute steps systematically",
                    "Monitor progress and adjust",
                    "Complete and verify results"
                ],
                "estimated_duration": "variable",
                "required_tools": ["general_purpose"]
            }

    def work_towards_goals(self, time_limit: int = 300) -> Dict[str, Any]:
        """Work towards active goals within time limit"""
        start_time = time.time()
        work_results = {
            "goals_worked_on": [],
            "progress_made": {},
            "completed_goals": [],
            "total_time_spent": 0
        }

        for goal in sorted(self.active_goals, key=lambda x: x["priority"], reverse=True):
            if time.time() - start_time >= time_limit:
                break

            print(f"\nüéØ Working on goal: {goal['description']}")
            goal_work_result = self._work_on_goal(goal, time_limit - (time.time() - start_time))

            work_results["goals_worked_on"].append(goal["id"])
            work_results["progress_made"][goal["id"]] = goal_work_result

            # Update goal progress
            goal["progress"] = goal_work_result.get("progress", goal["progress"])

            # Check if goal is completed
            if goal["progress"] >= 1.0:
                goal["status"] = "completed"
                goal["completed_at"] = datetime.now()
                self.completed_goals.append(goal)
                self.active_goals.remove(goal)
                work_results["completed_goals"].append(goal["id"])
                print(f"‚úÖ Goal completed: {goal['description']}")

        work_results["total_time_spent"] = time.time() - start_time
        return work_results

    def _work_on_goal(self, goal: Dict, time_limit: float) -> Dict:
        """Work on a specific goal"""
        start_time = time.time()

        # Find next subtask to work on
        subtasks = goal["subtasks"]
        current_subtask_index = int(goal["progress"] * len(subtasks))

        if current_subtask_index >= len(subtasks):
            return {"progress": 1.0, "status": "completed"}

        current_subtask = subtasks[current_subtask_index]
        print(f"üìã Working on subtask: {current_subtask}")

        try:
            # Execute subtask using base agent
            query = f"Complete this subtask for the goal '{goal['description']}': {current_subtask}"
            result = self.base_agent.invoke({"input": query})

            # Calculate progress
            new_progress = (current_subtask_index + 1) / len(subtasks)

            return {
                "progress": new_progress,
                "subtask_completed": current_subtask,
                "result": result.get("output", ""),
                "time_spent": time.time() - start_time,
                "status": "in_progress"
            }

        except Exception as e:
            return {
                "progress": goal["progress"],  # No progress made
                "error": str(e),
                "time_spent": time.time() - start_time,
                "status": "error"
            }

    def get_goal_status(self) -> Dict[str, Any]:
        """Get status of all goals"""
        return {
            "active_goals": len(self.active_goals),
            "completed_goals": len(self.completed_goals),
            "goals_by_priority": {
                f"priority_{p}": len([g for g in self.active_goals if g["priority"] == p])
                for p in range(1, 6)
            },
            "average_progress": sum(g["progress"] for g in self.active_goals) / max(len(self.active_goals), 1),
            "goals_near_deadline": len([
                g for g in self.active_goals
                if g.get("deadline") and g["deadline"] < datetime.now() + timedelta(days=1)
            ])
        }

**# SECTION 3: EVALUATION AND PRODUCTION DEPLOYMENT (13.8)**

In [None]:
print("\n" + "="*60)
print("SECTION 3: EVALUATION AND PRODUCTION DEPLOYMENT")
print("="*60)

class AgentEvaluationFramework:
    """Comprehensive evaluation framework for agentic RAG systems"""

    def __init__(self):
        self.evaluation_metrics = {
            "accuracy": [],
            "completeness": [],
            "efficiency": [],
            "user_satisfaction": [],
            "tool_usage_effectiveness": [],
            "response_time": [],
            "error_rate": [],
            "cost_efficiency": []
        }
        self.benchmark_datasets = {}
        self.evaluation_history = []

    def add_benchmark_dataset(self, name: str, queries: List[Dict], expected_outputs: List[Dict]):
        """Add a benchmark dataset for evaluation"""
        self.benchmark_datasets[name] = {
            "queries": queries,
            "expected_outputs": expected_outputs,
            "created_at": datetime.now()
        }
        print(f"üìä Added benchmark dataset: {name} ({len(queries)} queries)")

    def evaluate_agent_performance(self, agent: AgentExecutor, dataset_name: str,
                                 sample_size: int = None) -> Dict[str, Any]:
        """Evaluate agent performance against benchmark dataset"""
        if dataset_name not in self.benchmark_datasets:
            raise ValueError(f"Dataset {dataset_name} not found")

        dataset = self.benchmark_datasets[dataset_name]
        queries = dataset["queries"]
        expected_outputs = dataset["expected_outputs"]

        # Sample queries if requested
        if sample_size and sample_size < len(queries):
            import random
            indices = random.sample(range(len(queries)), sample_size)
            queries = [queries[i] for i in indices]
            expected_outputs = [expected_outputs[i] for i in indices]

        evaluation_results = {
            "dataset_name": dataset_name,
            "total_queries": len(queries),
            "successful_queries": 0,
            "failed_queries": 0,
            "individual_results": [],
            "aggregate_metrics": {},
            "timestamp": datetime.now()
        }

        print(f"üß™ Evaluating agent on {len(queries)} queries from {dataset_name}")

        for i, (query, expected) in enumerate(zip(queries, expected_outputs)):
            print(f"Query {i+1}/{len(queries)}: {query.get('text', '')[:50]}...")

            result = self._evaluate_single_query(agent, query, expected)
            evaluation_results["individual_results"].append(result)

            if result["success"]:
                evaluation_results["successful_queries"] += 1
            else:
                evaluation_results["failed_queries"] += 1

        # Calculate aggregate metrics
        evaluation_results["aggregate_metrics"] = self._calculate_aggregate_metrics(
            evaluation_results["individual_results"]
        )

        # Store evaluation history
        self.evaluation_history.append(evaluation_results)

        return evaluation_results

    def _evaluate_single_query(self, agent: AgentExecutor, query: Dict, expected: Dict) -> Dict:
        """Evaluate a single query-response pair"""
        start_time = time.time()

        try:
            # Execute query
            response = agent.invoke({"input": query.get("text", "")})
            execution_time = time.time() - start_time

            # Extract actual output
            actual_output = response.get("output", "")

            # Calculate metrics
            accuracy_score = self._calculate_accuracy(actual_output, expected.get("text", ""))
            completeness_score = self._calculate_completeness(actual_output, expected.get("key_points", []))
            efficiency_score = self._calculate_efficiency(response.get("intermediate_steps", []))

            return {
                "query_id": query.get("id", ""),
                "success": True,
                "execution_time": execution_time,
                "actual_output": actual_output,
                "expected_output": expected.get("text", ""),
                "metrics": {
                    "accuracy": accuracy_score,
                    "completeness": completeness_score,
                    "efficiency": efficiency_score,
                    "response_time": execution_time
                },
                "tools_used": self._extract_tools_used(response.get("intermediate_steps", []))
            }

        except Exception as e:
            execution_time = time.time() - start_time
            return {
                "query_id": query.get("id", ""),
                "success": False,
                "execution_time": execution_time,
                "error": str(e),
                "metrics": {
                    "accuracy": 0.0,
                    "completeness": 0.0,
                    "efficiency": 0.0,
                    "response_time": execution_time
                }
            }

    def _calculate_accuracy(self, actual: str, expected: str) -> float:
        """Calculate accuracy score using simple text similarity"""
        if not expected:
            return 1.0 if actual else 0.0

        # Simple word overlap metric
        actual_words = set(actual.lower().split())
        expected_words = set(expected.lower().split())

        if not expected_words:
            return 1.0

        overlap = len(actual_words.intersection(expected_words))
        return overlap / len(expected_words)

    def _calculate_completeness(self, actual: str, key_points: List[str]) -> float:
        """Calculate completeness based on key points coverage"""
        if not key_points:
            return 1.0

        actual_lower = actual.lower()
        covered_points = 0

        for point in key_points:
            if point.lower() in actual_lower:
                covered_points += 1

        return covered_points / len(key_points)

    def _calculate_efficiency(self, intermediate_steps: List) -> float:
        """Calculate efficiency score based on execution steps"""
        if not intermediate_steps:
            return 1.0

        # Penalize excessive steps
        optimal_steps = 3  # Assume 3 steps is optimal
        actual_steps = len(intermediate_steps)

        if actual_steps <= optimal_steps:
            return 1.0
        else:
            return max(0.0, 1.0 - (actual_steps - optimal_steps) * 0.1)

    def _extract_tools_used(self, intermediate_steps: List) -> List[str]:
        """Extract tools used from intermediate steps"""
        tools = []
        for step in intermediate_steps:
            if hasattr(step, 'tool'):
                tools.append(step.tool)
            elif isinstance(step, tuple) and len(step) > 0:
                if hasattr(step[0], 'tool'):
                    tools.append(step[0].tool)
        return tools

    def _calculate_aggregate_metrics(self, individual_results: List[Dict]) -> Dict[str, float]:
        """Calculate aggregate metrics from individual results"""
        successful_results = [r for r in individual_results if r["success"]]

        if not successful_results:
            return {
                "average_accuracy": 0.0,
                "average_completeness": 0.0,
                "average_efficiency": 0.0,
                "average_response_time": 0.0,
                "success_rate": 0.0,
                "error_rate": 1.0
            }

        metrics = {
            "average_accuracy": sum(r["metrics"]["accuracy"] for r in successful_results) / len(successful_results),
            "average_completeness": sum(r["metrics"]["completeness"] for r in successful_results) / len(successful_results),
            "average_efficiency": sum(r["metrics"]["efficiency"] for r in successful_results) / len(successful_results),
            "average_response_time": sum(r["metrics"]["response_time"] for r in successful_results) / len(successful_results),
            "success_rate": len(successful_results) / len(individual_results),
            "error_rate": (len(individual_results) - len(successful_results)) / len(individual_results)
        }

        return metrics

    def generate_evaluation_report(self, evaluation_results: Dict) -> str:
        """Generate human-readable evaluation report"""
        metrics = evaluation_results["aggregate_metrics"]

        report = f"""
=== AGENT EVALUATION REPORT ===
Dataset: {evaluation_results['dataset_name']}
Evaluation Date: {evaluation_results['timestamp'].strftime('%Y-%m-%d %H:%M:%S')}
Total Queries: {evaluation_results['total_queries']}

PERFORMANCE METRICS:
‚úì Success Rate: {metrics['success_rate']:.2%}
‚úì Average Accuracy: {metrics['average_accuracy']:.2%}
‚úì Average Completeness: {metrics['average_completeness']:.2%}
‚úì Average Efficiency: {metrics['average_efficiency']:.2%}
‚úì Average Response Time: {metrics['average_response_time']:.2f}s
‚úó Error Rate: {metrics['error_rate']:.2%}

PERFORMANCE RATING:
"""

        # Calculate overall rating
        overall_score = sum([
            metrics['success_rate'],
            metrics['average_accuracy'],
            metrics['average_completeness'],
            metrics['average_efficiency']
        ]) / 4

        if overall_score >= 0.9:
            report += "üåü EXCELLENT - Ready for production deployment"
        elif overall_score >= 0.8:
            report += "‚úÖ GOOD - Minor optimizations recommended"
        elif overall_score >= 0.7:
            report += "‚ö†Ô∏è ACCEPTABLE - Significant improvements needed"
        else:
            report += "‚ùå POOR - Major issues must be addressed"

        return report

print("‚úÖ Advanced Patterns and Evaluation Framework initialized successfully")

**# SECTION 4: PRODUCTION MONITORING AND OBSERVABILITY**

In [None]:
print("\n" + "="*60)
print("SECTION 4: PRODUCTION MONITORING AND OBSERVABILITY")
print("="*60)

class ProductionMonitor:
    """Comprehensive monitoring system for production agentic RAG deployments"""

    def __init__(self, alert_thresholds: Dict[str, float] = None):
        self.metrics_store = {}
        self.alert_thresholds = alert_thresholds or {
            "error_rate": 0.05,  # 5% error rate threshold
            "avg_response_time": 30.0,  # 30 seconds
            "success_rate": 0.95,  # 95% success rate
            "memory_usage": 0.85,  # 85% memory usage
            "cpu_usage": 0.80  # 80% CPU usage
        }
        self.alerts = []
        self.health_checks = []
        self.system_stats = {}

    def record_system_metrics(self):
        """Record system-level metrics"""
        import psutil

        self.system_stats = {
            "timestamp": datetime.now(),
            "cpu_percent": psutil.cpu_percent(interval=1),
            "memory_percent": psutil.virtual_memory().percent,
            "disk_usage": psutil.disk_usage('/').percent,
            "network_io": psutil.net_io_counters()._asdict() if psutil.net_io_counters() else {},
            "process_count": len(psutil.pids()),
            "load_average": psutil.getloadavg() if hasattr(psutil, 'getloadavg') else [0, 0, 0]
        }

        # Check for alerts
        self._check_system_alerts()

        return self.system_stats

    def record_agent_metrics(self, agent_id: str, execution_data: Dict):
        """Record agent-specific metrics"""
        if agent_id not in self.metrics_store:
            self.metrics_store[agent_id] = {
                "executions": [],
                "total_requests": 0,
                "successful_requests": 0,
                "total_execution_time": 0,
                "tool_usage": {},
                "error_types": {}
            }

        agent_metrics = self.metrics_store[agent_id]
        agent_metrics["executions"].append({
            "timestamp": datetime.now(),
            "execution_time": execution_data.get("execution_time", 0),
            "success": execution_data.get("success", False),
            "tools_used": execution_data.get("tools_used", []),
            "error": execution_data.get("error"),
            "query_complexity": execution_data.get("query_complexity", "medium")
        })

        # Update aggregate metrics
        agent_metrics["total_requests"] += 1
        if execution_data.get("success", False):
            agent_metrics["successful_requests"] += 1

        agent_metrics["total_execution_time"] += execution_data.get("execution_time", 0)

        # Update tool usage stats
        for tool in execution_data.get("tools_used", []):
            agent_metrics["tool_usage"][tool] = agent_metrics["tool_usage"].get(tool, 0) + 1

        # Update error stats
        if execution_data.get("error"):
            error_type = execution_data.get("error_type", "unknown")
            agent_metrics["error_types"][error_type] = agent_metrics["error_types"].get(error_type, 0) + 1

        # Check for agent-specific alerts
        self._check_agent_alerts(agent_id)

    def _check_system_alerts(self):
        """Check system metrics against alert thresholds"""
        current_time = datetime.now()

        # CPU usage alert
        if self.system_stats["cpu_percent"] > self.alert_thresholds["cpu_usage"] * 100:
            self.alerts.append({
                "timestamp": current_time,
                "type": "system",
                "severity": "warning",
                "message": f"High CPU usage: {self.system_stats['cpu_percent']:.1f}%",
                "metric": "cpu_usage",
                "value": self.system_stats["cpu_percent"]
            })

        # Memory usage alert
        if self.system_stats["memory_percent"] > self.alert_thresholds["memory_usage"] * 100:
            self.alerts.append({
                "timestamp": current_time,
                "type": "system",
                "severity": "warning",
                "message": f"High memory usage: {self.system_stats['memory_percent']:.1f}%",
                "metric": "memory_usage",
                "value": self.system_stats["memory_percent"]
            })

    def _check_agent_alerts(self, agent_id: str):
        """Check agent metrics against alert thresholds"""
        agent_metrics = self.metrics_store[agent_id]
        current_time = datetime.now()

        # Calculate recent performance (last 10 executions)
        recent_executions = agent_metrics["executions"][-10:]
        if len(recent_executions) >= 5:  # Need minimum sample size

            # Success rate alert
            recent_success_rate = sum(1 for ex in recent_executions if ex["success"]) / len(recent_executions)
            if recent_success_rate < self.alert_thresholds["success_rate"]:
                self.alerts.append({
                    "timestamp": current_time,
                    "type": "agent",
                    "agent_id": agent_id,
                    "severity": "critical",
                    "message": f"Low success rate for agent {agent_id}: {recent_success_rate:.2%}",
                    "metric": "success_rate",
                    "value": recent_success_rate
                })

            # Response time alert
            recent_avg_time = sum(ex["execution_time"] for ex in recent_executions) / len(recent_executions)
            if recent_avg_time > self.alert_thresholds["avg_response_time"]:
                self.alerts.append({
                    "timestamp": current_time,
                    "type": "agent",
                    "agent_id": agent_id,
                    "severity": "warning",
                    "message": f"Slow response time for agent {agent_id}: {recent_avg_time:.1f}s",
                    "metric": "avg_response_time",
                    "value": recent_avg_time
                })

    def get_dashboard_data(self) -> Dict[str, Any]:
        """Get comprehensive dashboard data"""
        dashboard_data = {
            "timestamp": datetime.now(),
            "system_health": self._calculate_system_health(),
            "agent_summary": self._get_agent_summary(),
            "recent_alerts": self.alerts[-10:],  # Last 10 alerts
            "performance_trends": self._calculate_performance_trends(),
            "resource_utilization": self.system_stats
        }

        return dashboard_data

    def _calculate_system_health(self) -> str:
        """Calculate overall system health status"""
        critical_alerts = [a for a in self.alerts[-20:] if a.get("severity") == "critical"]
        warning_alerts = [a for a in self.alerts[-20:] if a.get("severity") == "warning"]

        if len(critical_alerts) > 0:
            return "critical"
        elif len(warning_alerts) > 3:
            return "degraded"
        elif self.system_stats.get("cpu_percent", 0) > 90 or self.system_stats.get("memory_percent", 0) > 90:
            return "stressed"
        else:
            return "healthy"

    def _get_agent_summary(self) -> Dict[str, Any]:
        """Get summary of all agent performance"""
        total_agents = len(self.metrics_store)
        total_requests = sum(agent["total_requests"] for agent in self.metrics_store.values())
        total_successful = sum(agent["successful_requests"] for agent in self.metrics_store.values())

        overall_success_rate = total_successful / total_requests if total_requests > 0 else 0

        return {
            "total_agents": total_agents,
            "total_requests": total_requests,
            "overall_success_rate": overall_success_rate,
            "active_agents": len([a for a in self.metrics_store.values() if a["executions"] and
                                (datetime.now() - a["executions"][-1]["timestamp"]).seconds < 300])
        }

    def _calculate_performance_trends(self) -> Dict[str, Any]:
        """Calculate performance trends over time"""
        # Simple trend calculation based on recent vs historical performance
        all_executions = []
        for agent_metrics in self.metrics_store.values():
            all_executions.extend(agent_metrics["executions"])

        if len(all_executions) < 20:
            return {"trend": "insufficient_data"}

        # Sort by timestamp
        all_executions.sort(key=lambda x: x["timestamp"])

        # Compare recent vs historical
        split_point = len(all_executions) // 2
        historical = all_executions[:split_point]
        recent = all_executions[split_point:]

        historical_success_rate = sum(1 for ex in historical if ex["success"]) / len(historical)
        recent_success_rate = sum(1 for ex in recent if ex["success"]) / len(recent)

        historical_avg_time = sum(ex["execution_time"] for ex in historical) / len(historical)
        recent_avg_time = sum(ex["execution_time"] for ex in recent) / len(recent)

        return {
            "success_rate_trend": "improving" if recent_success_rate > historical_success_rate else "declining",
            "response_time_trend": "improving" if recent_avg_time < historical_avg_time else "declining",
            "historical_success_rate": historical_success_rate,
            "recent_success_rate": recent_success_rate,
            "historical_avg_time": historical_avg_time,
            "recent_avg_time": recent_avg_time
        }

    def generate_health_report(self) -> str:
        """Generate comprehensive health report"""
        dashboard_data = self.get_dashboard_data()

        report = f"""
=== PRODUCTION HEALTH REPORT ===
Generated: {dashboard_data['timestamp'].strftime('%Y-%m-%d %H:%M:%S')}

SYSTEM HEALTH: {dashboard_data['system_health'].upper()}
{'üü¢' if dashboard_data['system_health'] == 'healthy' else 'üü°' if dashboard_data['system_health'] in ['degraded', 'stressed'] else 'üî¥'}

AGENT SUMMARY:
‚Ä¢ Total Agents: {dashboard_data['agent_summary']['total_agents']}
‚Ä¢ Active Agents: {dashboard_data['agent_summary']['active_agents']}
‚Ä¢ Total Requests: {dashboard_data['agent_summary']['total_requests']}
‚Ä¢ Overall Success Rate: {dashboard_data['agent_summary']['overall_success_rate']:.2%}

RESOURCE UTILIZATION:
‚Ä¢ CPU Usage: {dashboard_data['resource_utilization'].get('cpu_percent', 0):.1f}%
‚Ä¢ Memory Usage: {dashboard_data['resource_utilization'].get('memory_percent', 0):.1f}%
‚Ä¢ Disk Usage: {dashboard_data['resource_utilization'].get('disk_usage', 0):.1f}%

PERFORMANCE TRENDS:
‚Ä¢ Success Rate: {dashboard_data['performance_trends'].get('success_rate_trend', 'unknown')}
‚Ä¢ Response Time: {dashboard_data['performance_trends'].get('response_time_trend', 'unknown')}

RECENT ALERTS: {len(dashboard_data['recent_alerts'])}
"""

        for alert in dashboard_data['recent_alerts'][-5:]:  # Show last 5 alerts
            severity_icon = "üî¥" if alert['severity'] == 'critical' else "üü°"
            report += f"\n{severity_icon} [{alert['severity'].upper()}] {alert['message']}"

        return report

class ScalabilityManager:
    """Manage scaling of agentic systems based on load and performance"""

    def __init__(self, monitor: ProductionMonitor):
        self.monitor = monitor
        self.scaling_policies = {
            "cpu_threshold": 70.0,  # Scale up if CPU > 70%
            "memory_threshold": 80.0,  # Scale up if memory > 80%
            "response_time_threshold": 20.0,  # Scale up if avg response time > 20s
            "queue_length_threshold": 10,  # Scale up if queue length > 10
            "min_instances": 1,
            "max_instances": 10,
            "scale_up_cooldown": 300,  # 5 minutes
            "scale_down_cooldown": 600  # 10 minutes
        }
        self.current_instances = 1
        self.last_scaling_action = None
        self.scaling_history = []

    def check_scaling_needs(self) -> Dict[str, Any]:
        """Check if scaling action is needed"""
        current_time = datetime.now()

        # Get current metrics
        system_stats = self.monitor.system_stats
        dashboard_data = self.monitor.get_dashboard_data()

        scaling_decision = {
            "timestamp": current_time,
            "action": "none",
            "reason": "",
            "current_instances": self.current_instances,
            "recommended_instances": self.current_instances,
            "metrics": {
                "cpu_usage": system_stats.get("cpu_percent", 0),
                "memory_usage": system_stats.get("memory_percent", 0),
                "avg_response_time": dashboard_data["performance_trends"].get("recent_avg_time", 0)
            }
        }

        # Check scale-up conditions
        should_scale_up = False
        scale_up_reasons = []

        if system_stats.get("cpu_percent", 0) > self.scaling_policies["cpu_threshold"]:
            should_scale_up = True
            scale_up_reasons.append(f"High CPU usage: {system_stats['cpu_percent']:.1f}%")

        if system_stats.get("memory_percent", 0) > self.scaling_policies["memory_threshold"]:
            should_scale_up = True
            scale_up_reasons.append(f"High memory usage: {system_stats['memory_percent']:.1f}%")

        recent_avg_time = dashboard_data["performance_trends"].get("recent_avg_time", 0)
        if recent_avg_time > self.scaling_policies["response_time_threshold"]:
            should_scale_up = True
            scale_up_reasons.append(f"Slow response time: {recent_avg_time:.1f}s")

        # Check scale-down conditions
        should_scale_down = (
            system_stats.get("cpu_percent", 100) < self.scaling_policies["cpu_threshold"] * 0.3 and
            system_stats.get("memory_percent", 100) < self.scaling_policies["memory_threshold"] * 0.3 and
            recent_avg_time < self.scaling_policies["response_time_threshold"] * 0.5 and
            self.current_instances > self.scaling_policies["min_instances"]
        )

        # Apply cooldown periods
        if self.last_scaling_action:
            time_since_last_action = (current_time - self.last_scaling_action["timestamp"]).seconds

            if should_scale_up and time_since_last_action < self.scaling_policies["scale_up_cooldown"]:
                should_scale_up = False
                scaling_decision["reason"] = "Scale-up blocked by cooldown period"

            if should_scale_down and time_since_last_action < self.scaling_policies["scale_down_cooldown"]:
                should_scale_down = False
                scaling_decision["reason"] = "Scale-down blocked by cooldown period"

        # Determine scaling action
        if should_scale_up and self.current_instances < self.scaling_policies["max_instances"]:
            scaling_decision["action"] = "scale_up"
            scaling_decision["recommended_instances"] = min(
                self.current_instances + 1,
                self.scaling_policies["max_instances"]
            )
            scaling_decision["reason"] = "; ".join(scale_up_reasons)

        elif should_scale_down:
            scaling_decision["action"] = "scale_down"
            scaling_decision["recommended_instances"] = max(
                self.current_instances - 1,
                self.scaling_policies["min_instances"]
            )
            scaling_decision["reason"] = "Low resource utilization detected"

        return scaling_decision

    def execute_scaling_action(self, scaling_decision: Dict) -> bool:
        """Execute the recommended scaling action"""
        if scaling_decision["action"] == "none":
            return True

        try:
            # In a real implementation, this would interact with container orchestration
            # systems like Kubernetes, Docker Swarm, or cloud auto-scaling groups

            old_instances = self.current_instances
            self.current_instances = scaling_decision["recommended_instances"]
            self.last_scaling_action = scaling_decision

            # Record scaling history
            self.scaling_history.append({
                "timestamp": scaling_decision["timestamp"],
                "action": scaling_decision["action"],
                "from_instances": old_instances,
                "to_instances": self.current_instances,
                "reason": scaling_decision["reason"],
                "success": True
            })

            print(f"üîß Scaled {scaling_decision['action']}: {old_instances} ‚Üí {self.current_instances} instances")
            print(f"   Reason: {scaling_decision['reason']}")

            return True

        except Exception as e:
            print(f"‚ùå Scaling action failed: {e}")
            return False

    def get_scaling_status(self) -> Dict[str, Any]:
        """Get current scaling status and history"""
        return {
            "current_instances": self.current_instances,
            "scaling_policies": self.scaling_policies,
            "last_scaling_action": self.last_scaling_action,
            "scaling_history": self.scaling_history[-10:],  # Last 10 scaling actions
            "total_scaling_events": len(self.scaling_history)
        }

class SafetyManager:
    """Manage safety boundaries and constraints for agentic systems"""

    def __init__(self):
        self.safety_policies = {
            "max_api_calls_per_minute": 100,
            "max_execution_time": 300,  # 5 minutes
            "allowed_domains": ["*.openai.com", "*.google.com", "*.wikipedia.org"],
            "blocked_keywords": ["delete", "remove", "drop", "truncate"],
            "max_cost_per_query": 1.0,  # $1 per query
            "max_daily_cost": 100.0,  # $100 per day
            "require_approval_for": ["financial_transactions", "data_modification", "external_communications"]
        }
        self.safety_violations = []
        self.current_costs = {"daily": 0.0, "query_costs": []}
        self.api_call_counts = {}

    def validate_query(self, query: str, agent_id: str) -> Dict[str, Any]:
        """Validate query against safety policies"""
        validation_result = {
            "approved": True,
            "violations": [],
            "warnings": [],
            "estimated_cost": 0.0,
            "requires_approval": False
        }

        # Check for blocked keywords
        query_lower = query.lower()
        for keyword in self.safety_policies["blocked_keywords"]:
            if keyword in query_lower:
                validation_result["violations"].append(f"Contains blocked keyword: {keyword}")
                validation_result["approved"] = False

        # Check for approval requirements
        for approval_category in self.safety_policies["require_approval_for"]:
            if approval_category.replace("_", " ") in query_lower:
                validation_result["requires_approval"] = True
                validation_result["warnings"].append(f"Requires approval: {approval_category}")

        # Estimate cost (simplified)
        estimated_tokens = len(query.split()) * 1.3  # Rough estimate
        validation_result["estimated_cost"] = estimated_tokens * 0.0001  # $0.0001 per token estimate

        # Check cost limits
        if validation_result["estimated_cost"] > self.safety_policies["max_cost_per_query"]:
            validation_result["violations"].append(f"Estimated cost exceeds limit: ${validation_result['estimated_cost']:.4f}")
            validation_result["approved"] = False

        if self.current_costs["daily"] + validation_result["estimated_cost"] > self.safety_policies["max_daily_cost"]:
            validation_result["violations"].append("Daily cost limit would be exceeded")
            validation_result["approved"] = False

        # Check API rate limits
        current_minute = datetime.now().replace(second=0, microsecond=0)
        minute_key = f"{agent_id}_{current_minute}"
        current_calls = self.api_call_counts.get(minute_key, 0)

        if current_calls >= self.safety_policies["max_api_calls_per_minute"]:
            validation_result["violations"].append("API rate limit exceeded")
            validation_result["approved"] = False

        return validation_result

    def record_execution(self, agent_id: str, execution_data: Dict):
        """Record execution for safety monitoring"""
        # Update API call counts
        current_minute = datetime.now().replace(second=0, microsecond=0)
        minute_key = f"{agent_id}_{current_minute}"
        self.api_call_counts[minute_key] = self.api_call_counts.get(minute_key, 0) + 1

        # Update cost tracking
        actual_cost = execution_data.get("cost", 0.0)
        self.current_costs["daily"] += actual_cost
        self.current_costs["query_costs"].append({
            "timestamp": datetime.now(),
            "agent_id": agent_id,
            "cost": actual_cost
        })

        # Clean up old data (keep only last 24 hours)
        cutoff_time = datetime.now() - timedelta(hours=24)
        self.current_costs["query_costs"] = [
            cost for cost in self.current_costs["query_costs"]
            if cost["timestamp"] > cutoff_time
        ]

        # Recalculate daily cost
        self.current_costs["daily"] = sum(cost["cost"] for cost in self.current_costs["query_costs"])

        # Clean up old API call counts (keep only last hour)
        cutoff_minute = datetime.now().replace(second=0, microsecond=0) - timedelta(hours=1)
        keys_to_remove = [
            key for key in self.api_call_counts.keys()
            if datetime.fromisoformat(key.split('_', 1)[1]) < cutoff_minute
        ]
        for key in keys_to_remove:
            del self.api_call_counts[key]

    def get_safety_status(self) -> Dict[str, Any]:
        """Get current safety status"""
        return {
            "daily_cost": self.current_costs["daily"],
            "cost_limit": self.safety_policies["max_daily_cost"],
            "cost_utilization": self.current_costs["daily"] / self.safety_policies["max_daily_cost"],
            "recent_violations": len(self.safety_violations),
            "api_calls_current_minute": max(self.api_call_counts.values()) if self.api_call_counts else 0,
            "safety_policies_active": len(self.safety_policies)
        }

**# SECTION 5: COMPREHENSIVE DEMONSTRATIONS**

In [None]:
print("\n" + "="*60)
print("SECTION 5: COMPREHENSIVE DEMONSTRATIONS")
print("="*60)

# Initialize production systems
print("üöÄ Initializing production systems...")
production_metrics = AgentMetrics(use_prometheus=False)  # Use fallback metrics to avoid Prometheus issues
agent_factory = ProductionAgentFactory(metrics=production_metrics)
monitor = ProductionMonitor()
scalability_manager = ScalabilityManager(monitor)
safety_manager = SafetyManager()

# Register production tools
agent_factory.register_tool(Tool(
    name="enhanced_web_search_tool",
    func=enhanced_web_search_tool,
    description="Search the web for current information"
))

agent_factory.register_tool(Tool(
    name="enhanced_calculator_tool",
    func=enhanced_calculator_tool,
    description="Perform mathematical calculations"
))

agent_factory.register_tool(Tool(
    name="data_analysis_tool",
    func=data_analysis_tool,
    description="Analyze data patterns and trends"
))

# Create production agents
print("\nü§ñ Creating production agents...")
tools = list(agent_factory.tools_registry.values())

# Create different types of agents
print("Creating ReAct agent...")
react_agent = agent_factory.create_react_agent(tools, memory=True)

print("Creating structured agent (simplified)...")
structured_agent = agent_factory.create_structured_chat_agent(
    tools,
    system_message="You are a production-ready AI assistant with comprehensive monitoring and safety controls."
)

# Create sample DataFrame for pandas agent
print("Creating pandas agent...")
sample_data = pd.DataFrame({
    'product': ['A', 'B', 'C', 'D', 'E'],
    'sales': [100, 150, 200, 120, 180],
    'profit': [20, 30, 50, 25, 40],
    'region': ['North', 'South', 'East', 'West', 'Central']
})

try:
    pandas_agent = agent_factory.create_pandas_agent(
        sample_data,
        "Sales and profit data for different products and regions"
    )
    print("‚úÖ Pandas agent created successfully")
except Exception as e:
    print(f"‚ö†Ô∏è Pandas agent creation failed: {e}")
    pandas_agent = None

print("‚úÖ All production agents created successfully")

# Create self-improvement and goal-oriented agents
print("\nüß† Creating advanced pattern agents...")
self_improving_agent = SelfImprovementAgent(react_agent, production_metrics)
goal_oriented_agent = GoalOrientedAgent(structured_agent)

# Set up evaluation framework
print("\nüìä Setting up evaluation framework...")
evaluator = AgentEvaluationFramework()

# Create benchmark dataset
benchmark_queries = [
    {"id": "q1", "text": "What are the latest developments in artificial intelligence?"},
    {"id": "q2", "text": "Calculate the compound interest on $10000 at 5% for 3 years"},
    {"id": "q3", "text": "Analyze the trend in technology stock prices"},
    {"id": "q4", "text": "What are the implications of quantum computing for cybersecurity?"},
    {"id": "q5", "text": "Compare renewable energy adoption rates globally"}
]

expected_outputs = [
    {"text": "AI developments include advances in LLMs, computer vision, and robotics", "key_points": ["LLMs", "computer vision", "robotics"]},
    {"text": "Compound interest calculation results in $11576.25", "key_points": ["compound interest", "calculation", "result"]},
    {"text": "Technology stocks show mixed performance with growth in AI sectors", "key_points": ["technology stocks", "performance", "AI"]},
    {"text": "Quantum computing poses challenges to current encryption methods", "key_points": ["quantum computing", "encryption", "security"]},
    {"text": "Renewable energy adoption varies by region with solar and wind leading", "key_points": ["renewable energy", "adoption", "solar", "wind"]}
]

evaluator.add_benchmark_dataset("production_test", benchmark_queries, expected_outputs)

**# DEMONSTRATION 1: Basic Production Agent Operation**

In [None]:
print("\n" + "="*60)
print("DEMONSTRATION 1: BASIC PRODUCTION AGENT OPERATION")
print("="*60)

def demonstrate_production_agent():
    """Demonstrate basic production agent with monitoring"""

    test_queries = [
        "What are recent developments in quantum computing?",
        "Calculate 15% of 2500 and explain the calculation",
        "Analyze trends in renewable energy adoption"
    ]

    for i, query in enumerate(test_queries, 1):
        print(f"\nüìù Test Query {i}: {query}")

        # Safety validation
        safety_check = safety_manager.validate_query(query, "react_agent")
        print(f"üõ°Ô∏è Safety Check: {'‚úÖ Approved' if safety_check['approved'] else '‚ùå Blocked'}")

        if not safety_check["approved"]:
            print(f"   Violations: {safety_check['violations']}")
            continue

        # Record system metrics before execution
        monitor.record_system_metrics()

        # Execute query
        start_time = time.time()
        try:
            result = react_agent.invoke({"input": query})
            execution_time = time.time() - start_time
            success = True
            print(f"‚úÖ Query completed in {execution_time:.2f}s")
            print(f"üìÑ Response: {result.get('output', '')[:200]}...")

        except Exception as e:
            execution_time = time.time() - start_time
            success = False
            print(f"‚ùå Query failed after {execution_time:.2f}s: {e}")

        # Record metrics
        execution_data = {
            "execution_time": execution_time,
            "success": success,
            "tools_used": ["web_search", "calculator"] if "calculate" in query.lower() else ["web_search"],
            "error": None if success else str(e) if 'e' in locals() else None,
            "cost": safety_check["estimated_cost"]
        }

        try:
            monitor.record_agent_metrics("react_agent", execution_data)
            safety_manager.record_execution("react_agent", execution_data)
        except Exception as metrics_error:
            print(f"‚ö†Ô∏è Metrics recording failed: {metrics_error}")
            # Continue anyway

    # Show monitoring results
    print(f"\nüìä Performance Summary:")
    perf_summary = production_metrics.get_performance_summary()
    print(f"   Total Executions: {perf_summary['total_executions']}")
    print(f"   Success Rate: {perf_summary['success_rate']:.2%}")
    print(f"   Average Duration: {perf_summary['average_duration']:.2f}s")

**# DEMONSTRATION 2: Self-Improvement Agent**

In [None]:
print("\n" + "="*60)
print("DEMONSTRATION 2: SELF-IMPROVEMENT AGENT")
print("="*60)

def demonstrate_self_improvement():
    """Demonstrate self-improvement capabilities"""

    test_scenarios = [
        {
            "query": "Research the impact of artificial intelligence on job markets",
            "feedback": "Good analysis but could be more comprehensive"
        },
        {
            "query": "Calculate the ROI for a $50000 investment with 8% annual return over 5 years",
            "feedback": "Calculation correct but explanation was too brief"
        },
        {
            "query": "Analyze current trends in sustainable energy technologies",
            "feedback": "Response was slow and missed some key developments"
        }
    ]

    print("üß† Testing self-improvement capabilities...")

    for i, scenario in enumerate(test_scenarios, 1):
        print(f"\nüéØ Scenario {i}: {scenario['query'][:50]}...")

        # Execute with learning
        result = self_improving_agent.execute_with_learning(
            scenario["query"],
            feedback=scenario["feedback"]
        )

        print(f"‚úÖ Execution: {'Success' if result['success'] else 'Failed'}")
        print(f"‚è±Ô∏è Time: {result['execution_time']:.2f}s")
        print(f"üîß Improvements Applied: {result.get('improvements_applied', 0)}")

        if 'performance_analysis' in result:
            analysis = result['performance_analysis']
            print(f"üìä Performance Rating: {analysis.get('execution_time_rating', 'unknown')}")
            print(f"üìã Completeness: {analysis.get('result_completeness', 'unknown')}")

    # Show learning insights
    print(f"\nüéì Learning Insights:")
    insights = self_improving_agent.get_learning_insights()
    print(f"   Total Interactions: {insights['total_interactions']}")
    print(f"   Success Rate: {insights['success_rate']:.2%}")
    print(f"   Performance Trend: {insights.get('recent_performance_trend', 'unknown')}")
    print(f"   Common Improvements: {insights.get('common_improvements', {})}")

**# DEMONSTRATION 3: Goal-Oriented Agent**

In [None]:
print("\n" + "="*60)
print("DEMONSTRATION 3: GOAL-ORIENTED AGENT")
print("="*60)

def demonstrate_goal_oriented_behavior():
    """Demonstrate goal-oriented agent capabilities"""

    print("üéØ Setting up long-term goals...")

    # Set multiple goals with different priorities
    goals = [
        {
            "description": "Research renewable energy market trends for next quarter",
            "priority": 3,
            "deadline": datetime.now() + timedelta(days=7)
        },
        {
            "description": "Analyze competitive landscape in AI industry",
            "priority": 2,
            "deadline": datetime.now() + timedelta(days=14)
        },
        {
            "description": "Monitor financial market indicators for portfolio optimization",
            "priority": 1,
            "deadline": datetime.now() + timedelta(days=30)
        }
    ]

    goal_ids = []
    for goal in goals:
        goal_id = goal_oriented_agent.set_goal(
            goal["description"],
            priority=goal["priority"],
            deadline=goal["deadline"]
        )
        goal_ids.append(goal_id)

    # Work towards goals
    print(f"\nüöÄ Working towards goals...")
    work_results = goal_oriented_agent.work_towards_goals(time_limit=60)  # 1 minute limit

    print(f"üìä Work Session Results:")
    print(f"   Goals Worked On: {len(work_results['goals_worked_on'])}")
    print(f"   Goals Completed: {len(work_results['completed_goals'])}")
    print(f"   Total Time Spent: {work_results['total_time_spent']:.1f}s")

    # Show goal status
    goal_status = goal_oriented_agent.get_goal_status()
    print(f"\nüéØ Goal Status:")
    print(f"   Active Goals: {goal_status['active_goals']}")
    print(f"   Completed Goals: {goal_status['completed_goals']}")
    print(f"   Average Progress: {goal_status['average_progress']:.1%}")


**# DEMONSTRATION 4: Comprehensive Evaluation**

In [None]:
print("\n" + "="*60)
print("DEMONSTRATION 4: COMPREHENSIVE EVALUATION")
print("="*60)

def demonstrate_comprehensive_evaluation():
    """Demonstrate comprehensive evaluation framework"""

    print("üìä Running comprehensive evaluation...")

    # Evaluate different agent types
    agents_to_test = [
        ("ReAct Agent", react_agent),
        ("Structured Chat Agent", structured_agent)
    ]

    evaluation_results = {}

    for agent_name, agent in agents_to_test:
        print(f"\nüß™ Evaluating {agent_name}...")

        try:
            # Run evaluation on sample dataset
            results = evaluator.evaluate_agent_performance(
                agent,
                "production_test",
                sample_size=3  # Test with 3 queries for demo
            )

            evaluation_results[agent_name] = results

            # Generate and print report
            report = evaluator.generate_evaluation_report(results)
            print(report)

        except Exception as e:
            print(f"‚ùå Evaluation failed for {agent_name}: {e}")

    return evaluation_results


**# DEMONSTRATION 5: Production Monitoring Dashboard**

In [None]:
print("\n" + "="*60)
print("DEMONSTRATION 5: PRODUCTION MONITORING DASHBOARD")
print("="*60)

def demonstrate_production_monitoring():
    """Demonstrate production monitoring and alerting"""

    print("üìä Generating production monitoring dashboard...")

    # Record some system metrics
    monitor.record_system_metrics()

    # Get dashboard data
    dashboard_data = monitor.get_dashboard_data()

    # Generate health report
    health_report = monitor.generate_health_report()
    print(health_report)

    # Check scaling needs
    scaling_decision = scalability_manager.check_scaling_needs()
    print(f"\nüîß Scaling Analysis:")
    print(f"   Current Instances: {scaling_decision['current_instances']}")
    print(f"   Recommended Action: {scaling_decision['action']}")
    print(f"   Reason: {scaling_decision['reason']}")

    if scaling_decision['action'] != 'none':
        print(f"   Recommended Instances: {scaling_decision['recommended_instances']}")

        # Execute scaling (simulation)
        success = scalability_manager.execute_scaling_action(scaling_decision)
        print(f"   Scaling {'‚úÖ Successful' if success else '‚ùå Failed'}")

    # Show safety status
    safety_status = safety_manager.get_safety_status()
    print(f"\nüõ°Ô∏è Safety Status:")
    print(f"   Daily Cost: ${safety_status['daily_cost']:.2f} / ${safety_status['cost_limit']:.2f}")
    print(f"   Cost Utilization: {safety_status['cost_utilization']:.1%}")
    print(f"   Recent Violations: {safety_status['recent_violations']}")
    print(f"   API Calls/Min: {safety_status['api_calls_current_minute']}")

**# DEMONSTRATION 6: Advanced Visualization**

In [None]:
print("\n" + "="*60)
print("DEMONSTRATION 6: ADVANCED VISUALIZATION")
print("="*60)

def create_production_dashboard_visualization():
    """Create comprehensive production dashboard visualization"""

    # Generate sample data for visualization
    sample_metrics = production_metrics.get_performance_summary()

    # Create subplots
    fig = make_subplots(
        rows=3, cols=2,
        subplot_titles=('Agent Performance Overview', 'Response Time Distribution',
                       'Tool Usage Patterns', 'Success Rate Trends',
                       'Resource Utilization', 'Cost Analysis'),
        specs=[[{"type": "indicator"}, {"type": "histogram"}],
               [{"type": "bar"}, {"type": "scatter"}],
               [{"type": "pie"}, {"type": "bar"}]]
    )

    # Agent Performance Overview (Gauge)
    success_rate = sample_metrics.get('success_rate', 0.85) * 100
    fig.add_trace(
        go.Indicator(
            mode="gauge+number",
            value=success_rate,
            title={"text": "Success Rate (%)"},
            gauge={
                "axis": {"range": [0, 100]},
                "bar": {"color": "darkgreen"},
                "steps": [
                    {"range": [0, 60], "color": "lightgray"},
                    {"range": [60, 80], "color": "yellow"},
                    {"range": [80, 100], "color": "lightgreen"}
                ],
                "threshold": {
                    "line": {"color": "red", "width": 4},
                    "thickness": 0.75,
                    "value": 90
                }
            }
        ),
        row=1, col=1
    )

    # Response Time Distribution
    response_times = np.random.lognormal(2.5, 0.5, 100)  # Sample data
    fig.add_trace(
        go.Histogram(x=response_times, name="Response Times", nbinsx=20),
        row=1, col=2
    )

    # Tool Usage Patterns
    tools = ['web_search', 'calculator', 'data_analysis', 'document_search']
    usage_counts = [45, 30, 25, 15]  # Sample data
    fig.add_trace(
        go.Bar(x=tools, y=usage_counts, name="Tool Usage"),
        row=2, col=1
    )

    # Success Rate Trends
    time_points = pd.date_range(start='2024-01-01', periods=30, freq='D')
    success_rates = 0.8 + 0.15 * np.random.random(30)  # Sample trend data
    fig.add_trace(
        go.Scatter(x=time_points, y=success_rates, mode='lines+markers', name="Success Rate"),
        row=2, col=2
    )

    # Resource Utilization
    resources = ['CPU', 'Memory', 'Disk', 'Network']
    utilization = [65, 78, 45, 32]  # Sample data
    fig.add_trace(
        go.Pie(labels=resources, values=utilization, name="Resource Usage"),
        row=3, col=1
    )

    # Cost Analysis
    cost_categories = ['API Calls', 'Compute', 'Storage', 'Network']
    costs = [120, 80, 30, 15]  # Sample cost data
    fig.add_trace(
        go.Bar(x=cost_categories, y=costs, name="Costs ($)"),
        row=3, col=2
    )

    # Update layout
    fig.update_layout(
        height=1000,
        showlegend=False,
        title_text="Production Agentic RAG System Dashboard",
        title_x=0.5
    )

    # Show the plot
    fig.show()

    print("üìä Production dashboard visualization created successfully!")

    return fig

**# MAIN EXECUTION AND TESTING**

In [None]:
print("\n" + "="*60)
print("MAIN EXECUTION AND TESTING")
print("="*60)

def run_comprehensive_demonstration():
    """Run all demonstrations in sequence"""

    print("üöÄ Starting comprehensive production demonstration...")

    try:
        # Run all demonstrations
        print("\n" + "="*50)
        demonstrate_production_agent()

        print("\n" + "="*50)
        demonstrate_self_improvement()

        print("\n" + "="*50)
        demonstrate_goal_oriented_behavior()

        print("\n" + "="*50)
        evaluation_results = demonstrate_comprehensive_evaluation()

        print("\n" + "="*50)
        demonstrate_production_monitoring()

        print("\n" + "="*50)
        dashboard_fig = create_production_dashboard_visualization()

        # Final summary
        print("\n" + "="*60)
        print("COMPREHENSIVE DEMONSTRATION COMPLETE")
        print("="*60)

        final_metrics = production_metrics.get_performance_summary()
        print(f"üìä Final Performance Summary:")
        print(f"   Total Executions: {final_metrics['total_executions']}")
        print(f"   Overall Success Rate: {final_metrics['success_rate']:.2%}")
        print(f"   Average Execution Time: {final_metrics['average_duration']:.2f}s")

        # System health check
        health_report = monitor.generate_health_report()
        print(f"\nüè• Final Health Check:")
        print(health_report)

        print(f"\n‚úÖ All production systems demonstrated successfully!")
        print(f"üéØ Key Takeaways:")
        print(f"   - Production monitoring provides comprehensive observability")
        print(f"   - Self-improvement agents learn from experience and feedback")
        print(f"   - Goal-oriented agents can pursue long-term objectives")
        print(f"   - Comprehensive evaluation ensures quality and reliability")
        print(f"   - Safety mechanisms protect against misuse and errors")
        print(f"   - Scalability systems handle varying loads automatically")

        return {
            "success": True,
            "final_metrics": final_metrics,
            "evaluation_results": evaluation_results,
            "dashboard_created": True
        }

    except Exception as e:
        print(f"‚ùå Demonstration failed: {e}")
        return {
            "success": False,
            "error": str(e)
        }

# Execute the comprehensive demonstration
if __name__ == "__main__":
    # Run comprehensive demonstration
    demo_results = run_comprehensive_demonstration()

    if demo_results["success"]:
        print(f"\nüéâ Production deployment demonstration completed successfully!")
        print(f"üí° Ready for real-world deployment with:")
        print(f"   ‚úì Comprehensive monitoring and alerting")
        print(f"   ‚úì Automatic scaling capabilities")
        print(f"   ‚úì Safety boundaries and cost controls")
        print(f"   ‚úì Self-improvement and learning systems")
        print(f"   ‚úì Goal-oriented autonomous behavior")
        print(f"   ‚úì Production-ready evaluation frameworks")
    else:
        print(f"‚ùå Demonstration encountered issues: {demo_results.get('error', 'Unknown error')}")

print(f"\n‚ú® Notebook Complete! Advanced patterns and production deployment demonstrated.")