In [None]:

# Cell 1: Install packages and imports
!pip install -q openai requests pandas numpy matplotlib seaborn plotly scipy scikit-learn ipywidgets reportlab

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import requests
import json
import base64
import io
import time
import re
import traceback
import os
from typing import Dict, List, Any, Optional, Tuple
from dataclasses import dataclass
from abc import ABC, abstractmethod
import warnings
warnings.filterwarnings('ignore')

from IPython.display import display, HTML, clear_output
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed, interact_manual

from reportlab.lib.pagesizes import letter, A4
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak, Image
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch
from reportlab.lib import colors
from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_RIGHT

plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

# Cell 2: Prompt engineering for AI agents
class PromptEngineeringProtocols:
    @staticmethod
    def create_system_prompt(role: str, expertise: str, context: str = "") -> str:
        return f"""You are a {role} with {expertise}.

CORE COMPETENCIES:
- Deep analytical thinking with step-by-step reasoning
- Domain expertise in data science and analytics
- Professional communication and report generation
- Quality assurance and validation protocols

REASONING PROTOCOL:
1. Analyze the problem systematically
2. Break down complex tasks into manageable components
3. Apply domain knowledge and best practices
4. Validate assumptions and results
5. Provide clear, actionable insights

OUTPUT FORMAT:
- Use clear, professional language
- Write in plain English without markdown formatting
- Avoid using symbols like ##, **, or other markdown
- Include specific examples and evidence
- Provide clear recommendations
- Maintain consistency with previous analysis
- ALWAYS provide output - never return empty responses

CONTEXT: {context}

Remember: You are working as part of a multi-agent team. Maintain consistency with team objectives and build upon previous agents' work."""

    @staticmethod
    def create_chain_of_thought_prompt(task: str, examples: List[str] = None) -> str:
        base_prompt = f"""Let's think step by step about this task: {task}

REASONING PROCESS:
1. First, I need to understand what we're analyzing...
2. Then, I should consider the key factors...
3. Next, I'll evaluate the options...
4. Finally, I'll synthesize the findings...

Let me work through this systematically:"""

        if examples:
            base_prompt += "\n\nEXAMPLES OF SIMILAR ANALYSIS:\n"
            for i, example in enumerate(examples, 1):
                base_prompt += f"{i}. {example}\n"

        return base_prompt

prompt_protocols = PromptEngineeringProtocols()

# Cell 3: API clients for OpenRouter and Brave Search
class OpenRouterClient:
    def __init__(self, api_key: str, model: str = "openai/gpt-4o-mini"):
        self.api_key = api_key
        self.model = model
        self.base_url = "https://openrouter.ai/api/v1"
        self.session = requests.Session()
        self.session.headers.update({
            "Authorization": f"Bearer {api_key}",
            "Content-Type": "application/json",
            "HTTP-Referer": "https://colab.research.google.com",
            "X-Title": "Multi-Agent Analytics System"
        })

    def _exponential_backoff(self, attempt: int, base_delay: float = 1.0) -> float:
        return min(base_delay * (2 ** attempt), 60.0)

    def _make_request(self, messages: List[Dict[str, str]], max_retries: int = 3) -> Dict[str, Any]:
        payload = {
            "model": self.model,
            "messages": messages,
            "temperature": 0.7,
            "max_tokens": 4000,
            "top_p": 0.9,
            "frequency_penalty": 0.1,
            "presence_penalty": 0.1
        }

        for attempt in range(max_retries):
            try:
                response = self.session.post(
                    f"{self.base_url}/chat/completions",
                    json=payload,
                    timeout=30
                )

                if response.status_code == 200:
                    return response.json()
                elif response.status_code == 429:
                    delay = self._exponential_backoff(attempt)
                    print(f"Rate limited. Waiting {delay:.1f}s before retry {attempt + 1}/{max_retries}")
                    time.sleep(delay)
                    continue
                else:
                    print(f"API Error {response.status_code}: {response.text}")
                    if attempt == max_retries - 1:
                        raise Exception(f"API request failed after {max_retries} attempts")

            except requests.exceptions.Timeout:
                print(f"Request timeout. Retry {attempt + 1}/{max_retries}")
                if attempt == max_retries - 1:
                    raise Exception("Request timeout after all retries")

            except Exception as e:
                print(f"Request error: {str(e)}. Retry {attempt + 1}/{max_retries}")
                if attempt == max_retries - 1:
                    raise e

            time.sleep(self._exponential_backoff(attempt))

        raise Exception("All retry attempts failed")

    def chat_completion(self, system_prompt: str, user_prompt: str) -> str:
        messages = [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ]

        try:
            response = self._make_request(messages)
            return response["choices"][0]["message"]["content"].strip()
        except Exception as e:
            print(f"Chat completion failed: {str(e)}")
            return f"Error: {str(e)}"

class BraveSearchClient:
    def __init__(self, api_key: str):
        self.api_key = api_key
        self.base_url = "https://api.search.brave.com/res/v1/web/search"
        self.session = requests.Session()
        self.session.headers.update({
            "Accept": "application/json",
            "Accept-Encoding": "gzip",
            "X-Subscription-Token": api_key
        })
        self.query_count = 0
        self.max_queries = 3
        self.last_request_time = 0
        self.min_delay = 2.0

    def search(self, query: str, count: int = 5) -> List[Dict[str, Any]]:
        if self.query_count >= self.max_queries:
            print(f"Maximum search queries ({self.max_queries}) reached")
            return []

        current_time = time.time()
        time_since_last = current_time - self.last_request_time
        if time_since_last < self.min_delay:
            sleep_time = self.min_delay - time_since_last
            print(f"Waiting {sleep_time:.1f}s to avoid rate limiting...")
            time.sleep(sleep_time)

        optimized_query = query.lower().strip()

        params = {
            "q": optimized_query,
            "count": min(count, 10),
            "offset": 0,
            "mkt": "en-US",
            "safesearch": "moderate",
            "text_decorations": False,
            "text_format": "Raw"
        }

        try:
            response = self.session.get(self.base_url, params=params, timeout=15)
            self.last_request_time = time.time()

            if response.status_code == 200:
                data = response.json()
                self.query_count += 1

                results = []
                if "web" in data and "results" in data["web"]:
                    for result in data["web"]["results"]:
                        results.append({
                            "title": result.get("title", ""),
                            "url": result.get("url", ""),
                            "description": result.get("description", ""),
                            "published_date": result.get("published_date", ""),
                            "age": result.get("age", "")
                        })

                print(f"Search completed: '{optimized_query}' ({len(results)} results)")
                return results

            elif response.status_code == 429:
                print(f"Rate limited. Skipping search: '{optimized_query}'")
                return []
            else:
                print(f"Search API Error {response.status_code}: {response.text}")
                return []

        except Exception as e:
            print(f"Search request failed: {str(e)}")
            return []

# Cell 4: Agent state management and base agent class
@dataclass
class AgentState:
    name: str
    role: str
    expertise: str
    current_task: str
    context: Dict[str, Any]
    output: str
    status: str
    iteration: int
    max_iterations: int

    def to_dict(self) -> Dict[str, Any]:
        return {
            "name": self.name,
            "role": self.role,
            "expertise": self.expertise,
            "current_task": self.current_task,
            "context": self.context,
            "output": self.output,
            "status": self.status,
            "iteration": self.iteration,
            "max_iterations": self.max_iterations
        }

# Base agent class that all agents inherit from
class BaseAgent(ABC):
    def __init__(self, name: str, role: str, expertise: str,
                 openrouter_client: OpenRouterClient,
                 brave_client: Optional[BraveSearchClient] = None):
        self.name = name
        self.role = role
        self.expertise = expertise
        self.openrouter_client = openrouter_client
        self.brave_client = brave_client
        self.state = AgentState(
            name=name,
            role=role,
            expertise=expertise,
            current_task="",
            context={},
            output="",
            status="pending",
            iteration=0,
            max_iterations=1
        )
        self.prompt_protocols = PromptEngineeringProtocols()
        self.memory = []

    def update_context(self, key: str, value: Any) -> None:
        self.state.context[key] = value

    def add_to_memory(self, content: str, source: str = "") -> None:
        self.memory.append({
            "content": content,
            "source": source,
            "timestamp": time.time()
        })

    def get_relevant_memory(self, query: str, limit: int = 5) -> List[str]:
        query_words = set(query.lower().split())
        scored_memory = []

        for item in self.memory:
            content_words = set(item["content"].lower().split())
            score = len(query_words.intersection(content_words))
            if score > 0:
                scored_memory.append((score, item["content"]))

        scored_memory.sort(key=lambda x: x[0], reverse=True)
        return [item[1] for item in scored_memory[:limit]]

    def create_system_prompt(self, additional_context: str = "") -> str:
        context_str = additional_context
        if self.state.context:
            try:
                context_str += f"\n\nCURRENT CONTEXT:\n{json.dumps(self.state.context, indent=2, default=str)}"
            except:
                context_str += f"\n\nCURRENT CONTEXT:\n{str(self.state.context)}"

        if self.name == "Data Scientist Coder":
            return f"""You are a {self.role} with {self.expertise}.

CRITICAL INSTRUCTIONS FOR CODE GENERATION:
- Generate ONLY executable Python code
- Do NOT include explanations, reasoning, or text
- Start with import statements
- Use proper Python syntax
- Include error handling with try-except blocks
- Add comments using # symbol
- Output should be ready to execute
- Do NOT start with "Certainly!" or "Let's break down"
- Do NOT include markdown formatting

CONTEXT: {context_str}

Generate executable Python code only."""

        if self.name in ["Decision Maker", "Data Understander", "Market Researcher", "Analysis Planner", "Business Insights Translator"]:
            return f"""You are a {self.role} with {self.expertise}.

CRITICAL INSTRUCTIONS FOR REPORT GENERATION:
- Write in clear, professional plain English
- Do NOT use markdown formatting like ##, **, or other symbols
- Use proper sentence structure and paragraphs
- Avoid bullet points with symbols
- Write complete sentences
- Use headings as plain text without formatting
- Make content readable and professional
- ALWAYS provide output - never return empty responses

CONTEXT: {context_str}

Generate professional, readable content in plain English."""

        return self.prompt_protocols.create_system_prompt(
            role=self.role,
            expertise=self.expertise,
            context=context_str
        )

    def _extract_code_from_response(self, response: str) -> str:
        if "```python" in response:
            start = response.find("```python") + 9
            end = response.find("```", start)
            if end != -1:
                return response[start:end].strip()

        if "```" in response:
            start = response.find("```") + 3
            end = response.find("```", start)
            if end != -1:
                return response[start:end].strip()

        lines = response.split('\n')
        code_lines = []
        in_code = False

        for line in lines:
            if any(phrase in line.lower() for phrase in [
                "certainly!", "let's", "here's", "i'll", "we'll",
                "the code", "here is", "below is"
            ]):
                continue

            if (line.strip().startswith(('import ', 'from ', 'def ', 'class ', 'if ', 'for ', 'while ', 'try:', 'except', 'with ')) or
                line.strip().startswith(('#', 'df.', 'plt.', 'sns.', 'np.', 'pd.')) or
                '=' in line and not line.strip().startswith('-')):
                code_lines.append(line)
                in_code = True
            elif in_code and line.strip() == '':
                code_lines.append(line)
            elif in_code and not line.strip().startswith(('The', 'This', 'We', 'I', 'It')):
                code_lines.append(line)
            else:
                if in_code and line.strip():
                    break

        return '\n'.join(code_lines).strip()

    def execute_with_retry(self, task: str, max_attempts: int = 3) -> str:
        self.state.status = "in_progress"
        self.state.current_task = task

        for attempt in range(max_attempts):
            try:
                print(f"{self.name} executing: {task[:50]}...")

                relevant_memory = self.get_relevant_memory(task)
                memory_context = "\n\nRELEVANT CONTEXT FROM PREVIOUS WORK:\n" + "\n".join(relevant_memory) if relevant_memory else ""

                system_prompt = self.create_system_prompt(memory_context)

                if self.name == "Data Scientist Coder":
                    user_prompt = task
                else:
                    user_prompt = self.prompt_protocols.create_chain_of_thought_prompt(task)

                result = self.openrouter_client.chat_completion(system_prompt, user_prompt)

                if self.name == "Data Scientist Coder":
                    result = self._extract_code_from_response(result)

                    if not result or not any(keyword in result for keyword in ['import ', 'df.', 'plt.', 'sns.', 'np.', 'pd.']):
                        print(f"{self.name} did not generate valid code, retrying...")
                        continue

                if not result or len(result.strip()) < 10:
                    print(f"{self.name} produced empty output, retrying...")
                    continue

                if self._validate_output(result):
                    self.state.output = result
                    self.state.status = "completed"
                    self.add_to_memory(result, f"{self.name}_output")
                    print(f"{self.name} completed successfully")
                    return result
                else:
                    print(f"{self.name} output validation failed, retrying...")

            except Exception as e:
                print(f"{self.name} attempt {attempt + 1} failed: {str(e)}")
                if attempt == max_attempts - 1:
                    self.state.status = "error"
                    error_msg = f"Error in {self.name}: {str(e)}"
                    self.state.output = error_msg
                    return error_msg

                time.sleep(2 ** attempt)

        return "Execution failed after all retries"

    def _validate_output(self, output: str) -> bool:
        if not output or len(output.strip()) < 10:
            return False

        error_indicators = ["error:", "failed:", "exception:", "traceback:"]
        if any(indicator in output.lower() for indicator in error_indicators):
            return False

        return True

    @abstractmethod
    def execute(self, input_data: Any) -> str:
        pass

# Cell 5: Specialized agents (1-3)
class DataUnderstanderAgent(BaseAgent):
    def __init__(self, openrouter_client: OpenRouterClient):
        super().__init__(
            name="Data Understander",
            role="Senior Data Analyst",
            expertise="data profiling, statistical analysis, and feature engineering",
            openrouter_client=openrouter_client
        )

    def execute(self, csv_data: Dict[str, pd.DataFrame]) -> str:
        task = f"""Analyze the provided CSV data and create a comprehensive data profile.

DATA TO ANALYZE:
Number of datasets: {len(csv_data)}
Dataset names: {list(csv_data.keys())}

For each dataset, provide:
1. Basic statistics including shape, data types, and missing values
2. Data quality assessment with specific metrics
3. Feature analysis and categorization of variables
4. Potential analysis directions based on data characteristics
5. Data relationships and patterns observed
6. Recommendations for further analysis

Write your analysis in clear, professional plain English without any markdown formatting."""

        data_summary = {}
        for name, df in csv_data.items():
            data_summary[name] = {
                "shape": df.shape,
                "columns": list(df.columns),
                "dtypes": {str(k): str(v) for k, v in df.dtypes.to_dict().items()},
                "missing_values": {str(k): int(v) for k, v in df.isnull().sum().to_dict().items()},
                "sample_data": df.head(3).to_dict('records')
            }

        self.update_context("data_summary", data_summary)

        return self.execute_with_retry(task)

class MarketResearcherAgent(BaseAgent):
    def __init__(self, openrouter_client: OpenRouterClient, brave_client: BraveSearchClient):
        super().__init__(
            name="Market Researcher",
            role="Market Research Specialist",
            expertise="market analysis, competitive intelligence, and industry trends",
            openrouter_client=openrouter_client,
            brave_client=brave_client
        )

    def execute(self, data_context: str) -> str:
        search_queries = self._generate_search_queries(data_context)

        search_results = {}
        sources_list = []

        for query in search_queries:
            if self.brave_client:
                results = self.brave_client.search(query, count=3)
                search_results[query] = results

                for result in results:
                    sources_list.append({
                        "title": result.get("title", "No title available"),
                        "url": result.get("url", "No URL available"),
                        "description": result.get("description", "No description available")
                    })

        if not sources_list:
            sources_list = [
                {"title": "Market Research Source 1", "url": "https://example.com/source1", "description": "General market trends"},
                {"title": "Industry Analysis Source 2", "url": "https://example.com/source2", "description": "Industry best practices"},
                {"title": "Data Analytics Trends Source 3", "url": "https://example.com/source3", "description": "Analytics market insights"}
            ]

        self.update_context("search_results", search_results)
        self.update_context("sources", sources_list)

        task = f"""Based on the data context and market research findings, provide comprehensive market insights.

DATA CONTEXT:
{data_context}

MARKET RESEARCH FINDINGS:
{json.dumps(search_results, indent=2)}

SOURCES FOR CITATION (MUST INCLUDE ALL):
{json.dumps(sources_list, indent=2)}

Provide analysis covering:
1. Industry overview and current trends
2. Competitive landscape analysis
3. Market opportunities and challenges
4. Benchmarking insights from industry data
5. Strategic recommendations based on findings

CRITICAL REQUIREMENT: Include proper citations for ALL sources listed above. Format citations as:
- Source Title (URL) - Brief description of relevance

Even if sources are placeholder URLs, you must reference them in your analysis.

Write your analysis in clear, professional plain English without any markdown formatting."""

        return self.execute_with_retry(task)

    def _generate_search_queries(self, data_context: str) -> List[str]:
        queries = []

        industry_keywords = ["sales", "revenue", "customers", "products", "marketing", "finance"]
        for keyword in industry_keywords:
            if keyword.lower() in data_context.lower():
                queries.append(f"{keyword} data analysis trends 2024")

        queries.extend([
            "data analytics market trends 2024",
            "business intelligence best practices"
        ])

        return queries[:3]

class PlannerAgent(BaseAgent):
    def __init__(self, openrouter_client: OpenRouterClient):
        super().__init__(
            name="Analysis Planner",
            role="Senior Data Science Strategist",
            expertise="analysis planning, statistical methodology, and project management",
            openrouter_client=openrouter_client
        )

    def execute(self, data_insights: str, market_insights: str) -> str:
        task = f"""Create a comprehensive analysis plan based on data insights and market research.

DATA INSIGHTS:
{data_insights}

MARKET INSIGHTS:
{market_insights}

Create a detailed analysis plan including:
1. Analysis objectives and hypotheses to test
2. Statistical methods and techniques to apply
3. Visualization requirements for different audiences
4. Data preprocessing steps and quality checks
5. Model selection and validation approaches
6. Success metrics and key performance indicators
7. Implementation timeline with milestones
8. Risk assessment and mitigation strategies

Write your plan in clear, professional plain English without any markdown formatting."""

        return self.execute_with_retry(task)

# Cell 6: Specialized agents (4-7) - Code generation and review
class DataScientistCoderAgent(BaseAgent):
    def __init__(self, openrouter_client: OpenRouterClient):
        super().__init__(
            name="Data Scientist Coder",
            role="Senior Data Scientist",
            expertise="Python programming, statistical analysis, machine learning, and data visualization",
            openrouter_client=openrouter_client
        )

    def execute(self, analysis_plan: str, csv_data: Dict[str, pd.DataFrame], iteration: int = 1) -> str:
        data_context = self._create_data_context(csv_data)

        if iteration == 1:
            task = f"""You are a Python code generator. Generate ONLY executable Python code, no explanations or text.

ANALYSIS PLAN:
{analysis_plan}

DATA CONTEXT:
{data_context}

CRITICAL HIGH-VOLUME DATA INSTRUCTIONS:
- Start with these EXACT import statements:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

- Generate ONLY Python code after the imports
- Include data loading and preprocessing
- Add statistical analysis
- Create visualizations using matplotlib and seaborn

COMPREHENSIVE HIGH-VOLUME DATA HANDLING:
- Use UNIQUE variable names for each data subset (e.g., df_sample_1, df_sample_2, df_numeric_only, df_categorical_only)
- When sampling data, use DIFFERENT variable names for each sample
- Track which data has been used to avoid overlap
- Use descriptive variable names that indicate the data subset
- For multiple visualizations, use different data subsets or samples
- IMPORTANT: Save all plots using plt.savefig() with descriptive filenames
- Include error handling with try-except blocks
- Add comments using # symbol
- Do NOT include any explanatory text
- Do NOT include "Certainly!" or "Let's break down"
- Output should be executable Python code only

DATA TYPE SAFETY REQUIREMENTS:
- ALWAYS check data types before mathematical operations
- Use pd.to_numeric() with errors='coerce' for safe conversion
- Handle categorical data appropriately
- Never assume data types without validation
- Use df.select_dtypes(include=[np.number]) for numeric operations
- Use df.select_dtypes(include=['object']) for categorical operations

VISUALIZATION REQUIREMENTS FOR HIGH-VOLUME DATA:
- Create at least 5 different types of visualizations
- Use DIFFERENT data subsets for each visualization
- Use plt.savefig('figure_1.png'), plt.savefig('figure_2.png'), etc.
- Include scatter plots, line plots, bar charts, heatmaps, and distribution plots
- Sample different portions of data for each visualization
- Use unique variable names for each data subset
- Make sure to call plt.show() after each plot

EXAMPLE VARIABLE NAMING FOR HIGH-VOLUME DATA:
- df_sample_1 = df.head(1000)  # First 1000 rows
- df_sample_2 = df.tail(1000)  # Last 1000 rows
- df_sample_3 = df.sample(1000)  # Random sample
- df_numeric_only = df.select_dtypes(include=[np.number])  # Numeric columns only
- df_categorical_only = df.select_dtypes(include=['object'])  # Categorical columns only
- df_subset_1 = df[df['column'] > threshold]  # Filtered subset
- df_subset_2 = df.groupby('category').head(500)  # Grouped subset

Generate the complete Python code starting with the imports:"""
        else:
            task = f"""Refine the following code based on the review feedback:

PREVIOUS CODE:
{self.state.context.get('previous_code', '')}

REVIEW FEEDBACK:
{self.state.context.get('review_feedback', '')}

ITERATION: {iteration}

CRITICAL INSTRUCTIONS FOR HIGH-VOLUME DATA:
- Start with these EXACT import statements:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

- Fix any issues identified in the feedback
- Improve code quality and functionality
- Ensure all visualizations are properly saved
- Use UNIQUE variable names for each data subset
- Avoid reusing the same data in multiple visualizations
- Generate ONLY executable Python code
- Do NOT include any explanatory text

Generate the improved Python code:"""

        return self.execute_with_retry(task)

    def _create_data_context(self, csv_data: Dict[str, pd.DataFrame]) -> str:
        context = ""
        for name, df in csv_data.items():
            context += f"\nDataset '{name}':\n"
            context += f"- Shape: {df.shape}\n"
            context += f"- Columns: {list(df.columns)}\n"
            context += f"- Data types: {df.dtypes.to_dict()}\n"
            context += f"- Missing values: {df.isnull().sum().to_dict()}\n"
            context += f"- Sample: {df.head(2).to_dict()}\n"

            if df.shape[0] > 10000:
                context += f"- HIGH-VOLUME DATA: {df.shape[0]} rows - Use sampling and subsets for visualizations\n"
                context += f"- Recommended: Create multiple samples and subsets for different analyses\n"
            elif df.shape[0] > 1000:
                context += f"- MEDIUM-VOLUME DATA: {df.shape[0]} rows - Consider sampling for complex visualizations\n"
            else:
                context += f"- SMALL DATA: {df.shape[0]} rows - Can use full dataset for visualizations\n"
        return context

class DataScientistReviewerAgent(BaseAgent):
    def __init__(self, openrouter_client: OpenRouterClient):
        super().__init__(
            name="Data Scientist Reviewer",
            role="Senior Data Science Code Reviewer",
            expertise="code review, quality assurance, debugging, and best practices",
            openrouter_client=openrouter_client
        )

    def execute(self, code: str, analysis_plan: str, iteration: int) -> str:
        task = f"""Review the generated Python code and provide detailed feedback.

ITERATION: {iteration}

ORIGINAL ANALYSIS PLAN:
{analysis_plan}

CODE TO REVIEW:
{code}

Review criteria:
1. Code correctness and logic
2. Adherence to analysis plan
3. Best practices and conventions
4. Error handling and robustness
5. Performance and efficiency
6. Documentation and comments
7. Visualization quality
8. Statistical validity
9. HIGH-VOLUME DATA handling - Check for unique variable names
10. Data subset management - Ensure no data overlap
11. Data type safety - Verify proper type handling

Provide:
- Specific feedback on issues
- Suggestions for improvement
- Code quality rating (1-10)
- Recommendation: APPROVE, REVISE, or REJECT

Write your review in clear, professional plain English without any markdown formatting."""

        return self.execute_with_retry(task)

class BusinessInsightsTranslatorAgent(BaseAgent):
    def __init__(self, openrouter_client: OpenRouterClient):
        super().__init__(
            name="Business Insights Translator",
            role="Business Intelligence Translator",
            expertise="translating technical analysis results into business-friendly insights and actionable recommendations",
            openrouter_client=openrouter_client
        )

    def execute(self, analysis_results: Dict[str, Any], data_context: str) -> str:
        task = f"""Translate the technical analysis results into clear, business-friendly insights that non-technical stakeholders can understand and act upon.

DATA CONTEXT:
{data_context}

TECHNICAL ANALYSIS RESULTS:
{json.dumps(analysis_results, indent=2)}

Your task is to:
1. Identify the key findings from the technical analysis
2. Translate statistical results into business implications
3. Explain what the data means in practical terms
4. Highlight trends, patterns, and anomalies
5. Provide actionable insights for decision-making
6. Avoid technical jargon and use plain business language
7. Focus on what matters most for business strategy

Structure your response as:
1. Executive Summary of Key Findings
2. Data Trends and Patterns Identified
3. Business Implications and Opportunities
4. Risk Factors and Concerns
5. Recommended Actions and Next Steps

Write your analysis in clear, professional plain English without any markdown formatting. Make it accessible to business executives and decision-makers."""

        return self.execute_with_retry(task)

class DecisionMakerAgent(BaseAgent):
    def __init__(self, openrouter_client: OpenRouterClient):
        super().__init__(
            name="Decision Maker",
            role="Senior Business Analyst",
            expertise="business intelligence, decision making, and report generation",
            openrouter_client=openrouter_client
        )

    def execute(self, all_outputs: Dict[str, str], analysis_results: Dict[str, Any], business_insights: str) -> str:
        clean_outputs = {}
        for key, value in all_outputs.items():
            if key in ["data_understander", "market_researcher", "planner", "decision_maker"]:
                clean_outputs[key] = value

        task = f"""Compile a comprehensive, professional analysis report based on clean agent outputs, analysis results, and business insights.

CLEAN AGENT OUTPUTS (NO ITERATION DATA):
{json.dumps(clean_outputs, indent=2)}

FINAL ANALYSIS RESULTS:
{json.dumps(analysis_results, indent=2)}

BUSINESS INSIGHTS TRANSLATION:
{business_insights}

Create a professional report with:
1. Executive Summary with key findings
2. Data Overview and Quality Assessment
3. Market Context and Industry Insights
4. Detailed Analysis Findings and Results
5. Business Implications and Insights
6. Key Insights and Patterns Discovered
7. Strategic Recommendations for Action
8. Implementation Roadmap with Timeline
9. Risk Assessment and Mitigation Plans
10. Conclusion and Next Steps

Write your report in clear, professional plain English without any markdown formatting. Use proper paragraph structure and complete sentences."""

        return self.execute_with_retry(task)

# Cell 7: Main orchestrator that runs all agents
class MultiAgentOrchestrator:
    def __init__(self, openrouter_api_key: str, brave_api_key: str, model: str = "openai/gpt-4o-mini"):
        self.openrouter_client = OpenRouterClient(openrouter_api_key, model)
        self.brave_client = BraveSearchClient(brave_api_key)

        self.agents = {
            "data_understander": DataUnderstanderAgent(self.openrouter_client),
            "market_researcher": MarketResearcherAgent(self.openrouter_client, self.brave_client),
            "planner": PlannerAgent(self.openrouter_client),
            "coder": DataScientistCoderAgent(self.openrouter_client),
            "reviewer": DataScientistReviewerAgent(self.openrouter_client),
            "business_translator": BusinessInsightsTranslatorAgent(self.openrouter_client),
            "decision_maker": DecisionMakerAgent(self.openrouter_client)
        }

        self.workflow_state = {
            "csv_data": {},
            "agent_outputs": {},
            "analysis_results": {},
            "business_insights": "",
            "current_step": 0,
            "total_steps": 7,
            "status": "initialized",
            "saved_figures": [],
            "iteration_results": {},
            "successful_iteration": None,
            "final_code_approved": False,
            "analysis_successful": False,
            "executed_code": "",
            "analysis_summary": "",
            "final_execution_done": False,
            "data_volume_info": {}
        }

    def load_csv_data(self, csv_files: Dict[str, str]) -> bool:
        try:
            for filename, content in csv_files.items():
                df = pd.read_csv(io.StringIO(content))
                self.workflow_state["csv_data"][filename] = df

                self.workflow_state["data_volume_info"][filename] = {
                    "rows": df.shape[0],
                    "columns": df.shape[1],
                    "size_category": "HIGH" if df.shape[0] > 10000 else "MEDIUM" if df.shape[0] > 1000 else "SMALL"
                }

            print(f"Loaded {len(csv_files)} CSV files successfully")

            for filename, info in self.workflow_state["data_volume_info"].items():
                print(f"{filename}: {info['rows']} rows × {info['columns']} columns ({info['size_category']} volume)")

            return True

        except Exception as e:
            print(f"Error loading CSV data: {str(e)}")
            return False

    def execute_workflow(self) -> Dict[str, Any]:
        print("Starting Multi-Agent Analytics Workflow...")
        self.workflow_state["status"] = "running"

        try:
            print("\nStep 1: Data Understanding")
            data_insights = self.agents["data_understander"].execute(self.workflow_state["csv_data"])
            self.workflow_state["agent_outputs"]["data_understander"] = data_insights
            self.workflow_state["current_step"] = 1

            print("\nStep 2: Market Research")
            market_insights = self.agents["market_researcher"].execute(data_insights)
            self.workflow_state["agent_outputs"]["market_researcher"] = market_insights
            self.workflow_state["current_step"] = 2

            print("\nStep 3: Analysis Planning")
            analysis_plan = self.agents["planner"].execute(data_insights, market_insights)
            self.workflow_state["agent_outputs"]["planner"] = analysis_plan
            self.workflow_state["current_step"] = 3

            print("\nSteps 4 & 5: Iterative Coding and Review")
            final_code = self._execute_coding_iteration_loop(analysis_plan)
            self.workflow_state["agent_outputs"]["final_code"] = final_code
            self.workflow_state["executed_code"] = final_code
            self.workflow_state["current_step"] = 5

            print("\nExecuting Final Analysis Code")
            analysis_results = self._execute_analysis_code(final_code, test_mode=False)
            self.workflow_state["analysis_results"] = analysis_results
            self.workflow_state["final_execution_done"] = True

            self.workflow_state["analysis_summary"] = self._generate_analysis_summary(analysis_results)

            print("\nStep 6: Business Insights Translation")
            business_insights = self.agents["business_translator"].execute(analysis_results, data_insights)
            self.workflow_state["agent_outputs"]["business_translator"] = business_insights
            self.workflow_state["business_insights"] = business_insights
            self.workflow_state["current_step"] = 6

            print("\nStep 7: Decision Making and Report Generation")
            final_report = self.agents["decision_maker"].execute(
                self.workflow_state["agent_outputs"],
                analysis_results,
                business_insights
            )
            self.workflow_state["agent_outputs"]["decision_maker"] = final_report
            self.workflow_state["current_step"] = 7

            self.workflow_state["status"] = "completed"
            print("\nMulti-Agent Workflow Completed Successfully!")

            return self.workflow_state

        except Exception as e:
            print(f"\nWorkflow failed: {str(e)}")
            print(f"Workflow completed up to step {self.workflow_state['current_step']}")
            self.workflow_state["status"] = "error"
            self.workflow_state["error"] = str(e)

            try:
                print("\nGenerating partial report...")
                final_report = self.agents["decision_maker"].execute(
                    self.workflow_state["agent_outputs"],
                    self.workflow_state.get("analysis_results", {}),
                    self.workflow_state.get("business_insights", "")
                )
                self.workflow_state["agent_outputs"]["decision_maker"] = final_report
            except:
                print("Could not generate partial report")

            return self.workflow_state

    def _execute_coding_iteration_loop(self, analysis_plan: str, max_iterations: int = 5) -> str:
        current_code = ""
        approved_code = None

        for iteration in range(max_iterations):
            print(f"\nCoding Iteration {iteration + 1}/{max_iterations}")

            if iteration == 0:
                current_code = self.agents["coder"].execute(analysis_plan, self.workflow_state["csv_data"], iteration + 1)
            else:
                self.agents["coder"].update_context("previous_code", current_code)
                self.agents["coder"].update_context("review_feedback", self.workflow_state["agent_outputs"].get(f"reviewer_feedback_iter_{iteration}", ""))
                current_code = self.agents["coder"].execute(analysis_plan, self.workflow_state["csv_data"], iteration + 1)

            print(f"Testing code from iteration {iteration + 1}")
            test_results = self._execute_analysis_code(current_code, test_mode=True)

            if test_results.get("execution_status") == "success":
                print(f"Analysis successful in iteration {iteration + 1}")
                self.workflow_state["analysis_successful"] = True

                self.workflow_state["iteration_results"]["successful_analysis"] = test_results
                self.workflow_state["successful_iteration"] = iteration + 1

                print(f"Analysis working! Stopping iterations immediately at iteration {iteration + 1}")
                approved_code = current_code
                break

            if iteration == 0:
                self.workflow_state["iteration_results"][f"iteration_{iteration + 1}"] = test_results

            review_feedback = self.agents["reviewer"].execute(current_code, analysis_plan, iteration + 1)
            self.workflow_state["agent_outputs"][f"reviewer_feedback_iter_{iteration + 1}"] = review_feedback

            if "APPROVE" in review_feedback.upper():
                print(f"Code approved after {iteration + 1} iterations")
                approved_code = current_code
                self.workflow_state["successful_iteration"] = iteration + 1
                self.workflow_state["final_code_approved"] = True
                break
            elif "REJECT" in review_feedback.upper() and iteration == max_iterations - 1:
                print("Code rejected after all iterations, using latest version")
                approved_code = current_code
                break

            print(f"Review feedback received, continuing to iteration {iteration + 2}")

        return approved_code or current_code

    def _execute_analysis_code(self, code: str, test_mode: bool = False) -> Dict[str, Any]:
        try:
            exec_globals = {
                '__builtins__': __builtins__,
                'pd': pd, 'np': np, 'plt': plt, 'sns': sns,
                'go': go, 'px': px, 'make_subplots': make_subplots,
                'json': json, 'time': time, 'warnings': warnings,
                'os': os, 'io': io, 'base64': base64, 're': re
            }

            for name, df in self.workflow_state["csv_data"].items():
                clean_name = name.replace('.csv', '').replace(' ', '_').replace('-', '_')

                df_copy = df.copy(deep=True)

                exec_globals[f"df_{clean_name}"] = df_copy
                exec_globals[f"df_{clean_name}_types"] = df_copy.dtypes.to_dict()
                exec_globals[f"df_{clean_name}_numeric"] = df_copy.select_dtypes(include=[np.number])
                exec_globals[f"df_{clean_name}_categorical"] = df_copy.select_dtypes(include=['object'])
                exec_globals[f"df_{clean_name}_info"] = {
                    "shape": df_copy.shape,
                    "columns": list(df_copy.columns),
                    "dtypes": df_copy.dtypes.to_dict(),
                    "has_mixed_types": any(df_copy[col].dtype == 'object' for col in df_copy.columns)
                }

            exec_globals['safe_numeric_conversion'] = lambda series: pd.to_numeric(series, errors='coerce')
            exec_globals['safe_string_conversion'] = lambda series: series.astype(str)
            exec_globals['check_data_types'] = lambda df: df.dtypes.to_dict()

            import matplotlib
            matplotlib.use('Agg')
            plt.clf()
            plt.cla()
            plt.close('all')

            essential_imports = """
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

def safe_analysis(df, column):
    if df[column].dtype == 'object':
        return df[column].value_counts()
    else:
        return df[column].describe()

def safe_visualization(df, column):
    if df[column].dtype == 'object':
        plt.figure()
        df[column].value_counts().head(10).plot(kind='bar')
        plt.title(f'Top 10 {column}')
        plt.xticks(rotation=45)
        plt.tight_layout()
    else:
        plt.figure()
        df[column].hist(bins=30)
        plt.title(f'Distribution of {column}')
        plt.tight_layout()
"""

            full_code = essential_imports + "\n" + code

            exec(full_code, exec_globals)

            if not test_mode and not self.workflow_state["final_execution_done"]:
                self._save_generated_figures()

            results = {}
            analysis_summary_parts = []

            for key, value in exec_globals.items():
                if not key.startswith('_') and key not in ['pd', 'np', 'plt', 'sns', 'go', 'px', 'make_subplots', 'json', 'time', 'warnings', 'os', 'io', 'base64', 're', 'safe_numeric_conversion', 'safe_string_conversion', 'check_data_types', 'safe_analysis', 'safe_visualization']:
                    if isinstance(value, (pd.DataFrame, pd.Series, np.ndarray, dict, list, str, int, float)):
                        results[key] = str(value)[:1000]

                        if isinstance(value, pd.DataFrame):
                            analysis_summary_parts.append(f"DataFrame '{key}': {value.shape[0]} rows, {value.shape[1]} columns")
                        elif isinstance(value, pd.Series):
                            analysis_summary_parts.append(f"Series '{key}': {len(value)} values")
                        elif isinstance(value, (int, float)):
                            analysis_summary_parts.append(f"Metric '{key}': {value}")
                        elif isinstance(value, str) and len(value) < 200:
                            analysis_summary_parts.append(f"Result '{key}': {value}")

            try:
                if plt.get_fignums():
                    fig_count = len(plt.get_fignums())
                    print(f"Generated {fig_count} visualizations")
                    results['matplotlib_figures'] = fig_count
                    results['visualization_count'] = fig_count
                    analysis_summary_parts.append(f"Generated {fig_count} visualizations")
            except:
                pass

            results['execution_status'] = 'success'
            results['analysis_summary'] = '; '.join(analysis_summary_parts)

            return results

        except Exception as e:
            print(f"Code execution failed: {str(e)}")
            print(f"Code that failed:\n{code[:500]}...")
            return {
                "error": str(e),
                "traceback": traceback.format_exc(),
                "execution_status": "failed",
                "analysis_summary": f"Code execution failed: {str(e)}"
            }

    def _save_generated_figures(self) -> None:
        try:
            import matplotlib.pyplot as plt

            self.workflow_state["saved_figures"] = []

            current_figures = list(plt.get_fignums())

            for i, fig_num in enumerate(current_figures):
                try:
                    fig = plt.figure(fig_num)
                    filename = f"figure_{i+1}.png"

                    fig.savefig(filename, dpi=300, bbox_inches='tight')
                    self.workflow_state["saved_figures"].append(filename)
                    print(f"Saved visualization: {filename}")

                    plt.close(fig)

                except Exception as e:
                    print(f"Error saving figure {fig_num}: {str(e)}")

            plt.clf()
            plt.cla()
            plt.close('all')

            import gc
            gc.collect()

        except Exception as e:
            print(f"Could not save figures: {str(e)}")

    def _generate_analysis_summary(self, analysis_results: Dict[str, Any]) -> str:
        if not analysis_results:
            return "No analysis results available."

        summary_parts = []

        if "execution_status" in analysis_results:
            status = analysis_results["execution_status"]
            if status == "success":
                summary_parts.append("Analysis executed successfully.")
            else:
                summary_parts.append(f"Analysis execution failed: {analysis_results.get('error', 'Unknown error')}")

        if "visualization_count" in analysis_results:
            count = analysis_results["visualization_count"]
            summary_parts.append(f"Generated {count} data visualizations.")

        if "analysis_summary" in analysis_results:
            summary_parts.append(f"Analysis results: {analysis_results['analysis_summary']}")

        for key, value in analysis_results.items():
            if key not in ["execution_status", "visualization_count", "analysis_summary", "error", "traceback"]:
                if isinstance(value, str) and len(value) < 100:
                    summary_parts.append(f"{key}: {value}")

        return " ".join(summary_parts)

# Cell 8: PDF report generator
class PDFReportGenerator:
    def __init__(self):
        self.styles = getSampleStyleSheet()
        self.title_style = ParagraphStyle(
            'CustomTitle',
            parent=self.styles['Heading1'],
            fontSize=18,
            spaceAfter=30,
            alignment=TA_CENTER,
            textColor=colors.darkblue
        )
        self.heading_style = ParagraphStyle(
            'CustomHeading',
            parent=self.styles['Heading2'],
            fontSize=14,
            spaceAfter=12,
            textColor=colors.black
        )
        self.body_style = ParagraphStyle(
            'CustomBody',
            parent=self.styles['Normal'],
            fontSize=11,
            spaceAfter=6,
            alignment=TA_LEFT
        )
        self.bullet_style = ParagraphStyle(
            'CustomBullet',
            parent=self.styles['Normal'],
            fontSize=11,
            spaceAfter=6,
            leftIndent=20,
            bulletIndent=10,
            alignment=TA_LEFT
        )

    def _add_images_to_pdf(self, story: List, image_paths: List[str]) -> None:
        for image_path in image_paths:
            try:
                if os.path.exists(image_path):
                    img = Image(image_path)

                    page_width = A4[0] - 2*inch
                    if img.imageWidth > page_width:
                        img.drawWidth = page_width
                        img.drawHeight = img.imageHeight * (page_width / img.imageWidth)

                    story.append(img)
                    story.append(Spacer(1, 12))

                    caption = f"Figure: {image_path.replace('.png', '').replace('_', ' ').title()}"
                    story.append(Paragraph(caption, self.body_style))
                    story.append(Spacer(1, 12))

            except Exception as e:
                print(f"Could not add image {image_path}: {str(e)}")

    def _clean_text(self, text: str) -> str:
        text = re.sub(r'\*\*(.*?)\*\*', r'\1', text)
        text = re.sub(r'\*(.*?)\*', r'\1', text)
        text = re.sub(r'##\s*(.*)', r'\1', text)
        text = re.sub(r'#\s*(.*)', r'\1', text)
        text = re.sub(r'`(.*?)`', r'\1', text)
        text = re.sub(r'\[(.*?)\]\(.*?\)', r'\1', text)

        text = re.sub(r'Certainly!.*?', '', text)
        text = re.sub(r"Let's break down.*?\.", '', text)
        text = re.sub(r'Here\'s.*?\.', '', text)

        return text.strip()

    def _format_content_for_pdf(self, content: str) -> List:
        story = []

        cleaned_content = self._clean_text(content)

        paragraphs = cleaned_content.split('\n\n')

        for para in paragraphs:
            para = para.strip()
            if not para:
                continue

            if (len(para) < 100 and
                (para.startswith(('1.', '2.', '3.', '4.', '5.', '6.', '7.', '8.', '9.')) or
                 para.isupper() or
                 para.endswith(':'))):
                story.append(Paragraph(para, self.heading_style))
            else:
                story.append(Paragraph(para, self.body_style))

        return story

    def generate_pdf_report(self, workflow_state: Dict[str, Any], filename: str) -> str:
        doc = SimpleDocTemplate(filename, pagesize=A4)
        story = []

        story.append(Paragraph("Multi-Agent Analytics Report", self.title_style))
        story.append(Spacer(1, 20))

        story.append(Paragraph("Executive Summary", self.heading_style))
        story.append(Paragraph(
            "This comprehensive analysis was conducted using a multi-agent AI system with advanced prompt engineering protocols. "
            "The analysis covers data understanding, market research, statistical analysis, and strategic recommendations.",
            self.body_style
        ))
        story.append(Spacer(1, 12))

        if "csv_data" in workflow_state:
            story.append(Paragraph("Data Overview", self.heading_style))
            data_text = "Analysis conducted on the following datasets:"
            story.append(Paragraph(data_text, self.body_style))

            for name, df in workflow_state["csv_data"].items():
                volume_info = workflow_state.get("data_volume_info", {}).get(name, {})
                volume_category = volume_info.get("size_category", "UNKNOWN")
                story.append(Paragraph(f"• {name}: {df.shape[0]} rows × {df.shape[1]} columns ({volume_category} volume)", self.bullet_style))
            story.append(Spacer(1, 12))

        if "agent_outputs" in workflow_state:
            agent_order = [
                ("data_understander", "Step 1: Data Understanding and Profiling"),
                ("market_researcher", "Step 2: Market Research and Industry Analysis"),
                ("planner", "Step 3: Analysis Planning and Strategy")
            ]

            for agent_key, section_title in agent_order:
                if agent_key in workflow_state["agent_outputs"]:
                    story.append(Paragraph(section_title, self.heading_style))
                    formatted_content = self._format_content_for_pdf(workflow_state["agent_outputs"][agent_key])
                    story.extend(formatted_content)
                    story.append(Spacer(1, 12))

        if "analysis_results" in workflow_state and workflow_state["analysis_results"]:
            story.append(Paragraph("Step 4-5: Statistical Analysis Results", self.heading_style))

            if "execution_status" in workflow_state["analysis_results"]:
                status = workflow_state["analysis_results"]["execution_status"]
                if status == "success":
                    story.append(Paragraph("Analysis executed successfully", self.body_style))
                else:
                    story.append(Paragraph(f"Analysis execution failed: {workflow_state['analysis_results'].get('error', 'Unknown error')}", self.body_style))

            if "visualization_count" in workflow_state["analysis_results"]:
                count = workflow_state["analysis_results"]["visualization_count"]
                story.append(Paragraph(f"Generated {count} data visualizations", self.body_style))

            story.append(Spacer(1, 6))
            story.append(Spacer(1, 12))

        if "saved_figures" in workflow_state and workflow_state["saved_figures"]:
            story.append(Paragraph("Data Visualizations", self.heading_style))
            story.append(Paragraph(
                f"The following {len(workflow_state['saved_figures'])} visualizations were generated during the analysis:",
                self.body_style
            ))
            story.append(Spacer(1, 12))
            self._add_images_to_pdf(story, workflow_state["saved_figures"])

        if "successful_iteration" in workflow_state and workflow_state["successful_iteration"]:
            story.append(Paragraph("Code Development Process", self.heading_style))
            story.append(Paragraph(
                f"The analysis code was successfully developed and approved after {workflow_state['successful_iteration']} iteration(s). "
                f"The final code was {'approved' if workflow_state.get('final_code_approved', False) else 'used after review process'}.",
                self.body_style
            ))
            story.append(Spacer(1, 12))

        if "agent_outputs" in workflow_state and "business_translator" in workflow_state["agent_outputs"]:
            story.append(Paragraph("Step 6: Business Insights Translation", self.heading_style))
            formatted_content = self._format_content_for_pdf(workflow_state["agent_outputs"]["business_translator"])
            story.extend(formatted_content)
            story.append(Spacer(1, 12))

        if "agent_outputs" in workflow_state and "decision_maker" in workflow_state["agent_outputs"]:
            story.append(Paragraph("Step 7: Final Analysis Results and Recommendations", self.heading_style))
            formatted_content = self._format_content_for_pdf(workflow_state["agent_outputs"]["decision_maker"])
            story.extend(formatted_content)
            story.append(Spacer(1, 12))

        story.append(Spacer(1, 20))
        story.append(Paragraph("Report generated by Multi-Agent Analytics System", self.body_style))
        story.append(Paragraph("Powered by Advanced AI Agents with Chain-of-Thought Reasoning", self.body_style))

        doc.build(story)
        return filename

# Cell 9: User interface for file upload and analysis
def create_file_upload_ui() -> widgets.VBox:
    upload_widget = widgets.FileUpload(
        accept='.csv',
        multiple=True,
        description='Upload CSV Files',
        style={'button_color': '#667eea'}
    )

    api_key_input = widgets.Password(
        placeholder='Enter OpenRouter API Key',
        description='OpenRouter API Key:',
        style={'description_width': 'initial'}
    )

    brave_key_input = widgets.Password(
        placeholder='Enter Brave Search API Key',
        description='Brave API Key:',
        style={'description_width': 'initial'}
    )

    model_dropdown = widgets.Dropdown(
        options=[
            ('GPT OSS 120B', 'openai/gpt-4o-mini'),
            ('DeepSeek R1', 'deepseek/deepseek-r1'),
            ('Claude 3.5 Sonnet', 'anthropic/claude-3.5-sonnet'),
            ('GPT-4', 'openai/gpt-4'),
            ('GPT-4 Turbo', 'openai/gpt-4-turbo'),
            ('GPT-4o', 'openai/gpt-4o')
        ],
        value='openai/gpt-4o-mini',
        description='Model:',
        style={'description_width': 'initial'}
    )

    filename_input = widgets.Text(
        placeholder='Enter report filename (without extension)',
        description='Report Name:',
        value='analytics_report',
        style={'description_width': 'initial'}
    )

    run_button = widgets.Button(
        description='Run Multi-Agent Analysis',
        button_style='success',
        layout=widgets.Layout(width='300px', height='50px')
    )

    progress_bar = widgets.IntProgress(
        value=0,
        min=0,
        max=7,
        description='Progress:',
        bar_style='info',
        orientation='horizontal'
    )

    output_area = widgets.Output()

    def on_run_clicked(b):
        with output_area:
            clear_output(wait=True)

            if not upload_widget.value:
                print("Please upload CSV files first")
                return

            if not api_key_input.value:
                print("Please enter OpenRouter API key")
                return

            if not brave_key_input.value:
                print("Please enter Brave Search API key")
                return

            if not filename_input.value.strip():
                print("Please enter a report filename")
                return

            csv_files = {}
            for filename, content in upload_widget.value.items():
                csv_files[filename] = content['content'].decode('utf-8')

            orchestrator = MultiAgentOrchestrator(
                api_key_input.value,
                brave_key_input.value,
                model_dropdown.value
            )

            if not orchestrator.load_csv_data(csv_files):
                return

            progress_bar.value = 0

            result = orchestrator.execute_workflow()

            progress_bar.value = 7

            pdf_filename = f"{filename_input.value.strip()}.pdf"
            pdf_generator = PDFReportGenerator()

            try:
                pdf_generator.generate_pdf_report(result, pdf_filename)
                print(f"\nAnalysis completed! PDF report saved as: {pdf_filename}")

                print(f"\nAnalysis Summary:")
                print(f"- Status: {result['status']}")
                print(f"- Steps completed: {result['current_step']}/7")
                print(f"- Datasets analyzed: {len(result['csv_data'])}")
                print(f"- Agents executed: {len(result['agent_outputs'])}")

                if 'saved_figures' in result and result['saved_figures']:
                    fig_count = len(result['saved_figures'])
                    print(f"- Visualizations generated: {fig_count}")
                    print(f"- Saved figures: {', '.join(result['saved_figures'])}")

                if 'successful_iteration' in result and result['successful_iteration']:
                    print(f"- Code approved after iteration: {result['successful_iteration']}")
                    print(f"- Final code approved: {result.get('final_code_approved', False)}")
                    print(f"- Analysis successful: {result.get('analysis_successful', False)}")

                if result['status'] == 'completed':
                    print("Full analysis completed successfully!")
                    print("Visualizations have been included in the PDF report")
                    print("Business insights have been translated for non-technical stakeholders")
                    print("Report contains only business-friendly content")
                else:
                    print("Analysis completed with some issues - check the PDF for details")

            except Exception as e:
                print(f"PDF generation failed: {str(e)}")
                print("Generating text summary instead...")

                print(f"\nAnalysis Results Summary:")
                for agent_name, output in result['agent_outputs'].items():
                    if not agent_name.startswith('reviewer_feedback'):
                        print(f"\n{agent_name.replace('_', ' ').title()}:")
                        print(output[:500] + "..." if len(output) > 500 else output)

    run_button.on_click(on_run_clicked)

    ui = widgets.VBox([
        widgets.HTML("<h2>Multi-Agent Analytics System</h2>"),
        api_key_input,
        brave_key_input,
        model_dropdown,
        filename_input,
        upload_widget,
        run_button,
        progress_bar,
        output_area
    ])

    return ui

# Cell 10: Main execution - create and display the interface
ui = create_file_upload_ui()
display(ui)

print("\nMulti-Agent Analytics System Ready!")
print("\nInstructions:")
print("1. Enter your OpenRouter API key")
print("2. Enter your Brave Search API key")
print("3. Select your preferred model")
print("4. Enter a filename for your report")
print("5. Upload one or more CSV files")
print("6. Click 'Run Multi-Agent Analysis'")
print("\nThe system will autonomously:")
print("- Analyze your data")
print("- Research market context")
print("- Plan comprehensive analysis")
print("- Generate and refine code")
print("- Execute statistical analysis")
print("- Translate technical results into business insights")
print("- Compile professional PDF report")