# Competitors research agent
Steps for competitors research:
1. Find list of competitors  
2. Go to their website  
3. Collect information including:  
	- standout features
	- product and pricing tiers
	- unique service proposition
	- marketing messages  
4. Analyze competitors  
	- identify common patterns  
	- spot potential gaps  
	- compare pricing strategies  
	- compare messaging themes

In [1]:
import aiohttp
import logging
import os
import json

import asyncio

from bs4 import BeautifulSoup
from datetime import datetime
from dotenv import load_dotenv
from google import genai

from playwright.async_api import async_playwright
from typing import Any, Dict, List, Tuple

In [2]:
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [3]:
class ResearchLogger:
    def __init__(self, research_id: str):
        self.research_id = research_id
        self.log_file = f"research_logs/{research_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
        self.json_log_file = f"research_logs/{research_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}_data.json"
        
        # Create logs directory if it doesn't exist
        os.makedirs("research_logs", exist_ok=True)
        
        # Set up file handler
        self.file_handler = logging.FileHandler(self.log_file)
        self.file_handler.setFormatter(
            logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
        )
        
        # Add file handler to root logger
        logging.getLogger('').addHandler(self.file_handler)
        
        self.research_data = {
            "research_id": research_id,
            "timestamp": datetime.now().isoformat(),
            "steps": []
        }

    def log_step(self, agent_name: str, action: str, input_data: Any, output_data: Any):
        """Log a research step with both input and output data"""
        step_data = {
            "timestamp": datetime.now().isoformat(),
            "agent": agent_name,
            "action": action,
            "input": input_data,
            "output": output_data
        }
        
        self.research_data["steps"].append(step_data)
        
        # Log to file
        logger.info(f"Agent: {agent_name} | Action: {action}")
        try:
            logger.debug(f"Input: {json.dumps(input_data, indent=2)}")
            logger.debug(f"Output: {json.dumps(output_data, indent=2)}")
        except Exception as e:
            logger.error(f"Error logging input/output data: {str(e)}")
            logger.debug(f"Input: {input_data}")
            logger.debug(f"Output: {output_data}")
        
        # Save updated research data to JSON file
        self._save_research_data()

    def log_error(self, agent_name: str, action: str, error: Exception, context: Dict = None):
        """Log error information"""
        error_data = {
            "timestamp": datetime.now().isoformat(),
            "agent": agent_name,
            "action": action,
            "error": str(error),
            "error_type": type(error).__name__,
            "context": context
        }
        
        self.research_data["errors"] = self.research_data.get("errors", [])
        self.research_data["errors"].append(error_data)
        
        logger.error(f"Error in {agent_name} during {action}: {str(error)}")
        if context:
            logger.error(f"Context: {json.dumps(context, indent=2)}")
        
        self._save_research_data()

    def _save_research_data(self):
        """Save the complete research data to JSON file"""
        with open(self.json_log_file, 'w') as f:
            json.dump(self.research_data, f, indent=2)

    def get_research_summary(self) -> Dict:
        """Generate a summary of the research process"""
        return {
            "research_id": self.research_id,
            "total_steps": len(self.research_data["steps"]),
            "errors": len(self.research_data.get("errors", [])),
            "agents_involved": list(set(step["agent"] for step in self.research_data["steps"])),
            "duration": (datetime.now() - datetime.fromisoformat(self.research_data["timestamp"])).total_seconds()
        }

In [4]:
class RateLimiter:
    def __init__(self, calls_per_minute: int):
        self.calls_per_minute = calls_per_minute
        self.calls = []
        
    async def wait_if_needed(self):
        now = datetime.now()
        self.calls = [call for call in self.calls 
                     if (now - call).total_seconds() < 60]
        
        if len(self.calls) >= self.calls_per_minute:
            sleep_time = 60 - (now - self.calls[0]).total_seconds()
            await asyncio.sleep(sleep_time)
            
        self.calls.append(now)

In [5]:
class Config:
    def __init__(self):
        self.gemini_client = genai.Client(api_key=os.getenv('GOOGLE_API_KEY'))
        self.rate_limiter = RateLimiter(calls_per_minute=50)

In [6]:
class WebScraper:
    def __init__(self, config: Config, logger: ResearchLogger):
        self.config = config
        self.logger = logger
        self.subscription_key = os.getenv('BING_API_KEY')
        self.endpoint = 'https://api.bing.microsoft.com/v7.0/search'
        self.playwright = None
        self.browser = None

    async def initialize(self):
        self.playwright = await async_playwright().start()
        self.browser = await self.playwright.chromium.launch(
            headless=True,
            args=[
            '--disable-blink-features=AutomationControlled',
            '--no-sandbox',
            '--disable-setuid-sandbox',
            '--disable-dev-shm-usage',
            '--disable-accelerated-2d-canvas',
            '--disable-gpu'
            ]
        )
        
    async def cleanup(self):
        """Cleanup Playwright resources"""
        if self.browser:
            await self.browser.close()
        if self.playwright:
            await self.playwright.stop()

    async def get_website_url(self, company_name: str) -> str:
        try:
            self.logger.log_step(
                agent_name="WebScraper",
                action="search_company_website",
                input_data={"company_name": company_name},
                output_data=None
            )

            headers = {'Ocp-Apim-Subscription-Key': self.subscription_key}
            params = {
                'q': f"{company_name} official website",
                'count': 1
            }
            
            async with aiohttp.ClientSession() as session:
                async with session.get(self.endpoint, headers=headers, params=params) as response:
                    data = await response.json()
                    website_url = data['webPages']['value'][0]['url']
                    
                    self.logger.log_step(
                        agent_name="WebScraper",
                        action="found_website_url",
                        input_data={"company_name": company_name},
                        output_data={"website_url": website_url}
                    )
                    
                    return website_url

        except Exception as e:
            self.logger.log_error(
                agent_name="WebScraper",
                action="get_website_url",
                error=e,
                context={"company_name": company_name}
            )
            raise

    async def _scroll_page(self, page):
        """Helper method to scroll the page and ensure content is loaded"""
        try:
            await page.evaluate("""
                async () => {
                    await new Promise((resolve) => {
                        let totalHeight = 0;
                        const distance = 100;
                        const timer = setInterval(() => {
                            const scrollHeight = document.body.scrollHeight;
                            window.scrollBy(0, distance);
                            totalHeight += distance;
                            
                            if(totalHeight >= scrollHeight){
                                clearInterval(timer);
                                resolve();
                            }
                        }, 100);
                    });
                }
            """)
        except Exception:
            # If scrolling fails, continue anyway
            pass

    async def _handle_page_error(self, error, url):
        """Handle page errors, ignoring LocalStorageUtil errors"""
        error_str = str(error)

        ignorable_errors = [
            'LocalStorageUtil',
            'already been declared',
            'Minified React error',
            'visit https://react.dev/errors'
        ]

        if not any(ignore_err in error_str for ignore_err in ignorable_errors):
            self.logger.log_error(
                agent_name="WebScraper",
                action="page_javascript_error",
                error=error_str,
                context={"url": url}
            )

    async def extract_page_content(self, url: str) -> str:
        try:
            self.logger.log_step(
                agent_name="WebScraper",
                action="start_page_extraction",
                input_data={"url": url},
                output_data=None
            )

            if not self.browser:
                await self.initialize()

            context = await self.browser.new_context(
                user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
                viewport={'width': 1920, 'height': 1080},
                java_script_enabled=True
            )
            
            page = await context.new_page()

            try:
                # Configure error handling
                page.on("pageerror", lambda err: self._handle_page_error(err, url))

                # Handle client-side errors
                await page.route("**/*", lambda route: self._handle_route(route))

                response = await page.goto(
                    url,
                    wait_until='domcontentloaded',
                    timeout=15000
                )

                if response is None or not response.ok:
                    raise Exception(f"Failed to load page: {url}")

                # Check for error messages
                error_selectors = [
                    "text='Application error'",
                    "text='Client-side exception'",
                    "text='Error'",
                    ".error-message",
                    "#error-message"
                ]

                has_error = False
                for selector in error_selectors:
                    try:
                        error_element = await page.wait_for_selector(selector, timeout=1000)
                        if error_element:
                            has_error = True
                            break
                    except:
                        continue

                if has_error:
                    # Try reloading with different settings
                    await page.reload(
                        wait_until='networkidle',
                        timeout=20000
                    )
                    await page.wait_for_timeout(2000)

                # Wait for critical content
                try:
                    await page.wait_for_selector('main, #content, .content, article, body', 
                                            timeout=5000,
                                            state='visible')
                except Exception:
                    pass

                # Ensure JavaScript execution is complete
                await page.wait_for_load_state('domcontentloaded')
                await page.wait_for_timeout(2000)  # Additional wait for dynamic content

                # Get the page content
                content = await page.content()
                
                # Close the context
                await context.close()

                # Process content
                soup = BeautifulSoup(content, 'html.parser')
                
                # Remove error messages and unnecessary elements
                error_classes = ['error-message', 'error-container', 'error-page']
                for error_class in error_classes:
                    for element in soup.find_all(class_=error_class):
                        element.decompose()

                for script in soup(["script", "style", "noscript"]):
                    script.decompose()

                text = soup.get_text()
                lines = (line.strip() for line in text.splitlines())
                chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
                text = ' '.join(chunk for chunk in chunks if chunk)

                # If the content is too short or contains error messages, try fallback method
                if len(text.strip()) < 100 or "Application error" in text:
                    text = await self._fallback_content_extraction(url)

                self.logger.log_step(
                    agent_name="WebScraper",
                    action="complete_page_extraction",
                    input_data={"url": url},
                    output_data={
                        "content_length": len(text),
                        "content_preview": text[:50]
                    }
                )

                return text

            except Exception as page_error:
                self.logger.log_error(
                    agent_name="WebScraper",
                    action="page_load_error",
                    error=page_error,
                    context={"url": url}
                )
                return await self._fallback_content_extraction(url)

        except Exception as e:
            self.logger.log_error(
                agent_name="WebScraper",
                action="extract_page_content",
                error=e,
                context={"url": url}
            )
            raise

    async def _handle_route(self, route):
        """Handle route requests and errors"""
        try:
            if route.request.resource_type in ['image', 'stylesheet', 'font']:
                await route.abort()
            else:
                await route.continue_()
        except:
            await route.continue_()

    async def _fallback_content_extraction(self, url: str) -> str:
        """Fallback method for content extraction"""
        try:
            context = await self.browser.new_context(
                java_script_enabled=False,  # Disable JavaScript
                user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
            )
            
            page = await context.new_page()
            
            # Simple GET request without waiting for JavaScript
            await page.goto(url, wait_until='commit', timeout=10000)
            
            content = await page.content()
            await context.close()
            
            soup = BeautifulSoup(content, 'html.parser')
            for script in soup(["script", "style"]):
                script.decompose()
                
            text = ' '.join(soup.get_text().split())
            
            return text
        except Exception as e:
            self.logger.log_error(
                agent_name="WebScraper",
                action="fallback_content_extraction",
                error=e,
                context={"url": url}
            )
            return ""

    async def _is_error_page(self, page) -> bool:
        """Check if the current page is showing an error"""
        error_indicators = [
            "Application error",
            "Client-side exception",
            "Something went wrong",
            "404 Not Found",
            "500 Internal Server Error"
        ]
        
        page_text = await page.text_content('body')
        return any(indicator in page_text for indicator in error_indicators)
    
    async def process_pages_concurrently(self, urls: List[str], max_concurrent: int = 5) -> Dict[str, str]:
        """Process multiple pages concurrently with rate limiting"""
        try:
            self.logger.log_step(
                agent_name="WebScraper",
                action="start_concurrent_extraction",
                input_data={"urls": urls, "max_concurrent": max_concurrent},
                output_data=None
            )

            if not self.browser:
                await self.initialize()

            async def process_single_url(url: str) -> Tuple[str, str]:
                try:
                    content = await self.extract_page_content(url)
                    return url, content
                except Exception as e:
                    self.logger.log_error(
                        agent_name="WebScraper",
                        action="process_single_url",
                        error=e,
                        context={"url": url}
                    )
                    return url, ""

            # Process URLs in chunks to limit concurrent operations
            results = {}
            for i in range(0, len(urls), max_concurrent):
                chunk = urls[i:i + max_concurrent]
                chunk_tasks = [process_single_url(url) for url in chunk]
                chunk_results = await asyncio.gather(*chunk_tasks, return_exceptions=True)
                
                for url, content in chunk_results:
                    if content:  # Only store successful results
                        results[url] = content

            return results

        except Exception as e:
            self.logger.log_error(
                agent_name="WebScraper",
                action="process_pages_concurrently",
                error=e,
                context={"urls": urls}
            )
            raise

    async def search_company_page(self, company_name: str, page_type: str) -> str:
        """Search for a specific type of page for a company"""
        try:
            search_terms = {
                'pricing': ['pricing', 'plans', 'packages'],
                'features': ['features', 'product features'],
                'products': ['products', 'solutions'],
                'about': ['about', 'company information'],
            }
            
            search_term = search_terms.get(page_type, [page_type])[0]
            query = f"{company_name} {search_term}"
            
            self.logger.log_step(
                agent_name="WebScraper",
                action="search_company_page",
                input_data={"company_name": company_name, "page_type": page_type},
                output_data=None
            )

            headers = {'Ocp-Apim-Subscription-Key': self.subscription_key}
            params = {
                'q': query,
                'count': 1
            }
            
            async with aiohttp.ClientSession() as session:
                async with session.get(self.endpoint, headers=headers, params=params) as response:
                    data = await response.json()
                    if 'webPages' in data and data['webPages']['value']:
                        page_url = data['webPages']['value'][0]['url']
                        
                        self.logger.log_step(
                            agent_name="WebScraper",
                            action="found_page_url",
                            input_data={"query": query},
                            output_data={"page_url": page_url}
                        )
                        
                        return page_url
                    return None

        except Exception as e:
            self.logger.log_error(
                agent_name="WebScraper",
                action="search_company_page",
                error=e,
                context={"company_name": company_name, "page_type": page_type}
            )
            raise

    async def analyze_company(self, company_name: str) -> Dict:
      try:
            self.logger.log_step(
                agent_name="WebScraper",
                action="start_company_analysis",
                input_data={"company_name": company_name},
                output_data=None
            )

            website_url = await self.get_website_url(company_name)
            page_types = ['pricing', 'features', 'products', 'about']

            urls_to_process = [website_url]
            page_urls = {}

            for page_type in page_types:
                try:
                    page_url = await self.search_company_page(company_name, page_type)
                    if page_url:
                        urls_to_process.append(page_url)
                        page_urls[page_type] = page_url
                except Exception as e:
                    self.logger.log_error(
                        agent_name="WebScraper",
                        action="get_page_url",
                        error=e,
                        context={"company_name": company_name, "page_type": page_type}
                    )
                    continue

            all_content = await self.process_pages_concurrently(urls_to_process)

            company_data = {
                "name": company_name,
                "website": website_url,
                "pages": {
                    "home": {
                        "url": website_url,
                        "content": all_content.get(website_url, "")
                    }
                }
            }

            for page_type, url in page_urls.items():
                company_data["pages"][page_type] = {
                    "url": url,
                    "content": all_content.get(url, "")
                }

            return company_data

      except Exception as e:
            self.logger.log_error(
                agent_name="WebScraper",
                action="analyze_company",
                error=e,
                context={"company_name": company_name}
            )
            raise

    async def analyze_competitors(self, competitors_data: Dict) -> Dict:
        print(type(competitors_data))
        if isinstance(competitors_data, str):
            competitors_data = json.loads(competitors_data)
            
        try:
            self.logger.log_step(
                agent_name="WebScraper",
                action="start_competitors_analysis",
                input_data={"competitors": [comp["name"] for comp in competitors_data["competitors"]]},
                output_data=None
            )

            results = {}
            for competitor in competitors_data["competitors"]:
                try:
                    results[competitor["name"]] = await self.analyze_company(competitor["name"])
                except Exception as e:
                    self.logger.log_error(
                        agent_name="WebScraper",
                        action="analyze_competitor",
                        error=e,
                        context={"competitor": competitor}
                    )
                    continue

            self.logger.log_step(
                agent_name="WebScraper",
                action="complete_competitors_analysis",
                input_data={"competitors": [comp["name"] for comp in competitors_data["competitors"]]},
                output_data={
                    "competitors_analyzed": list(results.keys()),
                    "total_competitors": len(competitors_data["competitors"]),
                    "successful_analyses": len(results)
                }
            )

            return results

        except Exception as e:
            self.logger.log_error(
                agent_name="WebScraper",
                action="analyze_competitors",
                error=e,
                context={"competitors_data": competitors_data}
            )
            raise

    async def extract_structured_data(self, content: str, data_type: str) -> Dict:
        """Extract specific types of data from page content"""
        try:
            self.logger.log_step(
                agent_name="WebScraper",
                action="start_structured_data_extraction",
                input_data={"data_type": data_type},
                output_data=None
            )

            # Use Gemini to extract structured data
            prompt = f"""
            Extract the following type of information: {data_type}
            From the following content:
            # TODO: Remove content limit?
            {content[:1000]}  # Limit content length for API
            
            Return the information in JSON format.
            """
            
            response = self.config.gemini_client.models.generate_content(
                model="gemini-2.0-flash-exp",
                contents=prompt,
                config=genai.types.GenerateContentConfig(
                  temperature= 0.1,
                ),
            )
            
            structured_data = json.loads(response.text)
            
            self.logger.log_step(
                agent_name="WebScraper",
                action="complete_structured_data_extraction",
                input_data={"data_type": data_type},
                output_data=structured_data
            )
            
            return structured_data

        except Exception as e:
            self.logger.log_error(
                agent_name="WebScraper",
                action="extract_structured_data",
                error=e,
                context={"data_type": data_type}
            )
            raise

In [7]:
class BaseAgent:
    def __init__(self, config: Config, logger: ResearchLogger):
        self.config = config
        self.logger = logger
        self.system_prompt = ""
        self.role = ""
        self.goal = ""
        self.backstory = ""
        self.temperature = 1.0

    async def execute(self, input_data: Any) -> Any:
        await self.config.rate_limiter.wait_if_needed()
        
        try:
            # Log the start of execution
            self.logger.log_step(
                agent_name=self.__class__.__name__,
                action="start_execution",
                input_data=input_data,
                output_data=None
            )
            
            prompt = f"""
            Role: {self.role}
            Goal: {self.goal}
            Backstory: {self.backstory}
            System Instructions: {self.system_prompt}
            
            Input Data:
            {json.dumps(input_data, indent=2)}
            
            Please provide your analysis based on the above information.
            """

            response = self.config.gemini_client.models.generate_content(
                model="gemini-2.0-flash-exp",
                contents=prompt,
                config=genai.types.GenerateContentConfig(
                  temperature=self.temperature,
                ),
            )

            self.logger.log_step(
                agent_name=self.__class__.__name__,
                action="loading_response_into_json",
                input_data=input_data,
                output_data=response.text
            )
            
            try:
              result = json.loads(response.text.replace('```json', '').replace('```', '').strip())
            except Exception as e:
              result = response.text
              self.logger.log_error(
                  agent_name=self.__class__.__name__,
                  action="loading_response_into_json",
                  error=e,
                  context={"response": result}
            )

            # Log the successful execution
            self.logger.log_step(
                agent_name=self.__class__.__name__,
                action="complete_execution",
                input_data=input_data,
                output_data=result
            )
            
            return result
            
        except Exception as e:
            # Log the error
            self.logger.log_error(
                agent_name=self.__class__.__name__,
                action="execute",
                error=e,
                context={"input_data": input_data}
            )
            raise

In [8]:
class MarketIntelligenceScout(BaseAgent):
    def __init__(self, config: Config, logger: ResearchLogger):
        super().__init__(config, logger)
        self.role = "Expert market researcher specializing in competitor identification"
        self.goal = "Identify and categorize the most relevant competitors"
        self.backstory = "Former market research director with 15 years of experience"
        self.system_prompt = """
        1. Search for top competitors in the given market
        2. Return results in JSON format with the following structure:
        {
            "competitors": [
                {
                    "name": "",
                    "website": "",
                    "industry": "",
                    "threat_level": ""
                }
            ]
        }
        """
        self.temperature = 0.2

In [9]:
class DigitalProductAnalyst(BaseAgent):
    def __init__(self, config: Config, logger: ResearchLogger):
        super().__init__(config, logger)
        self.role = "Product analysis specialist"
        self.goal = "Analyze product features and capabilities"
        self.backstory = "Previously a product manager at major tech companies"
        self.system_prompt = """
        Analyze each competitor's product features and capabilities.
        Return results in JSON format:
        {
            "competitor_name": {
                "key_features": [],
                "unique_capabilities": [],
                "user_experience": "",
                "product_maturity": ""
            }
        }
        """
        self.temperature = 0.2

In [10]:
class MarketingMessageDecoder(BaseAgent):
    def __init__(self, config: Config, logger: ResearchLogger):
        super().__init__(config, logger)
        self.role = "Marketing communications analyst"
        self.goal = "Decode and analyze competitors' marketing strategies"
        self.backstory = "Former copywriter turned marketing strategist"
        self.system_prompt = """
        Analyze marketing messages and positioning.
        Return results in JSON format:
        {
            "competitor_name": {
                "value_propositions": [],
                "messaging_tone": "",
                "target_audience": "",
                "unique_selling_points": []
            }
        }
        """
        self.temperature = 0.2

In [11]:
class TechnicalFeatureComparator(BaseAgent):
    def __init__(self, config: Config, logger: ResearchLogger):
        super().__init__(config, logger)
        self.role = "Technical analyst specializing in feature comparison"
        self.goal = "Provide detailed technical comparison of competitor products"
        self.backstory = "Senior solutions architect with cross-industry experience"
        self.system_prompt = """
        Compare technical features across competitors.
        Return results in JSON format:
        {
            "competitor_name": {
                "tech_stack": [],
                "api_capabilities": [],
                "scalability_features": [],
                "technical_advantages": [],
                "technical_limitations": []
            }
        }
        """
        self.temperature = 0.2

In [12]:
class PricingStrategySpecialist(BaseAgent):
    def __init__(self, config: Config, logger: ResearchLogger):
        super().__init__(config, logger)
        self.role = "Pricing analysis expert"
        self.goal = "Analyze and compare pricing models and strategies"
        self.backstory = "Former pricing consultant in SaaS industry"
        self.system_prompt = """
        Analyze pricing strategies and models.
        Return results in JSON format:
        {
            "competitor_name": {
                "pricing_tiers": [],
                "pricing_model": "",
                "discount_strategies": [],
                "pricing_positioning": ""
            }
        }
        """
        self.temperature = 0.2

In [13]:
class CompetitiveStrategyAnalyst(BaseAgent):
    def __init__(self, config: Config, logger: ResearchLogger):
        super().__init__(config, logger)
        self.role = "Strategic analyst specializing in competitive analysis"
        self.goal = "Synthesize competitive intelligence into strategic insights"
        self.backstory = "Strategy consultant from major consulting firms"
        self.system_prompt = """
        Synthesize all competitive data into strategic insights.
        Return results in JSON format:
        {
            "market_patterns": [],
            "competitive_advantages": {},
            "market_gaps": [],
            "strategic_recommendations": [],
            "threat_assessment": {}
        }
        """
        self.temperature = 0.5

In [14]:
class CompetitiveIntelligenceReportSpecialist(BaseAgent):
    def __init__(self, config: Config, logger: ResearchLogger):
        super().__init__(config, logger)
        self.role = "Report creation specialist"
        self.goal = "Create clear, actionable reports from competitive analysis"
        self.backstory = "Communications expert from McKinsey, has a history of writing insightful reports on the target market for clients"
        self.temperature = 0.5

    async def generate_executive_and_market(self, data: Dict) -> Dict:
        temp_agent = BaseAgent(self.config, self.logger)
        temp_agent.role = self.role
        temp_agent.goal = self.goal
        temp_agent.backstory = self.backstory
        temp_agent.temperature = self.temperature
        temp_agent.system_prompt = """
        Create the executive summary and market overview sections of the competitive analysis report.
        Return results in JSON format:
        {
            "executive_summary": "",
            "market_overview": ""
        }
        """
        return await temp_agent.execute(data)

    async def generate_analysis_and_findings(self, data: Dict) -> Dict:
        temp_agent = BaseAgent(self.config, self.logger)
        temp_agent.role = self.role
        temp_agent.goal = self.goal
        temp_agent.backstory = self.backstory
        temp_agent.temperature = self.temperature
        temp_agent.system_prompt = """
        Create the key findings and detailed analysis sections of the competitive analysis report.
        Return results in JSON format:
        {
            "key_findings": [],
            "detailed_analysis": {}
        }
        """
        return await temp_agent.execute(data)

    async def generate_recommendations_and_appendix(self, data: Dict) -> Dict:
        temp_agent = BaseAgent(self.config, self.logger)
        temp_agent.role = self.role
        temp_agent.goal = self.goal
        temp_agent.backstory = self.backstory
        temp_agent.temperature = self.temperature
        temp_agent.system_prompt = """
        Create the recommendations and appendix sections of the competitive analysis report.
        Return results in JSON format:
        {
            "recommendations": [],
            "appendix": {}
        }
        """
        return await temp_agent.execute(data)

    async def execute(self, data: Dict) -> Dict:
        try:
            # Generate report sections sequentially to respect rate limits
            exec_market = await self.generate_executive_and_market(data)
            analysis_findings = await self.generate_analysis_and_findings(data)
            rec_appendix = await self.generate_recommendations_and_appendix(data)

            # Combine all sections
            final_report = {
                **exec_market,
                **analysis_findings,
                **rec_appendix
            }

            # Validate sections
            required_keys = {
                "executive_summary", "market_overview", 
                "key_findings", "detailed_analysis",
                "recommendations", "appendix"
            }
            
            missing_keys = required_keys - set(final_report.keys())
            if missing_keys:
                raise ValueError(f"Missing sections in report: {missing_keys}")

            return final_report

        except Exception as e:
            self.logger.log_error(
                agent_name=self.__class__.__name__,
                action="execute",
                error=e,
                context={"data": data}
            )
            raise

In [15]:
class CompetitiveAnalysisWorkflow:
    def __init__(self, config: Config, research_id: str):
        self.config = config
        self.logger = ResearchLogger(research_id)

        self.market_scout = MarketIntelligenceScout(config, self.logger)
        self.product_analyst = DigitalProductAnalyst(config, self.logger)
        self.marketing_decoder = MarketingMessageDecoder(config, self.logger)
        self.technical_comparator = TechnicalFeatureComparator(config, self.logger)
        self.pricing_specialist = PricingStrategySpecialist(config, self.logger)
        self.competitive_analyst = CompetitiveStrategyAnalyst(config, self.logger)
        self.report_specialist = CompetitiveIntelligenceReportSpecialist(config, self.logger)
        self.web_scraper = WebScraper(config, self.logger)

    async def run_parallel_analysis(self, competitors_data: Dict) -> Dict[str, Any]:
        """Run parallel analysis tasks"""
        tasks = [
            self.product_analyst.execute(competitors_data),
            self.marketing_decoder.execute(competitors_data),
            self.technical_comparator.execute(competitors_data),
            self.pricing_specialist.execute(competitors_data)
        ]
        
        results = await asyncio.gather(*tasks, return_exceptions=True)
        
        return {
            "product_analysis": results[0],
            "marketing_analysis": results[1],
            "technical_analysis": results[2],
            "pricing_analysis": results[3]
        }

    async def execute_workflow(self, industry: str, target_company: str = None) -> Dict:
        try:
            self.logger.log_step(
                agent_name="Workflow",
                action="start_workflow",
                input_data={"industry": industry, "target_company": target_company},
                output_data=None
            )
            
            # Step 1: Retrieve target company information (if necessar;)
            target_company_data = None
            if target_company:
                target_company_data = await self.web_scraper.analyze_company(target_company)

            # Step 2: Identify competitors
            competitors_data = await self.market_scout.execute({
                "industry": industry,
                "target_company": target_company,
                "target_company_data": target_company_data
            })

            # Step 3: Gather website information for all competitors
            website_data = await self.web_scraper.analyze_competitors(competitors_data)

            # Step 4: Run parallel analysis with website data
            parallel_results = await self.run_parallel_analysis({
                "competitors_data": competitors_data,
                "website_data": website_data,
                "target_company_data": target_company_data
            })

            # Step 3: Strategic analysis
            strategic_analysis = await self.competitive_analyst.execute({
                "competitors_data": competitors_data,
                "parallel_results": parallel_results
            })

            # Step 4: Generate report
            final_report = await self.report_specialist.execute({
                "strategic_analysis": strategic_analysis,
                "raw_data": {
                    "competitors": competitors_data,
                    "analysis": parallel_results
                }
            })

            self.logger.log_step(
                agent_name="Workflow",
                action="complete_workflow",
                input_data={"industry": industry, "target_company": target_company},
                output_data=None
            )

            return final_report

        except Exception as e:
            self.logger.log_error(
                agent_name="Workflow",
                action="execute_workflow",
                error=e,
                context={
                    "industry": industry,
                    "target_company": target_company
                }
            )
            raise

In [16]:
async def test_workflow(industry, target_company=None):
    config = Config()
    research_id = f"research_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
    
    workflow = CompetitiveAnalysisWorkflow(config, research_id)
    
    result = await workflow.execute_workflow(
        industry=industry,
        target_company=target_company
    )
    
    # Print research summary
    print("\nResearch Summary:")
    print(json.dumps(workflow.logger.get_research_summary(), indent=2))
    
    print("\nFinal Result:")
    print(json.dumps(result, indent=2))
    
    return result

In [17]:
industry = "Software Testing and Quality Assurance"
target_company = "Nogrunt"

In [None]:
result = await test_workflow(industry, target_company)

INFO:__main__:Agent: Workflow | Action: start_workflow
INFO:__main__:Agent: WebScraper | Action: start_company_analysis
INFO:__main__:Agent: WebScraper | Action: search_company_website
INFO:__main__:Agent: WebScraper | Action: found_website_url
INFO:__main__:Agent: WebScraper | Action: search_company_page
INFO:__main__:Agent: WebScraper | Action: found_page_url
INFO:__main__:Agent: WebScraper | Action: search_company_page
INFO:__main__:Agent: WebScraper | Action: found_page_url
INFO:__main__:Agent: WebScraper | Action: search_company_page
INFO:__main__:Agent: WebScraper | Action: found_page_url
INFO:__main__:Agent: WebScraper | Action: search_company_page
INFO:__main__:Agent: WebScraper | Action: found_page_url
INFO:__main__:Agent: WebScraper | Action: start_concurrent_extraction
INFO:__main__:Agent: WebScraper | Action: start_page_extraction
INFO:__main__:Agent: WebScraper | Action: start_page_extraction
INFO:__main__:Agent: WebScraper | Action: start_page_extraction
INFO:__main__:Age

<class 'dict'>


INFO:__main__:Agent: WebScraper | Action: found_website_url
INFO:__main__:Agent: WebScraper | Action: search_company_page
INFO:__main__:Agent: WebScraper | Action: found_page_url
INFO:__main__:Agent: WebScraper | Action: search_company_page
INFO:__main__:Agent: WebScraper | Action: found_page_url
INFO:__main__:Agent: WebScraper | Action: search_company_page
INFO:__main__:Agent: WebScraper | Action: found_page_url
INFO:__main__:Agent: WebScraper | Action: search_company_page
INFO:__main__:Agent: WebScraper | Action: found_page_url
INFO:__main__:Agent: WebScraper | Action: start_concurrent_extraction
INFO:__main__:Agent: WebScraper | Action: start_page_extraction
INFO:__main__:Agent: WebScraper | Action: start_page_extraction
INFO:__main__:Agent: WebScraper | Action: start_page_extraction
INFO:__main__:Agent: WebScraper | Action: start_page_extraction
INFO:__main__:Agent: WebScraper | Action: start_page_extraction
INFO:__main__:Agent: WebScraper | Action: complete_page_extraction
INFO:__

In [42]:
def save_json_to_markdown(data, output_file="output.md"):
    # Convert JSON to Markdown
    def json_to_markdown(data, level=0):
        indent = "  " * level
        lines = []
        if isinstance(data, dict):
            for key, value in data.items():
                # Use headers for levels 0-6, bold for deeper nesting
                if level <= 6:
                    # Create header with appropriate number of #
                    lines.append(f"{indent}{'#' * (level + 1)} {key.replace('_', ' ').title()}")
                else:
                    lines.append(f"{indent}**{key.replace('_', ' ').title()}:**")
                lines.extend(json_to_markdown(value, level + 1))
        elif isinstance(data, list):
            for item in data:
                lines.extend(json_to_markdown(item, level + 1))
        else:
            lines.append(f"{indent}- {data}")
        return lines

    markdown_output = "\n".join(json_to_markdown(data))

    # Save to file
    try:
        with open(output_file, "w", encoding="utf-8") as f:
            f.write(markdown_output)
    except Exception as e:
        return f"Error: Could not save to file: {e}"

    return markdown_output

In [43]:
current_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
report_name = f"comp_analysis_report_{target_company}_{industry}_{current_timestamp}.md"

In [None]:
save_json_to_markdown(result, report_name)