In [1]:
!pip install requests beautifulsoup4 duckduckgo-search

Collecting duckduckgo-search
  Downloading duckduckgo_search-8.0.2-py3-none-any.whl (18 kB)
Collecting primp>=0.15.0
  Downloading primp-0.15.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.3 MB)
[K     |████████████████████████████████| 3.3 MB 30.6 MB/s eta 0:00:01
[?25hCollecting click>=8.1.8
  Downloading click-8.1.8-py3-none-any.whl (98 kB)
[K     |████████████████████████████████| 98 kB 12.6 MB/s  eta 0:00:01
[?25hCollecting lxml>=5.3.0
  Downloading lxml-5.4.0-cp39-cp39-manylinux_2_28_x86_64.whl (5.1 MB)
[K     |████████████████████████████████| 5.1 MB 109.5 MB/s eta 0:00:01
[?25hInstalling collected packages: primp, lxml, click, duckduckgo-search
Successfully installed click-8.1.8 duckduckgo-search-8.0.2 lxml-5.4.0 primp-0.15.0


In [4]:
import os
import requests
from bs4 import BeautifulSoup
from duckduckgo_search import DDGS
from datetime import datetime
import json
import time

# --- Configuration ---
# In a real application, use a secure way to manage your API key,
# such as environment variables or a secret management service.
# For this example, we will leave it as an empty string and rely on the
# execution environment to provide it.
GEMINI_API_KEY = "AIzaSyCxrKkaazB66x7ryIBYOEknzfhYsVwFUJc"
GEMINI_API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={GEMINI_API_KEY}"
PRODUCT_CATEGORIES = [
    "Vape Devices and E-Liquids",
    "Delta-8 and Kratom",
    "Candy and Mexican Products",
    "Beverage and Grocery",
    "Hookah and Smoking Products",
    "General Merchandise",
    "Adult Novelty and Glass",
]

# --- Core Components: Tools ---

class WebSearchTool:
    """A tool for performing web searches using DuckDuckGo."""
    def search(self, query: str, max_results: int = 7):
        """Performs a web search and returns the results."""
        print(f"TOOL: Performing web search for: '{query}'")
        try:
            with DDGS() as ddgs:
                results = [r for r in ddgs.text(query, max_results=max_results)]
                return results
        except Exception as e:
            print(f"Error during web search: {e}")
            return []

class WebScraperTool:
    """A tool for scraping content from a URL."""
    def scrape(self, url: str):
        """Scrapes text content from a given URL."""
        print(f"TOOL: Scraping URL: {url}")
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }
        try:
            response = requests.get(url, headers=headers, timeout=10)
            response.raise_for_status()
            soup = BeautifulSoup(response.text, 'html.parser')
            
            # Remove script and style elements
            for script_or_style in soup(["script", "style"]):
                script_or_style.decompose()
            
            text = soup.get_text()
            lines = (line.strip() for line in text.splitlines())
            chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
            text = '\n'.join(chunk for chunk in chunks if chunk)
            return text[:5000] # Limit content size for LLM processing
        except requests.RequestException as e:
            print(f"Error scraping {url}: {e}")
            return None

class GeminiLLM:
    """A wrapper for the Gemini LLM API."""
    def __init__(self, api_url: str):
        self.api_url = api_url

    def _make_request(self, prompt: str):
        """Makes a POST request to the Gemini API."""
        payload = {
            "contents": [{"role": "user", "parts": [{"text": prompt}]}]
        }
        headers = {'Content-Type': 'application/json'}
        
        # Add a delay to avoid hitting rate limits
        time.sleep(2) 

        try:
            response = requests.post(self.api_url, headers=headers, json=payload)
            response.raise_for_status()
            return response.json()
        except requests.RequestException as e:
            print(f"Error calling Gemini API: {e}")
            if e.response:
                print(f"Response body: {e.response.text}")
            return None

    def analyze(self, prompt: str):
        """Sends a prompt to Gemini and gets the text response."""
        print(f"LLM: Analyzing with prompt - '{prompt[:100]}...'")
        result = self._make_request(prompt)
        if (result and result.get('candidates') and
            result['candidates'][0].get('content') and
            result['candidates'][0]['content'].get('parts')):
            return result['candidates'][0]['content']['parts'][0]['text']
        else:
            print("LLM Error: Could not parse response.")
            return "Error: Analysis failed."

# --- Agent Definitions ---

class MarketResearchAgent:
    """
    Agent responsible for scanning the web to find trending products,
    market sentiment, and new product announcements.
    """
    def __init__(self, llm: GeminiLLM, search_tool: WebSearchTool, scraper_tool: WebScraperTool):
        self.llm = llm
        self.search_tool = search_tool
        self.scraper_tool = scraper_tool
        self.name = "Market Research Agent (MRA)"

    def research_category(self, category: str):
        print(f"\nAGENT ({self.name}): Starting research for category: {category}")
        
        # 1. Search for trends
        search_query = f"new product trends in {category} 2025"
        search_results = self.search_tool.search(search_query, max_results=3)
        if not search_results:
            print(f"AGENT ({self.name}): No search results found for {category}.")
            return {"category": category, "trends": "No new trends found.", "sentiment": "Unknown", "new_products": []}

        # 2. Scrape and analyze top results
        analysis_summary = ""
        for result in search_results:
            content = self.scraper_tool.scrape(result['href'])
            if content:
                prompt = f"""
                Based on the following article content, summarize the key market trends, 
                consumer sentiment, and any specific new products mentioned for the '{category}' category.
                
                Article Content:
                ---
                {content}
                ---
                
                Provide a concise summary.
                """
                summary = self.llm.analyze(prompt)
                analysis_summary += f"\n\nSource: {result['href']}\nSummary: {summary}"

        if not analysis_summary:
             print(f"AGENT ({self.name}): Could not scrape or analyze any content for {category}.")
             return {"category": category, "trends": "Could not analyze trends.", "sentiment": "Unknown", "new_products": []}

        # 3. Synthesize findings into a structured format
        final_prompt = f"""
        You are a market analyst. Based on the following research summaries for the '{category}' category, 
        provide a structured JSON output with three keys: "trends", "sentiment", and "new_products".
        - "trends": A string summarizing the key emerging trends.
        - "sentiment": A string describing the overall consumer sentiment (e.g., "Positive", "Mixed", "Cautious").
        - "new_products": A list of specific product names or types identified as new or trending.

        Research Summaries:
        ---
        {analysis_summary}
        ---

        Output only the JSON object.
        """
        
        structured_findings_str = self.llm.analyze(final_prompt)
        try:
            # Clean up the string response from LLM to make it valid JSON
            clean_str = structured_findings_str.strip().replace('```json', '').replace('```', '').strip()
            structured_findings = json.loads(clean_str)
            structured_findings['category'] = category
            print(f"AGENT ({self.name}): Successfully analyzed category: {category}")
            return structured_findings
        except json.JSONDecodeError as e:
            print(f"AGENT ({self.name}): Error decoding JSON for {category}. Error: {e}")
            print(f"LLM Response was: {structured_findings_str}")
            return {"category": category, "trends": "Analysis resulted in invalid format.", "sentiment": "Unknown", "new_products": []}


class RegulatoryComplianceAgent:
    """
    Agent responsible for monitoring the regulatory landscape, especially
    for high-risk product categories.
    """
    def __init__(self, llm: GeminiLLM, search_tool: WebSearchTool, scraper_tool: WebScraperTool):
        self.llm = llm
        self.search_tool = search_tool
        self.scraper_tool = scraper_tool
        self.name = "Regulatory Compliance Agent (RCA)"

    def check_compliance(self, category: str, jurisdiction: str = "Georgia, USA"):
        print(f"\nAGENT ({self.name}): Checking compliance for '{category}' in '{jurisdiction}'")
        
        # Focus on high-risk categories
        high_risk_keywords = ["vape", "delta", "kratom", "tobacco", "smoking"]
        if not any(keyword in category.lower() for keyword in high_risk_keywords):
            print(f"AGENT ({self.name}): Category '{category}' is low-risk. Compliance check not required.")
            return {"status": "Go", "summary": "Standard consumer product regulations apply."}

        # 1. Search for regulations
        search_query = f"new regulations for {category} in {jurisdiction} 2025"
        search_results = self.search_tool.search(search_query, max_results=7)

        if not search_results:
            return {"status": "Unknown", "summary": "Could not find regulatory information."}
            
        # 2. Scrape and analyze results
        analysis_summary = ""
        for result in search_results:
            if 'gov' in result['href'] or 'fda' in result['href'] or 'georgia.gov' in result['href']:
                content = self.scraper_tool.scrape(result['href'])
                if content:
                    prompt = f"""
                    Analyze the following text regarding regulations for '{category}' in {jurisdiction}.
                    Summarize key restrictions, age limits, licensing requirements, and any recent changes.
                    
                    Text:
                    ---
                    {content}
                    ---
                    """
                    summary = self.llm.analyze(prompt)
                    analysis_summary += f"\n\nSource: {result['href']}\nSummary: {summary}"
        
        if not analysis_summary:
            return {"status": "Watch", "summary": "Could not verify specific regulations from official sources. Caution advised."}

        # 3. Determine compliance status
        final_prompt = f"""
        Based on the following regulatory summary for '{category}' in {jurisdiction}, determine a compliance status.
        Choose one: "Go" (Fully compliant, standard rules), "Watch" (Requires caution, changing landscape, new rules), or "No-Go" (Banned or high risk).
        Also, provide a brief summary of the key regulations.

        Format your response as a JSON object with two keys: "status" and "summary".

        Regulatory Summary:
        ---
        {analysis_summary}
        ---
        
        Output only the JSON object.
        """
        
        status_str = self.llm.analyze(final_prompt)
        try:
            clean_str = status_str.strip().replace('```json', '').replace('```', '').strip()
            status = json.loads(clean_str)
            print(f"AGENT ({self.name}): Successfully checked compliance for: {category}")
            return status
        except json.JSONDecodeError:
            print(f"AGENT ({self.name}): Error decoding JSON for compliance status of {category}.")
            return {"status": "Error", "summary": "Failed to parse compliance analysis."}

class ReportingAgent:
    """
    Agent responsible for synthesizing findings from other agents and
    generating the final report.
    """
    def __init__(self, llm: GeminiLLM):
        self.llm = llm
        self.name = "Reporting Agent (RA)"
        
    def generate_report(self, market_data: list, compliance_data: dict):
        print(f"\nAGENT ({self.name}): Generating final intelligence report.")
        
        report_sections = []
        for item in market_data:
            category = item['category']
            compliance = compliance_data.get(category, {"status": "N/A", "summary": "Not checked."})
            
            section_prompt = f"""
            You are a senior purchasing analyst. Create a recommendation for the purchasing department based on the data below for the '{category}' category.
            
            Market Trends: {item['trends']}
            Consumer Sentiment: {item['sentiment']}
            New/Trending Products: {', '.join(item['new_products']) if item['new_products'] else 'None specified'}
            Regulatory Status ({compliance['status']}): {compliance['summary']}
            
            Based on ALL the information above, formulate an "Agency Recommendation".
            This recommendation should be a short, actionable directive for the purchasing team. 
            For example: "High Priority Purchase", "Cautious Purchase", "Standard Restock", or "Avoid/Deprioritize".
            Justify your recommendation briefly based on the provided data (e.g., strong trend, regulatory risk).
            
            Provide only the text for the recommendation.
            """
            
            recommendation = self.llm.analyze(section_prompt)
            
            section = {
                "Product/Trend": category,
                "Key Trend Drivers & Consumer Sentiment": f"Drivers: {item['trends']}\nSentiment: {item['sentiment']}",
                "Regulatory Compliance (Georgia)": f"{compliance['status']}: {compliance['summary']}",
                "Agency Recommendation": recommendation
            }
            report_sections.append(section)

        # Build the final markdown report
        report = f"""# Multi-Agent Purchasing Intelligence Agency: Daily Report

**Report Date:** {datetime.now().strftime("%B %d, %Y")}
**Generated By:** Autonomous Purchasing Intelligence Agency

This document outlines today's actionable intelligence report. The system is composed of three specialized agents working in concert to deliver timely and reliable market insights.

---

## Daily Intelligence Report

This section contains the high-priority market signals and product recommendations identified in the last 24 hours.

### **Top Trending Products & Categories**

| Product/Trend | Key Trend Drivers & Consumer Sentiment | Regulatory Compliance (Georgia) | Agency Recommendation |
| :--- | :--- | :--- | :--- |
"""
        for s in report_sections:
            report += "| **" + str(s['Product/Trend']) + "** | " + s['Key Trend Drivers & Consumer Sentiment'].replace(r'\n', '<br>') + " | " + str(s['Regulatory Compliance (Georgia)']) + " | " + str(s['Agency Recommendation']) + " |\n"
        return report

# --- Main Orchestration ---

if __name__ == "__main__":
    print("--- Initializing AI Agency ---")
    
    # Initialize tools and agents
    llm = GeminiLLM(api_url=GEMINI_API_URL)
    search_tool = WebSearchTool()
    scraper_tool = WebScraperTool()
    
    market_agent = MarketResearchAgent(llm, search_tool, scraper_tool)
    regulatory_agent = RegulatoryComplianceAgent(llm, search_tool, scraper_tool)
    reporting_agent = ReportingAgent(llm)

    print("\n--- Starting Daily Workflow ---")
    
    # 1. Market Research Agent gathers market data
    all_market_data = []
    for category in PRODUCT_CATEGORIES:
        market_findings = market_agent.research_category(category)
        all_market_data.append(market_findings)
        time.sleep(1) # Pause between categories

    # 2. Regulatory Compliance Agent checks relevant categories
    all_compliance_data = {}
    for category in PRODUCT_CATEGORIES:
        compliance_status = regulatory_agent.check_compliance(category)
        all_compliance_data[category] = compliance_status
        time.sleep(1) # Pause between categories
        
    # 3. Reporting Agent synthesizes the findings and generates the report
    final_report = reporting_agent.generate_report(all_market_data, all_compliance_data)

    print("\n\n--- FINAL REPORT ---")
    print(final_report)

    # Save report to a file
    with open("purchasing_intelligence_report.md", "w") as f:
        f.write(final_report)
    
    print("\nReport saved to purchasing_intelligence_report.md")



--- Initializing AI Agency ---

--- Starting Daily Workflow ---

AGENT (Market Research Agent (MRA)): Starting research for category: Vape Devices and E-Liquids
TOOL: Performing web search for: 'new product trends in Vape Devices and E-Liquids 2025'
TOOL: Scraping URL: https://vapebuyingtips.com/2025-vaping-guide-latest-trends-devices-and-expert-insights/
LLM: Analyzing with prompt - '
                Based on the following article content, summarize the key market trends, 
         ...'
TOOL: Scraping URL: https://worldvapeusa.com/blog/the-future-of-vaping-trends-to-watch-in-2025/
LLM: Analyzing with prompt - '
                Based on the following article content, summarize the key market trends, 
         ...'
TOOL: Scraping URL: https://ecigator.com/lounge/vape-market-2025-trends/
LLM: Analyzing with prompt - '
                Based on the following article content, summarize the key market trends, 
         ...'
LLM: Analyzing with prompt - '
        You are a market analyst. Bas

In [7]:
import os
import requests
from bs4 import BeautifulSoup
from duckduckgo_search import DDGS
from datetime import datetime
import json
import time

# Securely load API key
GEMINI_API_KEY = "AIzaSyCxrKkaazB66x7ryIBYOEknzfhYsVwFUJc"
if not GEMINI_API_KEY:
    raise ValueError("GEMINI_API_KEY not found. Please set it in your .env file.")

GEMINI_API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?key={GEMINI_API_KEY}"

# Cleaned and grouped product categories based on your list
PRODUCT_CATEGORIES = [
    "Vape Devices, Vaporizers, and E-Liquids",
    "Delta-8, Delta-10, and Kratom Products",
    "Hemp-Derived and CBD Products",
    "Hookah and Shisha Tobacco",
    "General Smoking Products (Cigars, Rolling Papers)",
    "Beverages (Energy Drinks, Sodas)",
    "Candy and Snacks (including Mexican specialties)",
    "General Grocery and Merchandise",
    "Adult Novelty and Glassware",
    "Store Supplies (Displays, Disposables)"
]
JURISDICTION = "Georgia, USA" # Define jurisdiction for regulatory checks

# --- Core Components: Tools ---

class WebSearchTool:
    """A tool for performing web searches using DuckDuckGo."""
    def search(self, query: str, max_results: int = 5):
        print(f"TOOL: Searching for: '{query}'")
        try:
            with DDGS() as ddgs:
                results = [r for r in ddgs.text(query, max_results=max_results)]
                return results
        except Exception as e:
            print(f"Error during web search: {e}")
            return []

class WebScraperTool:
    """A tool for scraping content from a URL."""
    def scrape(self, url: str):
        print(f"TOOL: Scraping URL: {url}")
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }
        try:
            response = requests.get(url, headers=headers, timeout=15)
            response.raise_for_status()
            soup = BeautifulSoup(response.text, 'html.parser')
            for script_or_style in soup(["script", "style", "nav", "footer", "header"]):
                script_or_style.decompose()
            text = soup.get_text()
            lines = (line.strip() for line in text.splitlines())
            chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
            text = '\n'.join(chunk for chunk in chunks if chunk)
            return text[:8000] # Increased content size for better context
        except requests.RequestException as e:
            print(f"Error scraping {url}: {e}")
            return None

class GeminiLLM:
    """A wrapper for the Gemini LLM API."""
    def __init__(self, api_url: str):
        self.api_url = api_url

    def _make_request(self, prompt: str):
        payload = {
            "contents": [{"role": "user", "parts": [{"text": prompt}]}],
            "generationConfig": {
                "temperature": 0.5,
                "topP": 0.95,
                "topK": 40,
            }
        }
        headers = {'Content-Type': 'application/json'}
        time.sleep(2) # Avoid hitting rate limits
        try:
            response = requests.post(self.api_url, headers=headers, json=payload)
            response.raise_for_status()
            return response.json()
        except requests.RequestException as e:
            print(f"Error calling Gemini API: {e}")
            if hasattr(e, 'response') and e.response:
                print(f"Response body: {e.response.text}")
            return None

    def analyze(self, prompt: str):
        print(f"LLM: Analyzing with prompt - '{prompt[:120]}...'")
        result = self._make_request(prompt)
        if (result and 'candidates' in result and
            result['candidates'][0].get('content', {}).get('parts')):
            return result['candidates'][0]['content']['parts'][0]['text']
        else:
            print(f"LLM Error: Could not parse response. Full response: {result}")
            return "Error: Analysis failed due to an invalid API response."

    def analyze_json(self, prompt: str):
        """Analyzes a prompt and expects a JSON string in return."""
        raw_response = self.analyze(prompt)
        try:
            # Clean up the string response from LLM
            clean_str = raw_response.strip().replace('```json', '').replace('```', '').strip()
            return json.loads(clean_str)
        except (json.JSONDecodeError, AttributeError) as e:
            print(f"LLM JSON Error: Failed to decode JSON. Error: {e}")
            print(f"LLM Response was: {raw_response}")
            return {"error": "Failed to parse JSON response from LLM.", "raw_response": raw_response}

# --- Agent Definitions ---

class MarketResearchAgent:
    """Agent that scans the web to find trending products and market sentiment."""
    def __init__(self, llm: GeminiLLM, search_tool: WebSearchTool, scraper_tool: WebScraperTool):
        self.llm = llm
        self.search_tool = search_tool
        self.scraper_tool = scraper_tool
        self.name = "Market Research Agent (MRA)"

    def research_category(self, category: str):
        print(f"\nAGENT ({self.name}): Starting research for category: {category}")
        
        search_queries = [
            f"new product trends in {category} 2024 2025",
            f"top selling {category} products for wholesale distributors",
            f"consumer sentiment and reviews {category}"
        ]
        
        all_content = ""
        for query in search_queries:
            search_results = self.search_tool.search(query, max_results=7)
            for result in search_results:
                content = self.scraper_tool.scrape(result['href'])
                if content:
                    all_content += f"\n\n--- Source: {result['title']} ({result['href']}) ---\n{content}"

        if not all_content:
            print(f"AGENT ({self.name}): No content found for {category}.")
            return {"category": category, "error": "No content could be gathered from web search."}

        prompt = f"""
        You are a senior market analyst for a large CPG wholesale distributor.
        Analyze the following compilation of articles and search results about the '{category}' category.
        
        Your task is to extract actionable intelligence for the purchasing department.
        Based on the text, provide a structured JSON output with the following keys:
        - "emerging_trends": A list of 2-4 key emerging trends (e.g., "New flavor profiles", "High-potency options", "Eco-friendly packaging").
        - "key_drivers": A string explaining the main factors driving these trends (e.g., "Gen Z preferences", "Regulatory changes", "Health-consciousness").
        - "consumer_sentiment": A single descriptive string: "Very Positive", "Positive", "Mixed", "Cautious", or "Negative".
        - "new_product_examples": A list of 3-5 specific product names, types, or brands mentioned as new, popular, or innovative.
        
        Article Compilation:
        ---
        {all_content}
        ---
        
        Output ONLY the JSON object. Do not include any other text or markdown formatting.
        """
        
        analysis = self.llm.analyze_json(prompt)
        analysis['category'] = category
        print(f"AGENT ({self.name}): Successfully analyzed category: {category}")
        return analysis

class RegulatoryComplianceAgent:
    """Agent that monitors the regulatory landscape for product categories."""
    def __init__(self, llm: GeminiLLM, search_tool: WebSearchTool, scraper_tool: WebScraperTool):
        self.llm = llm
        self.search_tool = search_tool
        self.scraper_tool = scraper_tool
        self.name = "Regulatory Compliance Agent (RCA)"

    def check_compliance(self, category: str, jurisdiction: str):
        print(f"\nAGENT ({self.name}): Checking compliance for '{category}' in '{jurisdiction}'")
        
        high_risk_keywords = ["vape", "delta", "kratom", "tobacco", "smoking", "hemp", "cbd", "adult"]
        if not any(keyword in category.lower() for keyword in high_risk_keywords):
            print(f"AGENT ({self.name}): Category '{category}' is low-risk. Standard regulations apply.")
            return {"status": "Go", "risk_level": "Low", "summary": "Standard consumer product regulations apply. No specific high-risk issues identified."}

        search_queries = [
            f"laws and regulations for selling {category} in {jurisdiction}",
            f"new {category} legislation {jurisdiction} 2024",
            f"{category} FDA regulations USA"
        ]
        
        all_content = ""
        for query in search_queries:
            search_results = self.search_tool.search(query, max_results=7)
            for result in search_results:
                # Prioritize official sources
                if any(domain in result['href'] for domain in ['.gov', '.org', 'fda.']):
                    content = self.scraper_tool.scrape(result['href'])
                    if content:
                        all_content += f"\n\n--- Source: {result['title']} ({result['href']}) ---\n{content}"
        
        if not all_content:
            return {"status": "Watch", "risk_level": "Medium", "summary": "Could not find definitive regulatory information from official sources. Proceed with caution and seek legal advice."}

        prompt = f"""
        You are a compliance officer specializing in regulated consumer goods.
        Analyze the following text regarding regulations for '{category}' in {jurisdiction}.
        
        Your task is to assess the risk and summarize the compliance landscape.
        Provide a structured JSON output with three keys:
        - "status": Choose one: "Go" (Seems compliant, stable rules), "Watch" (Evolving landscape, new rules pending, requires caution), or "No-Go" (Banned, heavily restricted, or high legal risk).
        - "risk_level": Choose one: "Low", "Medium", "High".
        - "summary": A concise paragraph detailing key restrictions (age limits, potency caps, flavor bans, licensing) and any recent or upcoming legal changes.
        
        Regulatory Text Compilation:
        ---
        {all_content}
        ---
        
        Output ONLY the JSON object.
        """
        
        status = self.llm.analyze_json(prompt)
        print(f"AGENT ({self.name}): Successfully checked compliance for: {category}")
        return status

class ReportingAgent:
    """Agent that synthesizes all findings and generates the final intelligence report."""
    def __init__(self, llm: GeminiLLM):
        self.llm = llm
        self.name = "Reporting Agent (RA)"

    def _calculate_opportunity_score(self, market_data, compliance_data):
        score = 50 # Start with a neutral score
        
        sentiment_map = {"Very Positive": 25, "Positive": 15, "Mixed": 0, "Cautious": -10, "Negative": -25}
        score += sentiment_map.get(market_data.get("consumer_sentiment"), 0)
        
        risk_map = {"Low": 15, "Medium": -10, "High": -30}
        score += risk_map.get(compliance_data.get("risk_level"), -10)
        
        if "No-Go" in compliance_data.get("status", ""):
            return 0
            
        return max(0, min(100, score)) # Clamp score between 0 and 100

    def generate_report(self, all_data: list):
        print(f"\nAGENT ({self.name}): Generating final intelligence report.")
        
        # 1. Generate Executive Summary
        summary_prompt = f"""
        You are the Director of Purchasing Intelligence. Review the following summarized findings for various product categories.
        Write a 2-3 paragraph Executive Summary for the Head of Purchasing.
        Highlight the most significant opportunities (high score, positive sentiment) and the most critical risks (high risk, 'No-Go' status).
        Mention 2-3 specific categories that require immediate attention, either for purchasing or for avoidance.
        
        Findings:
        ---
        {json.dumps(all_data, indent=2)}
        ---
        
        Provide only the text for the executive summary.
        """
        executive_summary = self.llm.analyze(summary_prompt)

        # 2. Build the detailed report sections
        report_sections = ""
        # Sort by opportunity score, descending
        sorted_data = sorted(all_data, key=lambda x: x['opportunity_score'], reverse=True)

        for item in sorted_data:
            market = item.get('market_analysis', {})
            compliance = item.get('compliance_analysis', {})
            
            # Create the recommendation for this specific section
            recommendation_prompt = f"""
            As a Senior Purchasing Analyst, write a final "Actionable Recommendation" for the '{item['category']}' category based *only* on the data below.
            
            - Market Trends: {market.get('emerging_trends', 'N/A')}
            - Key Drivers: {market.get('key_drivers', 'N/A')}
            - Consumer Sentiment: {market.get('consumer_sentiment', 'N/A')}
            - New Product Examples: {market.get('new_product_examples', 'N/A')}
            - Regulatory Status ({JURISDICTION}): {compliance.get('status', 'N/A')} ({compliance.get('risk_level', 'N/A')} Risk)
            - Regulatory Summary: {compliance.get('summary', 'N/A')}
            - Calculated Opportunity Score: {item['opportunity_score']}/100

            Your recommendation should be a concise paragraph. Justify your conclusion by weighing the market opportunity against the regulatory risks.
            For example: 'High Priority: The market shows strong positive sentiment and clear trends toward X. Although regulatory risk is Medium, the opportunity justifies a cautious initial purchase of the listed product types. Focus on compliant brands.'
            OR
            'Avoid: Despite some market interest, the 'No-Go' regulatory status makes this category an unacceptable risk.'

            Provide only the text for the recommendation.
            """
            recommendation = self.llm.analyze(recommendation_prompt)

            report_sections += f"""
### {item['category']}
- **Opportunity Score:** {item['opportunity_score']}/100
- **Market Analysis:**
    - **Consumer Sentiment:** {market.get('consumer_sentiment', 'N/A')}
    - **Emerging Trends:** {', '.join(market.get('emerging_trends', ['N/A']))}
    - **Key Drivers:** {market.get('key_drivers', 'N/A')}
    - **Trending Product Examples:** {', '.join(market.get('new_product_examples', ['N/A']))}
- **Regulatory Analysis ({JURISDICTION}):**
    - **Status:** {compliance.get('status', 'N/A')}
    - **Risk Level:** {compliance.get('risk_level', 'N/A')}
    - **Summary:** {compliance.get('summary', 'N/A')}
- **Actionable Recommendation:**
    - {recommendation}
---
"""

        # 3. Assemble final report
        final_report = f"""# Purchasing Intelligence Report

**Date:** {datetime.now().strftime("%B %d, %Y")}
**Prepared by:** Autonomous Intelligence Agency

## 1. Executive Summary
{executive_summary}

---

## 2. Detailed Category Analysis

This section provides a detailed breakdown of each product category, ranked by opportunity score.

{report_sections}
"""
        return final_report

# --- Main Orchestration ---

class Orchestrator:
    def __init__(self):
        print("--- Initializing AI Agency ---")
        self.llm = GeminiLLM(api_url=GEMINI_API_URL)
        self.search_tool = WebSearchTool()
        self.scraper_tool = WebScraperTool()
        self.market_agent = MarketResearchAgent(self.llm, self.search_tool, self.scraper_tool)
        self.regulatory_agent = RegulatoryComplianceAgent(self.llm, self.search_tool, self.scraper_tool)
        self.reporting_agent = ReportingAgent(self.llm)

    def run(self):
        print("\n--- Starting Daily Intelligence Workflow ---")
        
        all_category_data = []

        for category in PRODUCT_CATEGORIES:
            # 1. Market Research
            market_findings = self.market_agent.research_category(category)
            time.sleep(1)
            
            # 2. Regulatory Check
            compliance_status = self.regulatory_agent.check_compliance(category, JURISDICTION)
            time.sleep(1)

            # 3. Intermediate Synthesis
            # Skip if there were errors in primary data gathering
            if "error" in market_findings or "error" in compliance_status:
                print(f"Skipping report generation for {category} due to data gathering errors.")
                continue

            # Calculate opportunity score
            opportunity_score = self.reporting_agent._calculate_opportunity_score(market_findings, compliance_status)

            all_category_data.append({
                "category": category,
                "market_analysis": market_findings,
                "compliance_analysis": compliance_status,
                "opportunity_score": opportunity_score
            })
            
        # 4. Final Report Generation
        if not all_category_data:
            print("\nCould not gather sufficient data to generate a report.")
            return

        final_report = self.reporting_agent.generate_report(all_category_data)

        print("\n\n--- FINAL REPORT ---")
        print(final_report)

        # Save report to a file
        filename = f"purchasing_report_{datetime.now().strftime('%Y-%m-%d')}.md"
        with open(filename, "w", encoding='utf-8') as f:
            f.write(final_report)
        
        print(f"\nReport saved to {filename}")


if __name__ == "__main__":
    orchestrator = Orchestrator()
    orchestrator.run()

--- Initializing AI Agency ---

--- Starting Daily Intelligence Workflow ---

AGENT (Market Research Agent (MRA)): Starting research for category: Vape Devices, Vaporizers, and E-Liquids
TOOL: Searching for: 'new product trends in Vape Devices, Vaporizers, and E-Liquids 2024 2025'
TOOL: Scraping URL: https://ecigator.com/lounge/vape-market-2025-trends/
TOOL: Scraping URL: https://vapejuice.com/blogs/vape-juice-news/new-vapes-top-10
TOOL: Searching for: 'top selling Vape Devices, Vaporizers, and E-Liquids products for wholesale distributors'
TOOL: Scraping URL: https://vaperanger.com/
TOOL: Scraping URL: https://myvapereview.com/best-vapes/best-vape-wholesale-sites-us/
TOOL: Searching for: 'consumer sentiment and reviews Vape Devices, Vaporizers, and E-Liquids'
TOOL: Scraping URL: https://pmc.ncbi.nlm.nih.gov/articles/PMC6115599/
TOOL: Scraping URL: https://www.consumeraffairs.com/news/e-cigarette-news/
Error scraping https://www.consumeraffairs.com/news/e-cigarette-news/: 403 Client Er

In [8]:
import os
import requests
from bs4 import BeautifulSoup
from duckduckgo_search import DDGS
from datetime import datetime
import json
import time

# Securely load API key
GEMINI_API_KEY = "AIzaSyCxrKkaazB66x7ryIBYOEknzfhYsVwFUJc"
if not GEMINI_API_KEY:
    raise ValueError("GEMINI_API_KEY not found. Please set it in your .env file.")
GEMINI_API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?key={GEMINI_API_KEY}"

# Cleaned and grouped product categories based on your list
PRODUCT_CATEGORIES = [
    "Vape Devices, Vaporizers, and E-Liquids",
    "Delta-8, Delta-10, and Kratom Products",
    "Hemp-Derived and CBD Products",
    "Hookah and Shisha Tobacco",
    "General Smoking Products (Cigars, Rolling Papers)",
    "Beverages (Energy Drinks, Sodas)",
    "Candy and Snacks (including Mexican specialties)",
    "General Grocery and Merchandise",
    "Adult Novelty and Glassware",
    "Store Supplies (Displays, Disposables)"
]
JURISDICTION = "Georgia, USA" # Define jurisdiction for regulatory checks

# --- Core Components: Tools ---

class WebSearchTool:
    """A tool for performing web searches using DuckDuckGo."""
    def search(self, query: str, max_results: int = 10):
        print(f"TOOL: Searching for: '{query}'")
        try:
            with DDGS() as ddgs:
                results = [r for r in ddgs.text(query, max_results=max_results)]
                return results
        except Exception as e:
            print(f"Error during web search: {e}")
            return []

class WebScraperTool:
    """A tool for scraping content from a URL."""
    def scrape(self, url: str):
        print(f"TOOL: Scraping URL: {url}")
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }
        try:
            response = requests.get(url, headers=headers, timeout=15)
            response.raise_for_status()
            soup = BeautifulSoup(response.text, 'html.parser')
            for element in soup(["script", "style", "nav", "footer", "header", "aside"]):
                element.decompose()
            text = soup.get_text()
            lines = (line.strip() for line in text.splitlines())
            chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
            text = '\n'.join(chunk for chunk in chunks if chunk)
            return text[:8000]
        except requests.RequestException as e:
            print(f"Error scraping {url}: {e}")
            return None

class GeminiLLM:
    """A wrapper for the Gemini LLM API."""
    def __init__(self, api_url: str):
        self.api_url = api_url

    def _make_request(self, prompt: str):
        payload = {
            "contents": [{"role": "user", "parts": [{"text": prompt}]}],
            "generationConfig": {"temperature": 0.5, "topP": 0.95, "topK": 40}
        }
        headers = {'Content-Type': 'application/json'}
        time.sleep(2)
        try:
            response = requests.post(self.api_url, headers=headers, json=payload)
            response.raise_for_status()
            return response.json()
        except requests.RequestException as e:
            print(f"Error calling Gemini API: {e}")
            if hasattr(e, 'response') and e.response: print(f"Response body: {e.response.text}")
            return None

    def analyze(self, prompt: str):
        print(f"LLM: Analyzing with prompt - '{prompt[:120]}...'")
        result = self._make_request(prompt)
        if (result and 'candidates' in result and result['candidates'][0].get('content', {}).get('parts')):
            return result['candidates'][0]['content']['parts'][0]['text']
        else:
            print(f"LLM Error: Could not parse response. Full response: {result}")
            return "Error: Analysis failed due to an invalid API response."

    def analyze_json(self, prompt: str):
        raw_response = self.analyze(prompt)
        try:
            clean_str = raw_response.strip().replace('```json', '').replace('```', '').strip()
            return json.loads(clean_str)
        except (json.JSONDecodeError, AttributeError) as e:
            print(f"LLM JSON Error: Failed to decode JSON. Error: {e}\nLLM Response was: {raw_response}")
            return {"error": "Failed to parse JSON response from LLM.", "raw_response": raw_response}

# --- Agent Definitions ---

class MarketResearchAgent:
    """Agent that finds trending products, market sentiment, and innovations."""
    def __init__(self, llm: GeminiLLM, search_tool: WebSearchTool, scraper_tool: WebScraperTool):
        self.llm, self.search_tool, self.scraper_tool = llm, search_tool, scraper_tool
        self.name = "Market Research Agent (MRA)"

    def research_category(self, category: str):
        print(f"\nAGENT ({self.name}): Starting research for category: {category}")
        search_queries = [
            f"new product trends in {category} 2024 2025",
            f"top selling {category} products for wholesale distributors",
            f"consumer sentiment and reviews {category}",
            f"future innovations and upcoming products in {category}"
        ]
        all_content, sources = "", set()
        for query in search_queries:
            search_results = self.search_tool.search(query, max_results=7)
            for result in search_results:
                content = self.scraper_tool.scrape(result['href'])
                if content:
                    all_content += f"\n\n--- Source: {result['title']} ({result['href']}) ---\n{content}"
                    sources.add(result['href'])

        if not all_content:
            return {"analysis": {"category": category, "error": "No content could be gathered."}, "sources": []}

        prompt = f"""
        You are a senior market analyst for a large CPG wholesale distributor.
        Analyze the following compilation of articles about the '{category}' category.
        Your task is to extract actionable intelligence for the purchasing department.
        Provide a structured JSON output with the following keys:
        - "emerging_trends": A list of 2-4 key emerging trends (e.g., "New flavor profiles", "High-potency options", "Eco-friendly packaging").
        - "key_drivers": A string explaining the main factors driving these trends (e.g., "Gen Z preferences", "Regulatory changes", "Health-consciousness").
        - "consumer_sentiment": A single descriptive string: "Very Positive", "Positive", "Mixed", "Cautious", or "Negative".
        - "currently_trending_products": A list of 3-5 specific product types or brands that are popular *right now*.
        - "upcoming_innovations": A list of 2-4 product types, technologies, or concepts that are on the horizon or in early stages of market entry.

        Article Compilation:\n---\n{all_content}\n---
        
        Output ONLY the JSON object.
        """
        analysis = self.llm.analyze_json(prompt)
        if 'error' not in analysis:
            analysis['category'] = category
            print(f"AGENT ({self.name}): Successfully analyzed category: {category}")
        return {"analysis": analysis, "sources": list(sources)}

class RegulatoryComplianceAgent:
    """Agent that monitors the regulatory landscape."""
    def __init__(self, llm: GeminiLLM, search_tool: WebSearchTool, scraper_tool: WebScraperTool):
        self.llm, self.search_tool, self.scraper_tool = llm, search_tool, scraper_tool
        self.name = "Regulatory Compliance Agent (RCA)"

    def check_compliance(self, category: str, jurisdiction: str):
        print(f"\nAGENT ({self.name}): Checking compliance for '{category}' in '{jurisdiction}'")
        high_risk_keywords = ["vape", "delta", "kratom", "tobacco", "smoking", "hemp", "cbd", "adult"]
        if not any(keyword in category.lower() for keyword in high_risk_keywords):
            return {"analysis": {"status": "Go", "risk_level": "Low", "summary": "Standard consumer product regulations apply."}, "sources": []}

        search_queries = [
            f"laws and regulations for selling {category} in {jurisdiction}",
            f"new {category} legislation {jurisdiction} 2024",
            f"{category} FDA regulations USA"
        ]
        all_content, sources = "", set()
        for query in search_queries:
            search_results = self.search_tool.search(query, max_results=7)
            for result in search_results:
                if any(domain in result['href'] for domain in ['.gov', '.org', 'fda.']):
                    content = self.scraper_tool.scrape(result['href'])
                    if content:
                        all_content += f"\n\n--- Source: {result['title']} ({result['href']}) ---\n{content}"
                        sources.add(result['href'])
        
        if not all_content:
            return {"analysis": {"status": "Watch", "risk_level": "Medium", "summary": "Could not find definitive regulatory information."}, "sources": []}

        prompt = f"""
        You are a compliance officer. Analyze the text regarding regulations for '{category}' in {jurisdiction}.
        Provide a structured JSON output with three keys:
        - "status": Choose one: "Go", "Watch", or "No-Go".
        - "risk_level": Choose one: "Low", "Medium", "High".
        - "summary": A concise paragraph detailing key restrictions (age limits, potency caps, flavor bans, licensing) and any recent or upcoming legal changes.
        
        Regulatory Text:\n---\n{all_content}\n---
        
        Output ONLY the JSON object.
        """
        status = self.llm.analyze_json(prompt)
        print(f"AGENT ({self.name}): Successfully checked compliance for: {category}")
        return {"analysis": status, "sources": list(sources)}

class ReportingAgent:
    """Agent that synthesizes all findings into a detailed report."""
    def __init__(self, llm: GeminiLLM):
        self.llm = llm
        self.name = "Reporting Agent (RA)"

    def _calculate_opportunity_score(self, market_analysis, compliance_analysis):
        score = 50
        sentiment_map = {"Very Positive": 25, "Positive": 15, "Mixed": 0, "Cautious": -10, "Negative": -25}
        score += sentiment_map.get(market_analysis.get("consumer_sentiment"), 0)
        risk_map = {"Low": 15, "Medium": -15, "High": -35}
        score += risk_map.get(compliance_analysis.get("risk_level"), -15)
        if "No-Go" in compliance_analysis.get("status", ""): return 0
        return max(0, min(100, score))

    def generate_report(self, all_data: list):
        print(f"\nAGENT ({self.name}): Generating final intelligence report.")
        
        # 1. Generate Executive Summary
        summary_prompt = f"""
        You are the Director of Purchasing Intelligence. Review the summarized findings below.
        Write a 2-3 paragraph Executive Summary for the Head of Purchasing.
        Highlight the most significant opportunities (high score) and critical risks (high risk, 'No-Go' status).
        Mention 2-3 specific categories that require immediate attention (for purchasing or avoidance).
        
        Findings:\n---\n{json.dumps(all_data, indent=2)}\n---
        
        Provide only the text for the executive summary.
        """
        executive_summary = self.llm.analyze(summary_prompt)

        # 2. Build detailed report sections
        report_sections = ""
        sorted_data = sorted(all_data, key=lambda x: x['opportunity_score'], reverse=True)

        for item in sorted_data:
            market = item['market_data']['analysis']
            compliance = item['compliance_data']['analysis']
            
            recommendation_prompt = f"""
            As a Senior Purchasing Analyst, write a final "Actionable Recommendation" for the '{item['category']}' category based *only* on the data below. Justify your conclusion by weighing market opportunity against regulatory risks.
            
            - Market Trends: {market.get('emerging_trends', 'N/A')}
            - Consumer Sentiment: {market.get('consumer_sentiment', 'N/A')}
            - Currently Trending Products: {market.get('currently_trending_products', 'N/A')}
            - Upcoming Innovations: {market.get('upcoming_innovations', 'N/A')}
            - Regulatory Status ({JURISDICTION}): {compliance.get('status', 'N/A')} ({compliance.get('risk_level', 'N/A')} Risk)
            - Calculated Opportunity Score: {item['opportunity_score']}/100

            Provide a concise, justified paragraph.
            """
            recommendation = self.llm.analyze(recommendation_prompt)

            market_sources = item['market_data']['sources']
            compliance_sources = item['compliance_data']['sources']
            all_sources = sorted(list(set(market_sources + compliance_sources)))

            report_sections += f"""
### {item['category']}
- **Opportunity Score:** {item['opportunity_score']}/100
- **Market Analysis:**
    - **Consumer Sentiment:** {market.get('consumer_sentiment', 'N/A')}
    - **Emerging Trends:** {', '.join(market.get('emerging_trends', ['N/A']))}
    - **Key Drivers:** {market.get('key_drivers', 'N/A')}
- **Product Intelligence:**
    - **Currently Trending:** {', '.join(market.get('currently_trending_products', ['N/A']))}
    - **Upcoming Innovations:** {', '.join(market.get('upcoming_innovations', ['N/A']))}
- **Regulatory Analysis ({JURISDICTION}):**
    - **Status:** {compliance.get('status', 'N/A')} | **Risk Level:** {compliance.get('risk_level', 'N/A')}
    - **Summary:** {compliance.get('summary', 'N/A')}
- **Actionable Recommendation:**
    - {recommendation.strip()}
- **Sources:**
""" + ''.join([f"    - [{source}]({source})\n" for source in all_sources]) + "---\n"

        # 3. Assemble final report
        final_report = f"""# Purchasing Intelligence Report
**Date:** {datetime.now().strftime("%B %d, %Y")}
**Prepared by:** Autonomous Intelligence Agency

## 1. Executive Summary
{executive_summary.strip()}

---

## 2. Detailed Category Analysis
This section provides a detailed breakdown of each product category, ranked by opportunity score.

{report_sections}
"""
        return final_report

# --- Main Orchestration ---

class Orchestrator:
    def __init__(self):
        print("--- Initializing AI Agency ---")
        self.llm = GeminiLLM(api_url=GEMINI_API_URL)
        self.search_tool = WebSearchTool()
        self.scraper_tool = WebScraperTool()
        self.market_agent = MarketResearchAgent(self.llm, self.search_tool, self.scraper_tool)
        self.regulatory_agent = RegulatoryComplianceAgent(self.llm, self.search_tool, self.scraper_tool)
        self.reporting_agent = ReportingAgent(self.llm)

    def run(self):
        print("\n--- Starting Daily Intelligence Workflow ---")
        all_category_data = []

        for category in PRODUCT_CATEGORIES:
            market_result = self.market_agent.research_category(category)
            time.sleep(1)
            compliance_result = self.regulatory_agent.check_compliance(category, JURISDICTION)
            time.sleep(1)

            market_analysis = market_result.get('analysis', {})
            compliance_analysis = compliance_result.get('analysis', {})

            if "error" in market_analysis or "error" in compliance_analysis:
                print(f"Skipping report generation for {category} due to data gathering errors.")
                continue

            opportunity_score = self.reporting_agent._calculate_opportunity_score(market_analysis, compliance_analysis)

            all_category_data.append({
                "category": category,
                "market_data": market_result,
                "compliance_data": compliance_result,
                "opportunity_score": opportunity_score
            })
            
        if not all_category_data:
            print("\nCould not gather sufficient data to generate a report.")
            return

        final_report = self.reporting_agent.generate_report(all_category_data)
        print("\n\n--- FINAL REPORT ---")
        print(final_report)

        filename = f"purchasing_report_{datetime.now().strftime('%Y-%m-%d')}.md"
        with open(filename, "w", encoding='utf-8') as f: f.write(final_report)
        print(f"\nReport saved to {filename}")

if __name__ == "__main__":
    orchestrator = Orchestrator()
    orchestrator.run()

--- Initializing AI Agency ---

--- Starting Daily Intelligence Workflow ---

AGENT (Market Research Agent (MRA)): Starting research for category: Vape Devices, Vaporizers, and E-Liquids
TOOL: Searching for: 'new product trends in Vape Devices, Vaporizers, and E-Liquids 2024 2025'
TOOL: Scraping URL: https://metapress.com/innovations-in-vaping-technology-what-to-expect-in-2025/
TOOL: Scraping URL: https://ecigator.com/lounge/vape-market-2025-trends/
TOOL: Searching for: 'top selling Vape Devices, Vaporizers, and E-Liquids products for wholesale distributors'
TOOL: Scraping URL: https://vaperanger.com/
TOOL: Scraping URL: https://paylessdistro.com/
TOOL: Searching for: 'consumer sentiment and reviews Vape Devices, Vaporizers, and E-Liquids'
TOOL: Scraping URL: https://pmc.ncbi.nlm.nih.gov/articles/PMC6115599/
TOOL: Scraping URL: https://www.researchgate.net/publication/326306283_Understanding_Users_ENDS_Vaping_Experience_from_Social_Media_Using_Sentiment_Opinion_Summarization_Techniques

In [13]:
import os
import requests
from bs4 import BeautifulSoup
from duckduckgo_search import DDGS
from datetime import datetime
import json
import time
import traceback

# --- Configuration ---

# IMPORTANT: It is recommended to use environment variables for API keys.
# For this example, we'll use a placeholder. Replace with your actual key or set as an environment variable.
GEMINI_API_KEY = "AIzaSyCxrKkaazB66x7ryIBYOEknzfhYsVwFUJc"

# Replaced f-string
GEMINI_API_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?key={}".format(GEMINI_API_KEY)


# Expanded and refined product categories
PRODUCT_CATEGORIES = [
    "Vape Devices and Vaporizers (Disposable and Refillable)",
    "E-Liquids (Nicotine and Nicotine-Free)",
    "Hemp-Derived Products (CBD, CBG, CBN)",
    "Delta-8, Delta-10, HHC, and THCa Products",
    "Kratom (Powders, Capsules, Extracts)",
    "Hookah, Shisha Tobacco, and Charcoals",
    "Premium Cigars and Rolling Papers",
    "Energy Drinks and Nootropic Beverages",
    "Imported and Specialty Snacks (e.g., Mexican Candy)",
    "Adult Novelty and Wellness Products",
    "Smoke Shop Supplies (Glassware, Grinders, Displays)"
]
JURISDICTION = "Georgia, USA" # Define jurisdiction for regulatory checks

# --- Core Components: Tools ---

class WebSearchTool:
    """A tool for performing general web searches using DuckDuckGo."""
    def search(self, query: str, max_results: int = 5):
        print("TOOL (WebSearch): Searching for: '{}'".format(query))
        try:
            with DDGS() as ddgs:
                results = [r for r in ddgs.text(query, max_results=max_results)]
                return results
        except Exception as e:
            print("Error during web search for '{}': {}".format(query, e))
            return []

class SocialMediaSearchTool:
    """A tool for searching social media platforms for consumer sentiment."""
    def search(self, query: str, max_results: int = 5):
        print("TOOL (SocialSearch): Searching for: '{}'".format(query))
        try:
            # Focus on Reddit for candid conversations
            social_query = "site:reddit.com {}".format(query)
            with DDGS() as ddgs:
                results = [r for r in ddgs.text(social_query, max_results=max_results)]
                return results
        except Exception as e:
            print("Error during social media search for '{}': {}".format(query, e))
            return []

class WebScraperTool:
    """A tool for scraping and cleaning content from a URL."""
    def scrape(self, url: str):
        print("TOOL (Scraper): Scraping URL: {}".format(url))
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
            'Accept-Language': 'en-US,en;q=0.9',
            'Accept-Encoding': 'gzip, deflate, br',
            'Connection': 'keep-alive',
            'Upgrade-Insecure-Requests': '1'
        }
        try:
            response = requests.get(url, headers=headers, timeout=20)
            response.raise_for_status()
            soup = BeautifulSoup(response.text, 'html.parser')
            # More aggressive cleaning
            for element in soup(["script", "style", "nav", "footer", "header", "aside", "form", "button"]):
                element.decompose()
            text = soup.get_text(separator=' ', strip=True)
            return text[:8000] # Limit content size
        except requests.RequestException as e:
            print("Error scraping {}: {}".format(url, e))
            return None

class GeminiLLM:
    """A wrapper for the Gemini LLM API with error handling."""
    def __init__(self, api_url: str):
        self.api_url = api_url

    def _make_request(self, prompt: str):
        payload = {
            "contents": [{"role": "user", "parts": [{"text": prompt}]}],
            "generationConfig": {"temperature": 0.4, "topP": 0.95, "topK": 40}
        }
        headers = {'Content-Type': 'application/json'}
        time.sleep(2)  # Rate limiting
        try:
            response = requests.post(self.api_url, headers=headers, json=payload, timeout=45)
            response.raise_for_status()
            return response.json()
        except requests.RequestException as e:
            print("Error calling Gemini API: {}".format(e))
            if hasattr(e, 'response') and e.response:
                print("Response body: {}".format(e.response.text))
            return None

    def analyze(self, prompt: str) -> str:
        print("LLM (Analyze): Analyzing with prompt - '{}'...".format(prompt[:150].replace('\n', ' ')))
        result = self._make_request(prompt)
        if (result and 'candidates' in result and
                result['candidates'][0].get('content', {}).get('parts')):
            return result['candidates'][0]['content']['parts'][0]['text']
        else:
            print("LLM Error: Could not parse response. Full response: {}".format(result))
            return "Error: Analysis failed due to an invalid API response."

    def analyze_json(self, prompt: str) -> dict:
        print("LLM (Analyze JSON): Analyzing with prompt - '{}'...".format(prompt[:150].replace('\n', ' ')))
        raw_response = self.analyze(prompt)
        try:
            # Clean the response string from markdown code blocks
            clean_str = raw_response.strip().removeprefix('```json').removesuffix('```').strip()
            return json.loads(clean_str)
        except (json.JSONDecodeError, AttributeError) as e:
            print("LLM JSON Error: Failed to decode JSON. Error: {}\nLLM Response was:\n{}".format(e, raw_response))
            return {"error": "Failed to parse JSON response from LLM.", "raw_response": raw_response}

# --- Agent Definitions ---

class MarketResearchAgent:
    """Agent that finds trending products, market sentiment, and innovations."""
    def __init__(self, llm: GeminiLLM, search_tool: WebSearchTool, social_tool: SocialMediaSearchTool, scraper_tool: WebScraperTool):
        self.llm, self.search_tool, self.social_tool, self.scraper_tool = llm, search_tool, social_tool, scraper_tool
        self.name = "Market Research Agent (MRA)"

    def research_category(self, category: str):
        print("\nAGENT ({}): Starting research for category: {}".format(self.name, category))
        # Broader and more targeted search queries
        search_queries = [
            "new product trends in {} 2025".format(category),
            "top selling {} products wholesale B2B".format(category),
            "innovations and future of {}".format(category)
        ]
        social_queries = [
            "honest review {}".format(category),
            "what's the best {} right now".format(category),
            "underrated {} products".format(category)
        ]
        
        all_content, sources = "", set()
        
        # Gather web content
        for query in search_queries:
            search_results = self.search_tool.search(query, max_results=7)
            for result in search_results:
                content = self.scraper_tool.scrape(result['href'])
                if content:
                    all_content += "\n\n--- Web Source: {} ({}) ---\n{}".format(result['title'], result['href'], content)
                    sources.add(result['href'])
        
        # Gather social media content
        for query in social_queries:
            search_results = self.social_tool.search(query, max_results=7)
            for result in search_results:
                # No need to scrape reddit pages, snippets are often enough
                all_content += "\n\n--- Social Source: {} ({}) ---\n{}".format(result['title'], result['href'], result['body'])
                sources.add(result['href'])

        if not all_content:
            return {"analysis": {"category": category, "error": "No content could be gathered."}, "sources": []}

        prompt = """
        You are a senior market analyst for a large CPG wholesale distributor.
        Analyze the following compilation of web articles and social media posts about the '{category}' category.
        Your task is to extract actionable intelligence for the purchasing department.
        Provide a structured JSON output with the following keys:
        - "emerging_trends": A list of 2-4 key emerging trends (e.g., "New flavor profiles", "High-potency options", "Eco-friendly packaging").
        - "key_drivers": A string explaining the main factors driving these trends (e.g., "Gen Z preferences", "Regulatory changes", "Health-consciousness").
        - "consumer_sentiment": A single descriptive string: "Very Positive", "Positive", "Mixed", "Cautious", or "Negative". Base this heavily on the social media source content.
        - "currently_trending_products": A list of 3-5 specific product types or brands that are popular *right now*. Be specific.
        - "upcoming_innovations": A list of 2-4 product types, technologies, or concepts that are on the horizon or in early stages of market entry.

        Article and Social Media Compilation:\n---\n{all_content}\n---
        
        Output ONLY the JSON object.
        """.format(category=category, all_content=all_content)
        analysis = self.llm.analyze_json(prompt)
        if 'error' not in analysis:
            analysis['category'] = category
            print("AGENT ({}): Successfully analyzed category: {}".format(self.name, category))
        return {"analysis": analysis, "sources": list(sources)}

class RegulatoryComplianceAgent:
    """Agent that monitors the regulatory landscape."""
    def __init__(self, llm: GeminiLLM, search_tool: WebSearchTool, scraper_tool: WebScraperTool):
        self.llm, self.search_tool, self.scraper_tool = llm, search_tool, scraper_tool
        self.name = "Regulatory Compliance Agent (RCA)"

    def check_compliance(self, category: str, jurisdiction: str):
        print("\nAGENT ({}): Checking compliance for '{}' in '{}'".format(self.name, category, jurisdiction))
        high_risk_keywords = ["vape", "delta", "kratom", "tobacco", "smoking", "hemp", "cbd", "adult", "thca", "hhc"]
        if not any(keyword in category.lower() for keyword in high_risk_keywords):
            return {"analysis": {"status": "Go", "risk_level": "Low", "summary": "Standard consumer product regulations apply. No specific restrictions identified."}, "sources": []}

        search_queries = [
            "laws and regulations for selling {} in {}".format(category, jurisdiction),
            "new {} legislation {} 2025".format(category, jurisdiction),
            "{} FDA regulations USA federal".format(category),
            "{} Department of Revenue {} rules".format(jurisdiction, category)
        ]
        all_content, sources = "", set()
        for query in search_queries:
            # Prioritize official sources
            search_results = self.search_tool.search(query, max_results=3)
            for result in search_results:
                if any(domain in result['href'] for domain in ['.gov', '.org', 'fda.gov', 'ga.gov']):
                    content = self.scraper_tool.scrape(result['href'])
                    if content:
                        all_content += "\n\n--- Source: {} ({}) ---\n{}".format(result['title'], result['href'], content)
                        sources.add(result['href'])
        
        if not all_content:
            return {"analysis": {"status": "Watch", "risk_level": "Medium", "summary": "Could not find definitive regulatory information from official sources. Manual review required."}, "sources": []}

        prompt = """
        You are a compliance officer specializing in CPG products. Analyze the provided text regarding regulations for '{category}' in {jurisdiction}.
        Provide a structured JSON output with three keys:
        - "status": Choose one: "Go" (legal to sell), "Watch" (legal but with significant restrictions or pending changes), or "No-Go" (illegal or prohibitively restricted).
        - "risk_level": Choose one: "Low", "Medium", "High".
        - "summary": A concise paragraph detailing key restrictions (age limits, potency caps, flavor bans, licensing requirements, advertising rules) and any recent or upcoming legal changes.

        Regulatory Text:\n---\n{all_content}\n---
        
        Output ONLY the JSON object.
        """.format(category=category, jurisdiction=jurisdiction, all_content=all_content)
        status = self.llm.analyze_json(prompt)
        print("AGENT ({}): Successfully checked compliance for: {}".format(self.name, category))
        return {"analysis": status, "sources": list(sources)}

class CompetitiveIntelligenceAgent:
    """Agent that analyzes the competitive landscape."""
    def __init__(self, llm: GeminiLLM, search_tool: WebSearchTool, scraper_tool: WebScraperTool):
        self.llm, self.search_tool, self.scraper_tool = llm, search_tool, scraper_tool
        self.name = "Competitive Intelligence Agent (CIA)"

    def analyze_competitors(self, category: str):
        print("\nAGENT ({}): Analyzing competitors for '{}'".format(self.name, category))
        search_queries = [
            "top wholesale distributors for {} USA".format(category),
            "major online retailers for {}".format(category)
        ]
        all_content, sources = "", set()
        for query in search_queries:
            search_results = self.search_tool.search(query, max_results=7)
            for result in search_results:
                content = self.scraper_tool.scrape(result['href'])
                if content:
                    all_content += "\n\n--- Competitor Source: {} ({}) ---\n{}".format(result['title'], result['href'], content)
                    sources.add(result['href'])

        if not all_content:
            return {"analysis": {"error": "Could not find competitor information."}, "sources": []}

        prompt = """
        You are a competitive intelligence analyst. From the scraped text of competitor websites, identify key information about the '{category}' market.
        Provide a structured JSON output with:
        - "key_competitors": A list of 2-3 names of competing distributors or large retailers.
        - "promoted_brands": A list of specific brand names that appear to be heavily promoted or featured.
        - "competitor_focus": A short string describing what competitors seem to be focusing on (e.g., "High-end disposables", "Budget-friendly e-liquids", "Organic and natural products").

        Scraped Competitor Data:\n---\n{all_content}\n---

        Output ONLY the JSON object.
        """.format(category=category, all_content=all_content)
        analysis = self.llm.analyze_json(prompt)
        print("AGENT ({}): Successfully analyzed competitors for: {}".format(self.name, category))
        return {"analysis": analysis, "sources": list(sources)}
        
class SupplierDiscoveryAgent:
    """Agent that finds potential suppliers for high-opportunity products."""
    def __init__(self, llm: GeminiLLM, search_tool: WebSearchTool):
        self.llm, self.search_tool = llm, search_tool
        self.name = "Supplier Discovery Agent (SDA)"

    def find_suppliers(self, products: list):
        print("\nAGENT ({}): Finding suppliers for top products: {}".format(self.name, products))
        if not products:
            return {}
            
        suppliers = {}
        for product in products:
            query = '"{}" wholesale supplier distributor USA'.format(product)
            search_results = self.search_tool.search(query, max_results=3)
            
            if not search_results:
                suppliers[product] = [{"name": "No direct suppliers found via search.", "url": "#"}]
                continue

            prompt = """
            From the following search results for "{product}", identify up to 2 potential B2B suppliers or distributors.
            For each, provide their name and a direct URL to their website.
            Do not list retailers or informational sites. Focus on wholesale/distribution.
            
            Search Results:
            {search_results_json}

            Provide a JSON list where each object has a "name" and "url" key.
            Example: [{{"name": "Global Vapes Wholesale", "url": "https://globalvapes.com"}}]
            Output ONLY the JSON object.
            """.format(product=product, search_results_json=json.dumps(search_results, indent=2))
            
            supplier_list = self.llm.analyze_json(prompt)
            if "error" in supplier_list or not isinstance(supplier_list, list):
                 suppliers[product] = [{"name": "Could not identify suppliers from search.", "url": "#"}]
            else:
                 suppliers[product] = supplier_list

        print("AGENT ({}): Finished supplier discovery.".format(self.name))
        return suppliers


class ReportingAgent:
    """Agent that synthesizes all findings into a detailed HTML report."""
    def __init__(self, llm: GeminiLLM):
        self.llm = llm
        self.name = "Reporting Agent (RA)"

    def _calculate_opportunity_score(self, market_analysis, compliance_analysis):
        if "error" in market_analysis or "error" in compliance_analysis: return 0
        score = 50
        sentiment_map = {"Very Positive": 20, "Positive": 10, "Mixed": 0, "Cautious": -10, "Negative": -25}
        score += sentiment_map.get(market_analysis.get("consumer_sentiment"), 0)
        
        risk_map = {"Low": 15, "Medium": -15, "High": -30}
        score += risk_map.get(compliance_analysis.get("risk_level"), -15)
        
        if compliance_analysis.get("status") == "No-Go": return 0
        if compliance_analysis.get("status") == "Watch": score -= 10

        return max(0, min(100, score))
    
    def _get_recommendation(self, item):
        market = item['market_data']['analysis']
        compliance = item['compliance_data']['analysis']
        competition = item['competition_data']['analysis']

        recommendation_prompt = """
        As a Senior Purchasing Analyst, write a final "Actionable Recommendation" for the '{category}' category.
        Justify your conclusion by weighing market opportunity against regulatory risks and the competitive landscape.
        Keep it to a concise, direct paragraph.

        - Market Trends: {market_trends}
        - Consumer Sentiment: {consumer_sentiment}
        - Trending Products: {trending_products}
        - Regulatory Status ({jurisdiction}): {reg_status} ({risk_level} Risk)
        - Competitor Focus: {competitor_focus}
        - Calculated Opportunity Score: {score}/100

        Based on this, what is the final recommendation? (e.g., "Aggressively Pursue", "Test Market", "Monitor Closely", "Avoid"). Justify it.
        """.format(
            category=item['category'],
            market_trends=market.get('emerging_trends', 'N/A'),
            consumer_sentiment=market.get('consumer_sentiment', 'N/A'),
            trending_products=market.get('currently_trending_products', 'N/A'),
            jurisdiction=JURISDICTION,
            reg_status=compliance.get('status', 'N/A'),
            risk_level=compliance.get('risk_level', 'N/A'),
            competitor_focus=competition.get('competitor_focus', 'N/A'),
            score=item['opportunity_score']
        )
        return self.llm.analyze(recommendation_prompt)

    def generate_report(self, all_data: list, supplier_data: dict):
        print("\nAGENT ({}): Generating final HTML intelligence report.".format(self.name))
        
        summary_prompt = """
        You are the Director of Purchasing Intelligence. Review the summarized findings below from your team of agents.
        Write a 2-3 paragraph Executive Summary for the Head of Purchasing.
        - Start by stating the highest opportunity categories.
        - Highlight the most significant growth areas and specific product types to focus on.
        - Point out the critical risks, especially categories with "High" risk or "Watch" status in {jurisdiction}.
        - Conclude by recommending immediate actions for the purchasing team.
        
        Findings:\n---\n{all_data_json}\n---
        
        Provide only the text for the executive summary.
        """.format(jurisdiction=JURISDICTION, all_data_json=json.dumps(all_data, indent=2))
        executive_summary = self.llm.analyze(summary_prompt)

        # --- HTML Generation ---
        
        sorted_data = sorted(all_data, key=lambda x: x['opportunity_score'], reverse=True)
        
        report_sections_html = ""
        for item in sorted_data:
            market = item['market_data']['analysis']
            compliance = item['compliance_data']['analysis']
            competition = item['competition_data']['analysis']

            # Skip if critical data is missing
            if any("error" in d for d in [market, compliance, competition]):
                continue

            # Determine badge colors
            sentiment_colors = {"Very Positive": "bg-green-100 text-green-800", "Positive": "bg-blue-100 text-blue-800", "Mixed": "bg-yellow-100 text-yellow-800", "Cautious": "bg-orange-100 text-orange-800", "Negative": "bg-red-100 text-red-800"}
            risk_colors = {"Low": "bg-green-100 text-green-800", "Medium": "bg-yellow-100 text-yellow-800", "High": "bg-red-100 text-red-800"}
            status_colors = {"Go": "bg-green-100 text-green-800", "Watch": "bg-orange-100 text-orange-800", "No-Go": "bg-red-100 text-red-800"}

            recommendation = self._get_recommendation(item)

            market_sources = item['market_data']['sources']
            compliance_sources = item['compliance_data']['sources']
            competition_sources = item['competition_data']['sources']
            all_sources = sorted(list(set(market_sources + compliance_sources + competition_sources)))
            
            sources_html = "".join(['<li class="truncate"><a href="{0}" target="_blank" class="text-blue-600 hover:underline">{0}</a></li>'.format(source) for source in all_sources])

            report_sections_html += """
            <div class="bg-white p-6 rounded-lg shadow-md mb-8 break-inside-avoid">
                <!-- Category Header -->
                <div class="flex justify-between items-start mb-4">
                    <h2 class="text-2xl font-bold text-gray-800">{category}</h2>
                    <div class="text-right">
                        <p class="text-sm text-gray-500">Opportunity Score</p>
                        <p class="text-3xl font-bold text-blue-600">{opportunity_score}</p>
                    </div>
                </div>
                <div class="w-full bg-gray-200 rounded-full h-2.5 mb-6">
                    <div class="bg-blue-600 h-2.5 rounded-full" style="width: {opportunity_score}%"></div>
                </div>

                <!-- Actionable Recommendation -->
                <div class="bg-gray-50 p-4 rounded-lg mb-6 border border-gray-200">
                    <h3 class="font-bold text-lg text-gray-900 mb-2">Actionable Recommendation</h3>
                    <p class="text-gray-700">{recommendation}</p>
                </div>
                
                <!-- Details Grid -->
                <div class="grid md:grid-cols-3 gap-6">
                    <!-- Market Analysis -->
                    <div class="bg-blue-50 p-4 rounded-lg">
                        <h4 class="font-semibold text-blue-900 mb-3">Market Analysis</h4>
                        <p class="mb-2"><strong class="font-medium text-gray-700">Sentiment:</strong> <span class="text-sm font-semibold mr-2 px-2.5 py-0.5 rounded-full {sentiment_class}">{sentiment}</span></p>
                        <p class="mb-2"><strong class="font-medium text-gray-700">Trends:</strong> {trends}</p>
                        <p class="text-sm text-gray-600 mt-2"><strong class="font-medium text-gray-700">Drivers:</strong> {drivers}</p>
                    </div>
                    <!-- Product Intelligence -->
                    <div class="bg-green-50 p-4 rounded-lg">
                        <h4 class="font-semibold text-green-900 mb-3">Product Intelligence</h4>
                        <p class="mb-2"><strong class="font-medium text-gray-700">Trending Now:</strong> {trending_now}</p>
                        <p><strong class="font-medium text-gray-700">Innovations:</strong> {innovations}</p>
                    </div>
                    <!-- Regulatory & Competition -->
                    <div class="bg-red-50 p-4 rounded-lg">
                        <h4 class="font-semibold text-red-900 mb-3">Risk & Competition</h4>
                        <p class="mb-2"><strong class="font-medium text-gray-700">Regulatory ({jurisdiction}):</strong> <span class="text-sm font-semibold mr-2 px-2.5 py-0.5 rounded-full {status_class}">{status}</span><span class="text-sm font-semibold mr-2 px-2.5 py-0.5 rounded-full {risk_class}">{risk_level} Risk</span></p>
                        <p class="text-sm text-gray-600 mb-2">{compliance_summary}</p>
                        <p class="mt-3"><strong class="font-medium text-gray-700">Competitors:</strong> {competitors}</p>
                        <p class="text-sm text-gray-600"><strong class="font-medium text-gray-700">Their Focus:</strong> {competitor_focus}</p>
                    </div>
                </div>

                <!-- Sources -->
                <details class="mt-4">
                    <summary class="cursor-pointer text-sm font-medium text-gray-600 hover:text-gray-900">Show/Hide Sources</summary>
                    <ul class="mt-2 pl-4 list-disc text-xs text-gray-500 space-y-1">{sources_html}</ul>
                </details>
            </div>
            """.format(
                category=item['category'],
                opportunity_score=item['opportunity_score'],
                recommendation=recommendation.strip(),
                sentiment_class=sentiment_colors.get(market.get('consumer_sentiment'), 'bg-gray-100 text-gray-800'),
                sentiment=market.get('consumer_sentiment', 'N/A'),
                trends=', '.join(market.get('emerging_trends', ['N/A'])),
                drivers=market.get('key_drivers', 'N/A'),
                trending_now=', '.join(market.get('currently_trending_products', ['N/A'])),
                innovations=', '.join(market.get('upcoming_innovations', ['N/A'])),
                jurisdiction=JURISDICTION,
                status_class=status_colors.get(compliance.get('status'), 'bg-gray-100 text-gray-800'),
                status=compliance.get('status', 'N/A'),
                risk_class=risk_colors.get(compliance.get('risk_level'), 'bg-gray-100 text-gray-800'),
                risk_level=compliance.get('risk_level', 'N/A'),
                compliance_summary=compliance.get('summary', 'N/A'),
                competitors=', '.join(competition.get('key_competitors', ['N/A'])),
                competitor_focus=competition.get('competitor_focus', 'N/A'),
                sources_html=sources_html
            )
        
        supplier_html = ""
        if supplier_data:
            supplier_html += '<div class="bg-white p-6 rounded-lg shadow-md mb-8 break-inside-avoid">'
            supplier_html += '<h2 class="text-2xl font-bold text-gray-800 mb-4">Potential Supplier Discovery</h2>'
            supplier_html += '<p class="text-gray-600 mb-6">The following potential B2B suppliers were identified for high-opportunity products. Further vetting is required.</p>'
            supplier_html += '<ul class="space-y-4">'
            for product, suppliers in supplier_data.items():
                supplier_links = ", ".join(['<a href="{}" target="_blank" class="text-blue-600 hover:underline">{}</a>'.format(s["url"], s["name"]) for s in suppliers])
                supplier_html += '<li><strong class="font-medium text-gray-800">{}:</strong> {}</li>'.format(product, supplier_links)
            supplier_html += '</ul></div>'


        # Assemble final report
        final_report_html = """
        <!DOCTYPE html>
        <html lang="en">
        <head>
            <meta charset="UTF-8">
            <meta name="viewport" content="width=device-width, initial-scale=1.0">
            <title>Purchasing Intelligence Report</title>
            <script src="https://cdn.tailwindcss.com"></script>
            <style>
                @import url('https://rsms.me/inter/inter.css');
                html {{ font-family: 'Inter', sans-serif; }}
                body {{ background-color: #f3f4f6; }}
            </style>
        </head>
        <body class="p-4 sm:p-6 md:p-8">
            <div class="w-full md:max-w-[80%] mx-auto">
                <!-- Header -->
                <header class="mb-8">
                    <h1 class="text-4xl font-extrabold text-gray-900">Purchasing Intelligence Report</h1>
                    <p class="text-lg text-gray-500">Date: {date}</p>
                    <p class="text-sm text-gray-500">Prepared by: Autonomous Intelligence Agency for {jurisdiction}</p>
                </header>
                
                <!-- Executive Summary -->
                <div class="bg-white p-6 rounded-lg shadow-md mb-8">
                    <h2 class="text-2xl font-bold text-gray-800 mb-4">Executive Summary</h2>
                    <div class="prose max-w-none text-gray-700">{summary}</div>
                </div>

                <!-- Main Content: Use columns for larger screens -->
                <main class="columns-1 gap-8">
                    {supplier_html}
                    {report_sections}
                </main>

                <footer class="text-center mt-12 text-sm text-gray-500">
                    <p>This report was generated automatically. All findings should be independently verified before making purchasing decisions.</p>
                </footer>
            </div>
        </body>
        </html>
        """.format(
            date=datetime.now().strftime("%B %d, %Y"),
            jurisdiction=JURISDICTION,
            summary=executive_summary.strip().replace('\n', '<br>'),
            supplier_html=supplier_html,
            report_sections=report_sections_html
        )
        return final_report_html

# --- Main Orchestration ---

class Orchestrator:
    def __init__(self):
        print("--- Initializing AI Agency ---")
        self.llm = GeminiLLM(api_url=GEMINI_API_URL)
        self.search_tool = WebSearchTool()
        self.social_tool = SocialMediaSearchTool()
        self.scraper_tool = WebScraperTool()
        self.market_agent = MarketResearchAgent(self.llm, self.search_tool, self.social_tool, self.scraper_tool)
        self.regulatory_agent = RegulatoryComplianceAgent(self.llm, self.search_tool, self.scraper_tool)
        self.competition_agent = CompetitiveIntelligenceAgent(self.llm, self.search_tool, self.scraper_tool)
        self.supplier_agent = SupplierDiscoveryAgent(self.llm, self.search_tool)
        self.reporting_agent = ReportingAgent(self.llm)

    def run(self):
        print("\n--- Starting Daily Intelligence Workflow ---")
        all_category_data = []

        for category in PRODUCT_CATEGORIES:
            try:
                market_result = self.market_agent.research_category(category)
                compliance_result = self.regulatory_agent.check_compliance(category, JURISDICTION)
                competition_result = self.competition_agent.analyze_competitors(category)

                market_analysis = market_result.get('analysis', {})
                compliance_analysis = compliance_result.get('analysis', {})
                competition_analysis = competition_result.get('analysis', {})
                
                # Basic check for errors from the LLM
                if "error" in market_analysis or "error" in compliance_analysis or "error" in competition_analysis:
                   print("Skipping report generation for {} due to data gathering or analysis errors.".format(category))
                   continue

                opportunity_score = self.reporting_agent._calculate_opportunity_score(market_analysis, compliance_analysis)

                all_category_data.append({
                    "category": category,
                    "market_data": market_result,
                    "compliance_data": compliance_result,
                    "competition_data": competition_result,
                    "opportunity_score": opportunity_score
                })
            except Exception as e:
                print("!!! An unexpected error occurred while processing category '{}': {}".format(category, e))
                traceback.print_exc()


        if not all_category_data:
            print("\nCould not gather sufficient data to generate a report.")
            return
            
        # Supplier Discovery for top products
        # Identify top 3-5 trending products from high-opportunity categories
        sorted_by_score = sorted(all_category_data, key=lambda x: x['opportunity_score'], reverse=True)
        top_products_to_source = set()
        for item in sorted_by_score:
            if item['opportunity_score'] > 60: # Threshold for sourcing
                products = item['market_data']['analysis'].get('currently_trending_products', [])
                for p in products:
                    top_products_to_source.add(p)
                    if len(top_products_to_source) >= 5: break
            if len(top_products_to_source) >= 5: break
        
        supplier_data = self.supplier_agent.find_suppliers(list(top_products_to_source))

        # Generate the final report
        final_report = self.reporting_agent.generate_report(all_category_data, supplier_data)
        
        # Save the report
        filename = "purchasing_report_{}.html".format(datetime.now().strftime('%Y-%m-%d'))
        with open(filename, "w", encoding='utf-8') as f:
            f.write(final_report)
        print("\n--- Intelligence Workflow Complete ---")
        print("Report saved to: " + filename)
        # Use os.path.abspath to show a full, clear path
        print("Full path: " + os.path.abspath(filename))


if __name__ == "__main__":
    orchestrator = Orchestrator()
    orchestrator.run()

--- Initializing AI Agency ---

--- Starting Daily Intelligence Workflow ---

AGENT (Market Research Agent (MRA)): Starting research for category: Vape Devices and Vaporizers (Disposable and Refillable)
TOOL (WebSearch): Searching for: 'new product trends in Vape Devices and Vaporizers (Disposable and Refillable) 2025'
TOOL (Scraper): Scraping URL: https://vapebuyingtips.com/2025-vaping-guide-latest-trends-devices-and-expert-insights/
TOOL (Scraper): Scraping URL: https://vapejuice.com/blogs/vape-juice-news/new-vapes-top-10
TOOL (Scraper): Scraping URL: https://www.vapecentralgroup.com/blogs/news/the-most-popular-vape-trends-of-2025-so-far
TOOL (Scraper): Scraping URL: https://www.gotvape.com/blogs/vaporizer-blog/top-vape-trends-to-look-out-for-in-2025
TOOL (Scraper): Scraping URL: https://worldvapeusa.com/blog/the-future-of-vaping-trends-to-watch-in-2025/
TOOL (Scraper): Scraping URL: https://www.airisvape.com/blogs/top-disposable-vape-devices-and-brands-for-2025/
TOOL (Scraper): Scra