In [11]:
from duckduckgo_search import DDGS
from pydantic import BaseModel, Field
from typing import List, Dict, Any, Optional
import logging
import json
import pprint
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
from autogen import ConversableAgent
from autogen import GroupChat
from autogen import GroupChatManager

### Parameters

In [12]:
from utils import get_openai_api_key

OPENAI_API_KEY = get_openai_api_key()
config_list = [
    {
        "model": "llama3",
        "base_url": "http://localhost:11434/v1",
        'api_key': 'ollama',
    },
]

### Tools

In [13]:
def duckduckgo_search(search_term: str, max_results: int = 5) -> Optional[List[Dict[str, Any]]]:
    """
    Perform a DuckDuckGo search and return the search results as a list.

    Args:
        search_term (str): The term to use for the search.
        max_results (int): The maximum number of results to return. Defaults to 5.

    Returns:
        Optional[List[Dict[str, Any]]]: A list of search results, or None if the search term is empty.
    """
    try:
        result = DDGS().text(search_term, max_results=max_results)
        return result
    except Exception as e:
        logging.error(f"Error searching DuckDuckGo: {e}")
        return None

In [14]:
# Test duckduckgo_search function
search_term = "Autogen"
duckduckgo_search(search_term)

[{'title': 'GitHub - microsoft/autogen: A programming framework for agentic AI ...',
  'href': 'https://github.com/microsoft/autogen',
  'body': 'AutoGen v0.4 is a rewrite of AutoGen from the ground up to create a more robust, scalable, easier to use, cross-language library for building AI Agents. Some key features include asynchronous messaging, support for scalable distributed agents, modular extensible design (bring your own agents, implement behaviors however you like), cross ...'},
 {'title': 'AutoGen | AutoGen - GitHub Pages',
  'href': 'https://microsoft.github.io/autogen/0.2/',
  'body': 'AutoGen provides multi-agent conversation framework as a high-level abstraction. With this framework, one can conveniently build LLM workflows. Easily Build Diverse Applications. AutoGen offers a collection of working systems spanning a wide range of applications from various domains and complexities.'},
 {'title': 'AutoGen: Enabling next-generation large language model applications',
  'href'

#### Scraping tool

In [15]:
def scrape_url(url: str, timeout: int = 10) -> Optional[str]:
    """
    Scrape the content of a given URL and return it as a string.

    Args:
        url (str): The URL to scrape.
        timeout (int): The timeout in seconds for the request. Defaults to 10.

    Returns:
        Optional[str]: The scraped content as a string, or None if the request fails.
    """
    # Set up Selenium options to run headless
    options = Options()
    options.add_argument("--headless")
    options.add_argument("--disable-gpu")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    options.add_argument("--window-size=1920x1080")
    options.add_argument("--log-level=3")

    # Set up the webdriver
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

    try:
        # Open the URL
        driver.get(url)

        # Wait for the page to load
        WebDriverWait(driver, timeout).until(
            EC.presence_of_element_located((By.TAG_NAME, "body"))
        )

        # Get the page source
        page_source = driver.page_source

        # Parse the HTML content using BeautifulSoup
        soup = BeautifulSoup(page_source, "html.parser")

        # Extract the text content
        text_content = soup.get_text(separator=' ', strip=True)

        return text_content
    
    except Exception as e:
        logging.error(f"An error ocured while scraping {url}: {e}")
        return None

    finally:
        # Close the webdriver
        driver.quit()

In [16]:
# Test scrape_url function
url = "https://github.com/microsoft/autogen"
scrape_url(url)

'GitHub - microsoft/autogen: A programming framework for agentic AI 🤖 (PyPi: autogen-agentchat) Skip to content Navigation Menu Toggle navigation Sign in Product GitHub Copilot Write better code with AI Security Find and fix vulnerabilities Actions Automate any workflow Codespaces Instant dev environments Issues Plan and track work Code Review Manage code changes Discussions Collaborate outside of code Code Search Find more, search less Explore All features Documentation GitHub Skills Blog Solutions By company size Enterprises Small and medium teams Startups By use case DevSecOps DevOps CI/CD View all use cases By industry Healthcare Financial services Manufacturing Government View all industries View all solutions Resources Topics AI DevOps Security Software Development View all Explore Learning Pathways White papers, Ebooks, Webinars Customer Stories Partners Executive Insights Open Source GitHub Sponsors Fund open source developers The ReadME Project GitHub community articles Reposi

### Agents

#### Agent - DuckDuckGo Search

In [17]:
assistant_ddgo = ConversableAgent(
    name="AI_Assistant_in_DuckDuckGo_Search",
    system_message="You are a helpful AI assistant. You can only help using search tool. "
    "You list them in bullet points, in a format <title>, <href> ."
    "Return '$$$TERMINATE$$$' when the task is done.",
    llm_config={
        "config_list": config_list,
        "temperature": 0.7,
    },
)

#### Agent - Scrape & Summarize

In [18]:
assistant_scrape_summarize = ConversableAgent(
    name="AI_Assistant_Scraping_Summarizing",
    system_message="You will first scrape a given hyperlink for content, and then you will wrie a summary. "
    "If you are given a specific instruction or requirement for a summary (such as be concise or write in two paragraphs), you must follow the instruction. "
    "Otherwise, you will write a summary in between 3 to 5 bullet points. "
    "Sometimes, scraping will fall possibly with many reason, including dead links or being blocked from scraping. "
    "If that happenes, you must say 'information scraping failed'."
    "Return '$$$TERMINATE$$$' when the task is done.",
    llm_config={
        "config_list": config_list,
        "temperature": 0.7,
    }
)

#### Agent - Lead Summarizer

In [19]:
assistant_summarize_lead = ConversableAgent(
    name="Lead_Scrape_Summarize",
    system_message="You will get a list of one or more websites from another AI that uses search engine tools. "
    "You will orchestrate summarization. You will work with another AI who scrapes a link and summarizes its content. "
    "You will order that AI to do its task ONE LINK AT A TIME, and you instruct that AI to output a short summary paragraph. "
    "When all websites in the list are summarized, you yourself will combine them into one final summarization answer. "
    "Your summary will be in pullet points, from 5 to 10 points. "
    "Return '$$$TERMINATE$$$' when the WHOLE task is done OR no link left to perform the task."
    "DO NOT ask for more links to process.",
    llm_config={
        "config_list": config_list,
        "temperature": 0.7,
    }
)