In [8]:
# source: https://github.com/microsoft/autogen/blob/main/notebook/agentchat_groupchat_research.ipynb

# !pip install autogen
# !pip install openai
# or pip install -r requirements.txt (from the github repo)

In [1]:
from autogen import config_list_from_json

config_list_gpt4 = config_list_from_json(
    "OAI_CONFIG_LIST",
    filter_dict={
        "model": ["gpt-4o"],
    },
)

flaml.automl is not available. Please install flaml[automl] to enable AutoML functionalities.


In [2]:
gpt4_config = {
    "cache_seed": 42,
    "temperature": 0.0,
    "config_list": config_list_gpt4,
    "timeout": 120,
    
}

# AG Use CASE: Research Assistant
## AG use case: Research assistant to help (1) crawl the internet for specific list of companies or organizations (2) search for any content associated with a set of keywords or concepts prescribed by the user (e.g., equal pay, woman rights in company policies, etc.) associated with the list of the prescribed organizations. (3) Then compile a report highlighting this content identified and the associated reference to enable reproducibility. (4) Perform steps (1) to (3) at certain frequency such as every month.
AG use case part 2: Continued: (5) Next phase is to perform sentiment analysis to better understand the organizations attitude towards the specified keywords or concepts.
Any suggestions on how to do this with a locally running LLM at a university would be great to support our research which is done by students with manual labor. It would be better to leverage the students the analyze the findings of the agents compiled as a report. I know it is too much but any suggestions to make a proof-of-concept leveraging the notebooks you provided in the Git repo would be great starting point. Thanks again.

In [3]:
from autogen import UserProxyAgent, AssistantAgent, GroupChat, GroupChatManager

In [4]:
user_proxy = UserProxyAgent(
    name="Admin",
    system_message="A human admin. Interact with the planner to discuss\
        the plan. Plan execution needs to be approved by this admin.",
    code_execution_config=False
)

In [5]:
web_crawler_agent = AssistantAgent(
    name="Web Search Crawler",
    llm_config=gpt4_config,
    system_message="""
    You are a web search crawler that crawls the internet for specific list of companies or organizations using your web search tool.
""",
)

In [6]:
content_keyword_search_agent = AssistantAgent(
    name="Keyword Search Agent",
    llm_config=gpt4_config,
    system_message="""
    You are a web search agent that searches the internet for content associated with a set of keywords or concepts prescribed by the user (e.g., equal pay, woman rights in company policies, etc.) associated with the list of the prescribed organizations.
""",
)

In [7]:
draft_writer_agent = AssistantAgent(
    name="Draft Writer",
    llm_config=gpt4_config,
    system_message="""
    You are a draft writer that writes a report based on the findings of the web search crawler.
    You will compile a report highlighting this content identified and the associated reference to enable reproducibility.
    """,
)

In [37]:


##### Begin of google_search #####
# from skills import google_search # Import the function from skills.py

# source: https://github.com/JayZeeDesign/research-agents-3.0/blob/main/app.py
# pip install -r requirements.txt
import os
import requests
import json

serper_api_key = os.getenv("SERPER_API_KEY")

# Function for google search with autogen-compatible output
def google_search(search_keyword: str) -> dict:    
    url = "https://google.serper.dev/search"

    payload = json.dumps({
        "q": search_keyword
    })

    headers = {
        'X-API-KEY': serper_api_key,
        'Content-Type': 'application/json'
    }

    response = requests.request("POST", url, headers=headers, data=payload)

    if response.status_code == 200:
        search_results = response.json()

        # Construct a concise message for autogen output
        formatted_results = "\n".join(
            [f"{result['position']}. {result['title']} - {result['link']}" for result in search_results.get('organic', [])]
        )
        
        autogen_compatible_output = f"Here are the top search results for '{search_keyword}':\n\n{formatted_results}"
        
        return autogen_compatible_output

    else:
        return """Error occurred during search"""


In [38]:
google_search("gender pay gap")

"Here are the top search results for 'gender pay gap':\n\n1. Gender pay gap remained stable over past 20 years in US - https://www.pewresearch.org/short-reads/2023/03/01/gender-pay-gap-facts/\n2. The Simple Truth About the Gender Pay Gap - AAUW - https://www.aauw.org/resources/research/simple-truth/\n3. The Enduring Grip of the Gender Pay Gap - Pew Research Center - https://www.pewresearch.org/social-trends/2023/03/01/the-enduring-grip-of-the-gender-pay-gap/\n4. The Gender Pay Gap Is a Culture Problem - The New York Times - https://www.nytimes.com/2024/05/22/opinion/gender-pay-gap.html\n5. What You Need to Know About the Gender Wage Gap - DOL Blog - http://blog.dol.gov/2024/03/12/what-you-need-to-know-about-the-gender-wage-gap\n6. [PDF] UNDERSTANDING THE GENDER WAGE GAP - https://www.dol.gov/sites/dolgov/files/WB/equalpay/WB_issuebrief-undstg-wage-gap-v1.pdf\n7. Gender pay gap - Wikipedia - https://en.wikipedia.org/wiki/Gender_pay_gap\n8. The Gender Pay Gap - AAUW - https://www.aauw.or

In [39]:
from autogen import register_function

register_function(
    google_search,
    caller=web_crawler_agent,
    executor=user_proxy,
    name="google_search",
    description="Search the internet for content associated with a set of keywords or concepts prescribed by the user (e.g., equal pay, woman rights in company policies, etc.) associated with the list of the prescribed organizations.",
)




In [40]:
register_function(
    google_search,
    caller=content_keyword_search_agent,
    executor=user_proxy,
    name="google_search",
    description="Search the internet for content associated with a set of keywords or concepts prescribed by the user (e.g., equal pay, woman rights in company policies, etc.) associated with the list of the prescribed organizations.",
)

In [20]:
user_proxy.initiate_chat(
    web_crawler_agent,
    message="""
    Search "publicly policies available to address the gender pay gap for the U.S"
    """,
)

[33mAdmin[0m (to Web Search Crawler):


    Search "publicly policies available to address the gender pay gap for the U.S"
    

--------------------------------------------------------------------------------
[33mWeb Search Crawler[0m (to Admin):

[32m***** Suggested tool call (call_CJajYjuW6AH97HiPUtzRdPZT): google_search *****[0m
Arguments: 
{"search_keyword":"publicly available policies to address the gender pay gap in the U.S"}
[32m******************************************************************************[0m

--------------------------------------------------------------------------------
[31m
>>>>>>>> NO HUMAN INPUT RECEIVED.[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[35m
>>>>>>>> EXECUTING FUNCTION google_search...[0m
RESPONSE: {"searchParameters":{"q":"publicly available policies to address the gender pay gap in the U.S","type":"search","engine":"google"},"organic":[{"title":"Equal Pay and Pay Transparency | U.S. Department of Labor","link":"https://www.dol.gov/

BadRequestError: Error code: 400 - {'error': {'message': "Invalid 'messages[4].name': string does not match pattern. Expected a string that matches the pattern '^[a-zA-Z0-9_-]+$'.", 'type': 'invalid_request_error', 'param': 'messages[4].name', 'code': 'invalid_value'}}

In [41]:
planner = AssistantAgent(
    name="Planner",
    system_message="""Planner. Suggest a plan. Revise the plan based on feedback
    from admin. The plan will always involve the web crawler agent and the keyword search agent and the draft writer agent. 
    THe focus should be to produce a reproducible report of the content given the user query.
    """,
    llm_config=gpt4_config
)

In [42]:
groupchat = GroupChat(
    agents=[user_proxy, web_crawler_agent, content_keyword_search_agent, draft_writer_agent, planner], 
    messages=[], max_round=10
)

In [43]:
manager = GroupChatManager(groupchat=groupchat, 
                           llm_config=gpt4_config)

In [44]:
user_proxy.initiate_chat(
    manager,
    message="""
    
    Write a research report about gender pay gap in 2 different big US tech companies. 
    """,)

[33mAdmin[0m (to chat_manager):


    
    Write a research report about gender pay gap in 2 different big US tech companies. 
    

--------------------------------------------------------------------------------
[32m
Next speaker: Planner
[0m
[33mPlanner[0m (to chat_manager):

### Initial Plan

#### Step 1: Define the Scope and Keywords
- **Objective**: To write a research report on the gender pay gap in two major US tech companies.
- **Keywords**: Gender pay gap, salary disparity, tech companies, gender inequality, Company A, Company B.

#### Step 2: Web Crawler Agent
- **Task**: Gather data and articles related to the gender pay gap in the specified tech companies.
- **Sources**: Company websites, news articles, research papers, government reports, and industry analyses.
- **Output**: A collection of relevant URLs and extracted content.

#### Step 3: Keyword Search Agent
- **Task**: Analyze the gathered content to identify key statistics, trends, and insights related to the g

BadRequestError: Error code: 400 - {'error': {'message': "Invalid 'messages[4].name': string does not match pattern. Expected a string that matches the pattern '^[a-zA-Z0-9_-]+$'.", 'type': 'invalid_request_error', 'param': 'messages[4].name', 'code': 'invalid_value'}}

In [14]:
# Display text with markdown formatting
from IPython.display import Markdown

# Specify the text to display
text = """
| Title | Authors | Publication Date | Domain | URL |
|-------|---------|------------------|--------|-----|
| Enhancing Educational Outcomes with LLMs | John Doe, Jane Smith | 2023-10-01 | Education | [arXiv URL](https://arxiv.org/abs/2310.00001) |
| LLMs in Medical Training: A Review | Alice Johnson, Bob Brown | 2023-10-02 | Healthcare | [arXiv URL](https://arxiv.org/abs/2310.00002) |
| Leveraging LLMs for Business Intelligence | Carol White, David Green | 2023-10-03 | Business | [arXiv URL](https://arxiv.org/abs/2310.00003) |
| Social Impacts of LLMs in Psychology | Eve Black, Frank Blue | 2023-10-04 | Social Sciences | [arXiv URL](https://arxiv.org/abs/2310.00004) |
| LLMs in Software Development Training | Grace Red, Henry Yellow | 2023-10-05 | Technology | [arXiv URL](https://arxiv.org/abs/2310.00005) |
| Miscellaneous Applications of LLMs | Ian Purple, Jack Orange | 2023-10-06 | Others | [arXiv URL](https://arxiv.org/abs/2310.00006) |
"""

# Display the text
Markdown(text)


| Title | Authors | Publication Date | Domain | URL |
|-------|---------|------------------|--------|-----|
| Enhancing Educational Outcomes with LLMs | John Doe, Jane Smith | 2023-10-01 | Education | [arXiv URL](https://arxiv.org/abs/2310.00001) |
| LLMs in Medical Training: A Review | Alice Johnson, Bob Brown | 2023-10-02 | Healthcare | [arXiv URL](https://arxiv.org/abs/2310.00002) |
| Leveraging LLMs for Business Intelligence | Carol White, David Green | 2023-10-03 | Business | [arXiv URL](https://arxiv.org/abs/2310.00003) |
| Social Impacts of LLMs in Psychology | Eve Black, Frank Blue | 2023-10-04 | Social Sciences | [arXiv URL](https://arxiv.org/abs/2310.00004) |
| LLMs in Software Development Training | Grace Red, Henry Yellow | 2023-10-05 | Technology | [arXiv URL](https://arxiv.org/abs/2310.00005) |
| Miscellaneous Applications of LLMs | Ian Purple, Jack Orange | 2023-10-06 | Others | [arXiv URL](https://arxiv.org/abs/2310.00006) |


Disclaimer: These capabilities are still in their experimental, phase, the table generated below suggested links that do not match what is being saif (hallucinated).