In [None]:
! pip install -q  "groq==0.13.0" "ag2==0.9.6" "ag2[retrievechat]" chromadb

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/67.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m108.8/108.8 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m419.6/419.6 kB[0m [31m17.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m559.5/559.5 kB[0m [31m25.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m46.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m294.6/294.6 kB[0m [31m16.8 MB/s[0m eta [36m0

In [2]:
import os
import autogen
import chromadb
from autogen import AssistantAgent
from autogen.agentchat.contrib.retrieve_user_proxy_agent import RetrieveUserProxyAgent
from google.colab import userdata

Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.



In [3]:
api_key = userdata.get('GROQ_API_KEY')
model = "llama-3.3-70b-versatile"

In [4]:
os.environ["AUTOGEN_USE_DOCKER"] = "False"

In [10]:
config_list = [
    {
    "model": model,
    "api_key": api_key,
    "api_type": "groq",
    }
]

In [11]:
llm_config = {
    "timeout": 60,
    "cache_seed": 42,
    "config_list": config_list,
    "temperature": 0,
}

In [7]:
def termination_msg(x):
    return isinstance(x, dict) and "TERMINATE" == str(x.get("content", ""))[-9:].upper()

URL = "https://raw.githubusercontent.com/microsoft/FLAML/main/website/docs/Examples/Integrate%20-%20Spark.md"
PROBLEM = "How to use spark for parallel training in FLAML? Give me sample code."

In [13]:
# boss agent - without RAG
boss = autogen.UserProxyAgent(
    name="Boss",
    is_termination_msg=termination_msg,
    human_input_mode="NEVER",
    system_message="The boss who ask questions and give tasks.",
    code_execution_config=False,
    default_auto_reply="Reply `TERMINATE` if the task is done.",
)

# boss_aid agent - with RAg
boss_aid = RetrieveUserProxyAgent(
    name="Boss_Assistant",
    is_termination_msg=termination_msg,
    system_message="Assistant who has extra content retrieval power for solving difficult problems.",
    human_input_mode="NEVER",
    max_consecutive_auto_reply=3,
    retrieve_config={
        "task": "code",
        "docs_path": URL,
        "chunk_token_size": 1000,
        "model": config_list[0]["model"],
        "client": chromadb.PersistentClient(path="/tmp/chromadb"),
        "collection_name": "groupchat",
        "get_or_create": True,
    },
    code_execution_config=False,
)


coder = AssistantAgent(
    name="Senior_Python_Engineer",
    is_termination_msg=termination_msg,
    system_message="You are a senior python engineer. Reply `TERMINATE` in the end when everything is done.",
    llm_config=llm_config,
)


pm = autogen.AssistantAgent(
    name="Product_Manager",
    is_termination_msg=termination_msg,
    system_message="You are a product manager. Reply `TERMINATE` in the end when everything is done.",
    llm_config=llm_config,
)


reviewer = autogen.AssistantAgent(
    name="Code_Reviewer",
    is_termination_msg=termination_msg,
    system_message="You are a code reviewer. Reply `TERMINATE` in the end when everything is done.",
    llm_config=llm_config,
)


def _reset_agents():
    boss.reset()
    boss_aid.reset()
    coder.reset()
    pm.reset()
    reviewer.reset()

#### without RAG - No RAG chat

In [17]:
def norag_chat():
    _reset_agents()
    groupchat = autogen.GroupChat(
        agents=[boss, coder, pm, reviewer], # without RAG - use boss agent
        messages=[],
        max_round=12,
        speaker_selection_method="auto",
        allow_repeat_speaker=False,
    )
    manager = autogen.GroupChatManager(groupchat=groupchat, llm_config=llm_config)

    # Start chatting with the boss as this is the user proxy agent.
    boss.initiate_chat(
        manager,
        message=PROBLEM,
    )

norag_chat()

Boss (to chat_manager):

How to use spark for parallel training in FLAML? Give me sample code.

--------------------------------------------------------------------------------

Next speaker: Senior_Python_Engineer

Senior_Python_Engineer (to chat_manager):

FLAML (Fast and Lightweight Automated Machine Learning) is a Python library that provides an efficient way to perform automated machine learning. To use Spark for parallel training in FLAML, you can leverage the `spark` backend provided by FLAML.

Here's a sample code snippet that demonstrates how to use Spark for parallel training in FLAML:

```python
import pandas as pd
from flaml import AutoML
from pyspark.sql import SparkSession

# Create a SparkSession
spark = SparkSession.builder.appName("FLAML Spark Example").getOrCreate()

# Load your dataset into a Pandas DataFrame
df = pd.read_csv("your_data.csv")

# Define the task and the time budget
task = "classification"
time_budget = 3600  # in seconds

# Create an AutoML instance w

#### with RAG (RAG chat)

In [18]:
def rag_chat():
    _reset_agents()
    groupchat = autogen.GroupChat(
        agents=[boss_aid, coder, pm, reviewer], # you want RAG, use boss_aid
        messages=[],
        max_round=12,
        speaker_selection_method="round_robin"
    )
    manager = autogen.GroupChatManager(groupchat=groupchat, llm_config=llm_config)

    # Start chatting with boss_aid as this is the user proxy agent.
    boss_aid.initiate_chat(
        manager,
        message=PROBLEM,
        n_results=3,
    )

rag_chat()

Boss_Assistant (to chat_manager):

How to use spark for parallel training in FLAML? Give me sample code.

--------------------------------------------------------------------------------

Next speaker: Senior_Python_Engineer

Senior_Python_Engineer (to chat_manager):

FLAML (Fast and Lightweight Automated Machine Learning) is a Python library that provides an efficient way to perform automated machine learning. To use Spark for parallel training in FLAML, you can leverage the `spark` backend provided by FLAML.

Here's a sample code snippet that demonstrates how to use Spark for parallel training in FLAML:

```python
import pandas as pd
from flaml import AutoML
from pyspark.sql import SparkSession

# Create a SparkSession
spark = SparkSession.builder.appName("FLAML Spark Example").getOrCreate()

# Load your dataset into a Pandas DataFrame
df = pd.read_csv("your_data.csv")

# Define the task and the time budget
task = "classification"
time_budget = 3600  # in seconds

# Create an AutoML 



#### FUNCTION CALLING RAG CHAT

In [None]:
from autogen import LLMConfig

def function_calling_rag_chat():
    _reset_agents()
    def retrieve_content(message, n_results=3):
        boss_aid.n_results = n_results  # Set the number of results to be retrieved.
        # Check if we need to update the context.
        update_context_case1, update_context_case2 = boss_aid._check_update_context(message)
        if (update_context_case1 or update_context_case2) and boss_aid.update_context:
            boss_aid.problem = message if not hasattr(boss_aid, "problem") else boss_aid.problem
            _, ret_msg = boss_aid._generate_retrieve_user_reply(message)
        else:
            ret_msg = boss_aid.generate_init_message(message, n_results=n_results)
        return ret_msg if ret_msg else message

    boss_aid.human_input_mode = "NEVER"  # Disable human input for boss_aid since it only retrieves content.

    llm_config = {
        "functions": [
            {
                "name": "retrieve_content",
                "description": "retrieve content for code generation and question answering.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "message": {
                            "type": "string",
                            "description": "Refined message which keeps the original meaning and can be used to retrieve content for code generation and question answering.",
                        }
                    },
                    "required": ["message"],
                },
            },
        ],
        "config_list": config_list,
        "timeout": 60,
        "cache_seed": 42,
    }

    for agent in [coder, pm, reviewer]:
        # update llm_config for assistant agents.
        # agent.llm_config.update(llm_config)
        agent.llm_config = LLMConfig(**llm_config)

    for agent in [boss, coder, pm, reviewer]:
        # register functions for all agents.
        agent.register_function(
            function_map={
                "retrieve_content": retrieve_content,
            }
        )

    groupchat = autogen.GroupChat(
        agents=[boss, coder, pm, reviewer],
        messages=[],
        max_round=12,
        speaker_selection_method="random",
        allow_repeat_speaker=False,
    )

    manager_llm_config = llm_config.copy()
    manager_llm_config.pop("functions")
    manager = autogen.GroupChatManager(groupchat=groupchat, llm_config=manager_llm_config)

    # Start chatting with the boss as this is the user proxy agent.
    boss.initiate_chat(
        manager,
        message=PROBLEM+" Must retrieve content using function calling.",
    )

print("FUNCTION CALLING RAG CHAT")
function_calling_rag_chat()

FUNCTION CALLING RAG CHAT
Boss (to chat_manager):

How to use spark for parallel training in FLAML? Give me sample code. Must retrieve content using function calling.

--------------------------------------------------------------------------------

Next speaker: Product_Manager

Product_Manager (to chat_manager):

To use Spark for parallel training in FLAML, you can leverage the `SparkEstimator` class provided by FLAML. This allows you to distribute the training process across multiple nodes in a Spark cluster, significantly speeding up the hyperparameter tuning process for your machine learning models.

First, ensure you have FLAML and the necessary Spark dependencies installed in your environment. You can install FLAML using pip:

```bash
pip install flaml
```

For Spark, if you haven't installed it yet, you can do so by installing `pyspark`:

```bash
pip install pyspark
```

Here's a sample code snippet demonstrating how to use Spark for parallel training in FLAML. This example ass