use Deepseek models with Autogen Agents through Groq integration for basic queries, RAG applications, and interactive UI implementations.

In [None]:
! pip install -q  "groq==0.18.0" "pyautogen==0.7.6" "pyautogen[retrievechat]" chromadb streamlit

In [2]:
import os
import autogen
import chromadb
from autogen import AssistantAgent
from autogen.agentchat.contrib.retrieve_user_proxy_agent import RetrieveUserProxyAgent
from google.colab import userdata

  warn(


In [3]:
api_key = userdata.get('GROQ_API_KEY')
model = "deepseek-r1-distill-qwen-32b" #deepseek-r1-distill-llama-70b

In [4]:
os.environ["AUTOGEN_USE_DOCKER"] = "False"

In [5]:
config_list = [
    {
    "model": model,
    "api_key": api_key,
    "api_type": "groq",
    }
]

In [6]:
llm_config = {
    "timeout": 60,
    "cache_seed": 42,
    "config_list": config_list,
    "temperature": 0,
}

In [7]:
def termination_msg(x):
    return isinstance(x, dict) and "TERMINATE" == str(x.get("content", ""))[-9:].upper()

URL = "/content/parametric-RAG.pdf"
PROBLEM = "What is Parametric Retrieval Augmented Generation in one line?"

In [8]:
# boss agent - without RAG
boss = autogen.UserProxyAgent(
    name="Boss",
    is_termination_msg=termination_msg,
    human_input_mode="NEVER",
    system_message="The boss who ask questions and give tasks.",
    code_execution_config=False,
    default_auto_reply="Reply `TERMINATE` if the task is done.",
)

# boss_aid agent - with RAg
boss_aid = RetrieveUserProxyAgent(
    name="Boss_Assistant",
    is_termination_msg=termination_msg,
    system_message="Assistant who has extra content retrieval power for solving difficult problems.",
    human_input_mode="NEVER",
    max_consecutive_auto_reply=3,
    retrieve_config={
        "task": "code",
        "docs_path": URL,
        "chunk_token_size": 1000,
        "model": config_list[0]["model"],
        "client": chromadb.PersistentClient(path="/tmp/chromadb"),
        "collection_name": "groupchat",
        "get_or_create": True,
    },
    code_execution_config=False,
)


coder = AssistantAgent(
    name="Senior_Python_Engineer",
    is_termination_msg=termination_msg,
    system_message="You are a senior python engineer. Reply `TERMINATE` in the end when everything is done.",
    llm_config=llm_config,
)


def _reset_agents():
    boss.reset()
    boss_aid.reset()
    coder.reset()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling%2Fconfig.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

without RAG

In [9]:
def norag_chat():
    _reset_agents()
    groupchat = autogen.GroupChat(
        agents=[boss, coder],
        messages=[],
        max_round=12,
        speaker_selection_method="auto",
        allow_repeat_speaker=False,
    )
    manager = autogen.GroupChatManager(groupchat=groupchat, llm_config=llm_config)

    # Start chatting with the boss as this is the user proxy agent.
    boss.initiate_chat(
        manager,
        message=PROBLEM,
    )

norag_chat()

Boss (to chat_manager):

What is Parametric Retrieval Augmented Generation in one line?

--------------------------------------------------------------------------------

Next speaker: Senior_Python_Engineer

Senior_Python_Engineer (to chat_manager):

<think>
Okay, so I need to figure out what Parametric Retrieval Augmented Generation (Parametric RAG) is. I remember that RAG stands for Retrieval-Augmented Generation, which is a method where a model uses external information to generate responses. But what makes it "Parametric"?

Hmm, parametric models are those that have a fixed set of parameters, right? So maybe Parametric RAG uses a model that has parameters that can be adjusted or fine-tuned. Wait, but isn't RAG already a type of model that uses parameters? Maybe the difference is in how the retrieval part is handled.

In traditional RAG, the model might retrieve information from a database or some external source, like documents or web pages. But Parametric RAG might instead use a 



with RAG

In [10]:
def rag_chat():
    _reset_agents()
    groupchat = autogen.GroupChat(
        agents=[boss_aid, coder],
        messages=[],
        max_round=12,
        speaker_selection_method="round_robin"
    )
    manager = autogen.GroupChatManager(groupchat=groupchat, llm_config=llm_config)

    # Start chatting with boss_aid as this is the user proxy agent.
    boss_aid.initiate_chat(
        manager,
        message=PROBLEM,
        n_results=3,
    )

rag_chat()

Boss_Assistant (to chat_manager):

What is Parametric Retrieval Augmented Generation in one line?

--------------------------------------------------------------------------------

Next speaker: Senior_Python_Engineer

Senior_Python_Engineer (to chat_manager):

<think>
Okay, so I need to figure out what Parametric Retrieval Augmented Generation (Parametric RAG) is. I remember that RAG stands for Retrieval-Augmented Generation, which is a method where a model uses external information to generate responses. But what makes it "Parametric"?

Hmm, parametric models are those that have a fixed set of parameters, right? So maybe Parametric RAG uses a model that has parameters that can be adjusted or fine-tuned. Wait, but isn't RAG already using a model with parameters? So maybe the difference is in how the retrieval part is handled.

In traditional RAG, the model retrieves information from a database or corpus, which is often non-parametric because it's based on stored data. But Parametric R