# Using Different Models with Autogen

## OpenAI - Already seen

# Ollama - Local hosted

In [None]:
from autogen_core.models import UserMessage
from autogen_ext.models.ollama import OllamaChatCompletionClient

# Assuming your Ollama server is running locally on port 11434.
ollama_model_client = OllamaChatCompletionClient(model="llama3.2")

response = await ollama_model_client.create([UserMessage(content="Who are you, who created you?", source="user")])
print(response)
await ollama_model_client.close()


finish_reason='stop' content='The capital of France is Paris.' usage=RequestUsage(prompt_tokens=32, completion_tokens=8) cached=False logprobs=None thought=None

finish_reason='stop' content="I am a computer program designed to simulate conversation and answer questions to the best of my ability. I'm a type of artificial intelligence (AI) called a large language model, which means I've been trained on a massive dataset of text from various sources.\n\nMy primary function is to assist users with information and tasks, and I do this by processing natural language inputs and generating human-like responses. I don't have personal experiences, emotions, or consciousness like humans do, but I'm designed to be helpful and informative.\n\nI was created by a team of researcher-engineers at Meta AI, which is a subsidiary of Meta Platforms, Inc. The development of my technology involved extensive research in natural language processing (NLP), machine learning, and cognitive architectures.\n\nMy training data consists of a massive corpus of text from various sources, including books, articles, research papers, and websites. This corpus was used to fine-tune my understanding of language patterns, semantics, and context.\n\nI'm constantly learning and improving through interactions with users like you, so I appreciate any feedback or corrections that can help me become a better conversational AI!" usage=RequestUsage(prompt_tokens=33, completion_tokens=226) cached=False logprobs=None thought=None

In [None]:
from autogen_core.models import UserMessage
from autogen_ext.models.ollama import OllamaChatCompletionClient

# Assuming your Ollama server is running locally on port 11434.
ollama_model_client = OllamaChatCompletionClient(model="llama3.2")

response = await ollama_model_client.create([UserMessage(content="Who are you, who created you?", source="user")])
print(response)


In [None]:
from autogen_agentchat.agents import AssistantAgent
agent_1 = AssistantAgent(name='MyOllamaAgent',model_client=ollama_model_client)

In [None]:
result = await agent_1.run(task='Tell me something about you?')

In [None]:
result

TaskResult(messages=[TextMessage(id='a6b436b1-ca2a-430f-bf42-cb46e2ac0185', source='user', models_usage=None, metadata={}, created_at=datetime.datetime(2025, 7, 5, 5, 19, 6, 598002, tzinfo=datetime.timezone.utc), content='Tell me something about you?', type='TextMessage'), TextMessage(id='ad293544-9fd3-4610-9826-536bf9855c2c', source='MyOllamaAgent', models_usage=RequestUsage(prompt_tokens=55, completion_tokens=136), metadata={}, created_at=datetime.datetime(2025, 7, 5, 5, 19, 16, 631317, tzinfo=datetime.timezone.utc), content="I'm an artificial intelligence designed to assist and communicate with users like you. I have been trained on a vast amount of text data, which enables me to understand and respond to a wide range of questions and topics.\n\nMy primary function is to provide helpful and accurate information, answer questions, and engage in conversations to the best of my abilities. I can process natural language inputs and generate human-like responses.\n\nI don't have personal experiences, emotions, or opinions like humans do, but I'm designed to be neutral and informative. My goal is to help users find the information they need, learn new things, or simply have a conversation.\n\nWhat would you like to talk about?", type='TextMessage')], stop_reason=None)

# GEMINI

In [2]:
# Import the UserMessage model to structure user inputs
from autogen_core.models import UserMessage

# Import the OpenAIChatCompletionClient (used for interacting with LLMs like Gemini)
from autogen_ext.models.openai import OpenAIChatCompletionClient

# Import tools to load environment variables (for security)
from dotenv import load_dotenv
import os

# Load environment variables from a .env file (e.g., GOOGLE_API_KEY)
load_dotenv()

# Initialize the model client with the Gemini model using your Google API key
# This creates an async client to interact with the Gemini 1.5 Flash model
model_client = OpenAIChatCompletionClient(
    model="gemini-1.5-flash-8b",
    api_key=os.getenv("GOOGLE_API_KEY"),
)

# Send a single user message to the model and await its response
# `create()` is a low-level async call that sends one message and waits for a reply
response = await model_client.create([
    UserMessage(content="What is the capital of France?", source="user")
])

# Print the model's response (should output something like "Paris")
print(response)

# Close the model client to properly release network resources
# This is important in async workflows to avoid unclosed session warnings or resource leaks
await model_client.close()


finish_reason='stop' content='Paris\n' usage=RequestUsage(prompt_tokens=7, completion_tokens=2) cached=False logprobs=None thought=None


``` await model_client.close()```
## ✅ Why This Line Is Important:
This closes the underlying HTTP session or socket connections used to communicate with the LLM API.

Helps prevent:

- Memory leaks

- Open handles or sockets lingering

- Runtime warnings about unclosed sessions

Especially critical in async-based environments where connections are not automatically garbage-collected.

# Open Router

In [4]:
# API key for OpenRouter (example key shown — always keep your real key secure!)
open_router_api_key = os.getenv("OPEN_ROUTER_API_KEY")
#print(open_router_api_key)

open_router_model_client =  OpenAIChatCompletionClient(
    base_url="https://openrouter.ai/api/v1",
    model="qwen/qwen3-coder:free",
    api_key = open_router_api_key,
    model_info={
        "family":'deepseek',
        "vision" :True,
        "function_calling":True,
        "json_output": False
    }
)


response = await open_router_model_client.create([UserMessage(content="What is the capital of France?", source="user")])

print(response)


finish_reason='stop' content='The capital of France is Paris.' usage=RequestUsage(prompt_tokens=15, completion_tokens=8) cached=False logprobs=None thought=None


## ✅ Key Difference Between `model_client.create(...)` vs `model_client.run(...)`

| Function                | `model_client.create(...)`                          | `model_client.run(...)`                                 |
|------------------------|----------------------------------------------------|----------------------------------------------------------|
| 🔧 **Abstraction Level** | Low-level, asynchronous                            | High-level, often synchronous                            |
| 📤 **Behavior**         | Sends a **single message**, waits for LLM response | Runs an **entire interaction** or multi-turn workflow     |
| 🎯 **Use Case**         | Fine-grained control of message-passing            | Encapsulated agent logic and task execution              |
| 📦 **Typical Context**  | LLM client libraries (e.g., OpenAI, Groq SDKs)     | Agent frameworks like **AutoGen**, LangChain             |
| 🔄 **Flexibility**      | Developer manages context, memory, retries         | Often handles context internally                         |
| 🛠️ **Example Scenario** | Custom prompt injection, streaming, etc.           | Full conversation/session handled in one call            |

> ⚙️ Use `create(...)` when you need low-level control (e.g., streaming, retries, chaining).  
> 🤖 Use `run(...)` when building structured agent flows or invoking autonomous task execution.

