## interact with a locally hosted SLM (QWEN) to summarize web articles

In [None]:
from autogen_ext.models.openai import OpenAIChatCompletionClient
from autogen_core.models import ModelInfo , UserMessage 
from autogen_ext.tools.mcp import StdioServerParams, mcp_server_tools

from dotenv import load_dotenv
load_dotenv()
import yaml

In [None]:
qwen_model = OpenAIChatCompletionClient(
        model="Qwen2.5-7B-Instruct-1M-iq3_xxs.gguf",
        base_url="http://localhost:8080/v1",
        model_info=ModelInfo(vision=False, function_calling=True, json_output=False, family="unknown"),
        api_key="lm-studio",
    ) 
# Setup server params for local filesystem access
fetch_mcp_server = StdioServerParams(command="uvx", args=["mcp-server-fetch"])
tools = await mcp_server_tools(fetch_mcp_server)

result = await qwen_model.create(messages=[UserMessage(source="user", content="Summarize the content of https://newsletter.victordibia.com/p/you-have-ai-fatigue-thats-why-you")], tools=tools)

print()
print(result.content)
print()



## Additional interactions with a local SLM

In [None]:
# Example: reuse your existing OpenAI setup
from openai import OpenAI

# Point to the local server
client = OpenAI(base_url="http://localhost:8080/v1", api_key="lm-studio")

completion = client.chat.completions.create(
  model="Mungert/Qwen2.5-7B-Instruct-1M-GGUF",
  messages=[
    {"role": "system", "content": "Always answer in rhymes."},
    {"role": "user", "content": "Introduce yourself."}
  ],
  temperature=0.7,
)

print(completion.choices[0].message.content)

In [None]:
# Chat with an intelligent assistant in your terminal
from openai import OpenAI

# Point to the local server
client = OpenAI(base_url="http://localhost:8080/v1", api_key="lm-studio")

history = [
    {"role": "system", "content": "You are an intelligent assistant. You always provide well-reasoned answers that are both correct and helpful."},
    {"role": "user", "content": "Hello, introduce yourself to someone opening this program for the first time. Be concise."},
]

while True:
    completion = client.chat.completions.create(
        model="Mungert/Qwen2.5-7B-Instruct-1M-GGUF",
        messages=history,
        temperature=0.7,
        stream=True,
    )

    new_message = {"role": "assistant", "content": ""}
    
    for chunk in completion:
        if chunk.choices[0].delta.content:
            print(chunk.choices[0].delta.content, end="", flush=True)
            new_message["content"] += chunk.choices[0].delta.content

    history.append(new_message)
    
    # Uncomment to see chat history
    import json
    yellow_color = "\033[33m"  # Yellow ANSI code
    reset_color = "\033[0m"
    print(f"{yellow_color}\n{'-'*20} History dump {'-'*20}\n")
    print(json.dumps(history, indent=2))
    print(f"\n{'-'*55}\n{reset_color}")

    print()
    history.append({"role": "user", "content": input("> ")})