## Tool call working examples with vLLM models without llamastack 

In [55]:
from mcp_client import MCPClient
import json, os
from openai import AsyncOpenAI
from dotenv import load_dotenv
load_dotenv()

MCP_ENDPOINT = os.getenv("MCP_ENDPOINT") 
LLS_ENDPOINT = os.getenv("REMOTE_BASE_URL") 
LLS_OPENAI_ENDPOINT = f"{LLS_ENDPOINT}/v1/openai/v1"
LLM_MODEL_ID = "granite32-8b"         
OPENAI_APIKEY = os.getenv("OPENAI_API_KEY", "EMPTY")

In [56]:
client = AsyncOpenAI(api_key=OPENAI_APIKEY, base_url=LLS_OPENAI_ENDPOINT)
mcp = MCPClient(MCP_ENDPOINT)
tools = await mcp.list_tools()                 
openai_tools = [
    {
        "type": "function",
        "function": {
            "name": t.name,
            "description": t.description,
            "parameters": t.inputSchema, 
        },
    }
    for t in tools
]

In [57]:
messages = [
    {
        "role": "user",
        "content": (
            "Use tools to generate a number between 5 and 50"
        ),
    }
]
resp = await client.chat.completions.create(
    model = LLM_MODEL_ID,
    messages = messages,
    tools = openai_tools,
    tool_choice = "auto",
    stream = False,
    )
assistant = resp.choices[0].message
if assistant.tool_calls:
    for call in assistant.tool_calls:
        args = json.loads(call.function.arguments)
        print("TOOL")
        print(call.function.name)
        print(args)
        result = await mcp.invoke_tool(call.function.name, args)
        print(f"Results: {result.content}")

        messages.append(
            {
                "role": "assistant",
                "name": call.function.name,
                "content": result.content,  
            }
        )
    final = await client.chat.completions.create(
        model    = LLM_MODEL_ID,
        messages = messages,
        stream=False
    )
    print("\n🔹 Assistant:", final.choices[0].message.content)
else:
    print("\n🔹 Assistant:", assistant.content)

TOOL
generate_random_number
{'min': '5', 'max': '50'}
Results: {"type":"text","text":"49","annotations":null}

🔹 Assistant: I have used a random number generator to produce a number between 5 and 50. The result is 49.


## Tool call failed examples with vLLM models with llamastack 

In [58]:
from llama_stack_client.lib.agents.agent import Agent
from llama_stack_client.lib.agents.event_logger import EventLogger
from llama_stack_client import LlamaStackClient
from termcolor import cprint
import logging

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
stream_handler = logging.StreamHandler()
stream_handler.setLevel(logging.INFO)
formatter = logging.Formatter('%(message)s')
stream_handler.setFormatter(formatter)
logger.addHandler(stream_handler)

base_url = LLS_ENDPOINT
mcp_url=MCP_ENDPOINT
model = LLM_MODEL_ID

client = LlamaStackClient(base_url=base_url)
logger.info(f"Connected to Llama Stack server @ {base_url[:15]}... \n")

# Get tool info and register tools
registered_tools = client.tools.list()
registered_tools_identifiers = [t.identifier for t in registered_tools]
registered_toolgroups = [t.toolgroup_id for t in registered_tools]

if "mcp::custom_tool" not in registered_toolgroups:
    # Register MCP tools
    client.toolgroups.register(
        toolgroup_id="mcp::custom_tool",
        provider_id="model-context-protocol",
        mcp_endpoint={"uri":mcp_url},
        )
mcp_tools = [t.identifier for t in client.tools.list(toolgroup_id="mcp::custom_tool")]

logger.info(f"""Your Server has access the the following toolgroups:
{set(registered_toolgroups)}
""")


Connected to Llama Stack server @ http://llamasta... 

Your Server has access the the following toolgroups:
{'builtin::code_interpreter', 'mcp::slack', 'mcp::custom_tool', 'builtin::websearch', 'mcp::openshift', 'builtin::rag'}



In [59]:
# Create simple agent with tools
agent = Agent(
    client,
    model=model,
    instructions = """""" ,
    tools=["mcp::custom_tool"],
    tool_config={"tool_choice":"auto"},
    sampling_params={"max_tokens": 4096}
)


user_prompts = ["""Use tools to generate a number between 5 and 50"""]
session_id = agent.create_session(session_name="Auto_demo")
for prompt in user_prompts:
    turn_response = agent.create_turn(
        messages=[
            {
                "role":"user",
                "content": prompt
            }
        ],
        session_id=session_id,
        stream=True,
    )
    for log in EventLogger().log(turn_response):
        log.print()

logger.handlers.clear()

[33minference> [0m[33m<[0m[33mtool[0m[33m_[0m[33mcall[0m[33m>[0m[97m[0m
[30m[0m