In [1]:
LLAMA_STACK_URL = "http://localhost:8321"
MCP_URL = "http://127.0.0.1:8000/mcp"
LLAMA_STACK_MODEL_ID = "vllm/llama-4-scout-17b-16e-w4a16"
TXT_FILE_PATH = "../data/calabaceira_history.txt"

In [2]:
from llama_stack_client import LlamaStackClient

client = LlamaStackClient(base_url=LLAMA_STACK_URL)

In [3]:
def print_response(response):
    print(f"ID: {response.id}")
    print(f"Status: {response.status}")
    print(f"Model: {response.model}")
    print(f"Created at: {response.created_at}")
    print(f"Output items: {len(response.output)}")
    
    for i, output_item in enumerate(response.output):
        if len(response.output) > 1:
            print(f"\n--- Output Item {i+1} ---")
        print(f"Output type: {output_item.type}")
        
        if output_item.type in ("text", "message"):
            print(f"Response content: {output_item.content[0].text}")
        elif output_item.type == "file_search_call":
            print(f"  Tool Call ID: {output_item.id}")
            print(f"  Tool Status: {output_item.status}")
            # 'queries' is a list, so we join it for clean printing
            print(f"  Queries: {', '.join(output_item.queries)}")
            # Display results if they exist, otherwise note they are empty
            print(f"  Results: {output_item.results if output_item.results else 'None'}")
        elif output_item.type == "mcp_list_tools":
            print_mcp_list_tools(output_item)
        elif output_item.type == "mcp_call":
            print_mcp_call(output_item)
        else:
            print(f"Response content: {output_item.content}")


def print_mcp_call(mcp_call):
    """Print MCP call in a nicely formatted way"""
    print(f"\n🛠️  MCP Tool Call: {mcp_call.name}")
    print(f"   Server: {mcp_call.server_label}")
    print(f"   ID: {mcp_call.id}")
    print(f"   Arguments: {mcp_call.arguments}")
    
    if mcp_call.error:
        print("Error: {mcp_call.error}")
    elif mcp_call.output:
        print("Output:")
        # Try to format JSON output nicely
        try:
            import json
            parsed_output = json.loads(mcp_call.output)
            print(json.dumps(parsed_output, indent=4))
        except:
            # If not valid JSON, print as-is
            print(f"   {mcp_call.output}")
    else:
        print("   ⏳ No output yet")


def print_mcp_list_tools(mcp_list_tools):
    """Print MCP list tools in a nicely formatted way"""
    print(f"\n🔧 MCP Server: {mcp_list_tools.server_label}")
    print(f"   ID: {mcp_list_tools.id}")
    print(f"   Available Tools: {len(mcp_list_tools.tools)}")
    print("=" * 80)
    
    for i, tool in enumerate(mcp_list_tools.tools, 1):
        print(f"\n{i}. {tool.name}")
        print(f"   Description: {tool.description}")
        
        # Parse and display input schema
        schema = tool.input_schema
        if schema and 'properties' in schema:
            properties = schema['properties']
            required = schema.get('required', [])
            
            print("   Parameters:")
            for param_name, param_info in properties.items():
                param_type = param_info.get('type', 'unknown')
                param_desc = param_info.get('description', 'No description')
                required_marker = " (required)" if param_name in required else " (optional)"
                print(f"     • {param_name} ({param_type}){required_marker}")
                if param_desc:
                    print(f"       {param_desc}")
        
        if i < len(mcp_list_tools.tools):
            print("-" * 40)

def print_simple_response(response):
    print(f"ID: {response.id}")
    print(f"Status: {response.status}")
    print(f"Model: {response.model}")
    print(f"Created at: {response.created_at}")
    print(f"Output type: {response.output[0].type}")
    print(f"Response content: {response.output[0].content[0].text}")


def print_rag_response(response):
    print(f"ID: {response.id}")
    print(f"Status: {response.status}")
    print(f"Model: {response.model}")
    print(f"Created at: {response.created_at}")
    print(f"Output items: {len(response.output)}")
    
    for i, output_item in enumerate(response.output):
        if len(response.output) > 1:
            print(f"\n--- Output Item {i+1} ---")
        print(f"Output type: {output_item.type}")
        
        if output_item.type in ("text", "message"):
            print(f"Response content: {output_item.content[0].text}")
        elif output_item.type == "file_search_call":
            print(f"  Tool Call ID: {output_item.id}")
            print(f"  Tool Status: {output_item.status}")
            # 'queries' is a list, so we join it for clean printing
            print(f"  Queries: {', '.join(output_item.queries)}")
            # Display results if they exist, otherwise note they are empty
            print(f"  Results: {output_item.results if output_item.results else 'None'}")
        else:
            print(f"Response content: {output_item.content}")


def extract_response_text(response):
    """Extract the main response text from a LlamaStack response object.
    
    Args:
        response: ResponseObject from LlamaStack client
        
    Returns:
        str: The extracted response text, or None if not found
    """
    if not response or not response.output:
        return None
    
    # Look for message type output items
    for output_item in response.output:
        if output_item.type == "message" and hasattr(output_item, 'content'):
            if output_item.content and len(output_item.content) > 0:
                return output_item.content[0].text
        elif output_item.type == "text" and hasattr(output_item, 'content'):
            if output_item.content and len(output_item.content) > 0:
                return output_item.content[0].text
    
    return None


def extract_file_search_results(response):
    """Extract file search results from a RAG response.
    
    Args:
        response: ResponseObject from LlamaStack client
        
    Returns:
        list: List of file search results, or empty list if none found
    """
    if not response or not response.output:
        return []
    
    results = []
    for output_item in response.output:
        if output_item.type == "file_search_call" and hasattr(output_item, 'results'):
            if output_item.results:
                results.extend(output_item.results)
    
    return results

## Retrieval-Augmented Generation

### Create vector store

In [4]:
import uuid

vector_store_name= f"vec_{str(uuid.uuid4())[0:8]}"
vector_store = client.vector_stores.create(name=vector_store_name, embedding_model="all-MiniLM-L6-v2", embedding_dimension=384)
vector_store_id = vector_store.id

print(vector_store_id)

INFO:httpx:HTTP Request: POST http://localhost:8321/v1/openai/v1/vector_stores "HTTP/1.1 200 OK"


vs_a7f56262-3017-49b5-b535-d8fea3ede0ef


### Prompt 1: What is the Calabaceira Shop address? (empty knowledge base)

In [5]:
prompt_1 = "What is the Calabaceira Shop address?"

response_1 = client.responses.create(
    model=LLAMA_STACK_MODEL_ID,
    input=prompt_1,
     tools=[
        {
            "type": "file_search",
            "vector_store_ids": [vector_store_id],
        }
    ],
    instructions="You are a helpful customer service representative for Calabaceira Shop. Always be polite, professional, and provide accurate information about the shop. If you don't know something, admit it honestly"
)


print_rag_response(response_1)

INFO:httpx:HTTP Request: POST http://localhost:8321/v1/openai/v1/responses "HTTP/1.1 200 OK"


ID: resp-3914ebf7-922b-420f-8e5b-154ae282d6f6
Status: completed
Model: vllm/llama-4-scout-17b-16e-w4a16
Created at: 1756295428
Output items: 2

--- Output Item 1 ---
Output type: file_search_call
  Tool Call ID: chatcmpl-tool-d462145e76ca4eb4b173abba47de483e
  Tool Status: completed
  Queries: Calabaceira Shop address
  Results: None

--- Output Item 2 ---
Output type: message
Response content: I apologize for the inconvenience. Unfortunately, I couldn't find the address of Calabaceira Shop in my database. Can you please provide more context or information about the shop, such as its location or city? I'll do my best to help you find the address.


### Create a file

In [6]:
from pathlib import Path
file_create_response = client.files.create(file=Path(TXT_FILE_PATH), purpose="assistants")
file_create_response

INFO:httpx:HTTP Request: POST http://localhost:8321/v1/openai/v1/files "HTTP/1.1 200 OK"


File(id='file-f0912da26c234ac2b346bfd22e3c8e8f', bytes=1538, created_at=1756295448, expires_at=1787831448, filename='calabaceira_history.txt', object='file', purpose='assistants')

### Ingest file in vector store

In [7]:
file_ingest_response = client.vector_stores.files.create(
    vector_store_id=vector_store_id,
    file_id=file_create_response.id,
)
file_ingest_response

INFO:httpx:HTTP Request: POST http://localhost:8321/v1/openai/v1/vector_stores/vs_a7f56262-3017-49b5-b535-d8fea3ede0ef/files "HTTP/1.1 200 OK"


VectorStoreFile(id='file-f0912da26c234ac2b346bfd22e3c8e8f', attributes={}, chunking_strategy=ChunkingStrategyVectorStoreChunkingStrategyAuto(type='auto'), created_at=1756295451, object='vector_store.file', status='completed', usage_bytes=0, vector_store_id='vs_a7f56262-3017-49b5-b535-d8fea3ede0ef', last_error=None)

### Prompt 2: What is the Calabaceira Shop address? (Agent should answer because has knowledge base using RAG capabilities)

In [8]:
prompt_2 = "What is the Calabaceira Shop address?"

response_2 = client.responses.create(
    model=LLAMA_STACK_MODEL_ID,
    tools=[
        {
            "type": "file_search",
            "vector_store_ids": [vector_store_id],
        }
    ],
    input=prompt_2,
    instructions="You are a helpful customer service representative for Calabaceira Shop. Always be polite, professional, and provide accurate information about the shop. If you don't know something, admit it honestly."
)

print_rag_response(response_2)


INFO:httpx:HTTP Request: POST http://localhost:8321/v1/openai/v1/responses "HTTP/1.1 200 OK"


ID: resp-4e20899c-7153-4c7d-bc39-b37d69c054bd
Status: completed
Model: vllm/llama-4-scout-17b-16e-w4a16
Created at: 1756295461
Output items: 2

--- Output Item 1 ---
Output type: file_search_call
  Tool Call ID: chatcmpl-tool-be6206446d4948698fcddce77f337214
  Tool Status: completed
  Queries: Calabaceira Shop address
  Results: [OutputOpenAIResponseOutputMessageFileSearchToolCallResult(attributes={}, file_id='', filename='', score=1.2511613942368716, text="CalabaceiraShop: History and Goals\n\nHistory:\nCalabaceiraShop was founded in 1998 in Dublin, Ireland, with a mission to bring cutting-edge \nelectronics closer to everyday consumers. Starting as a small retail shop specializing in \nhousehold electronics, CalabaceiraShop quickly gained a reputation for quality products and \nexceptional customer service. Over the years, it expanded into multiple branches across \nIreland and launched an online store in 2008, allowing international customers to access \nits wide range of products. 

### Prompt 3: Can you list the products available? (Agent shouldn't answer because MCP isn't enabled)

In [9]:
prompt_3 = "Can you list the products available?"

response_3 = client.responses.create(
    model=LLAMA_STACK_MODEL_ID,
    tools=[
        {
            "type": "file_search",
            "vector_store_ids": [vector_store_id],
        }
    ],
    input=prompt_3,
    instructions="You are a helpful customer service representative for Calabaceira Shop. Always be polite, professional, and provide accurate information about the shop. If you don't know something, admit it honestly."
)


print_rag_response(response_3)

INFO:httpx:HTTP Request: POST http://localhost:8321/v1/openai/v1/responses "HTTP/1.1 200 OK"


ID: resp-90729834-502d-4476-966c-6f255ba193fe
Status: completed
Model: vllm/llama-4-scout-17b-16e-w4a16
Created at: 1756295480
Output items: 2

--- Output Item 1 ---
Output type: file_search_call
  Tool Call ID: chatcmpl-tool-793e1fdbe8604035b7eb6ad287778f76
  Tool Status: completed
  Queries: products available
  Results: [OutputOpenAIResponseOutputMessageFileSearchToolCallResult(attributes={}, file_id='', filename='', score=0.761455359964911, text="CalabaceiraShop: History and Goals\n\nHistory:\nCalabaceiraShop was founded in 1998 in Dublin, Ireland, with a mission to bring cutting-edge \nelectronics closer to everyday consumers. Starting as a small retail shop specializing in \nhousehold electronics, CalabaceiraShop quickly gained a reputation for quality products and \nexceptional customer service. Over the years, it expanded into multiple branches across \nIreland and launched an online store in 2008, allowing international customers to access \nits wide range of products. Today, 

## MCP + RAG

In [10]:
prompt_4 = "List all the available products in the Calabaceira Shop"

response_4 = client.responses.create(
    model=LLAMA_STACK_MODEL_ID,
    input=prompt_4,
    instructions=f"""You are a consultant for Calabaceira Shop. 
    
    When showing products:
    - Present them in an organized, easy-to-read format
    - Group similar items together
    - Always mention prices clearly
    - Use only the products provided by the MCP
    """,
    tools=[
         {
            "type": "file_search",
            "vector_store_ids": [vector_store_id],
        },
        {
            "type": "mcp",
            "server_url": MCP_URL,
            "server_label": "Calabaceira Sales MCP",
        }
    ],
    
)


print_response(response_4)

INFO:httpx:HTTP Request: POST http://localhost:8321/v1/openai/v1/responses "HTTP/1.1 200 OK"


ID: resp-b2ac5f9d-0dc6-4bcb-9f81-d385cfb74358
Status: completed
Model: vllm/llama-4-scout-17b-16e-w4a16
Created at: 1756295510
Output items: 3

--- Output Item 1 ---
Output type: mcp_list_tools

🔧 MCP Server: Calabaceira Sales MCP
   ID: mcp_list_6f8a911e-cdc6-4a30-a9d3-7f31222a9315
   Available Tools: 4

1. get_customers
   Description: Returns list of registered customers
    
   Parameters:
----------------------------------------

2. insert_customer_into_db
   Description: Add new customer to database
Args:
    customer_data:
        first_name: Customer's first name
        last_name: Customer's last name
        address: Customer's address
        city: Customer's city
        country: Customer's country
        phone_number: Customer's phone number
        email: Customer's email address

Returns:
    Dictionary containing the newly created customer data including customer_id
   Parameters:
     • customer_data (object) (required)
----------------------------------------

3. get

In [11]:
prompt_5 = "Hey, I want to create an account. My first name is Ian, last name is Miller, address is 123 Main Street, city is San Francisco, country is USA, phone number is +1-666-0123, and my email is iamiller@gmail.com"

response_5 = client.responses.create(
    model=LLAMA_STACK_MODEL_ID,
    input=prompt_5,
    instructions=f"""You are a consultant for Calabaceira Shop. 
    
    When showing products:
    - Ensure that email and phone number are unique and provided by the user
    - If user is already registered, respond that they are already registered
    """,
    tools=[
        {
            "type": "mcp",
            "server_url": MCP_URL,
            "server_label": "Calabaceira Sales MCP",
        }
    ],
)

print_response(response_5)

INFO:httpx:HTTP Request: POST http://localhost:8321/v1/openai/v1/responses "HTTP/1.1 200 OK"


ID: resp-f37709bb-aca9-4e67-87d2-40057bc17d0c
Status: completed
Model: vllm/llama-4-scout-17b-16e-w4a16
Created at: 1756295540
Output items: 3

--- Output Item 1 ---
Output type: mcp_list_tools

🔧 MCP Server: Calabaceira Sales MCP
   ID: mcp_list_541c1025-c99c-460b-a4e4-00590c3703d0
   Available Tools: 4

1. get_customers
   Description: Returns list of registered customers
    
   Parameters:
----------------------------------------

2. insert_customer_into_db
   Description: Add new customer to database
Args:
    customer_data:
        first_name: Customer's first name
        last_name: Customer's last name
        address: Customer's address
        city: Customer's city
        country: Customer's country
        phone_number: Customer's phone number
        email: Customer's email address

Returns:
    Dictionary containing the newly created customer data including customer_id
   Parameters:
     • customer_data (object) (required)
----------------------------------------

3. get

In [12]:
prompt_6 = "Place an order for a MacBook Pro, my email is iamiller@gmail.com"

response_6 = client.responses.create(
    model=LLAMA_STACK_MODEL_ID,
    input=prompt_6,
    instructions=f"You are a consultant for Calabaceira Shop. Don't register new user. You must place an order only for provided user.",
    tools=[
        {
            "type": "mcp",
            "server_url": MCP_URL,
            "server_label": "Calabaceira Sales MCP",
        }
    ],
)


print_response(response_6)

INFO:httpx:HTTP Request: POST http://localhost:8321/v1/openai/v1/responses "HTTP/1.1 200 OK"


ID: resp-c92f21ad-d664-44f1-b606-f942382c966f
Status: completed
Model: vllm/llama-4-scout-17b-16e-w4a16
Created at: 1756295558
Output items: 5

--- Output Item 1 ---
Output type: mcp_list_tools

🔧 MCP Server: Calabaceira Sales MCP
   ID: mcp_list_e6cbb2b9-209c-4cdf-9138-8d94bc38e7e3
   Available Tools: 4

1. get_customers
   Description: Returns list of registered customers
    
   Parameters:
----------------------------------------

2. insert_customer_into_db
   Description: Add new customer to database
Args:
    customer_data:
        first_name: Customer's first name
        last_name: Customer's last name
        address: Customer's address
        city: Customer's city
        country: Customer's country
        phone_number: Customer's phone number
        email: Customer's email address

Returns:
    Dictionary containing the newly created customer data including customer_id
   Parameters:
     • customer_data (object) (required)
----------------------------------------

3. get