[vLLM tool calling documentation](https://github.com/vllm-project/vllm/blob/main/docs/source/serving/openai_compatible_server.md#tool-calling-in-the-chat-completion-api)

Prior to executing this notebook, run the following command from the `presentation-examples/genai_and_rag` directory:
```
./vllm-tool-start.sh
```
Once the notebook has finished running, you may stop the vLLM server process by pressing Ctrl-C from the terminal that you ran the command in.

In [2]:
from langchain import PromptTemplate
from langchain_openai import ChatOpenAI
from langchain_core.tools import tool
from langchain_core.messages import SystemMessage, HumanMessage

# Database stub
database = {
    "invoice_001": {
        "supplier": "ABC Corp",
        "amount": 1500,
        "date": "2023-10-01",
        "status": "Paid",
    },
    "invoice_002": {
        "supplier": "XYZ Ltd",
        "amount": 2500,
        "date": "2023-10-05",
        "status": "Pending",
    },
}


# Function to fetch invoice information from the database
@tool
def fetch_invoice_info(invoice_id):
    """Fetch invoice information from the database."""
    return database.get(invoice_id, "Invoice not found")


# Define our tools list
tools = [fetch_invoice_info]

# Define the prompt template
prompt_template = PromptTemplate(
    input_variables=["invoice_id"],
    template="What's the status of invoice id {invoice_id}?",
)

# Initialize the model
llm = ChatOpenAI(
    base_url="http://localhost:8000/v1",
    model="meta-llama/Llama-3.1-8B-Instruct",
    api_key="abc1234",
)
llm_with_tools = llm.bind_tools(tools)

query = prompt_template.format(invoice_id="invoice_001")

# Call model w/ prompt and pass specific invoice_id
response = llm_with_tools.invoke(query)

In [3]:
print(response)

content='' additional_kwargs={'tool_calls': [{'id': 'chatcmpl-tool-cedd241362f1476eb6f98ff452c08805', 'function': {'arguments': '{"invoice_id": "invoice_001"}', 'name': 'fetch_invoice_info'}, 'type': 'function'}], 'refusal': None} response_metadata={'token_usage': {'completion_tokens': 22, 'prompt_tokens': 178, 'total_tokens': 200, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'meta-llama/Llama-3.1-8B-Instruct', 'system_fingerprint': None, 'finish_reason': 'tool_calls', 'logprobs': None} id='run-aa87a2cf-46b5-4d85-ba2a-b0e16c05f70f-0' tool_calls=[{'name': 'fetch_invoice_info', 'args': {'invoice_id': 'invoice_001'}, 'id': 'chatcmpl-tool-cedd241362f1476eb6f98ff452c08805', 'type': 'tool_call'}] usage_metadata={'input_tokens': 178, 'output_tokens': 22, 'total_tokens': 200, 'input_token_details': {}, 'output_token_details': {}}


In [4]:
messages = [
    SystemMessage(
        """You are provided with a tool to perform a database lookup when the user requests the status of an invoice with an invoice id. Perform the lookup and provide the status to the user in a clear, readable format. The status should be every field in the entry returned from the database on a separate line."""
    ),
    HumanMessage(query),
    response,
]

In [None]:
for tool_call in response.tool_calls:
    selected_tool = {"fetch_invoice_info": fetch_invoice_info}[
        tool_call["name"].lower()
    ]
    tool_msg = selected_tool.invoke(tool_call)
    messages.append(tool_msg)

{'name': 'fetch_invoice_info', 'args': {'invoice_id': 'invoice_001'}, 'id': 'chatcmpl-tool-cedd241362f1476eb6f98ff452c08805', 'type': 'tool_call'}
content='{"supplier": "ABC Corp", "amount": 1500, "date": "2023-10-01", "status": "Paid"}' name='fetch_invoice_info' tool_call_id='chatcmpl-tool-cedd241362f1476eb6f98ff452c08805'


In [6]:
print(messages)
messages.append(llm_with_tools.invoke(messages))

[SystemMessage(content='You are provided with a tool to perform a database lookup when the user requests the status of an invoice with an invoice id. Perform the lookup and provide the status to the user in a clear, readable format. The status should be every field in the entry returned from the database on a separate line.', additional_kwargs={}, response_metadata={}), HumanMessage(content="What's the status of invoice id invoice_001?", additional_kwargs={}, response_metadata={}), AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'chatcmpl-tool-cedd241362f1476eb6f98ff452c08805', 'function': {'arguments': '{"invoice_id": "invoice_001"}', 'name': 'fetch_invoice_info'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 22, 'prompt_tokens': 178, 'total_tokens': 200, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'meta-llama/Llama-3.1-8B-Instruct', 'system_fingerprint': None, 'finish_reason': 'tool

In [7]:
print(messages)

[SystemMessage(content='You are provided with a tool to perform a database lookup when the user requests the status of an invoice with an invoice id. Perform the lookup and provide the status to the user in a clear, readable format. The status should be every field in the entry returned from the database on a separate line.', additional_kwargs={}, response_metadata={}), HumanMessage(content="What's the status of invoice id invoice_001?", additional_kwargs={}, response_metadata={}), AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'chatcmpl-tool-cedd241362f1476eb6f98ff452c08805', 'function': {'arguments': '{"invoice_id": "invoice_001"}', 'name': 'fetch_invoice_info'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 22, 'prompt_tokens': 178, 'total_tokens': 200, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'meta-llama/Llama-3.1-8B-Instruct', 'system_fingerprint': None, 'finish_reason': 'tool

Be sure to stop the vLLM server process from the terminal by pressing Ctrl-C. If you do not, then you will likely run out of GPU memory when executing the following notebook.