In [27]:
# first check the openai's function call apis

from openai import OpenAI
from openai.types import FunctionDefinition
from lightrag.utils import setup_env
import json

client = OpenAI()

# Example dummy function hard coded to return the same weather
# In production, this could be your backend API or an external API
def get_current_weather(location, unit="fahrenheit"):
    """Get the current weather in a given location"""
    if "tokyo" in location.lower():
        return json.dumps({"location": "Tokyo", "temperature": "10", "unit": unit})
    elif "san francisco" in location.lower():
        return json.dumps({"location": "San Francisco", "temperature": "72", "unit": unit})
    elif "paris" in location.lower():
        return json.dumps({"location": "Paris", "temperature": "22", "unit": unit})
    else:
        return json.dumps({"location": location, "temperature": "unknown"})

def run_conversation():
    # Step 1: send the conversation and available functions to the model
    messages = [{"role": "user", "content": "What's the weather like in San Francisco, Tokyo, and Paris in celsius?"}]
    tools = [
        {
            "type": "function",
            "function": {
                "name": "get_current_weather",
                "description": "Get the current weather in a given location",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "location": {
                            "type": "string",
                            "description": "The city and state, e.g. San Francisco, CA",
                        },
                        "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
                    },
                    "required": ["location"],
                },
            },
        }
    ]
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=messages,
        tools=tools,
        tool_choice="auto",  # auto is default, but we'll be explicit
    )
    print(f"response: {response}")
    response_message = response.choices[0].message
    tool_calls = response_message.tool_calls

    print(f"tool_calls: {tool_calls}")
    # Step 2: check if the model wanted to call a function
    if tool_calls:
        # Step 3: call the function
        # Note: the JSON response may not always be valid; be sure to handle errors
        available_functions = {
            "get_current_weather": get_current_weather,
        }  # only one function in this example, but you can have multiple
        messages.append(response_message)  # extend conversation with assistant's reply
        # Step 4: send the info for each function call and function response to the model
        for tool_call in tool_calls:
            function_name = tool_call.function.name
            function_to_call = available_functions[function_name]
            function_args = json.loads(tool_call.function.arguments)# use json.loads to convert a string to a dictionary
            # function_response = function_to_call(
            #     location=function_args.get("location"),
            #     unit=function_args.get("unit"),
            # ) 
            # you have to exactly know the arguments, this does not make sense. How would i know its arguments. **function_args (makes more sense)
            function_response = function_to_call(**function_args)
            messages.append(
                {
                    "tool_call_id": tool_call.id,
                    "role": "tool",
                    "name": function_name,
                    "content": function_response,
                }
            )  # extend conversation with function response
        second_response = client.chat.completions.create(
            model="gpt-4o",
            messages=messages,
        )  # get a new response from the model where it can see the function response
        return second_response
print(run_conversation())

response: ChatCompletion(id='chatcmpl-9e2lZetnCX7GZnscApud6eseKKLlQ', choices=[Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, role='assistant', function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_rYuJw6IYXK0rAwgL5oG0lB8r', function=Function(arguments='{"location": "San Francisco, CA", "unit": "celsius"}', name='get_current_weather'), type='function'), ChatCompletionMessageToolCall(id='call_r4LiJjHWVEPv2L4GSedxQMq7', function=Function(arguments='{"location": "Tokyo, Japan", "unit": "celsius"}', name='get_current_weather'), type='function'), ChatCompletionMessageToolCall(id='call_2LnmgGhUzy3LSDjqXIy9vLPb', function=Function(arguments='{"location": "Paris, France", "unit": "celsius"}', name='get_current_weather'), type='function')]))], created=1719330781, model='gpt-4o-2024-05-13', object='chat.completion', service_tier=None, system_fingerprint='fp_5e6c71d4a8', usage=CompletionUsage(completion_tokens=83, prompt_tok

In [None]:
# Function(arguments='{"location": "Tokyo, Japan", "unit": "celsius"}', name='get_current_weather'

There are two important pieces. Getting function schema is not difficult and can be standarized.

The second piece is how to call the function, and how to execute it. The how to call the function depends on how we execute it.

How to execute a function:
1. Eval (LLM will output the code to call the function (in string format))-> Language generation.
2. We manage a function map, and we ask LLm to output either the code string or a structure with the function name and the arguments. We can use the function map to call the function. If its code string, we will have to parse the function call into the name and the arguments. If its a structure, we will have to convert it to data structure that can be used to call the function.

There are just so many different ways to do the actual function call, and different LLM might react differetntly in accuracy to each output format.

Function(arguments='{"location": "Paris, France"}', name='get_current_weather'), type='function')

In [2]:
def get_current_weather(location: str, unit: str = "fahrenheit"):
        """Get the current weather in a given location"""
        if "tokyo" in location.lower():
            return json.dumps({"location": "Tokyo", "temperature": "10", "unit": unit})
        elif "san francisco" in location.lower():
            return json.dumps(
                {"location": "San Francisco", "temperature": "72", "unit": unit}
            )
        elif "paris" in location.lower():
            return json.dumps({"location": "Paris", "temperature": "22", "unit": unit})
        else:
            return json.dumps({"location": location, "temperature": "unknown"})



In [3]:
from lightrag.core.tool_helper import FunctionTool

tool = FunctionTool.from_defaults(fn=get_current_weather)
print(tool)

name: location, parameter: location: str    <class 'str'>
name: unit, parameter: unit: str = 'fahrenheit'    <class 'str'>
FunctionTool(metadata=ToolMetadata(name='get_current_weather', description="get_current_weather(location: str, unit: str = 'fahrenheit')\nGet the current weather in a given location", parameters={'type': 'object', 'properties': {'location': {'type': 'str'}, 'unit': {'type': 'str', 'default': 'fahrenheit'}}, 'required': ['location']}), fn=<function get_current_weather at 0x1323cdb20>, async_fn=None)


In [4]:
print(tool.metadata.to_json())

{
    "name": "get_current_weather",
    "description": "get_current_weather(location: str, unit: str = 'fahrenheit')\nGet the current weather in a given location",
    "parameters": {
        "type": "object",
        "properties": {
            "location": {
                "type": "str"
            },
            "unit": {
                "type": "str",
                "default": "fahrenheit"
            }
        },
        "required": [
            "location"
        ]
    }
}


In [5]:
# v2

from lightrag.core.base_data_class import DataClass
from dataclasses import dataclass, field

@dataclass
class Weather(DataClass):
    location: str = field(metadata={"description": "The city and state, e.g. San Francisco, CA"})
    unit: str = field(metadata={"enum": ["celsius", "fahrenheit"]})

def get_current_weather_2(weather: Weather):
    """Get the current weather in a given location"""
    if "tokyo" in weather.location.lower():
        return json.dumps({"location": "Tokyo", "temperature": "10", "unit": weather.unit})
    elif "san francisco" in weather.location.lower():
        return json.dumps(
            {"location": "San Francisco", "temperature": "72", "unit": weather.unit}
        )
    elif "paris" in weather.location.lower():
        return json.dumps({"location": "Paris", "temperature": "22", "unit": weather.unit})
    else:
        return json.dumps({"location": weather.location, "temperature": "unknown"})

In [7]:
# Create a tool from the class

tool_2 = FunctionTool.from_defaults(fn=get_current_weather_2)

print(tool_2.metadata.to_json())



name: weather, parameter: weather: __main__.Weather    <class '__main__.Weather'>
type_hints[name]: <class '__main__.Weather'>
name: location, parameter: location: str    <class 'str'>
name: unit, parameter: unit: str    <class 'str'>
{
    "name": "get_current_weather_2",
    "description": "get_current_weather_2(weather: __main__.Weather)\nGet the current weather in a given location",
    "parameters": {
        "type": "object",
        "properties": {
            "weather": {
                "type": "Weather",
                "description": "The city and state, e.g. San Francisco, CA",
                "enum": [
                    "celsius",
                    "fahrenheit"
                ]
            }
        },
        "required": [
            "weather"
        ],
        "definitions": {
            "weather": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "str"
                    },
    

Llamaindex



In [None]:
lightrag_fn_schema =
{
        "type": "object",
        "properties": {
            "weather": {
                "type": "Weather",
                "desc": "The city and state, e.g. San Francisco, CA",
                "enum": [
                    "celsius",
                    "fahrenheit"
                ]
            }
        },
        "required": [
            "weather"
        ],
        "definitions": {
            "weather": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "str"
                    },
                    "unit": {
                        "type": "str"
                    }
                },
                "required": [
                    "location",
                    "unit"
                ]
            }
        }
}

In [None]:
    llama_fn_schema = {
        "type": "object",
        "properties": {"weather": {"$ref": "#/definitions/Weather"}},
        "required": ["weather"],
        "definitions": {
            "Weather": {
                "title": "Weather",
                "type": "object",
                "properties": {
                    "location": {
                        "title": "Location",
                        "desc": "The city and state, e.g. San Francisco, CA",
                        "type": "string",
                    },
                    "unit": {
                        "title": "Unit",
                        "enum": ["celsius", "fahrenheit"],
                        "type": "string",
                    },
                },
                "required": ["location", "unit"],
                "additionalProperties": false,
            }
        },
    }

In [22]:
# level 1, call function with default python data types
# such as str, int, float, list, dict, etc.

def _get_current_weather(location: str, unit: str = "fahrenheit"):
    """Get the current weather in a given location"""
    if "tokyo" in location.lower():
        return json.dumps({"location": "Tokyo", "temperature": "10", "unit": unit})
    elif "san francisco" in location.lower():
        return json.dumps(
            {"location": "San Francisco", "temperature": "72", "unit": unit}
        )
    elif "paris" in location.lower():
        return json.dumps({"location": "Paris", "temperature": "22", "unit": unit})
    else:
        return json.dumps({"location": location, "temperature": "unknown"})

In [24]:
# prepare function tool 
weather_tool = FunctionTool.from_defaults(fn=_get_current_weather)
print(weather_tool)

FunctionTool(metadata=ToolMetadata(name='_get_current_weather', description="_get_current_weather(location: str, unit: str = 'fahrenheit')\nGet the current weather in a given location", parameters={'type': 'object', 'properties': {'location': {'type': 'str'}, 'unit': {'type': 'str', 'default': 'fahrenheit'}}, 'required': ['location']}), fn=<function _get_current_weather at 0x10806ff60>, async_fn=None)


In [31]:
# prepare a minimal function calling template 
template = r"""<SYS>You have these tools available:
    <TOOLS>
    {% for tool in tools %}
    {{ loop.index }}. ToolName: {{ tool.metadata.name }}
        Tool Description: {{ tool.metadata.description }}
        Tool Parameters: {{ tool.metadata.fn_schema_str }}   
    __________
    {% endfor %}
    </TOOLS>
    {{output_format_str}}
    </SYS>
    User: {{input_str}}
    You:
    """

multiple_function_call_template = r"""<SYS>You can answer user query with these tools:
    <TOOLS>
    {% for tool in tools %}
    {{ loop.index }}. ToolName: {{ tool.metadata.name }}
        Tool Description: {{ tool.metadata.description }}
        Tool Parameters: {{ tool.metadata.fn_schema_str }}   
    __________
    {% endfor %}
    </TOOLS>
    You can call multiple tools by return a list of the following format:
    {{output_format_str}}
    </SYS>
    User: {{input_str}}
    You:
    """

from typing import Dict, Any
from lightrag.core.generator import Generator
from lightrag.core.types import ModelClientType
from lightrag.components.output_parsers import YamlOutputParser

model_kwargs = {"model": "gpt-3.5-turbo", "temperature": 0.3, "stream": False}

@dataclass
class Function(DataClass):
    name: str = field(metadata={"desc": "The name of the function"})
    args: Dict[str, Any] = field(metadata={"desc": "The arguments of the function"})

generator = Generator(
    model_client=ModelClientType.OPENAI(),
    model_kwargs=model_kwargs,
    template=template,
    prompt_kwargs={
        # "tools": [weather_tool],
        "output_format_str": YamlOutputParser(Function).format_instructions(),
        # "output_format_str": Function.to_yaml_signature(),
    },
    output_processors=YamlOutputParser(Function),
)
generator

Generator(
  model_kwargs={'model': 'gpt-3.5-turbo', 'temperature': 0.3, 'stream': False}, 
  (prompt): Prompt(
    template: <SYS>You have these tools available:
        <TOOLS>
        {% for tool in tools %}
        {{ loop.index }}. ToolName: {{ tool.metadata.name }}
            Tool Description: {{ tool.metadata.description }}
            Tool Parameters: {{ tool.metadata.fn_schema_str }}   
        __________
        {% endfor %}
        </TOOLS>
        {{output_format_str}}
        </SYS>
        User: {{input_str}}
        You:
        , prompt_kwargs: {'output_format_str': 'Your output should be formatted as a standard YAML instance with the following schema:\n```\nname: The name of the function (str) (required)\nargs: The arguments of the function (Dict) (required)\n```\n\n-Make sure to always enclose the YAML output in triple backticks (```). Please do not add anything other than valid YAML output!\n-Follow the YAML formatting conventions with an indent of 2 spaces.\n-Quote

In [27]:
# check the prompt

input_str = "What's the weather like in San Francisco, Tokyo, and Paris in celsius?"

generator.print_prompt(input_str=input_str, tools=[weather_tool])

Prompt:
<SYS>You have these tools available:
    <TOOLS>
    1. ToolName: _get_current_weather
        Tool Description: _get_current_weather(location: str, unit: str = 'fahrenheit')
Get the current weather in a given location
        Tool Parameters: {"type": "object", "properties": {"location": {"type": "str"}, "unit": {"type": "str", "default": "fahrenheit"}}, "required": ["location"]}   
    __________
    </TOOLS>
    Your output should be formatted as a standard YAML instance with the following schema:
```
name: The name of the function (str) (required)
args: The arguments of the function (Dict) (required)
```

-Make sure to always enclose the YAML output in triple backticks (```). Please do not add anything other than valid YAML output!
-Follow the YAML formatting conventions with an indent of 2 spaces.
-Quote the string values properly.

    </SYS>
    User: What's the weather like in San Francisco, Tokyo, and Paris in celsius?
    You:
    


In [28]:
prompt_kwargs = {
    "input_str": input_str,
    "tools": [weather_tool],
}
output = generator(prompt_kwargs=prompt_kwargs)
structured_output = Function.from_dict(output.data)
print(structured_output)

Function(name='_get_current_weather', args={'location': 'San Francisco', 'unit': 'celsius'})


In [39]:
# call the function

function_map = {
    "_get_current_weather": weather_tool
}

function_name = structured_output.name
function_args = structured_output.args
function_to_call = function_map[function_name]
function_response = function_to_call(**function_args)
print(function_response)

{"location": "Paris", "temperature": "22", "unit": "celsius"}


# multiple function calls

In [32]:
generator = Generator(
    model_client=ModelClientType.OPENAI(),
    model_kwargs=model_kwargs,
    template=multiple_function_call_template,
    prompt_kwargs={
        # "tools": [weather_tool],
        "output_format_str": YamlOutputParser(Function).format_instructions(),
        # "output_format_str": Function.to_yaml_signature(),
    },
    output_processors=YamlOutputParser(Function),
)
generator

Generator(
  model_kwargs={'model': 'gpt-3.5-turbo', 'temperature': 0.3, 'stream': False}, 
  (prompt): Prompt(
    template: <SYS>You can answer user query with these tools:
        <TOOLS>
        {% for tool in tools %}
        {{ loop.index }}. ToolName: {{ tool.metadata.name }}
            Tool Description: {{ tool.metadata.description }}
            Tool Parameters: {{ tool.metadata.fn_schema_str }}   
        __________
        {% endfor %}
        </TOOLS>
        You can call multiple tools by return a list of the following format:
        {{output_format_str}}
        </SYS>
        User: {{input_str}}
        You:
        , prompt_kwargs: {'output_format_str': 'Your output should be formatted as a standard YAML instance with the following schema:\n```\nname: The name of the function (str) (required)\nargs: The arguments of the function (Dict) (required)\n```\n\n-Make sure to always enclose the YAML output in triple backticks (```). Please do not add anything other than valid

In [34]:
# run the query

output = generator(prompt_kwargs=prompt_kwargs)
list_structured_output = [Function.from_dict(item) for item in output.data]
print(output)
print(list_structured_output)

GeneratorOutput(data=[{'name': '_get_current_weather', 'args': {'location': 'San Francisco', 'unit': 'celsius'}}, {'name': '_get_current_weather', 'args': {'location': 'Tokyo', 'unit': 'celsius'}}, {'name': '_get_current_weather', 'args': {'location': 'Paris', 'unit': 'celsius'}}], error=None, usage=None, raw_response='```yaml\n- name: _get_current_weather\n  args:\n    location: "San Francisco"\n    unit: "celsius"\n- name: _get_current_weather\n  args:\n    location: "Tokyo"\n    unit: "celsius"\n- name: _get_current_weather\n  args:\n    location: "Paris"\n    unit: "celsius"\n```')
[Function(name='_get_current_weather', args={'location': 'San Francisco', 'unit': 'celsius'}), Function(name='_get_current_weather', args={'location': 'Tokyo', 'unit': 'celsius'}), Function(name='_get_current_weather', args={'location': 'Paris', 'unit': 'celsius'})]


In [35]:
for structured_output in list_structured_output:
    function_name = structured_output.name
    function_args = structured_output.args
    function_to_call = function_map[function_name]
    function_response = function_to_call(**function_args)
    print(function_response)

{"location": "San Francisco", "temperature": "72", "unit": "celsius"}
{"location": "Tokyo", "temperature": "10", "unit": "celsius"}
{"location": "Paris", "temperature": "22", "unit": "celsius"}


In [16]:
from dataclasses import dataclass, field
from typing import Any, Dict

@dataclass
class Address:
    street: str
    city: str
    zipcode: str

@dataclass
class Person:
    name: str
    age: int
    address: Address

# Example instance of the nested dataclasses
person = Person(name="John Doe", age=30, address=Address(street="123 Main St", city="Anytown", zipcode="12345"))
print(person)

def to_dict(obj: Any) -> Dict[str, Any]:
    if hasattr(obj, "__dataclass_fields__"):
        return {key: to_dict(value) for key, value in obj.__dict__.items()}
    elif isinstance(obj, list):
        return [to_dict(item) for item in obj]
    elif isinstance(obj, dict):
        return {key: to_dict(value) for key, value in obj.items()}
    else:
        return obj

# Convert the person instance to a dictionary
person_dict = to_dict(person)
print(person_dict)

Person(name='John Doe', age=30, address=Address(street='123 Main St', city='Anytown', zipcode='12345'))
{'name': 'John Doe', 'age': 30, 'address': {'street': '123 Main St', 'city': 'Anytown', 'zipcode': '12345'}}


In [18]:
from typing import List
@dataclass
class Address:
    street: str
    city: str
    zipcode: str

@dataclass
class Person:
    name: str
    age: int
    addresses: List[Address]

# Example instance of the nested dataclasses
person = Person(name="John Doe", age=30, addresses=[Address(street="123 Main St", city="Anytown", zipcode="12345"), Address(street="456 Elm St", city="Othertown", zipcode="67890")])
print(person)

Person(name='John Doe', age=30, addresses=[Address(street='123 Main St', city='Anytown', zipcode='12345'), Address(street='456 Elm St', city='Othertown', zipcode='67890')])


In [19]:
# Convert the person instance to a dictionary
person_dict = to_dict(person)
print(person_dict)

{'name': 'John Doe', 'age': 30, 'addresses': [{'street': '123 Main St', 'city': 'Anytown', 'zipcode': '12345'}, {'street': '456 Elm St', 'city': 'Othertown', 'zipcode': '67890'}]}


In [21]:
from typing import List, Dict, Optional
def dataclass_obj_to_dict(
    obj: Any, exclude: Optional[Dict[str, List[str]]] = None, parent_key: str = ""
) -> Dict[str, Any]:
    r"""Convert a dataclass object to a dictionary.

    Supports nested dataclasses, lists, and dictionaries.
    Allow exclude keys for each dataclass object.
    Example:

    .. code-block:: python

       from dataclasses import dataclass
       from typing import List

       @dataclass
       class TrecData:
           question: str
           label: int

       @dataclass
       class TrecDataList:

           data: List[TrecData]
           name: str

       trec_data = TrecData(question="What is the capital of France?", label=0)
       trec_data_list = TrecDataList(data=[trec_data], name="trec_data_list")

       dataclass_obj_to_dict(trec_data_list, exclude={"TrecData": ["label"], "TrecDataList": ["name"]})

       # Output:
       # {'data': [{'question': 'What is the capital of France?'}], 'name': 'trec_data_list'}

    """
    if exclude is None:
        exclude = {}

    obj_class_name = obj.__class__.__name__
    current_exclude = exclude.get(obj_class_name, [])

    if hasattr(obj, "__dataclass_fields__"):
        return {
            key: dataclass_obj_to_dict(value, exclude, parent_key=key)
            for key, value in obj.__dict__.items()
            if key not in current_exclude
        }
    elif isinstance(obj, list):
        return [dataclass_obj_to_dict(item, exclude, parent_key) for item in obj]
    elif isinstance(obj, dict):
        return {
            key: dataclass_obj_to_dict(value, exclude, parent_key)
            for key, value in obj.items()
        }
    else:
        return obj

from dataclasses import dataclass
from typing import List

@dataclass
class TrecData:
    question: str
    label: int

@dataclass
class TrecDataList:

    data: List[TrecData]
    name: str

trec_data = TrecData(question="What is the capital of France?", label=0)
trec_data_list = TrecDataList(data=[trec_data], name="trec_data_list")

dataclass_obj_to_dict(trec_data_list, exclude={"TrecData": ["label"], "TrecDataList": ["name"]})

{'data': [{'question': 'What is the capital of France?'}]}

In [23]:
from typing import Type
def dataclass_obj_from_dict(cls: Type[Any], data: Dict[str, Any]) -> Any:
    if hasattr(cls, "__dataclass_fields__"):
        fieldtypes = {f.name: f.type for f in cls.__dataclass_fields__.values()}
        return cls(**{key: dataclass_obj_from_dict(fieldtypes[key], value) for key, value in data.items()})
    elif isinstance(data, list):
        return [dataclass_obj_from_dict(cls.__args__[0], item) for item in data]
    elif isinstance(data, dict):
        return {key: dataclass_obj_from_dict(cls.__args__[1], value) for key, value in data.items()}
    else:
        return data

In [24]:
dataclass_obj_from_dict(TrecDataList, dataclass_obj_to_dict(trec_data_list))

TrecDataList(data=[TrecData(question='What is the capital of France?', label=0)], name='trec_data_list')

In [25]:
dataclass_obj_from_dict(TrecDataList, dataclass_obj_to_dict(trec_data_list, exclude={"TrecData": ["label"], "TrecDataList": ["name"]}))

TypeError: TrecData.__init__() missing 1 required positional argument: 'label'