# Function calling
#### **General idea**
Let LLM know how to prepare its output so it can be used by your functions


#### **Explanation**
LLM can't execute any functions by itself, but if its object is initialized in right way it may
1. Get list of functions that may be executed with his output
2. Get list of desired input for each function
3. Choose which function from your list is best fit to your prompt
4. Prepare its result in json fromat that may be passed to function later

# Lets start with some smple function

In [1]:
def count_elephants(some_str, case_sensitive=False, output_format="num"):
    """
    It counts ELEPHANTS in given text
    INPUTS:
    some_str: str - Extracted part of user message that contains text to count elephants
    case_sensitive: bool - True if user specifies he want to counts only SMALL or BIG elephants or if he expectly mentiond it should be case sensitive
    output_format: str - ["num", "text]" - num (if user ask for counting elephants) text (if user ask to tell him how many elephants)
    """
    if not case_sensitive:
        some_str = some_str.lower()
    cnt = len(some_str.split("elephant"))-1
    if output_format == "num": 
        return cnt
    else:
        return f"Number of elephants is {cnt}"

count_elephants(
    "In this text we have John - The BIG ELEPHANT, and Susan, the little elephant", 
    case_sensitive=True, output_format="text"
    )

'Number of elephants is 1'

## Preparing function schema and passing it to LLM

#### 1. Using python dictionaries

In [2]:
count_elephants_schema = {
    "name": "count_elephants",
    "description": "Counts elephants in some text provided by user",
    "parameters": {
        "type": "object",
        "properties": {
            "some_str": {
                "type": "string",
                "description": "Extracted part of user message that contains text to count elephants",
            },
            "case_sensitive": {
                "type": "boolean",
                "description": "True if user specifies he want to counts only SMALL or BIG elephants or if he expectly mentiond it should be case sensitive"
            },
            "output_format": {
                "type": "string",
                "description": "num (if user ask for counting elephants) text (if user ask to tell him how many elephants)",
                "enum": ["num", "text"]
            }
        },
        "required": [
            "some_str"
        ]
    }
}

**initialization model with funcitons**

In [3]:
# --------------------------------------------------------------
# WITHOUT FUNCTION_CALL SPECIFIED (which indicates default function)
# --------------------------------------------------------------
# Model will respond normally (without functions) unless function use would be obvvious
from langchain_openai import ChatOpenAI
from langchain.schema import BaseMessage, HumanMessage, AIMessage

model_with_functions = ChatOpenAI(
    model_name="gpt-4-0613",
    model_kwargs={
        "functions": [count_elephants_schema]
        # "function_call": {"name": "query_enrichment"}, 
    }
)


**model_with_functions response for non-related to elephants stuff (no function_call specified)**

In [4]:
import json
result = model_with_functions.invoke([HumanMessage(content="Hey there!")])
print(json.dumps(json.loads(result.json()), indent=4))

{
    "content": "Hello! How can I assist you today?",
    "additional_kwargs": {},
    "response_metadata": {
        "token_usage": {
            "completion_tokens": 10,
            "prompt_tokens": 131,
            "total_tokens": 141
        },
        "model_name": "gpt-4-0613",
        "system_fingerprint": null,
        "finish_reason": "stop",
        "logprobs": null
    },
    "type": "ai",
    "name": null,
    "id": "run-e2faf76d-ebec-4a29-9799-2bf9b1a68df5-0",
    "example": false
}


**model_with_functions response for asking to count elephants (no function_call specified)**

In [5]:
import json
result = model_with_functions.invoke(
    [
        HumanMessage(content='Please, count elephants in the text "Elephants? I love elephants! Elephants are fucking amazing!')
    ]
)
print(json.dumps(json.loads(result.json()), indent=4))

{
    "content": "",
    "additional_kwargs": {
        "function_call": {
            "arguments": "{\n  \"some_str\": \"Elephants? I love elephants! Elephants are fucking amazing!\",\n  \"output_format\": \"num\"\n}",
            "name": "count_elephants"
        }
    },
    "response_metadata": {
        "token_usage": {
            "completion_tokens": 36,
            "prompt_tokens": 149,
            "total_tokens": 185
        },
        "model_name": "gpt-4-0613",
        "system_fingerprint": null,
        "finish_reason": "function_call",
        "logprobs": null
    },
    "type": "ai",
    "name": null,
    "id": "run-fe234ea4-89bc-4327-b17e-f1adee3d0829-0",
    "example": false
}


**model_with_functions response for non-related to elephants stuff (WITH function_call specified)**

In [6]:
# --------------------------------------------------------------
# WITH FUNCTION_CALL SPECIFIED (default = count_elaphants)
# --------------------------------------------------------------
# (model will choose this if context not suggest any other from list)
from langchain_openai import ChatOpenAI

model_with_functions = ChatOpenAI(
    model_name="gpt-4-0613",
    model_kwargs={
        "functions": [count_elephants_schema],
        "function_call": {"name": "count_elephants"}, 
    }
)

import json
result = model_with_functions.invoke([HumanMessage(content="Hey there!")])
print(json.dumps(json.loads(result.json()), indent=4))

{
    "content": "",
    "additional_kwargs": {
        "function_call": {
            "arguments": "{\n  \"some_str\": \"Hey there!\"\n}",
            "name": "count_elephants"
        }
    },
    "response_metadata": {
        "token_usage": {
            "completion_tokens": 11,
            "prompt_tokens": 138,
            "total_tokens": 149
        },
        "model_name": "gpt-4-0613",
        "system_fingerprint": null,
        "finish_reason": "stop",
        "logprobs": null
    },
    "type": "ai",
    "name": null,
    "id": "run-d3902e91-3418-4060-bbde-9bb9b2ad8158-0",
    "example": false
}


#### 2. Using python BaseModel and pydantic

**Defining pydantic BaseModel to let LLM know about structure**

```
NOTE: There is STR value in output_format instead of ENUM. 
Using ENUM here requires Enum library and defining new class
```

In [7]:
from langchain_openai import ChatOpenAI
from langchain.schema import HumanMessage
from pydantic import BaseModel, Field

class CountElephantsInput(BaseModel):
    some_str: str = Field(..., description="Extracted part of user message that contains text to count elephants")
    case_sensitive: bool = Field(False, description="True if user specifies he want to counts only SMALL or BIG elephants or if he expectly mentiond it should be case sensitive")
    output_format: str = Field("num", description="num (if user ask for counting elephants) text (if user ask to tell him how many elephants)")

    class Config:
        schema_extra = {
            "example": {
                "some_str": "There are 5 elephants in the room, and 2 more elephants just entered.",
                "case_sensitive": False,
                "output_format": "text"
            }
        }

**Initialize model with functions schema defined with pydantic**

In [8]:
# Initialize ChatOpenAI with the bound function
model = ChatOpenAI(
    model_name="gpt-4-0613",
    temperature=0
)

model_with_functions_pydantic = model.bind_functions(
    functions=[CountElephantsInput],
    function_call={"name": "CountElephantsInput"}
)

**model_with_functions_pydantic**

In [9]:
user_input = "Hey there! Lets count elephants in just single example: 'this is elephant, this is another elephant so we have two of them, right?'"
result = model_with_functions_pydantic.invoke([HumanMessage(content=user_input)])
print(json.dumps(json.loads(result.json()), indent=4))

{
    "content": "",
    "additional_kwargs": {
        "function_call": {
            "arguments": "{\n  \"some_str\": \"this is elephant, this is another elephant so we have two of them, right?\"\n}",
            "name": "CountElephantsInput"
        }
    },
    "response_metadata": {
        "token_usage": {
            "completion_tokens": 25,
            "prompt_tokens": 161,
            "total_tokens": 186
        },
        "model_name": "gpt-4-0613",
        "system_fingerprint": null,
        "finish_reason": "stop",
        "logprobs": null
    },
    "type": "ai",
    "name": null,
    "id": "run-a119ae15-9c22-4e62-84df-0dd55e596858-0",
    "example": false
}


#### PROS OF PYDANTIC - we can easly test model outpus against defined class

In [10]:
# Lets take previous example function args
function_name = result.additional_kwargs["function_call"]["name"]
print(function_name)

function_args = result.additional_kwargs["function_call"]["arguments"]
print(function_args)

# Lets validate&parse it
validated_args = CountElephantsInput.parse_raw(function_args)
validated_args

CountElephantsInput
{
  "some_str": "this is elephant, this is another elephant so we have two of them, right?"
}


CountElephantsInput(some_str='this is elephant, this is another elephant so we have two of them, right?', case_sensitive=False, output_format='num')

In [11]:
non_valid_args ='{"some_str": "Hey there!", "case_sensitive": "it should be boolean"}'
print(non_valid_args)

# Lets validate&parse it
validated_args = CountElephantsInput.parse_raw(non_valid_args)
validated_args

{"some_str": "Hey there!", "case_sensitive": "it should be boolean"}


ValidationError: 1 validation error for CountElephantsInput
case_sensitive
  value could not be parsed to a boolean (type=type_error.bool)

# Extracting fucnname & kwargs

In [12]:
# --------------------------------------------------------------
# Extract from model.invoke([])
# --------------------------------------------------------------
result = model_with_functions.invoke([ HumanMessage(content='Please, count elephants in the text "Elephants? I love elephants! Elephants are fucking amazing!')])
function_name = result.additional_kwargs["function_call"]["name"]
function_args = json.loads(result.additional_kwargs["function_call"]["arguments"])
print(type(function_name), function_name)
print(type(function_args), function_args)

<class 'str'> count_elephants
<class 'dict'> {'some_str': 'Elephants? I love elephants! Elephants are fucking amazing!'}


In [13]:
# --------------------------------------------------------------
# Extract from model.agenerate([[]])
# --------------------------------------------------------------
result = await model_with_functions.agenerate([[HumanMessage(content='Please, count elephants in the text "Elephants? I love elephants! Elephants are fucking amazing!')]])

result_dict = result.generations[0][0].dict()
function_call_data = result_dict['message']['additional_kwargs']['function_call']
function_name = function_call_data['name']
function_args = json.loads(function_call_data['arguments'])
print(type(function_name), function_name)
print(type(function_args), function_args)

<class 'str'> count_elephants
<class 'dict'> {'some_str': 'Elephants? I love elephants! Elephants are fucking amazing!', 'output_format': 'text'}


# Example of executing

In [17]:
# Define list of functions available to model
function_mapping = {'count_elephants':count_elephants}

# Our function have not defined validation inside it so lets assume we validate args before passing it
# For this example i will pick values from Pydantic validation example
function_name = function_name
function_kwargs = validated_args.dict()
print(function_name, json.dumps(function_kwargs,indent=4))

# Execute function
print("\n\nResult:")
function_mapping[function_name](**function_kwargs)

count_elephants {
    "some_str": "this is elephant, this is another elephant so we have two of them, right?",
    "case_sensitive": false,
    "output_format": "num"
}


Result:


2

# function_calling vs prompt_with_examples

In task C04L01 we have to clasify user input to either - request directy by LLM or ask some API for response

My attempt was to create function schema:

In [21]:
select_source_schema = {
    "name": "select_source",
    "description": "Select knowledge source from specified list basing on question content",
    "parameters": {
        "type": "object",
        "properties": {
            "question": {
                "type": "string",
                "description": "Unchanged user question"
            },
            "knowledge_source": {
                "type": "string",
                "description": "currency (if user ask exhange rates) population (if user asks about population of the country), other (otherwise)",
                "enum": ["currency", "population", "general"]
            },
            "currency": {
                "type": "string",
                "description": "3-digits Code for specific currency, f.e USD, PLN, EUR",
            },
            "country": {
                "type": "string",
                "description": "Name of country from question in English, f.e Poland, Germany, United Kingdo",
            },
            "answer": {
                "type": "string",
                "description": "Answer for user question. Required if knowledge_source is 'general'",
            },
        },
        "required": [
            "knowledge_source", "question", "answer"
        ]
    }
}

model_with_functions = ChatOpenAI(
    # model_name="gpt-4-0613",
    model_name="gpt-3.5-turbo",
    model_kwargs={
        "functions": [select_source_schema],
        "function_call": {"name": "select_source"}, 
    }
)

result = model_with_functions.invoke([HumanMessage('Czy złotówka jest droższa od euro?')])
result


AIMessage(content='', additional_kwargs={'function_call': {'arguments': '{"question":"Czy złotówka jest droższa od euro?","knowledge_source":"currency","currency":"PLN","answer":"Złotówka jest tańsza od euro."}', 'name': 'select_source'}}, response_metadata={'token_usage': {'completion_tokens': 44, 'prompt_tokens': 183, 'total_tokens': 227}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': 'fp_b28b39ffa8', 'finish_reason': 'stop', 'logprobs': None}, id='run-51ae87ed-4653-4b23-9e3c-72597217b55e-0')

Meanwhile the alternative mentioned by one other person in course was just using prompt with examples:

In [24]:
from langchain_core.prompts import ChatPromptTemplate
llm = ChatOpenAI()

prompt = ChatPromptTemplate.from_messages([
    ("system",
     'I classify questions and extract needed data for question in json form. '
     'On questions about population I only return country name. '
     'On questions about currency rate I return only country code. '
     'For other question I try response in answer field. For example:\n'
     'Q: Jaka jest populacja Węgier?\n'
     'A: {{"question_type": "population", "country_name": "Hungary"}}\n'
     'Q: Jaki jest teraz kurs Forinta?\n'
     'A: {{"question_type": "currency", "currency_code": "HUF"}}\n'
     'Q: Kto to jest Adam Małysz?\n'
     'A: {{"question_type": "general", "answer": "Skoczek narciarski"}}\n'),
    ("user", "{question}")
])

chain = prompt | llm

result = chain.invoke({"question": 'Czy złotówka jest droższa od euro?'})
result

AIMessage(content='{"question_type": "general", "answer": "Nie, obecnie złotówka jest tańsza od euro."}', response_metadata={'token_usage': {'completion_tokens': 29, 'prompt_tokens': 161, 'total_tokens': 190}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': 'fp_b28b39ffa8', 'finish_reason': 'stop', 'logprobs': None}, id='run-2faa626f-538b-48a9-984d-01150c8ee0bb-0')

Both variants works perfectly well on examples provided with task and solved it with 100% accuracy.

I made some tests about using different questions (close to provided in task samples but with different results wanted)

I will quote myself from forum disscusions about this

### Quote from forum
Zrobiłem pare testów, na zasadzie "pytanie podobne do tego z puli, ale jednak inne":

`Ile ludzi mieszka w Augustowie;`

__**Dla modelu GPT-3**__

- z function_calling zwraca: "population" z Polską jako krajem ORAZ answerem z poprawną odpowiedzią❌

- bez function_calling zwraca: "population" z Polską jako krajem ❌

__**Dla modelu GPT-4**__

- z function_calling zwraca: "general" z odpowiedzią ✅

- bez function_calling zwraca: 'population', 'poland' ❌


`Czy złotówki są droższe od euro?`

__**Dla modelu GPT-3**__

- z function_calling zwraca: "general" i "Złotówki są tańsze od euro." ✅

- bez function_calling zwraca: "general" i informacje żeby sobie sprawdzić kursy wymiany walut ✅/❌

__**Dla modelu GPT-4**__

- z function_calling zwraca: "general" z poprawną odpowiedią ✅

- bez function_calling zwraca "currency", "PLN" ✅


Także wydaje się, że przy używaniu wersji 4rtej lepiej sobie radzi z pytaniami podobnymi, ale z drugiej strony mój prompt był dużo bardziej szczegółówo opisany więc nie wiem czy to zasługa użycia function_calling czy więcej czasu poświęciłem na opisanie poszczególnych pól.


# get schema from function

In [25]:
# Example from https://python.langchain.com/docs/modules/model_io/chat/function_calling/
import json

from langchain_core.utils.function_calling import convert_to_openai_tool


def multiply(a: int, b: int) -> int:
    """Multiply two integers together.

    Args:
        a: First integer
        b: Second integer
    """
    return a * b


print(json.dumps(convert_to_openai_tool(multiply), indent=2))

{
  "type": "function",
  "function": {
    "name": "multiply",
    "description": "Multiply two integers together.",
    "parameters": {
      "type": "object",
      "properties": {
        "a": {
          "type": "integer",
          "description": "First integer"
        },
        "b": {
          "type": "integer",
          "description": "Second integer"
        }
      },
      "required": [
        "a",
        "b"
      ]
    }
  }
}


#### Lets test with orginal function (NOT TYPES SPECIFIED) - not working well

In [28]:
from langchain_core.utils.function_calling import convert_to_openai_tool
def count_elephants(some_str, case_sensitive=False, output_format="num"):
    """
    It counts ELEPHANTS in given text
    INPUTS:
    some_str: str - text that we use to count elephants
    case_sensitive: bool - 
    output_format: str - ["num", "text]" - to present implementation enum in function_calling
    """
    if not case_sensitive:
        some_str = some_str.lower()
    cnt = len(some_str.split("elephant"))-1
    if output_format == "num": 
        return cnt
    else:
        return f"Number of elephants is {cnt}"
    
print(json.dumps(convert_to_openai_tool(count_elephants), indent=2))

{
  "type": "function",
  "function": {
    "name": "count_elephants",
    "description": "It counts ELEPHANTS in given text\nINPUTS:\nsome_str: str - text that we use to count elephants\ncase_sensitive: bool - \noutput_format: str - [\"num\", \"text]\" - to present implementation enum in function_calling",
    "parameters": {
      "type": "object",
      "properties": {},
      "required": [
        "some_str"
      ]
    }
  }
}


#### Improved functions (Types specified and variables description)
**NOTE: I wasn't able to make ENUM works here, however we can add this manualy**

In [44]:
from langchain_core.utils.function_calling import convert_to_openai_tool
def count_elephants(some_str: str, case_sensitive: bool = False, output_format:str = "num") -> object:
    """Counts elephants in some text provided by user

    Args:
        some_str: Extracted part of user message that contains text to count elephants
        case_sensitive: True if user specifies he want to counts only SMALL or BIG elephants or if he expectly mentiond it should be case sensitive
        output_format: num (if user ask for counting elephants) text (if user ask to tell him how many elephants)
    """
    if not case_sensitive:
        some_str = some_str.lower()
    cnt = len(some_str.split("elephant"))-1
    if output_format == "num": 
        return cnt
    else:
        return f"Number of elephants is {cnt}"

function_schema = convert_to_openai_tool(count_elephants)
print(json.dumps(function_schema, indent=2))

{
  "type": "function",
  "function": {
    "name": "count_elephants",
    "description": "Counts elephants in some text provided by user",
    "parameters": {
      "type": "object",
      "properties": {
        "some_str": {
          "type": "string",
          "description": "Extracted part of user message that contains text to count elephants"
        },
        "case_sensitive": {
          "type": "boolean",
          "description": "True if user specifies he want to counts only SMALL or BIG elephants or if he expectly mentiond it should be case sensitive"
        },
        "output_format": {
          "type": "string",
          "description": "num (if user ask for counting elephants) text (if user ask to tell him how many elephants)"
        }
      },
      "required": [
        "some_str"
      ]
    }
  }
}


In [45]:
from langchain_openai import ChatOpenAI

model_with_functions = ChatOpenAI(
    model_name="gpt-4-0613",
    model_kwargs={
        "functions": [function_schema['function']],
        "function_call": {"name": "count_elephants"}, 
    }
)

import json
result = model_with_functions.invoke([HumanMessage(content='Please, count elephants in the text "Elephants? I love elephants! Elephants are fucking amazing!')])
print(json.dumps(json.loads(result.json()), indent=4))

{
    "content": "",
    "additional_kwargs": {
        "function_call": {
            "arguments": "{\n\"some_str\": \"Elephants? I love elephants! Elephants are fucking amazing!\"\n}",
            "name": "count_elephants"
        }
    },
    "response_metadata": {
        "token_usage": {
            "completion_tokens": 20,
            "prompt_tokens": 151,
            "total_tokens": 171
        },
        "model_name": "gpt-4-0613",
        "system_fingerprint": null,
        "finish_reason": "stop",
        "logprobs": null
    },
    "type": "ai",
    "name": null,
    "id": "run-46129101-11ac-4b07-b71b-7a92217ceb9b-0",
    "example": false
}
