# Pydantic for LLM workflows

## Basics

In [6]:
from pydantic import BaseModel, Field, ValidationError, EmailStr
from typing import Optional
from datetime import date
class UserInput(BaseModel):
    name: str = Field(...,description="The name of the user")
    email: EmailStr = Field(...,description="The email of the user")
    query: str = Field(...,description="The query of the user")
    order_id : Optional[int] = Field(
        None, 
        description="5 digit order number (cannot start with 0)",
        ge=10000,
        le=99999
    )
    purchase_date: Optional[date] = None

In [7]:
## validate input with our simple Pydantic model
user = UserInput(
    name = "abc",
    email ="abc12@gmail.com",
    query ="How can I cancel my subscription?",
)
print(user)

name='abc' email='abc12@gmail.com' query='How can I cancel my subscription?' order_id=None purchase_date=None


In [8]:
# defining fucntions to handle user input validations safely
from typing import Union
def validate_user_input(input_data: dict) -> Union[UserInput, None]:
    try: 
        user_input = UserInput(**input_data)
        print(f"Validated user input: ✅ {input_data}")
        return user_input
    except ValidationError as e:
        print(f"Validation error: ❌ {e}")
        return None
    
# test the function
user_input = {
    'name': '123',
    'email': 'john.doe@example.com',
    'query': 'How can I cancel my subscription?'
}
validate_user_input(user_input)

Validated user input: ✅ {'name': '123', 'email': 'john.doe@example.com', 'query': 'How can I cancel my subscription?'}


UserInput(name='123', email='john.doe@example.com', query='How can I cancel my subscription?', order_id=None, purchase_date=None)

In [9]:
# test the function
user_input = {
    'email': 'john.doe@example.com',
    'query': 'How can I cancel my subscription?'
}
validate_user_input(user_input)

Validation error: ❌ 1 validation error for UserInput
name
  Field required [type=missing, input_value={'email': 'john.doe@examp...ancel my subscription?'}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.11/v/missing


In [10]:
# test the function
user_input = {
    'email': 'john.doe@example.com',
    'query': 'How can I cancel my subscription?',
    'name':'abc',
    'order_id': 40000,
    'purchase_date':'2020-10-02'
}
validate_user_input(user_input)

Validated user input: ✅ {'email': 'john.doe@example.com', 'query': 'How can I cancel my subscription?', 'name': 'abc', 'order_id': 40000, 'purchase_date': '2020-10-02'}


UserInput(name='abc', email='john.doe@example.com', query='How can I cancel my subscription?', order_id=40000, purchase_date=datetime.date(2020, 10, 2))

# Validating Json + Data fields
- using model_validate_json

In [11]:
json_data_good = '''
{
  "email": "john.doe@example.com",
  "query": "How can I cancel my subscription?",
  "name": "abc",
  "order_id": 40000,
  "purchase_date": "2020-10-02"
}
'''
json_data_bad_json_fmt = '''

  "email": "john.doe@example.com",
  "query": "How can I cancel my subscription?",
  "name": "abc",
  "order_id": 40000,
  "purchase_date": "2020-10-02"
}
'''

json_data_bad_data_fmt = '''
{
  "email": "john.doe@example.com",
  "query": "How can I cancel my subscription?",
  "name": 123,
  "order_id": 03000,
  "purchase_date": "2020-10-02"
}
'''

print(f'Validating good json: ✅ {UserInput.model_validate_json(json_data_good)}')
print(f'Validating json with bad JSON formatting: ‼️ {UserInput.model_validate_json(json_data_bad_json_fmt)}')

Validating good json: ✅ name='abc' email='john.doe@example.com' query='How can I cancel my subscription?' order_id=40000 purchase_date=datetime.date(2020, 10, 2)


ValidationError: 1 validation error for UserInput
  Invalid JSON: trailing characters at line 3 column 10 [type=json_invalid, input_value='\n\n  "email": "john.doe...ate": "2020-10-02"\n}\n', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/json_invalid

In [12]:
print(f'Validating json with bad JSON formatting: ‼️ {UserInput.model_validate_json(json_data_bad_data_fmt)}')

ValidationError: 1 validation error for UserInput
  Invalid JSON: invalid number at line 6 column 16 [type=json_invalid, input_value='\n{\n  "email": "john.do...ate": "2020-10-02"\n}\n', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/json_invalid

# Validating LLM response with Pydantic

In [13]:
# imports
from pydantic import BaseModel, ValidationError, Field, EmailStr
from typing import List, Literal, Optional
import json
from datetime import date
from dotenv import load_dotenv
import openai
load_dotenv()

True

In [14]:
client = openai.OpenAI()

In [15]:
user_input_json = '''
{
  "email": "john.doe@example.com",
  "query": "How can I cancel my subscription?",
  "name": "abc",
  "order_id": null,
  "purchase_date": null
}
'''

In [16]:
class UserInput(BaseModel):
    name: str = Field(...,description="The name of the user")
    email: EmailStr = Field(...,description="The email of the user")
    query: str = Field(...,description="The query of the user")
    order_id : Optional[int] = Field(
        None, 
        description="5 digit order number (cannot start with 0)",
        ge=10000,
        le=99999
    )
    purchase_date: Optional[date] = None

In [17]:
user_input = UserInput.model_validate_json(user_input_json)

In [18]:
# Customer Query Model
class CustomerQuery(UserInput):
    priority:str = Field(...,
                         description="Priority level: low, medium, high")
    category: Literal['refund_request', 'information_request', 'other'] = Field(...,
                                                                                description='Query Category')
    is_complaint: bool = Field(...,
                               description='Whether this is a complaint or not?')
    tags: List[str] = Field(...,
                             description='Relevant keywords tags')

In [19]:
## give an example response to LLM to let it know what you expect as output
example_response_structure = f"""{{
    name ="Example User",
    email = "user@example.com"
    query = "I ordered a new computer monitor and it arrived with a broken screen"
    order_id = 12345
    purchase_date = "2023-11-10"
    priority = "medium"
    category = "refund_request"
    is_complaint = True
    tags = ["monitor","support","exchange"]
}}"""

In [20]:
llm_prompt = f""" 
Please analyse this user query\n{user_input.model_dump_json(indent=2)}:

Return your analysis as a JSON object matching this exact structure
and datatypes:
{example_response_structure}

Respond ONLY with valid JSON. Do not include andy explanations or 
other text or formatting before or after JSON object
"""
print(llm_prompt)

 
Please analyse this user query
{
  "name": "abc",
  "email": "john.doe@example.com",
  "query": "How can I cancel my subscription?",
  "order_id": null,
  "purchase_date": null
}:

Return your analysis as a JSON object matching this exact structure
and datatypes:
{
    name ="Example User",
    email = "user@example.com"
    query = "I ordered a new computer monitor and it arrived with a broken screen"
    order_id = 12345
    purchase_date = "2023-11-10"
    priority = "medium"
    category = "refund_request"
    is_complaint = True
    tags = ["monitor","support","exchange"]
}

Respond ONLY with valid JSON. Do not include andy explanations or 
other text or formatting before or after JSON object



In [21]:
# call LLM
def call_llm(prompt, model="gpt-4o"):
    response = client.chat.completions.create(
        model = model,
        messages=[{"role":"user", "content":prompt}]
    )
    return response.choices[0].message.content

In [22]:
response_content = call_llm(llm_prompt)
print(response_content)

```json
{
    "name": "abc",
    "email": "john.doe@example.com",
    "query": "How can I cancel my subscription?",
    "order_id": null,
    "purchase_date": null,
    "priority": "high",
    "category": "subscription_cancellation",
    "is_complaint": false,
    "tags": ["subscription", "cancellation", "support"]
}
```


In [23]:
CustomerQuery.model_validate_json(response_content)

ValidationError: 1 validation error for CustomerQuery
  Invalid JSON: expected value at line 1 column 1 [type=json_invalid, input_value='```json\n{\n    "name": ...on", "support"]\n}\n```', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/json_invalid

In [None]:
# function to catch validation errors gracefully
def validate_llm_responses(data_model, llm_response):
    try:
        validated_data = data_model.model_validate_json(llm_response)
        print(f"Data validation Successfull: ✅")
        print(validated_data.model_dump_json(indent=2))
        return validated_data, None
    except ValidationError as e:
        print(f"❌ Error Validating data: {e}")
        error_message = {
            f"This response generated a validation error: {e}"
        }
        return None, error_message

In [None]:
validated_data, validation_error = validate_llm_responses(CustomerQuery, response_content)
print(validation_error)

❌ Error Validating data: 1 validation error for CustomerQuery
  Invalid JSON: expected value at line 1 column 1 [type=json_invalid, input_value='```json\n{\n    "name": ...on", "support"]\n}\n```', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/json_invalid
{'This response generated a validation error: 1 validation error for CustomerQuery\n  Invalid JSON: expected value at line 1 column 1 [type=json_invalid, input_value=\'```json\\n{\\n    "name": ...on", "support"]\\n}\\n```\', input_type=str]\n    For further information visit https://errors.pydantic.dev/2.11/v/json_invalid'}


In [None]:
# Create a retry feedback loop
def create_retry_prompt(
    original_prompt,
    original_response, 
    error_message
):
    retry_prompt = f""" 
    There is a request to fix an error in the structure of llm response. 
    Here is the original request
    <original_prompt>
    {original_prompt}
    </original_prompt>
    
    Here is the original llm response:
    <llm_response>
    {original_response}
    </llm_response>
    
    This response generated an error:
    <error_message>
    {error_message}
    </error_message>
    
    Compare the error message and the llm_response and identify what needs to be fixed or removed
    in the llm_response to resolve this error.
    
    Respond ONLY with valid JSON. Do not include any explanations or other text or formatting before or after the JSON string
    """
    return retry_prompt

In [None]:
validation_retry_prompt = create_retry_prompt(
    original_prompt=llm_prompt,
    original_response=response_content,
    error_message=validation_error
)
print(validation_retry_prompt)

 
    There is a request to fix an error in the structure of llm response. 
    Here is the original request
    <original_prompt>
     
Please analyse this user query
{
  "name": "abc",
  "email": "john.doe@example.com",
  "query": "How can I cancel my subscription?",
  "order_id": null,
  "purchase_date": null
}:

Return your analysis as a JSON object matching this exact structure
and datatypes:
{
    name ="Example User",
    email = "user@example.com"
    query = "I ordered a new computer monitor and it arrived with a broken screen"
    order_id = 12345
    purchase_date = "2023-11-10"
    priority = "medium"
    category = "refund_request"
    is_complaint = True
    tags = ["monitor","support","exchange"]
}

Respond ONLY with valid JSON. Do not include andy explanations or 
other text or formatting before or after JSON object

    </original_prompt>

    Here is the original llm response:
    <llm_response>
    ```json
{
    "name": "abc",
    "email": "john.doe@example.com",
   

In [None]:
validation_retry_response = call_llm(validation_retry_prompt)
print(validation_retry_response)

```json
{
    "name": "abc",
    "email": "john.doe@example.com",
    "query": "How can I cancel my subscription?",
    "order_id": null,
    "purchase_date": null,
    "priority": "high",
    "category": "subscription_cancellation",
    "is_complaint": false,
    "tags": ["subscription", "cancellation", "support"]
}
```


In [None]:
# Define a function to automatically retry an LLM call multiple times
def validate_llm_response(
    prompt, data_model, n_retry=5, model="gpt-4o"
):
    # Initial LLM call
    response_content = call_llm(prompt, model=model)
    current_prompt = prompt

    # Try to validate with the model
    # attempt: 0=initial, 1=first retry, ...
    for attempt in range(n_retry + 1):

        validated_data, validation_error = validate_llm_responses(
            data_model, response_content
        )

        if validation_error:
            if attempt < n_retry:
                print(f"retry {attempt} of {n_retry} failed, trying again...")
            else:
                print(f"Max retries reached. Last error: {validation_error}")
                return None, (
                    f"Max retries reached. Last error: {validation_error}"
                )

            validation_retry_prompt = create_retry_prompt(
                original_prompt=current_prompt,
                original_response=response_content,
                error_message=validation_error
            )
            response_content = call_llm(
                validation_retry_prompt, model=model
            )
            current_prompt = validation_retry_prompt
            continue

        # If you get here, both parsing and validation succeeded
        return validated_data, None

In [24]:
# Test your complete solution with the original prompt
validated_data, error = validate_llm_response(
    llm_prompt, CustomerQuery
)

NameError: name 'validate_llm_response' is not defined

In [25]:
# Investigate the model_json_schema for CustomerQuery
data_model_schema = json.dumps(
    CustomerQuery.model_json_schema(), indent=2
)
print(data_model_schema)

{
  "properties": {
    "name": {
      "description": "The name of the user",
      "title": "Name",
      "type": "string"
    },
    "email": {
      "description": "The email of the user",
      "format": "email",
      "title": "Email",
      "type": "string"
    },
    "query": {
      "description": "The query of the user",
      "title": "Query",
      "type": "string"
    },
    "order_id": {
      "anyOf": [
        {
          "maximum": 99999,
          "minimum": 10000,
          "type": "integer"
        },
        {
          "type": "null"
        }
      ],
      "default": null,
      "description": "5 digit order number (cannot start with 0)",
      "title": "Order Id"
    },
    "purchase_date": {
      "anyOf": [
        {
          "format": "date",
          "type": "string"
        },
        {
          "type": "null"
        }
      ],
      "default": null,
      "title": "Purchase Date"
    },
    "priority": {
      "description": "Priority level: low, medi

In [26]:
# Create new prompt with user input and model_json_schema
prompt = f"""
Please analyze this user query\n {user_input.model_dump_json(indent=2)}:

Return your analysis as a JSON object matching the following schema:
{data_model_schema}

Respond ONLY with valid JSON. Do not include any explanations or 
other text or formatting before or after the JSON object.
"""

In [27]:
# Run your validate_llm_response function with the new prompt
final_analysis, error = validate_llm_response(
    prompt, CustomerQuery
)

NameError: name 'validate_llm_response' is not defined

# Using instructor to validate LLM responses with pydantic

In [28]:
from pydantic import BaseModel, Field, EmailStr
from typing import List, Literal, Optional
from openai import OpenAI
import instructor
import anthropic
from dotenv import load_dotenv
from datetime import date

In [29]:
class UserInput(BaseModel):
    name: str = Field(...,description="The name of the user")
    email: EmailStr = Field(...,description="The email of the user")
    query: str = Field(...,description="The query of the user")
    order_id : Optional[int] = Field(
        None, 
        description="5 digit order number (cannot start with 0)",
        ge=10000,
        le=99999
    )
    purchase_date: Optional[date] = None

In [30]:
# Customer Query Model
class CustomerQuery(UserInput):
    priority:str = Field(...,
                         description="Priority level: low, medium, high")
    category: Literal['refund_request', 'information_request', 'other'] = Field(...,
                                                                                description='Query Category')
    is_complaint: bool = Field(...,
                               description='Whether this is a complaint or not?')
    tags: List[str] = Field(...,
                             description='Relevant keywords tags')

In [31]:
user_input_json = '''
{
  "email": "john.doe@example.com",
  "query": "I ordered a product but it didn't arrive and now I want to cancel my order",
  "name": "abc",
  "order_id": 12345,
  "purchase_date": null
}
'''

In [32]:
user_input=  UserInput.model_validate_json(user_input_json)

In [33]:
prompt = (
    f"Analyze the following customer query {user_input} "
    f"and provide a structured response."
)

In [34]:
load_dotenv()

openai_client = instructor.from_openai(
    openai.OpenAI()
)
response = openai_client.beta.chat.completions.parse(
    model = 'gpt-4o',
    max_tokens = 1024,
    messages = [
        {
            'role':'user',
            'content':prompt
        }
    ],
    response_format = CustomerQuery,
)

In [35]:
print(response.choices[0].message.content)

{"name":"abc","email":"john.doe@example.com","query":"I ordered a product but it didn't arrive and now I want to cancel my order","order_id":12345,"purchase_date":null,"priority":"high","category":"refund_request","is_complaint":true,"tags":["order_not_received","cancel_order","customer_support"]}


In [36]:
response = openai_client.responses.parse(
    model = 'gpt-4o',
    input = [
        {
            'role':'user',
            'content':prompt
        }
    ],
    text_format = CustomerQuery,
)

In [37]:
print(response.output_text)

{"name":"abc","email":"john.doe@example.com","query":"I ordered a product but it didn't arrive and now I want to cancel my order","order_id":12345,"purchase_date":null,"priority":"high","category":"refund_request","is_complaint":true,"tags":["order cancellation","non-delivery","refund request","customer complaint"]}


# Pydantic for Tool Calling

In [38]:
from pydantic import BaseModel, Field, EmailStr, field_validator
from pydantic_ai import Agent
from typing import List, Literal, Optional
from datetime import datetime, date
import json
from openai import OpenAI
import anthropic
import instructor
from dotenv import load_dotenv
import nest_asyncio

load_dotenv()
nest_asyncio.apply()

In [39]:
class UserInput(BaseModel):
    name: str = Field(...,description="The name of the user")
    email: EmailStr = Field(...,description="The email of the user")
    query: str = Field(...,description="The query of the user")
    order_id : Optional[str] = Field(
        None, 
        description="Order ID if available (format: ABC-12345)"
    )
    # Validate order_id format (e.g should be of format ABC-12345)
    @field_validator('order_id')
    def validate_order_id(cls, order_id):
        import re
        if order_id is None:
            return order_id
        pattern = r"^[A-Z]{3}-\d{5}$"
        if not re.match(pattern, order_id):
            raise ValueError(
                "order_id must be in format ABC-12345"
                "(3 uppercase letters, dash, 5 digits)"
            )
    purchase_date: Optional[date] = None

In [40]:
# Customer Query Model
class CustomerQuery(UserInput):
    priority:str = Field(...,
                         description="Priority level: low, medium, high")
    category: Literal['refund_request', 'information_request', 'other'] = Field(...,
                                                                                description='Query Category')
    is_complaint: bool = Field(...,
                               description='Whether this is a complaint or not?')
    tags: List[str] = Field(...,
                             description='Relevant keywords tags')

In [41]:
from typing import Union
def validate_user_input(input_data: str) -> Union[UserInput, None]:
    try: 
        user_input = UserInput.model_validate_json(input_data)
        print(f"Validated user input: ✅ {input_data}")
        return user_input
    except ValidationError as e:
        print(f"Validation error: ❌ {e}")
        return None

In [74]:
def create_customer_query(valid_user_json: str) -> CustomerQuery:
    customer_query_agent = Agent(
        model = "gpt-4o-mini",
        output_type=CustomerQuery
    )
    response = customer_query_agent.run_sync(valid_user_json)
    print("CustomerQuery generated..")
    return response.output

In [97]:
user_input_json = '''
{
    "name": "Joe User",
    "email": "joe@example.com",
    "query": "When can I expect delivery of the headphones I ordered?",
    "order_id": "ABC-12345",
    "purchase_date": "2025-12-01"
}
'''

In [98]:
valid_data = validate_user_input(user_input_json).model_dump_json()
customer_query = create_customer_query(valid_data)
print(customer_query.model_dump_json(indent=2))


Validated user input: ✅ 
{
    "name": "Joe User",
    "email": "joe@example.com",
    "query": "When can I expect delivery of the headphones I ordered?",
    "order_id": "ABC-12345",
    "purchase_date": "2025-12-01"
}

CustomerQuery generated..
{
  "name": "Joe User",
  "email": "joe@example.com",
  "query": "When can I expect delivery of the headphones I ordered?",
  "order_id": null,
  "purchase_date": "2025-12-01",
  "priority": "medium",
  "category": "information_request",
  "is_complaint": false,
  "tags": [
    "delivery",
    "headphones",
    "order status"
  ]
}


In [99]:
# Create an FAQ lookup agent
class FAQLookupArgs(BaseModel):
    query:str = Field(..., description="User's query")
    tags: List[str] = Field(..., description="Relevant keyword tags from the customer query"
                            )

In [100]:
# Class to check order status
class CheckOrderStatusArgs(BaseModel):
    order_id:str = Field(...,
                         description = "Customer's order ID (format: ABC-12345)")
    email: EmailStr = Field(..., description="Customer's email address")
    @field_validator('order_id')
    def validate_order_id(cls, order_id):
        import re
        if order_id is None:
            return order_id
        pattern = r"^[A-Z]{3}-\d{5}$"
        if not re.match(pattern, order_id):
            raise ValueError(
                "order_id must be in format ABC-12345"
                "(3 uppercase letters, dash, 5 digits)"
            )
        return order_id

In [101]:
# Create a fake FAQ database as a list of entries with keywords
faq_db = [
    {
        "question": "How can I reset my password?",
        "answer": "To reset your password, click 'Forgot Password' on the sign-in page and follow the instructions sent to your email.",
        "keywords": ["password", "reset", "account"]
    },
    {
        "question": "How long does shipping take?",
        "answer": "Standard shipping takes 3-5 business days. You can track your order in your account dashboard.",
        "keywords": ["shipping", "delivery", "order", "tracking"]
    },
    {
        "question": "How can I return an item?",
        "answer": "You can return any item within 30 days of purchase. Visit our returns page to start the process.",
        "keywords": ["return", "refund", "exchange"]
    },
    {
        "question": "How can I delete my account?",
        "answer": "To delete your account, go to your account settings tab and select 'delete account'.",
        "keywords": ["delete", "account", "remove"]
    }
]

# Create a fake order database
order_db = {
    "ABC-12345": {
        "status": "shipped", "estimated_delivery": "2025-12-05",
        "purchase_date": "2025-12-01", "email": "joe@example.com"
    },
    "XYZ-23456": {
        "status": "processing", "estimated_delivery": "2025-12-15",
        "purchase_date": "2025-12-10", "email": "sue@example.com"
    },
    "QWE-34567": {
        "status": "delivered", "estimated_delivery": "2025-12-20",
        "purchase_date": "2025-12-18", "email": "bob@example.com"
    }
}

In [102]:
# Define your FAQ lookup tool
def lookup_faq_answer(args: FAQLookupArgs) -> str:
    """Look up an FAQ answer by matching tags and words in query 
    to FAQ entry keywords."""
    query_words = set(word.lower() for word in args.query.split())
    tag_set = set(tag.lower() for tag in args.tags)
    best_match = None
    best_score = 0
    for faq in faq_db:
        keywords = set(k.lower() for k in faq["keywords"])
        score = len(keywords & tag_set) + len(keywords & query_words)
        if score > best_score:
            best_score = score
            best_match = faq
    if best_match and best_score > 0:
        return best_match["answer"]
    return "Sorry, I couldn't find an FAQ answer for your question."

In [103]:
# Define your check order status tool
def check_order_status(args: CheckOrderStatusArgs):
    """Simulate checking the status of a customer's order by 
    order_id and email."""
    order = order_db.get(args.order_id)
    if not order:
        return {
            "order_id": args.order_id,
            "status": "not found",
            "estimated_delivery": None,
            "note": "order_id not found"
        }
    if args.email.lower() != order.get("email", "").lower():
        return {
            "order_id": args.order_id,
            "status": order["status"],
            "estimated_delivery": order["estimated_delivery"],
            "note": "order_id found but email mismatch"
        }
    return {
        "order_id": args.order_id,
        "status": order["status"],
        "estimated_delivery": order["estimated_delivery"],
        "note": "order_id and email match"
    }

In [104]:
# Define tools for your API call
tool_definitions = [
    {
        "type": "function",
        "function": {
            "name": "lookup_faq_answer",
            "description": "Look up an FAQ answer by matching tags to FAQ entry keywords.",
            "parameters": FAQLookupArgs.model_json_schema()
        }
    },
    {
        "type": "function",
        "function": {
            "name": "check_order_status",
            "description": "Check the status of a customer's order.",
            "parameters": CheckOrderStatusArgs.model_json_schema()
        }
    }
]

In [105]:
#Define your final output Pydantic models
class OrderDetails(BaseModel):
    status: str
    estimated_delivery: str
    note: str

class SupportTicket(CustomerQuery):
    recommended_next_action: Literal[
        'escalate_to_agent', 'send_faq_response', 
        'send_order_status', 'no_action_needed'
    ] = Field(
        ..., description="LLM's recommended next action for support"
    )
    order_details: Optional[OrderDetails] = Field(
        None, description="Order details if action is send_order_status"
    )
    faq_response: Optional[str] = Field(
        None, description="FAQ response if action is send_faq_response"
    )
    creation_date: datetime = Field(
        ..., description="Date and time the ticket was created"
    )

# Making an LLM call

In [106]:
# Initialize OpenAI client
client = OpenAI()

# Define a function to call OpenAI with tools
def decide_next_action_with_tools(customer_query: CustomerQuery):
    
    support_ticket_schema = json.dumps(
        SupportTicket.model_json_schema(), indent=2
    )
    system_prompt = f"""
        You are a helpful customer support agent. Your job is to 
        determine what support action should be taken for the customer, 
        based on the customer query and the expected fields in the 
        SupportTicket schema below. If more information on a particular 
        order_id or FAQ response would be helpful in responding to the 
        user query and can be obtained by calling a tool, call the 
        appropriate tool to get that information. If an order_id is 
        present in the query, always look up the order status to get 
        more information on the order.

        Here is the JSON schema for the SupportTicket model you must 
        use as context for what information is expected:
        {support_ticket_schema}
    """
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": str(customer_query.model_dump())}
    ]
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=messages,
        tools=tool_definitions,
        tool_choice="auto"
    )
    message = response.choices[0].message
    tool_calls = getattr(message, "tool_calls", None)
    return message, tool_calls, messages

In [107]:
customer_query

CustomerQuery(name='Joe User', email='joe@example.com', query='When can I expect delivery of the headphones I ordered?', order_id=None, purchase_date=datetime.date(2025, 12, 1), priority='medium', category='information_request', is_complaint=False, tags=['delivery', 'headphones', 'order status'])

In [108]:
message, tool_calls, messages = decide_next_action_wit_tools(customer_query)

# investigate LLM's outputs before proceeding
print(f"LLM's response: {json.dumps(message.model_dump(), indent=2)}")
print(f"\nTool calls\n: {json.dumps([call.model_dump() for call in tool_calls], indent=2)}")

LLM's response: {
  "content": null,
  "refusal": null,
  "role": "assistant",
  "annotations": [],
  "audio": null,
  "function_call": null,
  "tool_calls": [
    {
      "id": "call_D9hI7V9SK03trj2pOfHncEzh",
      "function": {
        "arguments": "{\"query\":\"When can I expect delivery of the headphones I ordered?\",\"tags\":[\"delivery\",\"headphones\",\"order status\"]}",
        "name": "lookup_faq_answer"
      },
      "type": "function"
    }
  ]
}

Tool calls
: [
  {
    "id": "call_D9hI7V9SK03trj2pOfHncEzh",
    "function": {
      "arguments": "{\"query\":\"When can I expect delivery of the headphones I ordered?\",\"tags\":[\"delivery\",\"headphones\",\"order status\"]}",
      "name": "lookup_faq_answer"
    },
    "type": "function"
  }
]


In [109]:
def get_tool_outputs(tool_calls):
    tool_outputs = []
    for tool_call in tool_calls:
        if tool_call.function.name == "lookup_faq_answer":
            print(f"Agent requested a call to look up FAQ tool..")
            args = FAQLookupArgs.model_validate_json(tool_call.function.arguments)
            result = lookup_faq_answer(args)
            tool_outputs.append({
                "tool_call_id":tool_call.id,
                "output":result
            })
            print(f"Lookup FAQ tool returned: {result}")
        elif tool_call.function.name == "check_order_status":
            print(f"Agent requested a call to check order status tool..")
            args = CheckOrderStatusArgs.model_validate_json(tool_call.function.arguments)
            result = check_order_status(args)
            tool_outputs.append({
                "tool_call_id":tool_call.id,
                "output":result
            })
            print(f"Check order status tool returned: {result}")
    return tool_outputs
            

In [110]:
# Stage 2: Gather amu meeded tools and generate s support ticket
tool_outputs = get_tool_outputs(tool_calls)
print(f"\nTool outputs:\n{json.dumps(tool_outputs, indent=2)}")

Agent requested a call to look up FAQ tool..
Lookup FAQ tool returned: Standard shipping takes 3-5 business days. You can track your order in your account dashboard.

Tool outputs:
[
  {
    "tool_call_id": "call_D9hI7V9SK03trj2pOfHncEzh",
    "output": "Standard shipping takes 3-5 business days. You can track your order in your account dashboard."
  }
]


In [116]:
openai_client = instructor.from_openai(OpenAI())

def generate_structured_support_ticket(customer_query: CustomerQuery,message, tool_outputs: list):
    tool_results_str = "\n".join([
        f"Tool:{out['tool_call_id']}, Output: {json.dumps(out['output'])}" for out in tool_outputs
    ])
    
    prompt = f""" 
    You're a support ticket agent. Use all the information below to generate a suppot ticket
    as a validated Pydantic model.
    Customer query: {customer_query.model_dump_json(indent=2)}
    LLM message: {str(message.model_dump())}
    Tool Results: {tool_results_str}
    """
    response = openai_client.chat.completions.create(
        model = "gpt-4o",
        messages = [
            {"role":"system", "content":prompt},
        ],
       response_model = SupportTicket
    )
    
    support_ticket = response
    support_ticket.creation_date = datetime.now()
    return support_ticket
    

In [117]:
support_ticket = generate_structured_support_ticket(customer_query,message, tool_outputs)
print(support_ticket.model_dump_json(indent=2))

{
  "name": "Joe User",
  "email": "joe@example.com",
  "query": "I'm really not happy with this product I bought",
  "order_id": null,
  "purchase_date": null,
  "priority": "high",
  "category": "other",
  "is_complaint": true,
  "tags": [
    "product dissatisfaction",
    "complaint"
  ],
  "recommended_next_action": "escalate_to_agent",
  "order_details": null,
  "faq_response": null,
  "creation_date": "2025-08-22T17:26:39.630300"
}


In [118]:
# Define new user input data
user_json = '''
{
    "name": "Joe User",
    "email": "joe@example.com",
    "query": "I'm really not happy with this product I bought",
    "order_id": "QWE-34567",
    "purchase_date": null
}
'''

In [120]:
# Run the entire pipeline
valid_user_json = validate_user_input(user_json).model_dump_json()
customer_query = create_customer_query(valid_user_json)
message, tool_calls, messages = decide_next_action_with_tools(
    customer_query
)
tool_outputs = get_tool_outputs(tool_calls)
support_ticket = generate_structured_support_ticket(
    customer_query, message, tool_outputs
)
print(support_ticket.model_dump_json(indent=2))

Validated user input: ✅ 
{
    "name": "Joe User",
    "email": "joe@example.com",
    "query": "I'm really not happy with this product I bought",
    "order_id": "QWE-34567",
    "purchase_date": null
}

CustomerQuery generated..
Agent requested a call to look up FAQ tool..
Lookup FAQ tool returned: Sorry, I couldn't find an FAQ answer for your question.
{
  "name": "Joe User",
  "email": "joe@example.com",
  "query": "I'm really not happy with this product I bought",
  "order_id": null,
  "purchase_date": null,
  "priority": "high",
  "category": "other",
  "is_complaint": true,
  "tags": [
    "product dissatisfaction",
    "complaint"
  ],
  "recommended_next_action": "escalate_to_agent",
  "order_details": null,
  "faq_response": "Sorry, I couldn't find an FAQ answer for your question.",
  "creation_date": "2025-08-22T17:27:57.889451"
}
