In [6]:
import getpass
import os

os.environ["LANGSMITH_TRACING"] = "true"
if not os.environ.get("LANGSMITH_API_KEY"):
    os.environ["LANGSMITH_API_KEY"] = getpass.getpass()

if not os.environ.get("GOOGLE_API_KEY"):
    os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter API key for Google Gemini: ")


In [4]:
from typing import Optional
from pydantic import BaseModel, Field


class Person(BaseModel):
    name: Optional[str] = Field(default=None, description="The person's name")
    hair_color: Optional[str] = Field(default=None, description="The person's hair color")
    height_in_meters: Optional[str] = Field(default=None, description="The person's height")
    weight: Optional[str] = Field(default=None, description="The person's weight")


person = Person()
print(person)

name=None hair_color=None height_in_meters=None weight=None


In [10]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

prompt_template = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are an expert extraction algorithm."
            "Only extract relevant information from text."
            "If you do not know the value of an attribute asked to extract,"
            "return null for the attribute's value."
        ),
        # Please see the how-to about improving performance with
        # reference examples.
        # MessagesPlaceholder('examples'),
        ("human", "{text}")
    ]
)

In [11]:
from langchain.chat_models import init_chat_model

llm = init_chat_model("gemini-2.5-flash", model_provider="google_genai")

structured_llm = llm.with_structured_output(schema=Person)

In [12]:
text = "Alan smith is 6 feet tall and has blond and black hair."
prompt = prompt_template.invoke({'text': text})
structured_llm.invoke(prompt)

Person(name='Alan smith', hair_color='blond and black', height_in_meters='6 feet', weight=None)

In [13]:
# multiple Entities
# In most cases, you should be extracting a list of entities rather than a single entity.

class Data(BaseModel):
    people: list[Person] = Field(default_factory=list)


structured_llm = llm.with_structured_output(schema=Data)
text = "My name is Jeff, my hair is black and i am 6 feet tall. Anna has the same color hair as me."
prompt = prompt_template.invoke({"text": text})
structured_llm.invoke(prompt)

Data(people=[Person(name='Jeff', hair_color='black', height_in_meters='6 feet', weight=None), Person(name='Anna', hair_color='black', height_in_meters=None, weight=None)])

In [14]:
messages = [
    {"role": "user", "content": "2 🦜 2"},
    {"role": "assistant", "content": "4"},
    {"role": "user", "content": "2 🦜 3"},
    {"role": "assistant", "content": "5"},
    {"role": "user", "content": "3 🦜 4"},
]

response = llm.invoke(messages)
print(response.content)

7


In [16]:
from langchain_core.utils.function_calling import tool_example_to_messages

examples = [
    (
        "The ocean is vast and blue. It's more than 20,000 feet deep.",
        Data(people=[]),
    ),
    (
        "Fiona traveled far from France to Spain.",
        Data(people=[Person(name="Fiona", height_in_meters=None, hair_color=None)]),
    ),
]

messages = []

for txt, tool_call in examples:
    if tool_call.people:
        # This final message is optional for some providers
        ai_response = "Detected people."
    else:
        ai_response = "Detected no people."
    messages.extend(tool_example_to_messages(txt, [tool_call], ai_response=ai_response))

for message in messages:
    message.pretty_print()


The ocean is vast and blue. It's more than 20,000 feet deep.
Tool Calls:
  Data (4342f9ec-26e2-4879-abbd-7ca1a6c42c15)
 Call ID: 4342f9ec-26e2-4879-abbd-7ca1a6c42c15
  Args:
    people: []

You have correctly called this tool.

Detected no people.

Fiona traveled far from France to Spain.
Tool Calls:
  Data (85b768ff-96b3-4537-befb-60e60d7162cd)
 Call ID: 85b768ff-96b3-4537-befb-60e60d7162cd
  Args:
    people: [{'name': 'Fiona', 'hair_color': None, 'height_in_meters': None, 'weight': None}]

You have correctly called this tool.

Detected people.


In [21]:
# In this example, the model is liable to erroneously generate records of people.
message_no_extraction = {
    "role": "user",
    "content": "The solar system is large, but earth has only 1 moon.",
}

# Solution
# class Data(BaseModel):
#     people: list[Person] = []


structured_llm = llm.with_structured_output(schema=Data)
structured_llm.invoke([message_no_extraction])

Data(people=[Person(name='Earth', hair_color='blue', height_in_meters='12742000 meters', weight='5.972 × 10^24 kg')])

In [18]:
structured_llm.invoke(messages + [message_no_extraction])

Data(people=[])