In [1]:
import getpass
import os

if "LANGCHAIN_API_KEY" not in os.environ:
    os.environ["LANGCHAIN_TRACING_V2"] = "true"
    os.environ["LANGCHAIN_API_KEY"] = getpass.getpass()

In [2]:
if not os.environ.get("OPENAI_API_KEY"):
    os.environ["OPENAI_API_KEY"] = getpass.getpass()

#### A. Extracción

In [4]:
from typing import Optional
from pydantic import BaseModel, Field
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

#! llm
llm = ChatOpenAI(model="gpt-4o-mini")

#! Modelo
class Person(BaseModel):
    name: Optional[str] = Field(default=None, description="The name of the person")
    hair_color: Optional[str] = Field(default=None, description="The color of the person's hair if known")
    height_in_meters: Optional[str] = Field(default=None, description="Height measured in meters")

#! Extractgor
prompt_template = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are an expert extraction algorithm. "
            "Only extract relevant information from the text. "
            "If you do not know the value of an attribute asked to extract, "
            "return null for the attribute's value.",
        ),
        ("human", "{text}")
    ]
)

#! Configurar llm para extracción
structured_llm = llm.with_structured_output(schema=Person)


In [5]:
text = "Alan Smith is 6 feet tall and has blond hair."
prompt = prompt_template.invoke({"text": text})
response = structured_llm.invoke(prompt)
response

Person(name='Alan Smith', hair_color='blond', height_in_meters='1.83')

In [6]:
text = "Alan Smith mide 190cm tiene el cabello naranja"
prompt = prompt_template.invoke({"text": text})
response = structured_llm.invoke(prompt)
response

Person(name='Alan Smith', hair_color='naranja', height_in_meters='1.90')

#### B. Extracción con multiples entidades

In [9]:
from typing import List

class Data(BaseModel):
    people: List[Person]

structured_llm = llm.with_structured_output(schema=Data)

In [13]:
text = "My name is Jeff, my hair is black and i am 6 feet tall. Anna has the same color hair as me."
prompt = prompt_template.invoke({"text": text})
response = structured_llm.invoke(prompt)
response

Data(people=[Person(name='Jeff', hair_color='black', height_in_meters='1.83'), Person(name='Anna', hair_color='black', height_in_meters=None)])

In [14]:
text = "Me llamo Jeff, tengo el pelo negro y mido 1,80 metros. Anna tiene el pelo del mismo color que yo"
prompt = prompt_template.invoke({"text": text})
response = structured_llm.invoke(prompt)
response

Data(people=[Person(name='Jeff', hair_color='negro', height_in_meters='1.80'), Person(name='Anna', hair_color='negro', height_in_meters=None)])

#### C. Extracción con ejemplos

In [15]:
messages = [
    {"role": "user", "content": "2 🦜 2"},
    {"role": "assistant", "content": "4"},
    {"role": "user", "content": "2 🦜 3"},
    {"role": "assistant", "content": "5"},
    {"role": "user", "content": "3 🦜 4"},
]

response = llm.invoke(messages)
print(response.content)

7


In [21]:
from langchain_core.utils.function_calling import tool_example_to_messages

examples = [
    (
        "The ocean is vast and blue. It's more than 20,000 feet deep.",
        Data(people=[]),
    ),
    (
        "Fiona traveled far from France to Spain.",
        Data(people=[Person(name="Fiona", height_in_meters=None, hair_color=None)]),
    ),
]

messages = []
for txt, tool_call in examples:
    if tool_call.people:
        ai_response = "Detected people"
    else:
        ai_response = "Detected no people"
    messages.extend(tool_example_to_messages(txt, [tool_call], ai_response=ai_response))

In [23]:
for message in messages:
    message.pretty_print()


The ocean is vast and blue. It's more than 20,000 feet deep.
Tool Calls:
  Data (86536756-ee2b-4d55-af5a-8835d6931ee2)
 Call ID: 86536756-ee2b-4d55-af5a-8835d6931ee2
  Args:
    people: []

You have correctly called this tool.

Detected no people

Fiona traveled far from France to Spain.
Tool Calls:
  Data (c0953198-b438-424a-a1cc-7769c11363f2)
 Call ID: c0953198-b438-424a-a1cc-7769c11363f2
  Args:
    people: [{'name': 'Fiona', 'hair_color': None, 'height_in_meters': None}]

You have correctly called this tool.

Detected people


In [24]:
message_no_extraction = {
    "role": "user",
    "content": "The solar system is large, but earth has only 1 moon.",
}

structured_llm = llm.with_structured_output(schema=Data)
structured_llm.invoke([message_no_extraction])

Data(people=[Person(name='Earth', hair_color='none', height_in_meters='0.0')])

In [25]:
structured_llm.invoke(messages + [message_no_extraction])

Data(people=[])