In [20]:
from dotenv import load_dotenv
load_dotenv()

True

### The Schema

In [21]:
from typing import Optional
from pydantic import BaseModel, Field

class Person(BaseModel): 
   """Information about a person."""
   
   name: Optional[str] = Field(default=None, description="The name of the person.")
   hair_color: Optional[str] = Field(default=None, description="The solor of the person's hair if known.")
   height_in_meters: Optional[str] = Field(default=None, description="Height measured in meters.")

### The Extractor

In [22]:
from langchain_core.prompts import ChatPromptTemplate

prompt_template = ChatPromptTemplate.from_messages(
   [
      (
         "system", 
         "You are an expert extraction algorithm. "
         "Only extract relevant information from the text. "
         "If you do not know value of an attribute asked to extract, "
         "return null for the attribute's value"
      ),
      ("human", "{text}"),
   ]
)


In [23]:
from langchain.chat_models import init_chat_model

structured_llm = init_chat_model(model="gpt-4o-mini", temperature=0).with_structured_output(schema=Person)

In [24]:
text = "Alan Smith is 6 feet tall and has blonde hair."

prompt = prompt_template.invoke({"text": text})

response = structured_llm.invoke(prompt)

In [25]:
response

Person(name='Alan Smith', hair_color='blonde', height_in_meters='1.83')

### Multiple Entities

In [26]:
from typing import Optional
from pydantic import BaseModel, Field

class Person(BaseModel): 
   """Information about a person."""
   
   name: Optional[str] = Field(default=None, description="The name of the person.")
   hair_color: Optional[str] = Field(default=None, description="The color of the person's hair if known.")
   height_in_meters: Optional[str] = Field(default=None, description="Height measured in meters.")
   

class Data(BaseModel):
    """Extract data about people."""
    
    People: list[Person]

In [27]:
structured_llm = init_chat_model(model="gpt-4o-mini", temperature=0).with_structured_output(schema=Data)

In [28]:
text = "M name is Jeff, my hair is black and  I am 6 feet tall. Anna has the same color hair as me."

prompt = prompt_template.invoke({"text": text})
response = structured_llm.invoke(prompt)

In [29]:
response

Data(People=[Person(name='Jeff', hair_color='black', height_in_meters='1.83'), Person(name='Anna', hair_color='black', height_in_meters=None)])

### Placeholder

In [41]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

prompt_template = ChatPromptTemplate.from_messages(
   [
      (
         "system",
         "You are an expert extraction algorithm. "
         "Only extract relevant information from the text. "
         "If you do not know value of an attribute asked to extract, "
         "to extract, return null for the attribute's value"
      ),
      #!!!!!!!!!!!!!!!!!!!!!!!!!!!
      MessagesPlaceholder("examples"),
      #!!!!!!!!!!!!!!!!!!!!!!!!!!!
      ("human", "{text}")
   ]
)

In [42]:
from langchain_core.messages import HumanMessage, AIMessage

# Example few-shot messages
examples = [
    HumanMessage(content="Extract info from: Steve Jobs founded Apple."),
    AIMessage(content='{"name": "Steve Jobs", "company": "Apple"}')
]

# Final input
text_input = "Bill Gates founded Microsoft."

# Invoke with both examples + main text
prompt = prompt_template.invoke({
    "examples": examples,
    "text": text_input
})

print(prompt)


messages=[SystemMessage(content="You are an expert extraction algorithm. Only extract relevant information from the text. If you do not know value of an attribute asked to extract, to extract, return null for the attribute's value", additional_kwargs={}, response_metadata={}), HumanMessage(content='Extract info from: Steve Jobs founded Apple.', additional_kwargs={}, response_metadata={}), AIMessage(content='{"name": "Steve Jobs", "company": "Apple"}', additional_kwargs={}, response_metadata={}), HumanMessage(content='Bill Gates founded Microsoft.', additional_kwargs={}, response_metadata={})]
