In [16]:
from dotenv import load_dotenv
from typing import List, Optional
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_openai import ChatOpenAI

load_dotenv()

True

In [17]:
class Person(BaseModel):
    """Information about a person"""
    
    name: Optional[str] = Field(
        default=None, description="The name of the person"
    )
    hair_color: Optional[str] = Field(
        default=None, description="The color of the person's hair"
    )
    height_in_meters: Optional[float] = Field(
        default=None, description="Height of the person measured in meters"
    )
    
class Data(BaseModel):
    """Extracted data about people."""

    # Creates a model so that we can extract multiple entities.
    people: List[Person]

In [18]:
# Define a custom prompt to provide instructions and any additional context.
# 1) You can add examples into the prompt template to improve extraction quality
# 2) Introduce additional parameters to take context into account (e.g., include metadata
#    about the document from which the text was extracted.)

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are an expert extraction algorithm. "
            "Only extract relevant information from the text. "
            "If you do not know the value of an attribute asked to extract, "
            "return null for the attribute's value.",
        ),
        # MessagesPlaceholder('examples'),
        ("human", "{text}"),
    ]
)

In [19]:
llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0613")
runnable = prompt | llm.with_structured_output(schema=Data)

In [24]:
text = "Alan Smith is 6 feet tall and has blond hair. Anna Croft is 5 feet tall and has the same color hair"
runnable.invoke({"text": text})

Data(people=[Person(name='Alan Smith', hair_color='blond', height_in_meters=1.83), Person(name='Anna Croft', hair_color='blond', height_in_meters=1.52)])