In [2]:
import os
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())
openai_api_key = os.environ['OPENAI_API_KEY']

In [3]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model='gpt-3.5-turbo-0125')

* **We`ll use Pydantic to define a schema to extract personal information**
* Pydantic is a Python library used for data validation. It helps ensure tha the data you program receives matches the format you expect, and it provides helpful error messages when the data doesn´t conform to your specifications
* **Document the attributes and the schema itself:** This information is sent to the LLM and is used to improve the quality of inforamtion extraction

In [10]:
from typing import Optional, List
from langchain_core.pydantic_v1 import BaseModel, Field

class Person(BaseModel):
    """Information about a person."""

    # Comments are sent to the LLM as description of the schema Person,
    # and it can help to improve extraction results
    #1. Each field is optional --- this allows the model to decline to extract it
    #2. Each field has a 'description' --- this is used by the LLM
    # Having a good description can help improve extraction results.
    name: Optional[str] = Field(
        default=None, description="The name of the person"
    )
    lastname: Optional[str] =Field(
        default=None, description="The lastname of person if known"
    )
    country: Optional[str] = Field(
        default=None, description="The country of the person if known"
    )

## Define the Extractor

Our extractor will be a chain with the prompt template and a chat model with the extraction instructions

In [5]:
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "you are and expert extraction algorithm"
            "Only extract relevant information from the text."
            "If you do not know the value of an attribute asked to extract, "
            "return null for the attribute´s value"
        ),
        ("human", "{text}")

    ]
)

In [7]:
extract_chain = prompt | llm.with_structured_output(schema=Person)

In [8]:
comment = ' I really loved the product. The quality is top-notch and the customer service is outstanding. I´ve recommended it to all my frends - Sarah Johnson, USA'

In [9]:
extract_chain.invoke({'text': comment})

Person(name='Sarah', lastname='Johnson', country='USA')

* **Note that this extraction is generative**, which means that our model can perform a variety of tasks beyon the expected. For instance, the model could infer the gender of a user from their name

In [12]:
# Nested pydantic models
class People(BaseModel):
    """Extracted data about people."""

    # Creates a model so that can extract multiple entities
    people: List[Person]

In [13]:
people_chain = prompt | llm.with_structured_output(schema=People)

In [15]:
text_input = """
Alice Johnson from Canada recently reviewed a book she loved. Meanwhile, Bob Smith from the USA shared his insights
"""

response = people_chain.invoke({'text': text_input})
response

People(people=[Person(name='Alice', lastname='Johnson', country='Canada'), Person(name='Bob', lastname='Smith', country='USA')])