In [1]:
print("Hello ")

Hello 


### Key Data Extraction App

In [2]:
import os

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser


from dotenv import load_dotenv, find_dotenv

In [3]:
_ = load_dotenv(find_dotenv())
groq_api_key = os.environ['GROQ_API_KEY']
hf_token_api = os.environ['HF_TOKEN']
pinecone_api_key = os.environ['PINECONE_API_KEY']

In [7]:
from langchain_groq import ChatGroq
llm = ChatGroq(
    model="llama-3.3-70b-versatile"
)

In [24]:
from typing import Optional

from langchain_core.pydantic_v1 import BaseModel, Field

class Person(BaseModel):

    name : Optional[str] = Field(
        default= None, description="The First name of the person"
    )
    lastname : Optional[str] = Field(
        default= None, description="The lastname of the person"
    )
    country : Optional[str] = Field(
        default= None, description="The country of the person if know"
    )

### Define the Extractor

In [11]:
from langchain_core.prompts import ChatPromptTemplate

messages = [
    ("system", "You are an expert extraction algorithem."
                "Only extract relevant information from text."
                "If you do not know the value of attribute asked to extract,"
                "return null for the attribute's value."),
    ("human", "{text}")
]

prompt = ChatPromptTemplate.from_messages(messages=messages)

### Using Structured Output with Chat Models

- We need to use a model that supports function/tool calling.  
- Please review the [documentation](https://platform.openai.com/docs/guides/function-calling) for a list of supported models.  
- We will use `.with_structured_output()` to add the extraction instructions to our chat model.


In [12]:
chain = prompt | llm.with_structured_output(schema=Person)

In [14]:
comment = "James Carter is a data analyst living in Canada. He enjoys working with large datasets and creating insightful visualizations to help companies make better decisions."

In [25]:
chain.invoke({"text" : comment})

Person(name='James', lastname='Carter', country='Canada')

In [29]:
from typing import Optional, List

from langchain_core.pydantic_v1 import BaseModel, Field

class Person(BaseModel):

    name : Optional[str] = Field(
        default= None, description="The First name of the person"
    )
    lastname : Optional[str] = Field(
        default= None, description="The lastname of the person"
    )
    country : Optional[str] = Field(
        default= None, description="The country of the person if know"
    )

class Data(BaseModel):
    people : List[Person]

In [30]:
chain = prompt | llm.with_structured_output(schema=Data)

In [31]:
raw_data = """
Emily Zhang from Australia is a software engineer specializing in AI.
Mohammed Al-Farsi, based in Oman, works as a cybersecurity consultant.
Lucas Romero is a data scientist from Argentina with a passion for machine learning.
Amina Diouf, originally from Senegal, is currently studying data engineering in France.
Tomoko Saito lives in Japan and develops mobile apps for health tracking.
"""

response = chain.invoke({"text" : raw_data})

In [32]:
response

Data(people=[Person(name='Emily', lastname='Zhang', country='Australia'), Person(name='Mohammed', lastname='Al-Farsi', country='Oman'), Person(name='Lucas', lastname='Romero', country='Argentina'), Person(name='Amina', lastname='Diouf', country='Senegal'), Person(name='Tomoko', lastname='Saito', country='Japan')])