In [None]:
!pip install langchain-groq
!pip install langchain-community

import os

os.environ["GROQ_API_KEY"] = "mi KEY"

from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import JsonOutputParser
import json

In [17]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.pydantic_v1 import BaseModel, Field
from typing import Optional

Inicializamos el modelo

In [19]:
# Initialize Groq LLM
llm = ChatGroq(
    model_name="llama-3.3-70b-versatile",
    temperature=0.7
)

# Define qué información quieres extraer

creamos un clase Persona que sera la que defina cuales son los campos que el LLM va a estraer

In [18]:
class Person(BaseModel):
    """Information about a person."""

    # Doc-string for the entity Person.
    # This doc-string is sent to the LLM as the description of the schema Person,
    # and it can help to improve extraction results.

    # Note that:
    # 1. Each field is an `optional` -- this allows the model to decline to extract it!
    # 2. Each field has a `description` -- this description is used by the LLM.
    # Having a good description can help improve extraction results.
    name: Optional[str] = Field( #es un campo string Opcional por lo cual el si no exite puede asignarce un None por ej
        default=None, description="The name of the person"
    )
    lastname: Optional[str] = Field(
        default=None, description="The lastname of the person if known"
    )
    country: Optional[str] = Field(
        default=None, description="The country of the person if known"
    )




# Definimos el extractor de datos

In [20]:
# Define una solicitud personalizada para proporcionar instrucciones y contexto adicional.
# 1) Puedes añadir ejemplos a la plantilla de solicitud para mejorar la calidad de la extracción.
# 2) Puedes introducir parámetros adicionales para tener en cuenta el contexto (por ejemplo, incluir metadatos sobre el documento del que se extrajo el texto).


# Define a custom prompt to provide instructions and any additional context.
# 1) You can add examples into the prompt template to improve extraction quality
# 2) You can introduce additional parameters to take context into account (e.g., include metadata
#    about the document from which the text was extracted.)
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are an expert extraction algorithm. "
            "Only extract relevant information from the text. "
            "If you do not know the value of an attribute asked to extract, "
            "return null for the attribute's value.",
        ),
        ("human", "{text}"),
    ]
)

In [21]:
chain = prompt | llm.with_structured_output(schema=Person)

In [22]:
comment = "I absolutely love this product! It's been a game-changer for my daily routine. The quality is top-notch and the customer service is outstanding. I've recommended it to all my friends and family. - Sarah Johnson, USA"
chain.invoke({"text": comment})

Person(name='Sarah Johnson', lastname='Johnson', country='USA')

# Extracción de una lista de entidades en lugar de una sola entidad

In [29]:
from typing import List

class Data(BaseModel):
    """Extracted data about people."""

    # Creates a model so that we can extract multiple entities.
    people: List[Person]

In [31]:
# Example input text that mentions multiple people
text_input = """
Alice Johnson from Canada recently reviewed a book she loved. Meanwhile, Bob Smith from the USA shared his insights on the same book in a different review. Both reviews were very insightful.
"""
#nueva chain agregamos with_structured_outout
chain = prompt | llm.with_structured_output(schema=Data)

# Invoke the processing chain on the text
response = chain.invoke({"text": text_input})

# Output the extracted data
response

Data(people=[Person(name='Alice Johnson', lastname='Johnson', country='Canada'), Person(name='Bob Smith', lastname='Smith', country='USA')])