## Output structured using an LLM

We need to configure a key in:

https://platform.openai.com/api-keys


In [1]:
from openai import OpenAI
from dotenv import load_dotenv
import os
import instructor
from typing import Union
load_dotenv()

# Initialize the OpenAI client
client = OpenAI(api_key=os.getenv('OPENAI_AP'))

from pydantic import BaseModel, Field, field_validator, model_validator
from typing import ClassVar

# Patch the OpenAI client
client = instructor.from_openai(client)

### Example 1

Notice how we can leverage pydantic to define a schema desired in response to our model, and also implement some basic validation.

In [38]:

class UserInfo(BaseModel):
    country: str = Field(description="The country where the user is located")
    city: str = Field(description="The unique identifier for the city where the user is located")
    language: str = Field(description="The primary language spoken by the user")
    age: int= Field(...,description="The age of the user")

    model_config: ClassVar[dict] = {
        "json_schema_extra": {
            "examples": [
                {
                    "country": "united states",
                    "city": "new york",
                    "language": "english",
                    "age": 30
                }
            ]
        }
    }


In [41]:


# Extract structured data from natural language
user_info = client.chat.completions.create(
    model="gpt-4o-mini",
    response_model=UserInfo,
    messages= [{"role": "system", "content": "Please identify entities in the input of user"},
        {"role": "user", "content": "I am Eduardo, I am from Miami United States and I speak English, I am 21"}
    ],
    max_retries=4
)

print(user_info.model_dump())


{'country': 'United States', 'city': 'Miami', 'language': 'English', 'age': 21}


### Example 2

In certain scenarios, it may be necessary to enforce the output to be in UPPERCASE. To achieve this, we can leverage the validation capabilities provided by Pydantic.

In [42]:
class UserInfo(BaseModel):
    country: str = Field(description="The country where the user is located")
    city: str = Field(description="The unique identifier for the city where the user is located")
    language: str = Field(description="The primary language spoken by the user")
    age: int = Field(description="The age of the user")

    model_config: ClassVar[dict] = {
        "json_schema_extra": {
            "examples": [
                {
                    "country": "united state",
                    "city": "new york",
                    "language": "english",
                    "age": 30
                }
            ]
        }
    }

    @field_validator('country', 'city', 'language')
    @classmethod
    def validate_and_uppercase_fields(cls, value: str) -> str:
        return value.upper()

    @model_validator(mode='after')
    def validate_age(self) -> 'UserInfo':
        if self.age <= 0 or self.age >= 120:
            self.age = 18  # Set default value if age is out of range
        return self


In [43]:

# Extract structured data from natural language
user_info = client.chat.completions.create(
    model="gpt-4o-mini",
    response_model=UserInfo,
    messages= [{"role": "system", "content": "Please identify entities in the input of user"},
        {"role": "user", "content": "I am Eduardo from Miami, United States and I speak English, also i am 2000 year old"}
    ],
    max_retries=4
)

print(user_info.model_dump())

{'country': 'UNITED STATES', 'city': 'MIAMI', 'language': 'ENGLISH', 'age': 18}


 Note that although the input specified an age of 2000 years, the system has defaulted to a value of 18 years.

### Example 3

We can nested model according to our needs

In [46]:
from enum import Enum

class SentimentEnum(str, Enum):
    POSITIVE = "POS"
    NEUTRAL = "NEG"

class TextInfo(BaseModel):
    sentiment: SentimentEnum = Field(description="The sentiment of the text")
    length_of_text: int = Field(description="The length of the text in characters")

    model_config: ClassVar[dict] = {
        "json_schema_extra": {
            "examples": [
                {
                    "sentiment": "positive",
                    "length_of_text": 123
                }
            ]
        }
    }

    @model_validator(mode='after')
    def validate_length_of_text(self) -> 'TextInfo':
        if self.length_of_text <= 0:
            raise ValueError('Length of text must be positive')
        return self


In [47]:

# Extract structured data from natural language
user_info = client.chat.completions.create(
    model="gpt-4o-mini",
    response_model=TextInfo,
    messages= [{"role": "system", "content": "Indentifica el sentiemiento y la longitud del texto"},
        {"role": "user", "content": "El dia de hoy fue un dia muy bueno, me siento muy feliz"}
    ],
    max_retries=2
)

print(user_info.model_dump())

{'sentiment': <SentimentEnum.POSITIVE: 'POS'>, 'length_of_text': 61}


In [48]:

# Extract structured data from natural language
user_info = client.chat.completions.create(
    model="gpt-4o-mini",
    response_model=TextInfo,
    messages= [{"role": "system", "content": "Indentifica el sentiemiento y la longitud del texto"},
        {"role": "user", "content": "La verdad hoy me he sentido algo triste, no se que hacer"}
    ],
    max_retries=2
)

print(user_info.model_dump())

{'sentiment': <SentimentEnum.NEUTRAL: 'NEG'>, 'length_of_text': 54}


### example 4

We can enforce a list of Python strings as a response.

In [50]:
from typing import List

class ListSchema(BaseModel):
    feelings: List[str] = Field(description="A list of string items")

    model_config: ClassVar[dict] = {
        "json_schema_extra": {
            "examples": [
                {
                    "feelings": ["happy", "sad", "angry", "excited"]
                }
            ]
        }
    }

    @model_validator(mode='after')
    def validate_feelings(self) -> 'ListSchema':
        if not isinstance(self.feelings, list):
            raise ValueError('Feelings must be a list')
        return self

In [52]:
prompt_system = """actua como un identificador de identidades y lista los sentimientos en los chats escritos por los usuarios
solo identifica las entidades en el texto
"""

prompt_user = """Me comunicque ayer para si me podrian ayudar con el problema de conexion pero la atencion no fue nada buena, estoy muy molesto y enojado,
luego me pasaron a otro operador y luego otro, la verdad terminé frustrado
"""

# Extract structured data from natural language
user_info = client.chat.completions.create(
    model="gpt-4o-mini",
    response_model=ListSchema,
    messages= [{"role": "system", "content": prompt_system},
        {"role": "user", "content": prompt_user}
    ],
    max_retries=2
)

print(user_info.model_dump())

{'feelings': ['molesto', 'enojado', 'frustrado']}


As we notice, we can restrict our output to a list of feelings in Spanish.

### Example 5

In [54]:
class FunctionTestRequest(BaseModel):
    function_name: str = Field(description="The name of the function to be tested")
    input_parameters: dict = Field(description="A dictionary of input parameters for the function")
    expected_output: Union[str, int, float, bool, list, dict] = Field(description="The expected output of the function")
    description: str = Field(description="A brief description of the test case")
    code: str = Field(description="The code to be tested")

    model_config: ClassVar[dict] = {
        "json_schema_extra": {
            "examples": [
                {
                    "function_name": "add",
                    "input_parameters": {"a": 1, "b": 2},
                    "expected_output": 3,
                    "description": "Test case for adding two numbers",
                    "code": "def add(a, b):\n    return a + b"
                }
            ]
        }
    }

In [57]:
prompt_system = """Eres un experto programador de python encargado de escribir pruebas unitarias para una funciones de acuerdo a al requerimiento
definido por el usuario
"""

prompt_user = """Escribe una prueba unitaria para la función que multiplica dos numeros
"""

# Extract structured data from natural language
user_info = client.chat.completions.create(
    model="gpt-4o-mini",
    response_model=FunctionTestRequest,
    messages= [{"role": "system", "content": prompt_system},
        {"role": "user", "content": prompt_user}
    ],
    max_retries=4
)

print(user_info.model_dump())

{'function_name': 'multiply', 'input_parameters': {'a': 2, 'b': 3}, 'expected_output': 6, 'description': 'Test case for multiplying two numbers', 'code': 'def multiply(a, b):\n    return a * b'}


In [61]:
objetivo1=user_info.model_dump()
objetivo1['code']

'def multiply(a, b):\n    return a * b'