# Structured Output

In [1]:
from devtools import debug
from dotenv import load_dotenv

load_dotenv(verbose=True)

!export PYTHONPATH=":./python"

### Method 1 : provide instruction in the prompt

In [2]:
from langchain.output_parsers import PydanticOutputParser

# IMPORTANT : select Pydantic V1
from langchain_core.pydantic_v1 import BaseModel, Field

In [4]:
"""
The usual "tell me a joke" LLM call.
"""


class Joke(BaseModel):
    setup: str = Field(description="The setup of the joke")
    punchline: str = Field(description="The punchline to the joke")


from python.ai_core.llm import get_llm
from python.ai_core.prompts import def_prompt


class Joke(BaseModel):
    the_joke: str = Field(description="a good joke")
    explanation: str = Field(description="explain why it's funny")
    rate: float = Field(description="rate how the joke is funny between 0 and 5")


parser = PydanticOutputParser(pydantic_object=Joke)

prompt_with_format = """
    tell me  a joke on {topic}     
    --- 
    {format_instructions}"""

structured_prompt = def_prompt(user=prompt_with_format).partial(
    format_instructions=parser.get_format_instructions(),
)

LLM_ID = None
structured_joke = structured_prompt | get_llm(llm_id=LLM_ID) | parser

r = structured_joke.invoke({"topic": "cat"})
debug(r)

[32m2024-09-04 10:52:43.381[0m | [1mINFO    [0m | [36mpython.ai_core.llm[0m:[36mget_llm[0m:[36m319[0m - [1mget LLM:'gpt_4omini_edenai' -configurable: False - streaming: False[0m
[32m2024-09-04 10:52:43.390[0m | [1mINFO    [0m | [36mpython.ai_core.cache[0m:[36mset_cache[0m:[36m54[0m - [1mLLM cache : SQLiteCache[0m


/tmp/ipykernel_378732/4055731540.py:36 <module>
    r: Joke(
        the_joke='Why was the cat sitting on the computer? Because it wanted to keep an eye on the mouse!',
        explanation=(
            "This joke is funny because it plays on the double meaning of 'mouse'—the computer accessory and the small"
            ' animal that cats typically chase. The image of a cat being interested in a computer adds a humorous twis'
            't.'
        ),
        rate=4.0,
    ) (Joke)


Joke(the_joke='Why was the cat sitting on the computer? Because it wanted to keep an eye on the mouse!', explanation="This joke is funny because it plays on the double meaning of 'mouse'—the computer accessory and the small animal that cats typically chase. The image of a cat being interested in a computer adds a humorous twist.", rate=4.0)

In [5]:
debug(structured_prompt)

/tmp/ipykernel_378732/2639052080.py:1 <module>
    structured_prompt: ChatPromptTemplate(
        input_variables=['topic'],
        partial_variables={
            'format_instructions': (
                'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n'
                '\n'
                'As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strin'
                'gs", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\n'
                'the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"propertie'
                's": {"foo": ["bar", "baz"]}} is not well-formatted.\n'
                '\n'
                'Here is the output schema:\n'
                '```\n'
                '{"properties": {"the_joke": {"title": "The Joke", "description": "a good joke", "type": "string"}, "e'
                'xplanation": {"title": "Explanation", "descr

ChatPromptTemplate(input_variables=['topic'], partial_variables={'format_instructions': 'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"the_joke": {"title": "The Joke", "description": "a good joke", "type": "string"}, "explanation": {"title": "Explanation", "description": "explain why it\'s funny", "type": "string"}, "rate": {"title": "Rate", "description": "rate how the joke is funny between 0 and 5", "type": "number"}}, "required": ["the_joke", "explanation", "rate"]}\n```'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['format_instr

In [6]:
# You can have a look at the generated prompt:
print(structured_prompt.invoke({"topic": "cat"}).messages[0].content)

tell me  a joke on cat     
--- 
The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"the_joke": {"title": "The Joke", "description": "a good joke", "type": "string"}, "explanation": {"title": "Explanation", "description": "explain why it's funny", "type": "string"}, "rate": {"title": "Rate", "description": "rate how the joke is funny between 0 and 5", "type": "number"}}, "required": ["the_joke", "explanation", "rate"]}
```


### Method #2 : Use "with_structured_output"  (bases on function calls)

In [7]:
prompt = "tell me  a joke on {topic}"

# MODEL = None
MODEL = "gpt_4_azure"
chain = def_prompt(prompt) | get_llm(llm_id=MODEL).with_structured_output(Joke)
debug(chain.invoke(({"topic": "cat"})))

[32m2024-09-04 10:54:39.477[0m | [1mINFO    [0m | [36mpython.ai_core.llm[0m:[36mget_llm[0m:[36m319[0m - [1mget LLM:'gpt_4_azure' -configurable: False - streaming: False[0m
[32m2024-09-04 10:54:39.956[0m | [1mINFO    [0m | [36mpython.ai_core.cache[0m:[36mset_cache[0m:[36m54[0m - [1mLLM cache : SQLiteCache[0m


/tmp/ipykernel_378732/925907821.py:6 <module>
    chain.invoke(({"topic": "cat"})): Joke(
        the_joke='Why was the cat sitting on the computer? Because it wanted to keep an eye on the mouse!',
        explanation=(
            "This joke is a play on words. In the context of computers, a 'mouse' is a device used to navigate the int"
            'erface. However, cats are known for chasing and watching mice, which are small rodents. The humor comes f'
            "rom the double meaning of the word 'mouse' and the image of a cat literally sitting on a computer to watc"
            'h a computer mouse, as if it were a real mouse.'
        ),
        rate=3.0,
    ) (Joke)


Joke(the_joke='Why was the cat sitting on the computer? Because it wanted to keep an eye on the mouse!', explanation="This joke is a play on words. In the context of computers, a 'mouse' is a device used to navigate the interface. However, cats are known for chasing and watching mice, which are small rodents. The humor comes from the double meaning of the word 'mouse' and the image of a cat literally sitting on a computer to watch a computer mouse, as if it were a real mouse.", rate=3.0)

##  Assignement (Optional)
Rate the above joke.
Use https://python.langchain.com/v0.1/docs/modules/model_io/output_parsers/types/enum/ 


In [None]:
from enum import Enum


class JokeRater(Enum):
    NOT_SO_GOOD = 0
    GOOD = 1
    VERY_GOOD = 2