In [3]:
from dotenv import load_dotenv

load_dotenv()

True

# PydanticOutputParser

In [4]:
from langchain.chat_models import ChatOpenAI

from langchain import LLMChain
from langchain.prompts import PromptTemplate
from langchain.output_parsers import PydanticOutputParser



In [5]:
from pydantic import BaseModel, Field, validator
from typing import List

In [6]:
model_name = 'gpt-3.5-turbo'
temperature = 0.0
model = ChatOpenAI(model_name=model_name, temperature=temperature)

## Documentation Example

In [7]:

class Joke(BaseModel):
    setup: str = Field(description="question to set up a joke")
    punchline: str = Field(description="answer to resolve the joke")

    # You can add custom validation logic easily with Pydantic.
    @validator('setup')
    def question_ends_with_question_mark(cls, field):
        if field[-1] != '?':
            raise ValueError("Badly formed question!")
        return field

In [8]:
# And a query intented to prompt a language model to populate the data structure.
joke_query = "Tell me a joke."

In [9]:
# Set up a parser + inject instructions into the prompt template.
parser = PydanticOutputParser(pydantic_object=Joke)

In [10]:
prompt_template = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)

In [11]:
chain = LLMChain(llm=model, prompt=prompt_template)

In [12]:
# Run the LLMChain to get the AI-generated answer
output = chain.run(joke_query)

In [13]:
parser.parse(output)

Joke(setup="Why couldn't the bicycle stand up by itself?", punchline='Because it was two tired!')

In [14]:
# Here's another example, but with a compound typed field.
class Actor(BaseModel):
    name: str = Field(description="name of an actor")
    film_names: List[str] = Field(description="list of names of films they starred in")

In [15]:
actor_query = "Generate the filmography for a random actor."

In [16]:
parser = PydanticOutputParser(pydantic_object=Actor)

In [17]:
prompt_template = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)

In [18]:
chain = LLMChain(llm=model, prompt=prompt_template)

In [19]:
# Run the LLMChain to get the AI-generated answer
output = chain.run(actor_query)

In [21]:
output

'{\n  "name": "Tom Hanks",\n  "film_names": ["Forrest Gump", "Cast Away", "Saving Private Ryan", "Toy Story", "The Green Mile"]\n}'

In [20]:
parser.parse(output)

Actor(name='Tom Hanks', film_names=['Forrest Gump', 'Cast Away', 'Saving Private Ryan', 'Toy Story', 'The Green Mile'])

 # parsing multiple outputs

In [23]:
class Suggestions(BaseModel):
    words: List[str] = Field(description="list of substitue words based on context")
    reasons: List[str] = Field(description="the reasoning of why this word fits the context")

    @validator('words')
    def not_start_with_number(cls, field):
        for item in field:
            if item[0].isnumeric():
                raise ValueError("The word can not start with numbers!")
        return field
    
    @validator('reasons')
    def end_with_dot(cls, field):
        for idx, item in enumerate( field ):
            if item[-1] != ".":
                field[idx] += "."
        return field
    

In [24]:
parser = PydanticOutputParser(pydantic_object=Suggestions)


In [25]:
template = """
Offer a list of suggestions to substitue the specified target_word based the presented context and the reasoning for each word.
{format_instructions}
target_word={target_word}
context={context}
"""

In [27]:
prompt_template = PromptTemplate(
    template=template,
    input_variables=['target_word', 'context'],
    partial_variables={'format_instructions': parser.get_format_instructions()}
)

In [31]:
chain = LLMChain(llm=model, prompt=prompt_template)

In [41]:
target_word="1onstructive"
context="in a relationship"
formatted_prompt = prompt_template.format(target_word=target_word,
context=context)
print("🧠 Final Prompt Sent to the LLM:\n")
print(formatted_prompt)

🧠 Final Prompt Sent to the LLM:


Offer a list of suggestions to substitue the specified target_word based the presented context and the reasoning for each word.
The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"words": {"title": "Words", "description": "list of substitue words based on context", "type": "array", "items": {"type": "string"}}, "reasons": {"title": "Reasons", "description": "the reasoning of why this word fits the context", "type": "array", "items": {"type": "string"}}}, "required": ["words", "reasons"]}
```
target_word=1onstructive
context=in a relationship



In [39]:
output = chain.run({"target_word": target_word, "context":context})

In [42]:
output

'{\n  "words": ["supportive", "encouraging", "positive", "helpful", "nurturing"],\n  "reasons": ["These words convey a sense of positivity and encouragement in a relationship, which is the opposite of being destructive or critical."]\n}'

In [40]:
parser.parse(output)

Suggestions(words=['supportive', 'encouraging', 'positive', 'helpful', 'nurturing'], reasons=['These words convey a sense of positivity and encouragement in a relationship, which is the opposite of being destructive or critical.'])

# Comma seperated list output parser

In [43]:
from langchain.prompts import PromptTemplate
from langchain.output_parsers import CommaSeparatedListOutputParser