In [1]:
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field, validator
from typing import List

In [2]:
# Defining desired data structure.
class Suggestions(BaseModel):
    words: List[str] = Field(description='List of substitute words based on context')

    # Throw error in case of receiving a numbered-list from API
    @validator('words')
    def not_start_with_number(cls, field):
        for item in field:
            if item[0].isnumeric():
                raise ValueError("The word can not start with the number")
        return field
    

#We will pass the created class to the PydanticOutputParser wrapper to make it a LangChain parser object.
parser = PydanticOutputParser(pydantic_object=Suggestions)

In [4]:
from langchain.prompts import PromptTemplate

template = """Give a list of suggestions to substitute the given target_word based on given context.
{format_instructions}
target_word = {target_word}
context={context}
"""

prompt = PromptTemplate(template=template, 
                        input_variables=['target_word', 'context'],
                        partial_variables={"format_instructions": parser.get_format_instructions()})

input_data = prompt.format_prompt(
    target_word='behaviour',
    context='The behaviour of the students in the classroom was disruptive and made it difficult for the teacher to conduct the lesson.'
)

In [5]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(model='gemini-pro', temperature=0)

In [10]:
input_data.to_string()

'Give a list of suggestions to substitute the given target_word based on given context.\nThe output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"words": {"title": "Words", "description": "List of substitute words based on context", "type": "array", "items": {"type": "string"}}}, "required": ["words"]}\n```\ntarget_word = behaviour\ncontext=The behaviour of the students in the classroom was disruptive and made it difficult for the teacher to conduct the lesson.\n'

In [12]:
output = llm.invoke(input_data.to_string())

In [17]:
parser.parse(output.content)

Suggestions(words=['conduct', 'actions', 'demeanor', 'mannerisms'])

# Multiple Outputs Example

In [27]:
class Suggestions(BaseModel):
    words: List[str] = Field(description="list of substitue words based on context")
    reasons: List[str] = Field(description="the reasoning of why this word fits the context")
    
    @validator('words')
    def not_start_with_number(cls, field):
      for item in field:
        if item[0].isnumeric():
          raise ValueError("The word can not start with numbers!")
      return field
    
    @validator('reasons')
    def end_with_dot(cls, field):
      for idx, item in enumerate( field ):
        if item[-1] != ".":
          field[idx] += "."
      return field
    
parser = PydanticOutputParser(pydantic_object=Suggestions)


template = """
Offer a list of suggestions to substitute the specified target_word based on the presented context and the reasoning for each word.
{format_instructions}
target_word={target_word}
context={context}
"""

prompt = PromptTemplate(template=template, 
                        input_variables=['target_word', 'context'],
                        partial_variables={"format_instructions": parser.get_format_instructions()})

input_data = prompt.format_prompt(
    target_word='behaviour',
    context='The behaviour of the students in the classroom was disruptive and made it difficult for the teacher to conduct the lesson.'
)



In [39]:
output = llm.invoke(input_data.to_string())
parser.parse(output.content)

OutputParserException: Failed to parse Suggestions from completion ```
{
 "properties": {
  "words": {
   "title": "Words",
   "description": "list of substitue words based on context",
   "type": "array",
   "items": {
    "type": "string"
   }
  },
  "reasons": {
   "title": "Reasons",
   "description": "the reasoning of why this word fits the context",
   "type": "array",
   "items": {
    "type": "string"
   }
  }
 },
 "required": [
  "words",
  "reasons"
 ]
},
 "words": [
  "conduct",
  "actions",
  "demeanor"
 ],
 "reasons": [
  "Conduct is a synonym of behavior and is commonly used in the context of education.",
  "Actions is a more general term that can refer to any type of behavior, but it is often used to describe specific, observable behaviors.",
  "Demeanor refers to the way a person behaves or carries themselves, and it can be used to describe both positive and negative behaviors."
 ]
}
```. Got: Extra data: line 24 column 2 (char 397)

In [33]:
output

AIMessage(content='```\n{\n "properties": {\n  "words": {\n   "title": "Words",\n   "description": "list of substitue words based on context",\n   "type": "array",\n   "items": {\n    "type": "string"\n   }\n  },\n  "reasons": {\n   "title": "Reasons",\n   "description": "the reasoning of why this word fits the context",\n   "type": "array",\n   "items": {\n    "type": "string"\n   }\n  }\n },\n "required": [\n  "words",\n  "reasons"\n ]\n},\n "words": [\n  "conduct",\n  "actions",\n  "demeanor"\n ],\n "reasons": [\n  "Conduct is a synonym of behavior and is commonly used in the context of education.",\n  "Actions is a more general term that can refer to any type of behavior, but it is often used to describe specific, observable behaviors.",\n  "Demeanor refers to the way a person behaves or carries themselves, and it can be used to describe both positive and negative behaviors."\n ]\n}\n```')

# OutputFixingParser


In [40]:
from langchain.output_parsers import OutputFixingParser

In [42]:
outputfixing_parser = OutputFixingParser.from_llm(parser=parser, llm=llm)
outputfixing_parser.parse(output.content)

Suggestions(words=['behavior', 'actions', 'conduct'], reasons=["Behavior is a general term that encompasses all aspects of a person's actions and reactions.", 'Actions are specific, observable things that a person does.', 'Conduct is behavior that is in accordance with a set of rules or standards.'])

The from_llm() function takes the old parser and a language model as input parameters. Then, It initializes a new parser for you that has the ability to fix output errors. 

# RetryOutputParser


In [43]:
from langchain.output_parsers import RetryWithErrorOutputParser

missformatted_output = '{"words": ["conduct", "manner"]}'

retry_parser = RetryWithErrorOutputParser.from_llm(parser=parser, llm=llm)

retry_parser.parse_with_prompt(missformatted_output, input_data)

Suggestions(words=['demeanor', 'actions'], reasons=['Demeanor refers to the way a person behaves or conducts themselves, which is appropriate in the context of students in a classroom.', "Actions is a more general term for the things that people do, which also fits the context of students' behavior in a classroom."])