In [56]:
from dotenv import load_dotenv

load_dotenv('../../.env')

True

# Output Parsers

# Pydantic Output Parser

In [57]:
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field, validator
from typing import List

# Define your desired data structure.
class Suggestions(BaseModel):
    words: List[str] = Field(description="list of substitue words based on context")

    # Throw error in case of receiving a numbered-list from API
    @validator('words')
    def not_start_with_number(cls, field):
        for item in field:
            if item[0].isnumeric():
                raise ValueError("The word can not start with numbers!")
        return field

parser = PydanticOutputParser(pydantic_object=Suggestions)

# Use the parser in template

In [58]:
from langchain.prompts import PromptTemplate

template = """
Offer a list of suggestions to substitue the specified target_word based the presented context.
{format_instructions}
target_word={target_word}
context={context}
"""

prompt = PromptTemplate(
    template=template,
    input_variables=["target_word", "context"],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)

model_input = prompt.format_prompt(
			target_word="behaviour",
			context="The behaviour of the students in the classroom was disruptive and made it difficult for the teacher to conduct the lesson."
)

In [59]:
from langchain import PromptTemplate, LLMChain, HuggingFaceHub

# initialize Hub LLM
llm_mistral = HuggingFaceHub(
    repo_id='mistralai/Mistral-7B-Instruct-v0.2',
    model_kwargs={'temperature':0.5,"max_length": 64,"max_new_tokens":512}
)



# Custom Parser

In [60]:
import ast
def custom_parser(output):
    output = output.replace('json',"")
    output = output.replace('`',"")
    output = output.replace('\n',"")
    output = ast.literal_eval(output)
    return output

In [61]:
output = llm_mistral(model_input.to_string())

print(custom_parser(output))

{'properties': {'words': ['actions', 'conduct', 'performance', 'responses', 'reactions']}, 'required': ['words']}


# Multiple Output

In [66]:
template = """
Offer a list of suggestions to substitute the specified target_word based on the presented context and the reasoning for each word.
{format_instructions}
target_word={target_word}
context={context}
"""

In [67]:
class Suggestions(BaseModel):
    words: List[str] = Field(description="list of substitue words based on context")
    reasons: List[str] = Field(description="the reasoning of why this word fits the context")
    
    @validator('words')
    def not_start_with_number(cls, field):
      for item in field:
        if item[0].isnumeric():
          raise ValueError("The word can not start with numbers!")
      return field
    
    @validator('reasons')
    def end_with_dot(cls, field):
      for idx, item in enumerate( field ):
        if item[-1] != ".":
          field[idx] += "."
      return field

# Comma Seperated Output Parser

In [68]:
from langchain.output_parsers import CommaSeparatedListOutputParser

parser = CommaSeparatedListOutputParser()

In [69]:
# Prepare the Prompt
template = """
Offer a list of suggestions to substitute the word '{target_word}' based the presented the following text: {context}.
{format_instructions}
"""

prompt = PromptTemplate(
    template=template,
    input_variables=["target_word", "context"],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)

model_input = prompt.format(
  target_word="behaviour",
  context="The behaviour of the students in the classroom was disruptive and made it difficult for the teacher to conduct the lesson."
)

In [71]:

# Send the Request
output = llm_mistral(model_input)
print(output)
parser.parse(output)


Here are some suggestions to substitute the word 'behaviour' in the given text:
1. Conduct, 2. Performance, 3. Action, 4. Actions, 5. Demeanor, 6. Manner, 7. Act, 8. Response, 9. Reaction, 10. Execution.

So the list would be: conduct, performance, action, actions, demeanor, manner, act, response, reaction, execution.


["Here are some suggestions to substitute the word 'behaviour' in the given text:\n1. Conduct",
 '2. Performance',
 '3. Action',
 '4. Actions',
 '5. Demeanor',
 '6. Manner',
 '7. Act',
 '8. Response',
 '9. Reaction',
 '10. Execution.\n\nSo the list would be: conduct',
 'performance',
 'action',
 'actions',
 'demeanor',
 'manner',
 'act',
 'response',
 'reaction',
 'execution.']

# Structured Output Parser

In [72]:
from langchain.output_parsers import StructuredOutputParser, ResponseSchema

response_schemas = [
    ResponseSchema(name="words", description="A substitue word based on context"),
    ResponseSchema(name="reasons", description="the reasoning of why this word fits the context.")
]

parser = StructuredOutputParser.from_response_schemas(response_schemas)

# Fix Output

In [77]:
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field
from typing import List

# Define your desired data structure.
class Suggestions(BaseModel):
    words: List[str] = Field(description="list of substitue words based on context")
    reasons: List[str] = Field(description="the reasoning of why this word fits the context")

parser = PydanticOutputParser(pydantic_object=Suggestions)


In [78]:
from langchain.output_parsers import OutputFixingParser

outputfixing_parser = OutputFixingParser.from_llm(parser=parser, llm=llm_mistral)
missformatted_output = '{"words": ["conduct", "manner"], "reasoning": ["refers to the way someone acts in a particular situation.", "refers to the way someone behaves in a particular situation."]}'
outputfixing_parser.parse(missformatted_output)

OutputParserException: Failed to parse Suggestions from completion 

Output:
--------------
{
  "properties": {
    "words": {
      "title": "Words",
      "description": "list of substitute words based on context",
      "type": "array",
      "items": {
        "type": "string"
      }
    },
    "reasons": {
      "title": "Reasons",
      "description": "the reasoning of why this word fits the context",
      "type": "array",
      "items": {
        "type": "string"
      }
    }
  },
  "required": ["words", "reasons"]
}
-----------------------------
Completion:
-----------------------------
{
  "words": ["behave", "act"],
  "reasons": ["refers to the way someone conducts themselves.", "refers to the way someone performs an action."]
}
-----------------------------

This Completion satisfies the constraints given in the Instructions.

As an assistant I don't have the ability to directly output a JSON formatted string, but I can describe it for you. The output should be a JSON object that conforms to the following schema:

```
{
  "properties": {
    "words": {
      "title": "Words",
      "description": "list of substitute words based on context",
      "type": "array",
      "items": {
        "type": "string"
      }
    },
    "reasons": {
      "title": "Reasons",
      "description": "the reasoning of why this word fits the context",
      "type": "array",
      "items": {
        "type": "string"
      }
    }
  },
  "required": ["words", "reasons"]
}
```

The Completion provided above is a valid JSON instance of this schema. It includes an array of words, each of which is a string, and an array of reasons, each of which is also a string. The words and reasons are provided as substitutes based on the context.

For example, if the context is "describe how someone behaved in a situation", then the words might be "behave", "act", "conduct", or "perform", and the reasons might. Got: Extra data: line 22 column 1 (char 441)

# Retry Output Parser

In [79]:
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field
from typing import List

# Define data structure.
class Suggestions(BaseModel):
    words: List[str] = Field(description="list of substitue words based on context")
    reasons: List[str] = Field(description="the reasoning of why this word fits the context")

parser = PydanticOutputParser(pydantic_object=Suggestions)

# Define prompt
template = """
Offer a list of suggestions to substitue the specified target_word based the presented context and the reasoning for each word.
{format_instructions}
target_word={target_word}
context={context}
"""

prompt = PromptTemplate(
    template=template,
    input_variables=["target_word", "context"],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)

model_input = prompt.format_prompt(target_word="behaviour", context="The behaviour of the students in the classroom was disruptive and made it difficult for the teacher to conduct the lesson.")


In [80]:
from langchain.output_parsers import RetryWithErrorOutputParser

missformatted_output = '{"words": ["conduct", "manner"]}'

retry_parser = RetryWithErrorOutputParser.from_llm(parser=parser, llm=llm_mistral)

retry_parser.parse_with_prompt(missformatted_output, model_input)

Suggestions(words=['conduct', 'performance', 'actions'], reasons=["The teacher is having trouble managing the class and needs to focus on teaching. 'Conduct' and 'performance' describe the students' behavior in a way that highlights their impact on the learning environment. 'Actions' is a more general term that can encompass a range of behaviors, both positive and negative."])