In [1]:
! pip install gpt4all



In [2]:
import os
from dotenv import load_dotenv

load_dotenv()
base_url = os.environ.get('OPENAI_BASE_URL')
api_key = os.environ.get('OPENAI_API_KEY')
model_name = os.environ.get('OPENAI_MODEL_NAME')

In [3]:
from langchain_openai import ChatOpenAI as LLM

llm = LLM(api_key=api_key, model_name=model_name, base_url=base_url)

In [4]:
from langchain_core.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field, field_validator
from typing import List

class MyPydanticParsingModel(BaseModel):
    words: List[str] = Field(
        description='list of substitute words based on the context '
    )
    
    reasons: List[str] = Field(
        description='list of reason words based on the context '
    )
    
    #throw an error if the subsituties starts with a number
    
    @field_validator('words')
    def not_start_with_number(cls, info):
        for item in info:
            if item[0].isnumeric():
                raise ValueError("Error the word can't start with number")
            return info
        
    @field_validator('reasons')
    def end_with_dot(cls, info):
        for idx, item in enumerate(info):
            if item[-1] != ".":
                info[idx] += "."
        return info       

In [5]:
my_parser = PydanticOutputParser(
 pydantic_object=MyPydanticParsingModel,   
)

In [11]:
from langchain_core.prompts import PromptTemplate

template = ("""
            Offer a list of substitutions the specified target_word based on the present context and the reasoning for each word.
            
            target_word={target_word}
            context={context}
            
            with {format_instructions}
            """)

prompt = PromptTemplate(template=template,
                        input_variables=['target_word', 'context'],
                        partial_variables={
                            "format_instructions": my_parser.get_format_instructions()
                        })

In [12]:
user_input = prompt.format_prompt(
    target_word="loyalty",
    context="""
    The loyalty of a solider was so great that even under sever torture, he refused to betray his comrades.
    """
)

In [13]:
output = llm.invoke(user_input.to_string())

In [20]:
my_parser.parse(output.content)

OutputParserException: Failed to parse MyPydanticParsingModel from completion [{"words": ["devotion", "faithfulness", "allegiance"], "reasons": ["refused to betray comrades under torture"]}]. Got: 1 validation error for MyPydanticParsingModel
  Input should be a valid dictionary or instance of MyPydanticParsingModel [type=model_type, input_value=[{'words': ['devotion', '...mrades under torture']}], input_type=list]
    For further information visit https://errors.pydantic.dev/2.7/v/model_type

In [18]:
output.content

'[\n    {"words": ["devotion", "faithfulness", "allegiance"], "reasons": ["refused to betray comrades under torture"]}\n]'

In [17]:
user_input

StringPromptValue(text='\n            Offer a list of substitutions the specified target_word based on the present context and the reasoning for each word.\n            \n            target_word=loyalty\n            context=\n    The loyalty of a solider was so great that even under sever torture, he refused to betray his comrades.\n    \n            \n            with The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"words": {"description": "list of substitute words based on the context ", "items": {"type": "string"}, "title": "Words", "type": "array"}, "reasons": {"d