In [2]:
from typing import List
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain.output_parsers.datetime import DatetimeOutputParser
from langchain.output_parsers.boolean import BooleanOutputParser
from langchain_core.output_parsers import PydanticOutputParser
from langchain_core.exceptions import OutputParserException
from langchain.output_parsers import OutputFixingParser
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
llm = ChatOpenAI(
  model="gpt-4o-mini",
  temperature=0.0,
  base_url="https://openai.vocareum.com/v1"
)

#### Output Parsers

String parsers

In [4]:
llm.invoke("Hello")

AIMessage(content='Hello! How can I assist you today?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 9, 'prompt_tokens': 8, 'total_tokens': 17, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c', 'id': 'chatcmpl-Bjvj62ptpxrwmSHknneDIeBhWEVDq', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None}, id='run--6d139680-cbca-4cbc-94fa-f6d53001f510-0', usage_metadata={'input_tokens': 8, 'output_tokens': 9, 'total_tokens': 17, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

In [5]:
llm.invoke("hello").content

'Hello! How can I assist you today?'

In [6]:
parser = StrOutputParser()

In [7]:
parser.invoke(
  llm.invoke("Hello")
)

'Hello! How can I assist you today?'

### Other Parsers

Datetime

In [8]:
llm.invoke(
  "Output a random date time in %Y-%m-%dT%H:%M:%S.%fZ. "
  "Don't say anything else."
)

AIMessage(content='2023-05-14T08:23:45.123456Z', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 16, 'prompt_tokens': 35, 'total_tokens': 51, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c', 'id': 'chatcmpl-BjvllBrsJrdv66KrZzHDNwrUPfqg9', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None}, id='run--6c445f65-4e24-4c06-a5d1-2607ccf33dbb-0', usage_metadata={'input_tokens': 35, 'output_tokens': 16, 'total_tokens': 51, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

In [9]:
parser = DatetimeOutputParser()

In [10]:
parser.invoke(
  llm.invoke(
    "Output a random date time in %Y-%m-%dT%H:%M:%S.%fZ. "
    "Don't say anything else."
  )
)

datetime.datetime(2023, 5, 14, 8, 23, 45, 123456)

Boolean

In [11]:
llm.invoke("Are tour an AI? Yes or no only")

AIMessage(content='Yes.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 2, 'prompt_tokens': 16, 'total_tokens': 18, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c', 'id': 'chatcmpl-BjvnnsMPWxF9Om3Vgfa4muWEP9rby', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None}, id='run--ab6985f2-74a7-498f-9936-11dd45aeb6a8-0', usage_metadata={'input_tokens': 16, 'output_tokens': 2, 'total_tokens': 18, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

In [12]:
parser = BooleanOutputParser()

In [13]:
parser.invoke(
  llm.invoke("Are tour an AI? Yes or no only")
)

True

### Structured

Dict Schema

In [14]:
from typing_extensions import Annotated, TypedDict

class UserInfo(TypedDict):
  """User's info."""
  name: Annotated[str, "", "User's name, Defaults to ''"]
  country: Annotated[str, "", "Where the user lives. Defaults to ''"]

In [15]:
llm_with_structure = llm.with_structured_output(UserInfo)

In [16]:
llm_with_structure.invoke("My name is Henrique, and I am from Brazil.")

{'name': 'Henrique', 'country': 'Brazil'}

In [17]:
llm_with_structure.invoke("The sky is blue.")

{'name': '', 'country': ''}

In [18]:
llm_with_structure.invoke(
  "Hello, my name is the same as the capital of the U.S. "
  "But I'm from a country where we usually associate with kangaroos."
)

{'name': 'Washington', 'country': 'Australia'}

Pydantic

In [19]:
from pydantic import BaseModel, Field

class PydanticUserInfo(BaseModel):
  """User's info."""
  name: Annotated[str, Field(description="User's name, Defaults to ''", defaul=None)]
  country: Annotated[str, Field(description="Where the user lives. Defaults to ''", default=None)]

In [20]:
llm_with_structure = llm.with_structured_output(PydanticUserInfo)

In [21]:
structured_output = llm_with_structure.invoke(
  "The sky is blue"
)

In [22]:
structured_output

PydanticUserInfo(name='', country='')

In [23]:
structured_output = llm_with_structure.invoke(
  "Hello, my name is the same as the capital of the U.S. "
  "But I'm from a country where we usually associate with kangaroos."
)

In [24]:
structured_output

PydanticUserInfo(name='Washington', country='Australia')

### Dealing with Errors

In [25]:
class Performer(BaseModel):
  """FIlmography info about an actor/actress."""
  name: Annotated[str, Field(description="name of an actor/actress")]
  film_names: Annotated[List[str], Field(description="List of names of films they starred in")]

In [26]:
llm_with_structure = llm.with_structured_output(Performer)

In [27]:
response = llm_with_structure.invoke(
  "Generate the filmography for Scarlett Johansson. Top 5 only"
)

response

Performer(name='Scarlett Johansson', film_names=['Lost in Translation (2003)', 'The Avengers (2012)', 'Her (2013)', 'Lucy (2014)', 'Marriage Story (2019)'])

Fixing Parser

In [28]:
response.json()

/tmp/ipykernel_50201/690762135.py:1: PydanticDeprecatedSince20: The `json` method is deprecated; use `model_dump_json` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  response.json()


'{"name":"Scarlett Johansson","film_names":["Lost in Translation (2003)","The Avengers (2012)","Her (2013)","Lucy (2014)","Marriage Story (2019)"]}'

In [31]:
parser = PydanticOutputParser(pydantic_object=Performer)

In [32]:
parser.parse(response.json())

/tmp/ipykernel_50201/2130552313.py:1: PydanticDeprecatedSince20: The `json` method is deprecated; use `model_dump_json` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  parser.parse(response.json())


Performer(name='Scarlett Johansson', film_names=['Lost in Translation (2003)', 'The Avengers (2012)', 'Her (2013)', 'Lucy (2014)', 'Marriage Story (2019)'])

In [33]:
missformatted_result = "{'name': 'Scarlett Johansson', 'film_names': ['The Avengers']}"

In [34]:
try:
  parser.parse(missformatted_result)
except OutputParserException as e:
  print(f"Error parsing output: {e}")

Error parsing output: Invalid json output: {'name': 'Scarlett Johansson', 'film_names': ['The Avengers']}
For troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/OUTPUT_PARSING_FAILURE 


In [35]:
new_parser = OutputFixingParser.from_llm(
  llm=llm,
  parser=parser,
)

In [36]:
new_parser.parse(missformatted_result)

Performer(name='Scarlett Johansson', film_names=['The Avengers', 'Lost in Translation', 'Marriage Story', 'Black Widow'])