### Output parsers 
- classes that help structure language model responses. There are two main methods an output parser must implement:

1. `Get format instructions`: A method which returns a string containing instructions for how the output of a language model should be formatted.
2. `Parse`: A method which takes in a string (assumed to be the response from a language model) and parses it into some structure.


- Output parsers implement the Runnable interface, the basic building block of the LangChain Expression Language (LCEL). This means they support `invoke`, `ainvoke`, `stream`, `astream`, `batch`, `abatch`, `astream_log` calls.
- Output parsers accept a `string` or `BaseMessage` as input and can return an arbitrary type.

In [3]:
from dotenv import load_dotenv
load_dotenv()

from langchain.chat_models import ChatOpenAI
from langchain.output_parsers import PydanticOutputParser
from langchain.pydantic_v1 import BaseModel, Field
from typing import List

class Actor(BaseModel):
    name: str = Field(description="name of an actor")
    film_names: List[str] = Field(description="list of names of films they starred in")

actor_query = "Generate the filmography for a random actor."

parser = PydanticOutputParser(pydantic_object=Actor)
misformatted = "{'name': 'Tom Hanks', 'film_names': ['Forrest Gump']}"
# parser.parse(misformatted) # >> OutputParserException

from langchain.output_parsers import OutputFixingParser
new_parser = OutputFixingParser.from_llm(
    parser=parser,
    llm=ChatOpenAI()
)
new_parser.parse(misformatted)

Actor(name='Tom Hanks', film_names=['Forrest Gump'])

In [5]:
from langchain.prompts import (
    PromptTemplate,
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
)
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.output_parsers import (
    PydanticOutputParser,
    OutputFixingParser,
    RetryOutputParser,
)
from pydantic import BaseModel, Field, validator
from typing import List
from langchain.output_parsers import RetryWithErrorOutputParser

template = """Based on the user question, provide an Action and Action Input for what step should be taken.
{format_instructions}
Question: {query}
Response:"""


class Action(BaseModel):
    action: str = Field(description="action to take")
    action_input: str = Field(description="input to the action")


parser = PydanticOutputParser(pydantic_object=Action)
prompt = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)
prompt_value = prompt.format_prompt(query="who is leo di caprios gf?")
bad_response = '{"action": "search"}'
# parser.parse(bad_response) # > OutputParserException

# Using OutputFixingParser cannot resolve it, as it doesn't know what to put in action_input.
fix_parser = OutputFixingParser.from_llm(parser=parser, llm=ChatOpenAI())
print(fix_parser.parse(bad_response))

# Use retryparser to resolve it
from langchain.output_parsers import RetryWithErrorOutputParser
retry_parser = RetryWithErrorOutputParser.from_llm(
    parser=parser, llm=OpenAI(temperature=0)
)
print(retry_parser.parse_with_prompt(bad_response, prompt_value))

action='search' action_input='input_value'
action='search' action_input='who is leo di caprios gf?'


In [11]:
## XMLOutputParser

from langchain.prompts import PromptTemplate
from langchain.output_parsers import XMLOutputParser

model = OpenAI()
actor_query = "Generate the shortened filmography for Tom Hanks."
template = f"""

Human:
{actor_query}
Please enclose the movies in <movie></movie> tags
Assistant:
"""
output = model(template.format(actor_query=actor_query))
print(output)

parser = XMLOutputParser(tags=["movies", "actor", "film", "name", "genre"])
prompt = PromptTemplate(
    template="""
    
    Human:
    {query}
    {format_instructions}
    AI:""",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

chain = prompt | model | parser
output = chain.invoke({"query": actor_query})
print(output)

<movie>Big</movie><movie>Forrest Gump</movie><movie>Saving Private Ryan</movie><movie>Cast Away</movie><movie>The Green Mile</movie><movie>Toy Story</movie><movie>A League of Their Own</movie><movie>Apollo 13</movie><movie>The Da Vinci Code</movie><movie>Sully</movie>
{'movies': [{'actor': [{'name': 'Tom Hanks'}, {'film': [{'name': 'Forrest Gump'}, {'genre': 'Drama'}]}, {'film': [{'name': 'Cast Away'}, {'genre': 'Adventure'}]}, {'film': [{'name': 'Saving Private Ryan'}, {'genre': 'War'}]}]}]}
