Prepare for examples

In [1]:
from rich import print as pprint

In [2]:
from langchain_google_vertexai import VertexAI

llm = VertexAI()

In [3]:
from langchain_google_vertexai import ChatVertexAI

chat_model = ChatVertexAI(model_name="gemini-2.0-flash-001")

In [6]:
from langchain.prompts import PromptTemplate

# Output parser

The return from LLM is string, but during programming development we hope that we could receive structured information.

## 1. CommaSeparatedListOutputParser

In [10]:
from langchain.output_parsers import CommaSeparatedListOutputParser

In [13]:
output_parser = CommaSeparatedListOutputParser()
instructions = output_parser.get_format_instructions()
pprint(instructions)

In [16]:
prompt_template = PromptTemplate.from_template(
    template='Please return 3 representative {input}.\n{instructions}',
    partial_variables={'instructions':instructions}
)

prompt = prompt_template.format(input='Programming Languages')

pprint(prompt)

In [17]:
response = llm.invoke(prompt)
pprint(response)

In [18]:
type(response)

str

In [21]:
result = output_parser.parse(response)

result

['Python', 'Java', 'C++']

In [20]:
type(result)

list

## 2. StructuredOutputParser
We could use `StructuredOutputParser` to transform LLM response to Python Dictionary

In [39]:
from langchain.output_parsers import StructuredOutputParser, ResponseSchema

define the Parser for Dictionary

In [40]:
schemas =[
    ResponseSchema(name='answer', description='Content of response'),
    ResponseSchema(name='source', description='the source of content of response')
]

output_parser = StructuredOutputParser.from_response_schemas(schemas)

instruction = output_parser.get_format_instructions()


In [41]:
pprint(instruction)

In [42]:
prompt_template = PromptTemplate.from_template(
    template="Please reply user's question. \n{question}\n{instruction}",
    partial_variables={'instruction':instruction}
)

prompt = prompt_template.format(question='There are how many nationalities in China?')

In [43]:
response = llm.invoke(prompt)

pprint(response)

In [44]:
output = output_parser.parse(response)
output

{'answer': 'There are 56 officially recognized nationalities (or ethnic groups) in China. The Han Chinese are the majority, and the other 55 are officially recognized as minority groups.',
 'source': 'Various sources including official government websites and academic studies on ethnicity in China.'}

In [45]:
type(output)

dict

## 3. XMLOutputParser

In [46]:
from langchain.output_parsers import XMLOutputParser

In [47]:
output_parser = XMLOutputParser(
    tags=['movies', 'movie', 'name', 'director', 'year']
)

instruction = output_parser.get_format_instructions()

pprint(instruction)

In [49]:
prompt_template = PromptTemplate.from_template(
    template="Tell me two representative Japanese movies\n{instruction}",
    partial_variables={'instruction':instruction}
)

prompt = prompt_template.format()

pprint(prompt)

In [54]:
response = llm.invoke(prompt)

pprint(response)

In [55]:
output = output_parser.parse(response)

In [56]:
type(output)

dict

In [57]:
output

{'movies': [{'movie': [{'name': 'Seven Samurai'},
    {'director': 'Akira Kurosawa'},
    {'year': '1954'}]},
  {'movie': [{'name': 'Spirited Away'},
    {'director': 'Hayao Miyazaki'},
    {'year': '2001'}]}]}