## Output parsers

# Output Parsers are responsible for taking the output of an LLM and parsing into more structured format.

In [1]:
import os
from dotenv import load_dotenv, find_dotenv

_ = load_dotenv(find_dotenv())

# api for groq
GROQ_API = os.getenv("GROQ_API")
from langchain_groq import ChatGroq

chat_groq_model = ChatGroq(
    model='llama-3.1-70b-versatile',
    api_key = GROQ_API,
    temperature= 0.7,
    max_retries=3,
)

## Comma Separated List OutputParser

### Parse the output of an LLM call to a comma-separated list.



In [2]:
from langchain.output_parsers import CommaSeparatedListOutputParser
from langchain.prompts import PromptTemplate

In [15]:
output_parser = CommaSeparatedListOutputParser()
formated_instructions = output_parser.get_format_instructions()

In [17]:
list_prompt_template = PromptTemplate(
    template = "List the five {subject}. \n {formated_instructions}",
    input_variables=['subject'],
    partial_variables={'formated_instructions': formated_instructions},
)

In [18]:
chain = list_prompt_template | chat_groq_model | output_parser

In [19]:
output = chain.invoke({'subject': 'largest cities in the world'})
print(output)

['Tokyo', 'Delhi', 'Shanghai', 'Mumbai', 'Sao Paulo']


In [20]:
print(type(output))

<class 'list'>


## Date time OutputParser

### This OutputParser can be used to parse LLM output into datetime format.

In [21]:
from langchain.output_parsers import DatetimeOutputParser

In [48]:
output_datetime_parser = DatetimeOutputParser(Hformat = "%Y-%m-%d")
formated_instructions = output_datetime_parser.get_format_instructions()

date_time_prompt_template = PromptTemplate(
    template="{question}\n{formated_instructions}",
    input_variables=['question'],
    partial_variables={'formated_instructions': formated_instructions},
)

In [49]:
date_time_chain = date_time_prompt_template | chat_groq_model | output_datetime_parser

In [50]:
ans = date_time_chain.invoke({'question': 'What is birth date of Narendra modi?'})

In [51]:
print(ans)

1950-09-17 00:00:00


## Simple Json Ouput parser

In [6]:
from langchain.output_parsers.json import SimpleJsonOutputParser
from langchain.chains import LLMChain

In [5]:
json_parser = SimpleJsonOutputParser()
json_parser.get_format_instructions()

'Return a JSON object.'

In [7]:
prompt_template = """
  I want to Open a resturant for {cuisine} food. Suggest 
  a fancy name for this
"""

prompt_template = PromptTemplate(
  input_variables=['cuisine'],
  template = prompt_template
)

In [8]:
json_chain = LLMChain(llm = chat_groq_model, prompt = prompt_template,
                 verbose = True)

  json_chain = LLMChain(llm = chat_groq_model, prompt = prompt_template,


In [10]:
res = json_chain.invoke("Italian")



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
  I want to Open a resturant for Italian food. Suggest 
  a fancy name for this
[0m

[1m> Finished chain.[0m


In [11]:
print(res)

{'cuisine': 'Italian', 'text': 'That sounds like a delicious venture. Here are some fancy name suggestions for your Italian restaurant:\n\n1. **Bella Vita**: Meaning "Beautiful Life" in Italian, this name evokes a sense of elegance and sophistication.\n2. **Casa Toscana**: Inspired by the Tuscany region in Italy, this name conveys a sense of warmth and hospitality.\n3. **Il Palazzo**: Meaning "The Palace" in Italian, this name suggests a grand and luxurious dining experience.\n4. **Vino e Cucina**: Translating to "Wine and Kitchen," this name highlights the importance of wine pairings and culinary expertise.\n5. **La Dolce Vita**: Meaning "The Sweet Life" in Italian, this name captures the essence of Italian cuisine and culture.\n6. **Ristorante Firenze**: Named after the city of Florence, this name exudes refinement and culture.\n7. **Caffè Milano**: Inspired by the fashion capital of Italy, this name combines the idea of a casual café with the elegance of Milan.\n8. **Tavola Italiana

## JsonOutputParser (With Pydantic)

### This output parser allows users to specify an arbitrary JSON schema and query LLMs for outputs that conform to that schema.


In [13]:
# Pydantic class for the case summary
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import PromptTemplate
from langchain.pydantic_v1 import BaseModel,Field
from langchain.chains import LLMChain

In [32]:
class CaseSummary(BaseModel):
    issue: list[str] = Field(description="Issue of support case")
    root_cause: list[str] = Field(description="Root Cause of support case")
    resolution: list[str] = Field(description="Resolution of support case")
    case_status: list[str] = Field(description="Case Status")

In [33]:
json_parser = JsonOutputParser(pydantic_object = CaseSummary)

In [34]:
prompt_template = PromptTemplate(
    template="""
    You are an expert support engineer.
    Help me to summarize the below case into issues, root cause , resolution and case status.
    Each bullet point should be a full sentance.
    Avoid showing an personal information.
    If you do not know answer , say "I do not know"
    {formated_instructions}
    case: \n{case}
    """,
    description="Summarize the support case",
    input_variables=['case'],
    partial_variables={'formated_instructions': json_parser.get_format_instructions()}  
)

In [35]:
json_chain = LLMChain(llm = chat_groq_model, prompt = prompt_template,
                 verbose = True)

In [36]:
output = json_chain.invoke("""
                  case: \nCustomer is facing issue with the product. The product is not working as expected. The customer is not able to login to system. 
                  The customer is not able to access the data.  Root cause is system os is got corrupted. The resolution is to reinstall the os. cASE status is closed.
                  """)



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
    You are an expert support engineer.
    Help me to summarize the below case into issues, root cause , resolution and case status.
    Each bullet point should be a full sentance.
    Avoid showing an personal information.
    If you do not know answer , say "I do not know"
    The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"issue": {"title": "Issue", "description": "Issue of support case", "type": "array", "items": {"type": "string"}}, "root_cause": {"title": "Root Cause", "description": "

In [37]:
print(output["text"])

Here is a JSON instance that summarizes the case:

```
{
  "issue": ["The product is not working as expected.", "The customer is not able to login to system.", "The customer is not able to access the data."],
  "root_cause": ["The system os is corrupted."],
  "resolution": ["Reinstall the os."],
  "case_status": ["The case is closed."]
}
```


In [31]:
class Joker(BaseModel):
    setup: str = Field(description="question to setup joke")
    punchline: str = Field(description="Answer to resolve the joke")

In [38]:
joke_json_pareser  = JsonOutputParser(pydantic_object = Joker)

In [39]:
joke_query = "tell me a joke on cat"

joke_prompt_template = PromptTemplate(
    template = "Answer the user query. \n {formated_instructions} \n {query} \n",
    description = "Tell me a joke",
    input_variables = ['query'],
    partial_variables = {'formated_instructions': joke_json_pareser.get_format_instructions()}
)

In [40]:
joke_chain = LLMChain(llm = chat_groq_model, prompt = joke_prompt_template,verbose = True)

In [43]:
output = joke_chain.invoke({"query": joke_query})



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mAnswer the user query. 
 The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"setup": {"title": "Setup", "description": "question to setup joke", "type": "string"}, "punchline": {"title": "Punchline", "description": "Answer to resolve the joke", "type": "string"}}, "required": ["setup", "punchline"]}
``` 
 tell me a joke  
[0m

[1m> Finished chain.[0m


In [44]:
print(output["text"])

```json
{
  "setup": "Why couldn't the bicycle stand up by itself?",
  "punchline": "Because it was two-tired."
}
```


In [45]:
# define chain with ne way (LCEL)

joke_chain_v1 = joke_prompt_template | chat_groq_model | joke_json_pareser

In [46]:
joke_query = "tell me a joke on cat"
output = joke_chain.invoke({"query": joke_query})



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mAnswer the user query. 
 The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"setup": {"title": "Setup", "description": "question to setup joke", "type": "string"}, "punchline": {"title": "Punchline", "description": "Answer to resolve the joke", "type": "string"}}, "required": ["setup", "punchline"]}
``` 
 tell me a joke on cat 
[0m

[1m> Finished chain.[0m


In [47]:
print(output["text"])

```json
{
  "setup": "Why did the cat join a band?",
  "punchline": "Because it wanted to be the purr-cussionist."
}
```


## JSonOuputParser (Without Pydantic)

### You can also use this without Pydantic. This will prompt it return JSON, but doesn't provide specific about what the schema should be.

In [69]:
json_parser = JsonOutputParser()

formated_instructions = json_parser.get_format_instructions()

In [71]:
joke_query_v1 = """ Tell me a joke"""

prompt_template = PromptTemplate (
    template= "Answer the user query as best as possible \n{formated_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables= {'formated_instructions' : formated_instructions} 
)

In [72]:
chain_joke = prompt_template | chat_groq_model | json_parser
output = chain_joke.invoke({'query': joke_query_v1})
print(output)

{'joke': "Why don't scientists trust atoms? Because they make up everything.", 'category': 'chemistry', 'type': 'pun'}


## StructuredOutputParser

### It is used when you want to parse an LLM’s response into a structured format like a dict, or JSON.
### The StructuredOutputParser allows you to define a custom schema that matches the expected structure of the LLM’s response.
### You would use the StructuredOutputParser when:

#### <li>The LLM’s response contains multiple fields/values you want to extract</li>
#### <li>The fields have predictable names you can define in a schema</li>
#### <li>You want the output parsed into a dict rather than raw text</li>
#### <li>The built-in parsers don’t handle the structure you need</li>

In [48]:
from langchain.output_parsers import StructuredOutputParser, ResponseSchema

In [49]:
response_schema = [
    ResponseSchema(name = 'answer',description='answer to user question'),
    ResponseSchema(name = 'fact' , description="an intresting fact about answer to user question")
]

In [50]:
output_structured_parser = StructuredOutputParser.from_response_schemas(response_schema)
formated_instructions = output_structured_parser.get_format_instructions() 

In [54]:
prompt_template = PromptTemplate(
    template="answer the user question as best as possible \n{formated_instructions} \n{query}",
    input_variables=['query'],
    partial_variables= {'formated_instructions' : formated_instructions}
)

In [55]:
chain = prompt_template | chat_groq_model | output_structured_parser

In [63]:
_input = prompt_template.format_prompt(query="what's the capital of Manitoba?")

In [64]:
output = chain.invoke({'query' : _input.to_messages()})

In [68]:
print(output['answer'])
print(output['fact'])

Winnipeg
Winnipeg is not only the capital of Manitoba but also the largest city in the province, known for its vibrant arts and culture scene, as well as its historic significance as a major transportation hub in Canada.


## Enum parser


In [75]:
from langchain.output_parsers.enum import EnumOutputParser
from enum import Enum

In [83]:
class Color(Enum):
    RED = 'red'
    GREEN = 'green'
    BLUE = 'blue'
    BROWN = 'brown'

In [84]:
parser = EnumOutputParser(enum = Color)

In [92]:
prompt_template = PromptTemplate.from_template(
    """What color eyes does this person have ? You should give only color and no other thing.
    Answer should in one word only that is color in small letter.
     
    > Person : {person}

    Instructions : {instructions}   
     """
).partial(instructions = parser.get_format_instructions())

In [93]:
chain = prompt_template | chat_groq_model | parser

In [101]:
output = chain.invoke({'person' : 'Sardar Patel'})

In [102]:
output

<Color.BROWN: 'brown'>

In [28]:
from langchain_mistralai import ChatMistralAI
chat_mistral_model = ChatMistralAI(
    model="mistral-large-latest",
    temperature=0.8,
    api_key=os.environ['MISTRAL_API_KEY']
)

## Pandas DataFrame Parser

### This output parser allows users to specify an arbitrary Pandas DataFrame and query LLMs for data in the form of a formatted dictionary that extracts data from the corresponding DataFrame.

In [33]:
import pprint
import pandas as pd
from langchain.output_parsers import PandasDataFrameOutputParser
from langchain_core.prompts import PromptTemplate
from typing import Any

In [34]:
def format_parser_output(parser_output : dict[str, Any]) -> None:

    for key in parser_output.keys():
        parser_output[key] =  parser_output[key].to_dict()
    return pprint.PrettyPrinter(width=4,compact=True).pprint(parser_output)

In [35]:
df = pd.DataFrame(
    {
        "num_legs" : [1,2,3,4,5],
        "num_wings" : [3,4,5,7,7],
        "num_specimen_seen": [4,5,6,6,2]
    }
)

In [41]:
df

Unnamed: 0,num_legs,num_wings,num_specimen_seen
0,1,3,4
1,2,4,5
2,3,5,6
3,4,7,6
4,5,7,2


In [36]:
parser_dataframe = PandasDataFrameOutputParser(dataframe=df)

In [37]:
df_query = """Retribe num_wings column data"""

prompt_dataframe = PromptTemplate(
    template= "Answer the user Query \n{formated_instructions} \n {user_query}",
    input_variables=["user_query"],
    partial_variables= {'formated_instructions': parser_dataframe.get_format_instructions()}
)

In [38]:
chain_pandas = prompt_dataframe | chat_groq_model | parser_dataframe

In [42]:
format_parser_output(chain_pandas.invoke({'user_query':df_query}))

{'num_wings': {0: 3,
               1: 4,
               2: 5,
               3: 7,
               4: 7}}


In [51]:
df_query_1 = "Retrieve the first row"
format_parser_output(chain_pandas.invoke({'user_query':df_query_1}))

{'0': {'num_legs': 1,
       'num_specimen_seen': 4,
       'num_wings': 3}}


In [56]:
df_query_2 = "Retrieve the mean of the num_wings column"
chain_pandas.invoke({'user_query':df_query_2})

{'mean': 5.2}

## XML parser

### This output parser allows users to obtain results from LLM in the popular XML format.

In [18]:
from langchain.output_parsers import XMLOutputParser
from langchain_cohere import ChatCohere
import os
from langchain_core.prompts import PromptTemplate

In [19]:
chat_cohere_model = ChatCohere(
    temperature=0.8,
    api_key= os.environ['COHERE_API_KEY']
)

In [20]:
actor_query = "Generate the shortend filmography for tom Hanks"

output = chat_cohere_model.invoke(f""" {actor_query} Please enclose the movies in <movie> </movie> tags """)

print(output.content)

Here is a shortened filmography of Tom Hanks, featuring some of his most notable films:

<movie>Forrest Gump</movie>
<movie>Saving Private Ryan</movie>
<movie>Philadelphia</movie>
<movie>Apollo 13</movie>
<movie>Cast Away</movie>
<movie>Big</movie>
<movie>Toy Story</movie>
<movie>Catch Me If You Can</movie>
<movie>Captain Phillips</movie>
<movie>The Green Mile</movie>
<movie>Sully</movie>
<movie>A Beautiful Day in the Neighborhood</movie>
<movie>The Post</movie>

This list includes award-winning dramas, iconic comedies, and animated classics, showcasing Hanks' versatility as an actor.


In [21]:
parser = XMLOutputParser()

prompt_template = PromptTemplate(
    template="""{query}\n{formated_instruction}""",
    input_variables=['query'],
    partial_variables={'formated_instruction': parser.get_format_instructions()}
)

In [23]:
chain = prompt_template | chat_cohere_model | parser

output = chain.invoke({'query' : actor_query})

print(output)

{'filmography': [{'film': [{'title': 'Forrest Gump'}, {'year': '1994'}]}, {'film': [{'title': 'Apollo 13'}, {'year': '1995'}]}, {'film': [{'title': 'Saving Private Ryan'}, {'year': '1998'}]}, {'film': [{'title': 'Cast Away'}, {'year': '2000'}]}, {'film': [{'title': 'Catch Me If You Can'}, {'year': '2002'}]}, {'film': [{'title': 'The Polar Express'}, {'year': '2004'}]}, {'film': [{'title': 'The Da Vinci Code'}, {'year': '2006'}]}, {'film': [{'title': 'Captain Phillips'}, {'year': '2013'}]}, {'film': [{'title': 'Bridge of Spies'}, {'year': '2015'}]}, {'film': [{'title': 'A Beautiful Day in the Neighborhood'}, {'year': '2019'}]}]}


#### Add custom XML Tags

In [24]:
parser_xml = XMLOutputParser(tags=['movies','Actor','films','director','name','genre'])

prompt_xml = prompt_template = PromptTemplate(
    template="""{query}\n{formated_instruction}""",
    input_variables=['query'],
    partial_variables={'formated_instruction': parser_xml.get_format_instructions()}
)

In [28]:
chain = prompt_xml | chat_cohere_model | parser_xml

actor_query = "Generate the short filmography for tom Hanks "

output = chain.invoke({'query' : actor_query})

In [30]:
for s in chain.stream({"query": actor_query}):
    print(s)

{'movies': [{'Actor': [{'films': [{'film': [{'genre': 'Romantic Comedy'}]}]}]}]}
{'movies': [{'Actor': [{'films': [{'film': [{'genre': 'Comedy'}]}]}]}]}
{'movies': [{'Actor': [{'films': [{'film': [{'genre': 'Drama'}]}]}]}]}
{'movies': [{'Actor': [{'films': [{'film': [{'genre': 'War'}]}]}]}]}
{'movies': [{'Actor': [{'films': [{'film': [{'genre': 'Adventure'}]}]}]}]}
{'movies': [{'Actor': [{'director': [{'film': [{'genre': 'Musical Comedy'}]}]}]}]}
{'movies': [{'Actor': [{'director': [{'film': [{'genre': 'Romantic Comedy'}]}]}]}]}


## YAML parser

### This output parser allows users to specify an arbitrary schema and query LLMs for outputs that conform to that schema, using YAML to format their response.

In [40]:
from typing import List
from pydantic import BaseModel,Field
from langchain.output_parsers import YamlOutputParser

In [49]:
class Joke(BaseModel):
    setup : str = Field(description='question to setup joke')
    punchline : str = Field(description='answer to resolve joke')

In [50]:
parser = YamlOutputParser(pydantic_object=Joke)

user_query = 'Tell me a Joke'


prompt_yaml = PromptTemplate(
    template='Answer the user Query \n{format_instruction}\n{user_query}\n',
    input_variables=['user_query'],
    partial_variables={'format_instruction': parser.get_format_instructions()}
)

In [52]:
chain = prompt_yaml | chat_cohere_model | parser


output = chain.invoke({'user_query':user_query})

print(output)

setup='What did the hat say to the head?' punchline="You go on ahead, I'll catch up in a minute."
