## OutputParsers

`langchain.output_parsers` Most user-facing / ready-to-use AND most plug-and-play use cases ✅

and

`langchain_core.output_parsers` Core abstractions & base APIs OR workflows with LCEL (Runnable chains)

In [None]:
# hf llm model 
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
from dotenv import load_dotenv
from langchain_core.prompts import PromptTemplate

load_dotenv()

# define the model
llm = HuggingFaceEndpoint(repo_id="google/gemma-2-2b-it", task="text-generation")
model = ChatHuggingFace(llm=llm)

# 1st prompt -> detailed report
template1 = PromptTemplate(
    template='Write a detailed report on {topic}',
    input_variables=['topic']
)

# 2nd prompt -> summary
template2 = PromptTemplate(
    template='Write a 5 point summary on the following text. /n {text}',
    input_variables=['text']
)

prompt1 = template1.invoke({'topic':'black hole'})

result = model.invoke(prompt1)

prompt2 = template2.invoke({'text':result.content}) # can't directly chain because of result.content. 
        # fix -> StrOutputParser &&  
        # chain = template1 | model | parser | template2 | model | parser && chain.invoke({'topic':...})

result1 = model.invoke(prompt2)

print(result1.content)


### StrOutputParser

In [None]:
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser

load_dotenv()

model = ChatOpenAI()

# 1st prompt -> detailed report
template1 = PromptTemplate(
    template='Write a detailed report on {topic}',
    input_variables=['topic']
)

# 2nd prompt -> summary
template2 = PromptTemplate(
    template='Write a 5 point summary on the following text. /n {text}',
    input_variables=['text'] # if here multiple input variable then RunnablePassthrough is needed in chain
)

parser = StrOutputParser()

chain = template1 | model | parser | template2 | model | parser

result = chain.invoke({'topic':'alpha centauri 200'})

print(result)


### Using RunnablePassthrough 
#### for multiple input variables for the subsequent prompt 

In [None]:
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
from dotenv import load_dotenv
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough # Needed for more complex chaining

load_dotenv()

llm = HuggingFaceEndpoint(repo_id="google/gemma-2-2b-it", task="text-generation")
model = ChatHuggingFace(llm=llm)

parser = StrOutputParser() # Using 'parser' for brevity

# 1st prompt -> detailed report
template1 = PromptTemplate(
    template='Write a detailed report on {topic}',
    input_variables=['topic']
)

# 2nd prompt -> summary
template2 = PromptTemplate(
    template='Write a 5 point summary on the following text. \n {text}',
    input_variables=['text']
)

# The correct way to build the single chain
full_chain = (
    # Step 1: Input comes in as {'topic': '...'}.
    # We want to use this 'topic' for template1.
    # We also want the *output* of template1|model|parser to be assigned to 'text' for the next step.
    {
        "text": template1 | model | parser, # This runs the first part of the chain and assigns its output to 'text'
        "topic": RunnablePassthrough() # This passes the original 'topic' input through, if you needed it later
    }
    # Step 2: Now the input to the next stage is a dictionary like {'text': 'report...', 'topic': {'topic': 'original_topic'}}.
    # We only need the 'text' for template2.
    | {
        "text": lambda x: x["text"] # Extract the 'text' (detailed report) from the previous step's output
    }
    # Step 3: Apply the second template
    | template2
    # Step 4: Pass to the model
    | model
    # Step 5: Parse the final output
    | parser
)

# Invoke the single chain with the initial topic
final_summary = full_chain.invoke({'topic': 'black hole'})

print(final_summary)

### JsonOutputParser
#### Customizable parser

In [None]:
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
from dotenv import load_dotenv
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser, StrOutputParser, XMLOutputParser

load_dotenv()

llm = HuggingFaceEndpoint(repo_id="google/gemma-2-2b-it", task="text-generation")
model = ChatHuggingFace(llm=llm)

parser = JsonOutputParser()
# parser = XMLOutputParser()
# parser -> SimpleJsonOutputParser PydanticOutputParser XMLOutputParser, MarkdownListOutputParser etc etc

template = PromptTemplate(
    template='Give me 3 mind-blowing facts about {topic} \n {format_instruction}',
    input_variables=['topic'],
    partial_variables={'format_instruction': parser.get_format_instructions()} # before runtime
)

chain = template | model | parser

result = chain.invoke({'topic':'UY Scuti'})

print(result)


In [None]:
parser = XMLOutputParser()
parser.get_format_instructions()

### StructuredOutputParser - gives result following json or pydantic schema

In [None]:
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
from dotenv import load_dotenv
from langchain_core.prompts import PromptTemplate
from langchain.output_parsers import StructuredOutputParser, ResponseSchema # N.B.

load_dotenv()

llm = HuggingFaceEndpoint(repo_id="google/gemma-2-2b-it", task="text-generation")
model = ChatHuggingFace(llm=llm)

schema = [
    ResponseSchema(name='fact_1', description='Fact 1 about the topic', type="string"),
    ResponseSchema(name='fact_2', description='Fact 2 about the topic'),
    ResponseSchema(name='fact_3', description='Fact 3 about the topic'),
]

parser = StructuredOutputParser.from_response_schemas(schema)

template = PromptTemplate(
    template='Give 3 fact about {topic} \n {format_instruction}',
    input_variables=['topic'],
    partial_variables={'format_instruction':parser.get_format_instructions()}
)

# prompt = template.invoke({'topic':'black hole'})
# output = model.invoke(prompt)
# result = parser.parse(output.content) # OR by chain

chain = template | model | parser
result = chain.invoke({'topic':'black hole'})

print(result)

### PydanticOutputParser

In [None]:
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
from dotenv import load_dotenv
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field

load_dotenv()

llm = HuggingFaceEndpoint(repo_id="google/gemma-2-2b-it", task="text-generation")
model = ChatHuggingFace(llm=llm)

class Person(BaseModel):

    name: str = Field(description='Name of the person')
    age: int = Field(gt=18, description='Age of the person')
    city: str = Field(description='Name of the city the person belongs to')

parser = PydanticOutputParser(pydantic_object=Person)

template = PromptTemplate(
    template='Generate the name, age and city of a fictional {place} person \n {format_instruction}',
    input_variables=['place'],
    partial_variables={'format_instruction':parser.get_format_instructions()}
)

chain = template | model | parser
final_result = chain.invoke({'place':'south-african'})

print(final_result) # validated