#LangChain Parsers

In [None]:
!pip install --upgrade --user google-cloud-aiplatform langchain langchain-google-genai langchain_core

Collecting google-cloud-aiplatform
  Downloading google_cloud_aiplatform-1.54.1-py2.py3-none-any.whl (5.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.1/5.1 MB[0m [31m27.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain
  Downloading langchain-0.2.3-py3-none-any.whl (974 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m974.0/974.0 kB[0m [31m23.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain-google-genai
  Downloading langchain_google_genai-1.0.6-py3-none-any.whl (35 kB)
Collecting langchain_core
  Downloading langchain_core-0.2.5-py3-none-any.whl (314 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m314.7/314.7 kB[0m [31m22.2 MB/s[0m eta [36m0:00:00[0m
Collecting langchain-text-splitters<0.3.0,>=0.2.0 (from langchain)
  Downloading langchain_text_splitters-0.2.1-py3-none-any.whl (23 kB)
Collecting langsmith<0.2.0,>=0.1.17 (from langchain)
  Downloading langsmith-0.1.75-py3-none-any.whl (124 kB)


In [None]:
import os
from google.colab import userdata

os.environ['LANGCHAIN_TRACING_V2'] = "true"
os.environ['LANGCHAIN_ENDPOINT'] = "https://api.smith.langchain.com"
os.environ['LANGCHAIN_API_KEY'] = userdata.get("LANGCHAIN_API_KEY")
os.environ['LANGCHAIN_PROJECT'] = "test-for-kai-v2"

if "GOOGLE_API_KEY" not in os.environ:
  os.environ['GOOGLE_API_KEY'] = userdata.get("GOOGLE_API_KEY")

In [None]:
from langchain_google_genai import GoogleGenerativeAI
from google.colab import userdata

llm = GoogleGenerativeAI(
    model = "gemini-1.0-pro",
    temperate=1,
    location = "us-central1"
)

#String Parser

In [None]:
from langchain_core.output_parsers import StrOutputParser
# And a query intented to prompt a language model to populate the data structure.
joke_query = "Tell me a joke."

prompt = PromptTemplate(
    template="Answer the user query.\n{query}\n",
    input_variables=["query"],
)

chain = prompt | llm | StrOutputParser()

chain.invoke({"query": joke_query})

'Why did the golfer wear two pairs of pants?\n\nIn case he got a hole-in-one!'

#JSON Parser

In [None]:
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field

In [None]:
# Define your desired data structure.
class Joke(BaseModel):
    setup: str = Field(description="question to set up a joke")
    punchline: str = Field(description="answer to resolve the joke")

In [None]:
# And a query intented to prompt a language model to populate the data structure.
joke_query = "Tell me a joke."

# Set up a parser + inject instructions into the prompt template.
parser = JsonOutputParser(pydantic_object=Joke)

prompt = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

chain = prompt | llm | parser

chain.invoke({"query": joke_query})

{'setup': "What do you call a bee that can't make up its mind?",
 'punchline': 'A maybe'}

In [None]:
from langchain_core.pydantic_v1 import BaseModel, Field, validator
from typing import List
import re

class Quiz(BaseModel):
    question: str = Field(...)
    choices: List[str] = Field(..., min_items=3, max_items=3)
    answer: str = Field(...)
    explanation: str = Field(...)

    @validator('choices', each_item=True)
    def check_choices_format(cls, v):
        pattern = re.compile(r'^[A-C]: .+$')
        if not pattern.match(v):
            raise ValueError('Each choice must be in the format "A: ...", "B: ...", "C: ..."')
        return v

    @validator('answer')
    def check_answer_format(cls, v, values):
        pattern = re.compile(r'^[A-C]: .+$')
        if not pattern.match(v):
            raise ValueError('Answer must be in the format "A: ...", "B: ...", "C: ..."')

        # Ensure the answer matches one of the choices
        if 'choices' in values and v not in values['choices']:
            raise ValueError('Answer must match one of the provided choices')

        return v

# Example usage
try:
    quiz = Quiz(
        question="What is the capital of France?",
        choices=["A: Berlin", "B: Madrid", "C: Paris"],
        answer="C: Paris",
        explanation="Paris is the capital city of France."
    )
    print(quiz)
except ValueError as e:
    print(e)


question='What is the capital of France?' choices=['A: Berlin', 'B: Madrid', 'C: Paris'] answer='C: Paris' explanation='Paris is the capital city of France.'


In [None]:
parser2 = JsonOutputParser(pydantic_object=Quiz)

In [None]:
question_query = "What is the physic Domain of Knowledge used for Quantum Computing? (Divide each choice with A, B, C and D)"

prompt2 = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser2.get_format_instructions()},
)

chain2 = prompt2 | llm | parser2
chain2.invoke({"query": question_query})

{'question': 'What is the physic Domain of Knowledge used for Quantum Computing?',
 'choices': ['A. Classical Mechanics',
  'B. Quantum Mechanics',
  'C. Electromagnetism',
  'D. Special Relativity'],
 'answer': 'B. Quantum Mechanics',
 'explanation': 'Quantum Mechanics is the physic Domain of Knowledge used for Quantum Computing because it provides the mathematical framework to describe and predict the behavior of quantum systems, which are the fundamental building blocks of quantum computers.'}

###Streaming:

In [None]:
for s in chain.stream({"query": joke_query}):
    print(s)

{'setup': 'Why did the golfer wear two pairs of pants?'}
{'setup': 'Why did the golfer wear two pairs of pants?', 'punchline': 'In case he got a hole-in-one!'}


#XML Parser

In [None]:
from langchain.output_parsers import XMLOutputParser
from langchain_core.prompts import PromptTemplate

In [None]:
actor_query = "Generate the shortened filmography for Leonardo Di Caprio."
output = llm.invoke(
    f"""{actor_query}
Please enclose the movies in <movie></movie> tags"""
)
print(output)

<movie>Titanic</movie>
<movie>The Wolf of Wall Street</movie>
<movie>Inception</movie>
<movie>The Revenant</movie>
<movie>Once Upon a Time in Hollywood</movie>


In [None]:
parser = XMLOutputParser()

prompt = PromptTemplate(
    template="""{query}\n{format_instructions}""",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

chain = prompt | llm | parser

output = chain.invoke({"query": actor_query})
print(output)

{'filmography': [{'film': [{'title': 'Titanic'}, {'year': '1997'}]}, {'film': [{'title': 'The Beach'}, {'year': '2000'}]}, {'film': [{'title': 'Catch Me If You Can'}, {'year': '2002'}]}, {'film': [{'title': 'The Aviator'}, {'year': '2004'}]}, {'film': [{'title': 'The Departed'}, {'year': '2006'}]}, {'film': [{'title': 'Blood Diamond'}, {'year': '2006'}]}, {'film': [{'title': 'Inception'}, {'year': '2010'}]}, {'film': [{'title': 'Django Unchained'}, {'year': '2012'}]}, {'film': [{'title': 'The Wolf of Wall Street'}, {'year': '2013'}]}, {'film': [{'title': 'The Revenant'}, {'year': '2015'}]}, {'film': [{'title': 'Once Upon a Time in Hollywood'}, {'year': '2019'}]}, {'film': [{'title': "Don't Look Up"}, {'year': '2021'}]}]}


In [None]:
parser = XMLOutputParser(tags=["movies", "actor", "film", "name", "genre"])
prompt = PromptTemplate(
    template="""{query}\n{format_instructions}""",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)


chain = prompt | llm | parser

output = chain.invoke({"query": actor_query})

print(output)

{'movies': [{'actor': [{'film': [{'name': 'Titanic'}, {'genre': 'Romance/Disaster'}]}, {'film': [{'name': 'The Wolf of Wall Street'}, {'genre': 'Comedy/Crime'}]}, {'film': [{'name': 'Inception'}, {'genre': 'Science Fiction/Action'}]}, {'film': [{'name': 'The Revenant'}, {'genre': 'Adventure/Drama'}]}, {'film': [{'name': 'Once Upon a Time in Hollywood'}, {'genre': 'Comedy/Drama'}]}]}]}


In [None]:
for s in chain.stream({"query": actor_query}):
    print(s)

{'movies': [{'actor': 'Leonardo DiCaprio'}]}
{'movies': [{'film': [{'name': 'Titanic'}]}]}
{'movies': [{'film': [{'genre': 'Drama/Romance'}]}]}
{'movies': [{'film': [{'name': 'The Wolf of Wall Street'}]}]}
{'movies': [{'film': [{'genre': 'Drama/Comedy'}]}]}
{'movies': [{'film': [{'name': 'The Revenant'}]}]}
{'movies': [{'film': [{'genre': 'Adventure/Drama'}]}]}
{'movies': [{'film': [{'name': 'Once Upon a Time in Hollywood'}]}]}
{'movies': [{'film': [{'genre': 'Comedy/Drama'}]}]}


# CSV Parser

In [None]:
from langchain.output_parsers import CommaSeparatedListOutputParser
from langchain_core.prompts import PromptTemplate

In [None]:
output_parser = CommaSeparatedListOutputParser()

format_instructions = output_parser.get_format_instructions()
prompt = PromptTemplate(
    template="List five {subject}.\n{format_instructions}",
    input_variables=["subject"],
    partial_variables={"format_instructions": format_instructions},
)

chain = prompt | llm | output_parser

In [None]:
chain.invoke({"subject": "LLM"})

['GPT-3', 'BLOOM', 'T5', 'Codex', 'ELECTRA']

In [None]:
for s in chain.stream({"subject": "AI Systems"}):
    print(s)

['- ChatGPT\n- DALL-E 2\n- Stable Diffusion\n- AlphaFold\n- GPT-3']


#Output-fixing parser


In [None]:
from typing import List

from langchain.output_parsers import PydanticOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field

In [None]:
class Actor(BaseModel):
    name: str = Field(description="name of an actor")
    film_names: List[str] = Field(description="list of names of films they starred in")

actor_query = "Generate the filmography for a random actor."

parser = PydanticOutputParser(pydantic_object=Actor)

In [None]:
misformatted = "{'name': 'Tom Hanks', 'film_names': ['Forrest Gump']}"
#parser.parse(misformatted)

In [None]:
from langchain.output_parsers import OutputFixingParser

new_parser = OutputFixingParser.from_llm(parser=parser, llm=llm)

In [None]:
new_parser.parse(misformatted)

Actor(name='Tom Hanks', film_names=['Forrest Gump', 'Cast Away', 'Saving Private Ryan'])

#Retry parser


In [None]:
from langchain.output_parsers import (
    OutputFixingParser,
    PydanticOutputParser,
)
from langchain_core.prompts import (
    PromptTemplate,
)
from langchain_core.pydantic_v1 import BaseModel, Field

In [None]:
template = """Based on the user question, provide an Action and Action Input for what step should be taken.
{format_instructions}
Question: {query}
Response:"""


class Action(BaseModel):
    action: str = Field(description="action to take")
    action_input: str = Field(description="input to the action")


parser = PydanticOutputParser(pydantic_object=Action)

In [None]:
prompt = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

In [None]:
prompt_value = prompt.format_prompt(query="who is leo di caprios gf?")

In [None]:
from langchain.output_parsers import RetryOutputParser

In [None]:
retry_parser = RetryOutputParser.from_llm(parser=parser, llm=llm)

In [None]:
bad_response = '{"action": "search"}'
retry_parser.parse_with_prompt(bad_response, prompt_value)

Action(action='entity_search', action_input='leo di caprios gf')

In [None]:
from langchain_core.runnables import RunnableLambda, RunnableParallel

completion_chain = prompt | llm

main_chain = RunnableParallel(
    completion=completion_chain, prompt_value=prompt
) | RunnableLambda(lambda x: retry_parser.parse_with_prompt(**x))


main_chain.invoke({"query": "who is leo di caprios gf?"})

Action(action='get_celebrity_info', action_input="leo di caprio's gf")

#Pydantic parser

In [None]:
from typing import List

from langchain.output_parsers import PydanticOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field, validator

In [None]:
# Define your desired data structure.
class Joke(BaseModel):
    setup: str = Field(description="question to set up a joke")
    punchline: str = Field(description="answer to resolve the joke")

    # You can add custom validation logic easily with Pydantic.
    @validator("setup")
    def question_ends_with_question_mark(cls, field):
        if field[-1] != "?":
            raise ValueError("Badly formed question!")
        return field

# And a query intented to prompt a language model to populate the data structure.
joke_query = "Tell me a joke."

# Set up a parser + inject instructions into the prompt template.
parser = PydanticOutputParser(pydantic_object=Joke)

prompt = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

chain = prompt | llm | parser

chain.invoke({"query": joke_query})

Joke(setup='What do you call a deer with no eyes?', punchline='No idea')

In [None]:
# Here's another example, but with a compound typed field.
class Actor(BaseModel):
    name: str = Field(description="name of an actor")
    film_names: List[str] = Field(description="list of names of films they starred in")


actor_query = "Generate the filmography for a random actor."

parser = PydanticOutputParser(pydantic_object=Actor)

prompt = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

chain = prompt | llm | parser

chain.invoke({"query": actor_query})

Actor(name='Michael Gambon', film_names=['Harry Potter and the Deathly Hallows – Part 2', 'Harry Potter and the Deathly Hallows – Part 1', 'Harry Potter and the Half-Blood Prince', 'Harry Potter and the Order of the Phoenix', 'Harry Potter and the Goblet of Fire', 'Harry Potter and the Prisoner of Azkaban', 'Harry Potter and the Chamber of Secrets', "Harry Potter and the Sorcerer's Stone", 'Gosford Park', 'Sleepy Hollow', 'The Insider', 'The Singing Detective', 'Mary Reilly', 'A Fish Called Wanda', 'The Cook, the Thief, His Wife & Her Lover', 'The Dead', 'Paris by Night', 'Brideshead Revisited', 'Turtle Diary', 'Langrishe, Go Down', 'The Good Father', 'Another Time, Another Place', 'Heat and Dust', 'The Last September', 'The Honorary Consul', 'The Sea Wolves', 'Conduct Unbecoming', 'Soldier of Orange', 'O Lucky Man!', 'The Ruling Class', 'Loot', 'The Boyfriend', 'The Wrong Box', 'Oh! What a Lovely War', 'The Quiller Memorandum', 'Alfie', 'The Whisperers', 'The Wrong Arm of the Law', 'T

#YAML parser


In [None]:
from typing import List

from langchain.output_parsers import YamlOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field

In [None]:
# Define your desired data structure.
class Joke(BaseModel):
    setup: str = Field(description="question to set up a joke")
    punchline: str = Field(description="answer to resolve the joke")

In [None]:
# And a query intented to prompt a language model to populate the data structure.
joke_query = "Tell me a joke."

# Set up a parser + inject instructions into the prompt template.
parser = YamlOutputParser(pydantic_object=Joke)

prompt = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

chain = prompt | llm | parser

chain.invoke({"query": joke_query})

Joke(setup='Why did the golfer wear two pairs of pants?', punchline='In case he got a hole-in-one!')

#Pandas DataFrame Parser

In [None]:
import pprint
from typing import Any, Dict

import pandas as pd
from langchain.output_parsers import PandasDataFrameOutputParser
from langchain_core.prompts import PromptTemplate

In [None]:
# Solely for documentation purposes.
def format_parser_output(parser_output: Dict[str, Any]) -> None:
    for key in parser_output.keys():
        parser_output[key] = parser_output[key].to_dict()
    return pprint.PrettyPrinter(width=4, compact=True).pprint(parser_output)

In [None]:
# Define your desired Pandas DataFrame.
df = pd.DataFrame(
    {
        "num_legs": [2, 4, 8, 0],
        "num_wings": [2, 0, 0, 0],
        "num_specimen_seen": [10, 2, 1, 8],
    }
)

# Set up a parser + inject instructions into the prompt template.
parser = PandasDataFrameOutputParser(dataframe=df)

In [None]:
# Here's an example of a column operation being performed.
df_query = "Retrieve the num_wings column."

# Set up the prompt.
prompt = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

chain = prompt | llm | parser
parser_output = chain.invoke({"query": df_query})

format_parser_output(parser_output)

{'num_wings': {0: 2,
               1: 0,
               2: 0,
               3: 0}}


In [None]:
# Here's an example of a row operation being performed.
df_query = "Retrieve the first row."

# Set up the prompt.
prompt = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

chain = prompt | llm | parser
parser_output = chain.invoke({"query": df_query})

format_parser_output(parser_output)

{'1': {'num_legs': 4,
       'num_specimen_seen': 2,
       'num_wings': 0}}


In [None]:
# Here's an example of a random Pandas DataFrame operation limiting the number of rows
df_query = "Retrieve the average of the num_legs column from rows 1 to 3."

# Set up the prompt.
prompt = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

chain = prompt | llm | parser
parser_output = chain.invoke({"query": df_query})

print(parser_output)

{'mean': 4.0}


#Enum parser

In [None]:
from langchain.output_parsers.enum import EnumOutputParser
from enum import Enum

class Colors(Enum):
    RED = "red"
    GREEN = "green"
    BLUE = "blue"

parser = EnumOutputParser(enum=Colors)

In [None]:
from langchain_core.prompts import PromptTemplate

prompt = PromptTemplate.from_template(
    """What color eyes does this person have?

> Person: {person}

Instructions: {instructions}"""
).partial(instructions=parser.get_format_instructions())
chain = prompt | llm | parser

In [None]:
chain.invoke({"person": "Frank Sinatra"})

<Colors.BLUE: 'blue'>

#Datetime Parser

In [None]:
from langchain.output_parsers import DatetimeOutputParser
from langchain_core.prompts import PromptTemplate

output_parser = DatetimeOutputParser()
template = """Answer the users question:

{question}

{format_instructions}"""
prompt = PromptTemplate.from_template(
    template,
    partial_variables={"format_instructions": output_parser.get_format_instructions()},
)

In [None]:
prompt

PromptTemplate(input_variables=['question'], partial_variables={'format_instructions': "Write a datetime string that matches the following pattern: '%Y-%m-%dT%H:%M:%S.%fZ'.\n\nExamples: 339-11-29T12:26:50.185690Z, 184-01-27T12:00:45.532358Z, 94-10-31T11:42:31.193366Z\n\nReturn ONLY this string, no other words!"}, template='Answer the users question:\n\n{question}\n\n{format_instructions}')

In [None]:
chain = prompt | llm | output_parser
output = chain.invoke({"question": "when was OpenAI founded?"})
print(output)

2015-12-11 00:00:00


#Structured output parser

In [None]:
from langchain.output_parsers import ResponseSchema, StructuredOutputParser
from langchain_core.prompts import PromptTemplate

response_schemas = [
    ResponseSchema(name="answer", description="answer to the user's question"),
    ResponseSchema(
        name="source",
        description="source used to answer the user's question, should be a website.",
    ),
]
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)

format_instructions = output_parser.get_format_instructions()
prompt = PromptTemplate(
    template="answer the users question as best as possible.\n{format_instructions}\n{question}",
    input_variables=["question"],
    partial_variables={"format_instructions": format_instructions},
)

chain = prompt | llm | output_parser

chain.invoke({"question": "what's the capital of france?"})

{'answer': 'Paris is the capital of France.',
 'source': 'https://en.wikipedia.org/wiki/Paris'}

In [None]:
chain.invoke({"question": "who's OpenAI founder?"})

{'answer': 'Sam Altman', 'source': 'https://en.wikipedia.org/wiki/OpenAI'}

In [None]:
for s in chain.stream({"question": "who's MIT founder?"}):
    print(s)

{'answer': 'William Barton Rogers', 'source': 'https://en.wikipedia.org/wiki/Massachusetts_Institute_of_Technology'}
